def test_s3_file_manager_read():
    state = {"called": 0}
    bar_bytes = "bar".encode()

    class S3Mock(mock.MagicMock):
        def download_file(self, *_args, **kwargs):
            state["called"] += 1
            assert state["called"] == 1
            state["bucket"] = kwargs.get("Bucket")
            state["key"] = kwargs.get("Key")
            file_name = kwargs.get("Filename")
            state["file_name"] = file_name
            with open(file_name, "wb") as ff:
                ff.write(bar_bytes)

    s3_mock = S3Mock()
    file_manager = S3FileManager(s3_mock, "some-bucket", "some-key")
    file_handle = S3FileHandle("some-bucket", "some-key/kdjfkjdkfjkd")
    with file_manager.read(file_handle) as file_obj:
        assert file_obj.read() == bar_bytes

    assert state["bucket"] == file_handle.s3_bucket
    assert state["key"] == file_handle.s3_key

    # read again. cached
    with file_manager.read(file_handle) as file_obj:
        assert file_obj.read() == bar_bytes

    assert os.path.exists(state["file_name"])

    file_manager.delete_local_temp()

    assert not os.path.exists(state["file_name"])
def test_s3_file_manager_read():
    state = {'called': 0}
    bar_bytes = 'bar'.encode()

    class S3Mock(mock.MagicMock):
        def download_file(self, *_args, **kwargs):
            state['called'] += 1
            assert state['called'] == 1
            state['bucket'] = kwargs.get('Bucket')
            state['key'] = kwargs.get('Key')
            file_name = kwargs.get('Filename')
            state['file_name'] = file_name
            with open(file_name, 'wb') as ff:
                ff.write(bar_bytes)

    s3_mock = S3Mock()
    file_manager = S3FileManager(s3_mock, 'some-bucket', 'some-key')
    file_handle = S3FileHandle('some-bucket', 'some-key/kdjfkjdkfjkd')
    with file_manager.read(file_handle) as file_obj:
        assert file_obj.read() == bar_bytes

    assert state['bucket'] == file_handle.s3_bucket
    assert state['key'] == file_handle.s3_key

    # read again. cached
    with file_manager.read(file_handle) as file_obj:
        assert file_obj.read() == bar_bytes

    assert os.path.exists(state['file_name'])

    file_manager.delete_local_temp()

    assert not os.path.exists(state['file_name'])
def test_successful_file_handle_to_s3(mock_s3_bucket):
    foo_bytes = "foo".encode()
    remote_s3_object = mock_s3_bucket.Object("some-key/foo")
    remote_s3_object.put(Body=foo_bytes)

    file_handle = S3FileHandle(mock_s3_bucket.name, "some-key/foo")
    result = execute_pipeline(
        create_file_handle_pipeline(file_handle),
        run_config={
            "solids": {
                "file_handle_to_s3": {"config": {"Bucket": mock_s3_bucket.name, "Key": "some-key"}}
            },
            "resources": {"file_manager": {"config": {"s3_bucket": mock_s3_bucket.name}}},
        },
    )

    assert result.success

    assert mock_s3_bucket.Object(key="some-key").get()["Body"].read() == foo_bytes

    materializations = result.result_for_solid("file_handle_to_s3").materializations_during_compute
    assert len(materializations) == 1
    assert len(materializations[0].metadata_entries) == 1
    assert materializations[0].metadata_entries[
        0
    ].entry_data.path == "s3://{bucket}/some-key".format(bucket=mock_s3_bucket.name)
    assert materializations[0].metadata_entries[0].label == "some-key"
Example #4
0
def test_s3_file_manager_read(mock_s3_resource, mock_s3_bucket):
    body = "bar".encode()
    remote_s3_object = mock_s3_bucket.Object("some-key/foo")
    remote_s3_object.put(Body=body)

    file_manager = S3FileManager(mock_s3_resource.meta.client, mock_s3_bucket.name, "some-key")
    file_handle = S3FileHandle(mock_s3_bucket.name, "some-key/foo")

    with file_manager.read(file_handle) as file_obj:
        assert file_obj.read() == body

    # read again. cached
    remote_s3_object.delete()
    with file_manager.read(file_handle) as file_obj:
        assert file_obj.read() == body
Example #5
0
def test_successful_file_handle_to_s3(mock_s3_bucket):
    foo_bytes = b"foo"
    remote_s3_object = mock_s3_bucket.Object("some-key/foo")
    remote_s3_object.put(Body=foo_bytes)

    file_handle = S3FileHandle(mock_s3_bucket.name, "some-key/foo")
    result = create_file_handle_job(file_handle).execute_in_process(
        run_config={
            "solids": {
                "file_handle_to_s3": {
                    "config": {
                        "Bucket": mock_s3_bucket.name,
                        "Key": "some-key"
                    }
                }
            },
            "resources": {
                "file_manager": {
                    "config": {
                        "s3_bucket": mock_s3_bucket.name
                    }
                }
            },
        }, )

    assert result.success

    assert mock_s3_bucket.Object(
        key="some-key").get()["Body"].read() == foo_bytes

    materializations = [
        event.step_materialization_data.materialization
        for event in result.events_for_node("file_handle_to_s3")
        if event.event_type == DagsterEventType.ASSET_MATERIALIZATION
    ]
    assert len(materializations) == 1
    assert len(materializations[0].metadata_entries) == 1
    assert (materializations[0].metadata_entries[0].entry_data.path ==
            f"s3://{mock_s3_bucket.name}/some-key")
    assert materializations[0].metadata_entries[0].label == "some-key"