def test_s3_file_manager_read():
    state = {"called": 0}
    bar_bytes = "bar".encode()

    class S3Mock(mock.MagicMock):
        def download_file(self, *_args, **kwargs):
            state["called"] += 1
            assert state["called"] == 1
            state["bucket"] = kwargs.get("Bucket")
            state["key"] = kwargs.get("Key")
            file_name = kwargs.get("Filename")
            state["file_name"] = file_name
            with open(file_name, "wb") as ff:
                ff.write(bar_bytes)

    s3_mock = S3Mock()
    file_manager = S3FileManager(s3_mock, "some-bucket", "some-key")
    file_handle = S3FileHandle("some-bucket", "some-key/kdjfkjdkfjkd")
    with file_manager.read(file_handle) as file_obj:
        assert file_obj.read() == bar_bytes

    assert state["bucket"] == file_handle.s3_bucket
    assert state["key"] == file_handle.s3_key

    # read again. cached
    with file_manager.read(file_handle) as file_obj:
        assert file_obj.read() == bar_bytes

    assert os.path.exists(state["file_name"])

    file_manager.delete_local_temp()

    assert not os.path.exists(state["file_name"])
def test_s3_file_manager_read():
    state = {'called': 0}
    bar_bytes = 'bar'.encode()

    class S3Mock(mock.MagicMock):
        def download_file(self, *_args, **kwargs):
            state['called'] += 1
            assert state['called'] == 1
            state['bucket'] = kwargs.get('Bucket')
            state['key'] = kwargs.get('Key')
            file_name = kwargs.get('Filename')
            state['file_name'] = file_name
            with open(file_name, 'wb') as ff:
                ff.write(bar_bytes)

    s3_mock = S3Mock()
    file_manager = S3FileManager(s3_mock, 'some-bucket', 'some-key')
    file_handle = S3FileHandle('some-bucket', 'some-key/kdjfkjdkfjkd')
    with file_manager.read(file_handle) as file_obj:
        assert file_obj.read() == bar_bytes

    assert state['bucket'] == file_handle.s3_bucket
    assert state['key'] == file_handle.s3_key

    # read again. cached
    with file_manager.read(file_handle) as file_obj:
        assert file_obj.read() == bar_bytes

    assert os.path.exists(state['file_name'])

    file_manager.delete_local_temp()

    assert not os.path.exists(state['file_name'])
Exemple #3
0
def test_s3_file_manager_write(mock_s3_resource, mock_s3_bucket):
    file_manager = S3FileManager(mock_s3_resource.meta.client, mock_s3_bucket.name, "some-key")
    body = "foo".encode()

    file_handle = file_manager.write_data(body)
    assert mock_s3_bucket.Object(file_handle.s3_key).get()["Body"].read() == body

    file_handle = file_manager.write_data(body, ext="foo")
    assert file_handle.s3_key.endswith(".foo")
    assert mock_s3_bucket.Object(file_handle.s3_key).get()["Body"].read() == body
Exemple #4
0
def test_s3_file_manager_write():
    s3_mock = mock.MagicMock()
    file_manager = S3FileManager(s3_mock, 'some-bucket', 'some-key')

    foo_bytes = 'foo'.encode()

    file_handle = file_manager.write_data(foo_bytes)

    assert isinstance(file_handle, S3FileHandle)

    assert file_handle.s3_bucket == 'some-bucket'
    assert file_handle.s3_key.startswith('some-key/')

    assert s3_mock.put_object.call_count == 1
Exemple #5
0
def test_unzip_file_handle_on_fake_s3():
    foo_bytes = b"foo"

    @solid(required_resource_keys={"file_manager"}, output_defs=[OutputDefinition(S3FileHandle)])
    def write_zipped_file_to_s3_store(context):
        with get_temp_file_name() as zip_file_name:
            write_zip_file_to_disk(zip_file_name, "an_archive_member", foo_bytes)
            with open(zip_file_name, "rb") as ff:
                s3_file_handle = context.resources.file_manager.write_data(ff.read())
                return s3_file_handle

    # Uses mock S3
    # https://github.com/spulec/moto/issues/3292
    s3 = boto3.client("s3", region_name="us-east-1")
    s3.create_bucket(Bucket="some-bucket")
    file_manager = S3FileManager(s3_session=s3, s3_bucket="some-bucket", s3_base_key="dagster")

    @pipeline(
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "s3": ResourceDefinition.hardcoded_resource(s3),
                    "file_manager": ResourceDefinition.hardcoded_resource(file_manager),
                    "io_manager": s3_pickle_io_manager,
                },
            )
        ]
    )
    def do_test_unzip_file_handle_s3():
        return unzip_file_handle(write_zipped_file_to_s3_store())

    result = execute_pipeline(
        do_test_unzip_file_handle_s3,
        run_config={
            "resources": {"io_manager": {"config": {"s3_bucket": "some-bucket"}}},
            "solids": {
                "unzip_file_handle": {"inputs": {"archive_member": {"value": "an_archive_member"}}}
            },
        },
    )

    assert result.success

    zipped_s3_file = result.result_for_solid("write_zipped_file_to_s3_store").output_value()
    unzipped_s3_file = result.result_for_solid("unzip_file_handle").output_value()
    bucket_keys = [obj["Key"] for obj in s3.list_objects(Bucket="some-bucket")["Contents"]]

    assert zipped_s3_file.s3_key in bucket_keys
    assert unzipped_s3_file.s3_key in bucket_keys
Exemple #6
0
def test_s3_file_manager_read(mock_s3_resource, mock_s3_bucket):
    body = "bar".encode()
    remote_s3_object = mock_s3_bucket.Object("some-key/foo")
    remote_s3_object.put(Body=body)

    file_manager = S3FileManager(mock_s3_resource.meta.client, mock_s3_bucket.name, "some-key")
    file_handle = S3FileHandle(mock_s3_bucket.name, "some-key/foo")

    with file_manager.read(file_handle) as file_obj:
        assert file_obj.read() == body

    # read again. cached
    remote_s3_object.delete()
    with file_manager.read(file_handle) as file_obj:
        assert file_obj.read() == body
def test_s3_file_manager_write():
    s3_mock = mock.MagicMock()
    file_manager = S3FileManager(s3_mock, "some-bucket", "some-key")

    foo_bytes = "foo".encode()

    file_handle = file_manager.write_data(foo_bytes)

    assert isinstance(file_handle, S3FileHandle)

    assert file_handle.s3_bucket == "some-bucket"
    assert file_handle.s3_key.startswith("some-key/")

    assert s3_mock.put_object.call_count == 1

    file_handle = file_manager.write_data(foo_bytes, ext="foo")

    assert isinstance(file_handle, S3FileHandle)

    assert file_handle.s3_bucket == "some-bucket"
    assert file_handle.s3_key.startswith("some-key/")
    assert file_handle.s3_key[-4:] == ".foo"

    assert s3_mock.put_object.call_count == 2
Exemple #8
0
def test_unzip_file_handle_on_fake_s3():
    foo_bytes = 'foo'.encode()

    @solid(required_resource_keys={'file_manager'},
           output_defs=[OutputDefinition(S3FileHandle)])
    def write_zipped_file_to_s3_store(context):
        with get_temp_file_name() as zip_file_name:
            write_zip_file_to_disk(zip_file_name, 'an_archive_member',
                                   foo_bytes)
            with open(zip_file_name, 'rb') as ff:
                s3_file_handle = context.resources.file_manager.write_data(
                    ff.read())
                return s3_file_handle

    # Uses mock S3
    s3 = boto3.client('s3')
    s3.create_bucket(Bucket='some-bucket')
    file_manager = S3FileManager(s3_session=s3,
                                 s3_bucket='some-bucket',
                                 s3_base_key='dagster')

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={
                's3':
                ResourceDefinition.hardcoded_resource(s3),
                'file_manager':
                ResourceDefinition.hardcoded_resource(file_manager),
            },
            system_storage_defs=[s3_system_storage],
        )
    ])
    def do_test_unzip_file_handle_s3():
        return unzip_file_handle(write_zipped_file_to_s3_store())

    result = execute_pipeline(
        do_test_unzip_file_handle_s3,
        run_config={
            'storage': {
                's3': {
                    'config': {
                        's3_bucket': 'some-bucket'
                    }
                }
            },
            'solids': {
                'unzip_file_handle': {
                    'inputs': {
                        'archive_member': {
                            'value': 'an_archive_member'
                        }
                    }
                }
            },
        },
    )

    assert result.success

    zipped_s3_file = result.result_for_solid(
        'write_zipped_file_to_s3_store').output_value()
    unzipped_s3_file = result.result_for_solid(
        'unzip_file_handle').output_value()
    bucket_keys = [
        obj['Key'] for obj in s3.list_objects(Bucket='some-bucket')['Contents']
    ]

    assert zipped_s3_file.s3_key in bucket_keys
    assert unzipped_s3_file.s3_key in bucket_keys