Example #1
0
def test_adls2_file_manager_write(storage_account, file_system):
    file_mock = mock.MagicMock()
    adls2_mock = mock.MagicMock()
    adls2_mock.get_file_client.return_value = file_mock
    adls2_mock.account_name = storage_account
    file_manager = ADLS2FileManager(adls2_mock, file_system, 'some-key')

    foo_bytes = 'foo'.encode()

    file_handle = file_manager.write_data(foo_bytes)

    assert isinstance(file_handle, ADLS2FileHandle)

    assert file_handle.account == storage_account
    assert file_handle.file_system == file_system
    assert file_handle.key.startswith('some-key/')

    assert file_mock.upload_data.call_count == 1

    file_handle = file_manager.write_data(foo_bytes, ext='foo')

    assert isinstance(file_handle, ADLS2FileHandle)

    assert file_handle.account == storage_account
    assert file_handle.file_system == file_system
    assert file_handle.key.startswith('some-key/')
    assert file_handle.key[-4:] == '.foo'

    assert file_mock.upload_data.call_count == 2
Example #2
0
def test_adls2_file_manager_read(storage_account, file_system):
    state = {'called': 0}
    bar_bytes = 'bar'.encode()

    class DownloadMock(mock.MagicMock):
        def readinto(self, fileobj):
            fileobj.write(bar_bytes)

    class FileMock(mock.MagicMock):
        def download_file(self):
            state['called'] += 1
            assert state['called'] == 1
            return DownloadMock(file=self)

    class ADLS2Mock(mock.MagicMock):
        def get_file_client(self, *_args, **kwargs):
            state['file_system'] = kwargs['file_system']
            file_path = kwargs['file_path']
            state['file_path'] = kwargs['file_path']
            return FileMock(file_path=file_path)

    adls2_mock = ADLS2Mock()
    file_manager = ADLS2FileManager(adls2_mock, file_system, 'some-key')
    file_handle = ADLS2FileHandle(storage_account, file_system,
                                  'some-key/kdjfkjdkfjkd')
    with file_manager.read(file_handle) as file_obj:
        assert file_obj.read() == bar_bytes

    assert state['file_system'] == file_handle.file_system
    assert state['file_path'] == file_handle.key

    # read again. cached
    with file_manager.read(file_handle) as file_obj:
        assert file_obj.read() == bar_bytes

    file_manager.delete_local_temp()
Example #3
0
def test_depends_on_adls2_resource_file_manager(storage_account, file_system):
    bar_bytes = b"bar"

    @solid(output_defs=[OutputDefinition(ADLS2FileHandle)],
           required_resource_keys={"file_manager"})
    def emit_file(context):
        return context.resources.file_manager.write_data(bar_bytes)

    @solid(
        input_defs=[InputDefinition("file_handle", ADLS2FileHandle)],
        required_resource_keys={"file_manager"},
    )
    def accept_file(context, file_handle):
        local_path = context.resources.file_manager.copy_handle_to_local_temp(
            file_handle)
        assert isinstance(local_path, str)
        assert open(local_path, "rb").read() == bar_bytes

    adls2_fake_resource = FakeADLS2Resource(storage_account)
    adls2_fake_file_manager = ADLS2FileManager(
        adls2_client=adls2_fake_resource.adls2_client,
        file_system=file_system,
        prefix="some-prefix",
    )

    @pipeline(mode_defs=[
        ModeDefinition(resource_defs={
            "adls2":
            ResourceDefinition.hardcoded_resource(adls2_fake_resource),
            "file_manager":
            ResourceDefinition.hardcoded_resource(adls2_fake_file_manager),
        }, )
    ])
    def adls2_file_manager_test():
        accept_file(emit_file())

    result = execute_pipeline(
        adls2_file_manager_test,
        run_config={
            "resources": {
                "file_manager": {
                    "config": {
                        "adls2_file_system": file_system
                    }
                }
            }
        },
    )

    assert result.success

    keys_in_bucket = set(
        adls2_fake_resource.adls2_client.file_systems[file_system].keys())

    assert len(keys_in_bucket) == 1

    file_key = list(keys_in_bucket)[0]
    comps = file_key.split("/")

    assert "/".join(comps[:-1]) == "some-prefix"

    assert uuid.UUID(comps[-1])