def test_successful_file_handle_to_s3():
    foo_bytes = 'foo'.encode()
    with get_temp_file_handle_with_data(foo_bytes) as temp_file_handle:
        s3_fake_resource = create_s3_fake_resource()
        result = execute_pipeline(
            create_file_handle_pipeline(temp_file_handle, s3_fake_resource),
            environment_dict={
                'solids': {
                    'file_handle_to_s3': {
                        'config': {
                            'Bucket': 'some-bucket',
                            'Key': 'some-key'
                        }
                    }
                }
            },
        )

        assert result.success

        assert s3_fake_resource.session.mock_extras.upload_fileobj.call_count == 1

        assert (s3_fake_resource.session.get_object(
            'some-bucket', 'some-key')['Body'].read() == foo_bytes)

        materializations = result.result_for_solid(
            'file_handle_to_s3').materializations_during_compute
        assert len(materializations) == 1
        assert len(materializations[0].metadata_entries) == 1
        assert (materializations[0].metadata_entries[0].entry_data.path ==
                's3://some-bucket/some-key')
        assert materializations[0].metadata_entries[0].label == 'some-key'
def test_successful_file_handle_to_s3_with_configs():
    foo_bytes = 'foo'.encode()
    with get_temp_file_handle_with_data(foo_bytes) as temp_file_handle:
        s3_fake_resource = create_s3_fake_resource()

        result = execute_pipeline(
            create_file_handle_pipeline(temp_file_handle, s3_fake_resource),
            environment_dict={
                'solids': {
                    'file_handle_to_s3': {
                        'config': {
                            'Bucket': 'some-bucket',
                            'Key': 'some-key',
                            'CacheControl': 'some-value',
                        }
                    }
                }
            },
        )

        assert result.success

        s3_fake_resource.session.mock_extras.put_object.assert_called_once_with(
            CacheControl='some-value'
        )
def test_depends_on_s3_resource_intermediates():
    @solid(
        input_defs=[
            InputDefinition('num_one', Int),
            InputDefinition('num_two', Int)
        ],
        output_defs=[OutputDefinition(Int)],
    )
    def add_numbers(_, num_one, num_two):
        return num_one + num_two

    s3_fake_resource = create_s3_fake_resource()

    @pipeline(mode_defs=[
        ModeDefinition(
            system_storage_defs=s3_plus_default_storage_defs,
            resource_defs={
                's3': ResourceDefinition.hardcoded_resource(s3_fake_resource)
            },
        )
    ])
    def s3_internal_pipeline():
        return add_numbers()

    result = execute_pipeline(
        s3_internal_pipeline,
        environment_dict={
            'solids': {
                'add_numbers': {
                    'inputs': {
                        'num_one': {
                            'value': 2
                        },
                        'num_two': {
                            'value': 4
                        }
                    }
                }
            },
            'storage': {
                's3': {
                    'config': {
                        's3_bucket': 'some-bucket'
                    }
                }
            },
        },
    )

    assert result.success
    assert result.result_for_solid('add_numbers').output_value() == 6

    assert 'some-bucket' in s3_fake_resource.session.buckets

    keys = set()
    for step_key, output_name in [('add_numbers.compute', 'result')]:
        keys.add(create_s3_key(result.run_id, step_key, output_name))

    assert set(s3_fake_resource.session.buckets['some-bucket'].keys()) == keys
Example #4
0
def test_unzip_file_handle_on_fake_s3():
    foo_bytes = 'foo'.encode()

    @solid(output_defs=[OutputDefinition(S3FileHandle)])
    def write_zipped_file_to_s3_store(context):
        with get_temp_file_name() as zip_file_name:
            write_zip_file_to_disk(zip_file_name, 'an_archive_member',
                                   foo_bytes)
            with open(zip_file_name, 'rb') as ff:
                s3_file_handle = context.file_manager.write_data(ff.read())
                return s3_file_handle

    s3_fake_resource = create_s3_fake_resource()

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={
                's3': ResourceDefinition.hardcoded_resource(s3_fake_resource)
            },
            system_storage_defs=[s3_system_storage],
        )
    ])
    def do_test_unzip_file_handle_s3():
        return unzip_file_handle(write_zipped_file_to_s3_store())

    result = execute_pipeline(
        do_test_unzip_file_handle_s3,
        environment_dict={
            'storage': {
                's3': {
                    'config': {
                        's3_bucket': 'some-bucket'
                    }
                }
            },
            'solids': {
                'unzip_file_handle': {
                    'inputs': {
                        'archive_member': {
                            'value': 'an_archive_member'
                        }
                    }
                }
            },
        },
    )

    assert result.success

    zipped_s3_file = result.result_for_solid(
        'write_zipped_file_to_s3_store').output_value()
    unzipped_s3_file = result.result_for_solid(
        'unzip_file_handle').output_value()

    assert zipped_s3_file.s3_key in s3_fake_resource.session.buckets[
        'some-bucket']
    assert unzipped_s3_file.s3_key in s3_fake_resource.session.buckets[
        'some-bucket']
def test_depends_on_s3_resource_file_manager():
    bar_bytes = 'bar'.encode()

    @solid(output_defs=[OutputDefinition(S3FileHandle)])
    def emit_file(context):
        return context.file_manager.write_data(bar_bytes)

    @solid(input_defs=[InputDefinition('file_handle', S3FileHandle)])
    def accept_file(context, file_handle):
        local_path = context.file_manager.copy_handle_to_local_temp(file_handle)
        assert isinstance(local_path, str)
        assert open(local_path, 'rb').read() == bar_bytes

    s3_fake_resource = create_s3_fake_resource()

    @pipeline(
        mode_defs=[
            ModeDefinition(
                system_storage_defs=s3_plus_default_storage_defs,
                resource_defs={'s3': ResourceDefinition.hardcoded_resource(s3_fake_resource)},
            )
        ]
    )
    def s3_file_manager_test():
        accept_file(emit_file())

    result = execute_pipeline(
        s3_file_manager_test,
        environment_dict={'storage': {'s3': {'config': {'s3_bucket': 'some-bucket'}}}},
    )

    assert result.success

    keys_in_bucket = set(s3_fake_resource.session.buckets['some-bucket'].keys())

    for step_key, output_name in [
        ('emit_file.compute', 'result'),
        ('accept_file.compute', 'result'),
    ]:
        keys_in_bucket.remove(create_s3_key(result.run_id, step_key, output_name))

    assert len(keys_in_bucket) == 1

    file_key = list(keys_in_bucket)[0]
    comps = file_key.split('/')

    assert '/'.join(comps[:-1]) == 'dagster/runs/{run_id}/files/managed'.format(
        run_id=result.run_id
    )

    assert uuid.UUID(comps[-1])