def test_successful_file_handle_to_s3(): foo_bytes = 'foo'.encode() with get_temp_file_handle_with_data(foo_bytes) as temp_file_handle: s3_fake_resource = create_s3_fake_resource() result = execute_pipeline( create_file_handle_pipeline(temp_file_handle, s3_fake_resource), environment_dict={ 'solids': { 'file_handle_to_s3': { 'config': { 'Bucket': 'some-bucket', 'Key': 'some-key' } } } }, ) assert result.success assert s3_fake_resource.mock_extras.upload_fileobj.call_count == 1 assert s3_fake_resource.get_object( 'some-bucket', 'some-key')['Body'].read() == foo_bytes materializations = result.result_for_solid( 'file_handle_to_s3').materializations_during_compute assert len(materializations) == 1 assert len(materializations[0].metadata_entries) == 1 assert (materializations[0].metadata_entries[0].entry_data.path == 's3://some-bucket/some-key') assert materializations[0].metadata_entries[0].label == 'some-key'
def test_depends_on_s3_resource_file_manager(): bar_bytes = 'bar'.encode() @solid(output_defs=[OutputDefinition(S3FileHandle)]) def emit_file(context): return context.file_manager.write_data(bar_bytes) @solid(input_defs=[InputDefinition('file_handle', S3FileHandle)]) def accept_file(context, file_handle): local_path = context.file_manager.copy_handle_to_local_temp( file_handle) assert isinstance(local_path, str) assert open(local_path, 'rb').read() == bar_bytes s3_fake_resource = create_s3_fake_resource() @pipeline(mode_defs=[ ModeDefinition( system_storage_defs=s3_plus_default_storage_defs, resource_defs={ 's3': ResourceDefinition.hardcoded_resource(s3_fake_resource) }, ) ]) def s3_file_manager_test(): accept_file(emit_file()) result = execute_pipeline( s3_file_manager_test, environment_dict={ 'storage': { 's3': { 'config': { 's3_bucket': 'some-bucket' } } } }, ) assert result.success keys_in_bucket = set(s3_fake_resource.buckets['some-bucket'].keys()) for step_key, output_name in [ ('emit_file.compute', 'result'), ('accept_file.compute', 'result'), ]: keys_in_bucket.remove( create_s3_key(result.run_id, step_key, output_name)) assert len(keys_in_bucket) == 1 file_key = list(keys_in_bucket)[0] comps = file_key.split('/') assert '/'.join(comps[:-1]) == 'dagster/storage/{run_id}/files'.format( run_id=result.run_id) assert uuid.UUID(comps[-1])
def test_depends_on_s3_resource_intermediates(): @solid( input_defs=[ InputDefinition('num_one', Int), InputDefinition('num_two', Int) ], output_defs=[OutputDefinition(Int)], ) def add_numbers(_, num_one, num_two): return num_one + num_two s3_fake_resource = create_s3_fake_resource() @pipeline(mode_defs=[ ModeDefinition( system_storage_defs=s3_plus_default_storage_defs, resource_defs={ 's3': ResourceDefinition.hardcoded_resource(s3_fake_resource) }, ) ]) def s3_internal_pipeline(): return add_numbers() result = execute_pipeline( s3_internal_pipeline, environment_dict={ 'solids': { 'add_numbers': { 'inputs': { 'num_one': { 'value': 2 }, 'num_two': { 'value': 4 } } } }, 'storage': { 's3': { 'config': { 's3_bucket': 'some-bucket' } } }, }, ) assert result.success assert result.result_for_solid('add_numbers').output_value() == 6 assert 'some-bucket' in s3_fake_resource.buckets keys = set() for step_key, output_name in [('add_numbers.compute', 'result')]: keys.add(create_s3_key(result.run_id, step_key, output_name)) assert set(s3_fake_resource.buckets['some-bucket'].keys()) == keys
def test_unzip_file_handle_on_fake_s3(): foo_bytes = 'foo'.encode() @solid(output_defs=[OutputDefinition(S3FileHandle)]) def write_zipped_file_to_s3_store(context): with get_temp_file_name() as zip_file_name: write_zip_file_to_disk(zip_file_name, 'an_archive_member', foo_bytes) with open(zip_file_name, 'rb') as ff: s3_file_handle = context.file_manager.write_data(ff.read()) return s3_file_handle s3_fake_resource = create_s3_fake_resource() @pipeline(mode_defs=[ ModeDefinition( resource_defs={ 's3': ResourceDefinition.hardcoded_resource(s3_fake_resource) }, system_storage_defs=[s3_system_storage], ) ]) def do_test_unzip_file_handle_s3(): return unzip_file_handle(write_zipped_file_to_s3_store()) result = execute_pipeline( do_test_unzip_file_handle_s3, environment_dict={ 'storage': { 's3': { 'config': { 's3_bucket': 'some-bucket' } } }, 'solids': { 'unzip_file_handle': { 'inputs': { 'archive_member': { 'value': 'an_archive_member' } } } }, }, ) assert result.success zipped_s3_file = result.result_for_solid( 'write_zipped_file_to_s3_store').output_value() unzipped_s3_file = result.result_for_solid( 'unzip_file_handle').output_value() assert zipped_s3_file.s3_key in s3_fake_resource.buckets['some-bucket'] assert unzipped_s3_file.s3_key in s3_fake_resource.buckets['some-bucket']