def test_file_system_intermediate_store_with_composite_type_storage_plugin(): run_id = make_new_run_id() intermediate_store = build_fs_intermediate_store( DagsterInstance.ephemeral().intermediates_directory, run_id=run_id, type_storage_plugin_registry=TypeStoragePluginRegistry( [(RuntimeString, FancyStringFilesystemTypeStoragePlugin)] ), ) with yield_empty_pipeline_context(run_id=run_id) as context: with pytest.raises(check.NotImplementedCheckError): intermediate_store.set_value( ['hello'], context, resolve_dagster_type(List[String]), ['obj_name'] ) with yield_empty_pipeline_context(run_id=run_id) as context: with pytest.raises(check.NotImplementedCheckError): intermediate_store.set_value( ['hello'], context, resolve_dagster_type(Optional[String]), ['obj_name'] ) with yield_empty_pipeline_context(run_id=run_id) as context: with pytest.raises(check.NotImplementedCheckError): intermediate_store.set_value( ['hello'], context, resolve_dagster_type(List[Optional[String]]), ['obj_name'] ) with yield_empty_pipeline_context(run_id=run_id) as context: with pytest.raises(check.NotImplementedCheckError): intermediate_store.set_value( ['hello'], context, resolve_dagster_type(Optional[List[String]]), ['obj_name'] )
def test_file_system_intermediate_store_with_composite_type_storage_plugin(): run_id = str(uuid.uuid4()) # FIXME need a dedicated test bucket intermediate_store = FileSystemIntermediateStore( run_id=run_id, type_storage_plugin_registry=TypeStoragePluginRegistry( {RuntimeString.inst(): FancyStringFilesystemTypeStoragePlugin}), ) with yield_empty_pipeline_context(run_id=run_id) as context: with pytest.raises(check.NotImplementedCheckError): intermediate_store.set_value(['hello'], context, resolve_to_runtime_type(List[String]), ['obj_name']) with yield_empty_pipeline_context(run_id=run_id) as context: with pytest.raises(check.NotImplementedCheckError): intermediate_store.set_value(['hello'], context, resolve_to_runtime_type( Optional[String]), ['obj_name']) with yield_empty_pipeline_context(run_id=run_id) as context: with pytest.raises(check.NotImplementedCheckError): intermediate_store.set_value( ['hello'], context, resolve_to_runtime_type(List[Optional[String]]), ['obj_name']) with yield_empty_pipeline_context(run_id=run_id) as context: with pytest.raises(check.NotImplementedCheckError): intermediate_store.set_value( ['hello'], context, resolve_to_runtime_type(Optional[List[String]]), ['obj_name'])
def test_custom_read_write_mode(s3_bucket): run_id = make_new_run_id() intermediate_storage = S3IntermediateStorage(run_id=run_id, s3_bucket=s3_bucket) data_frame = [OrderedDict({"foo": "1", "bar": "1"}), OrderedDict({"foo": "2", "bar": "2"})] try: with yield_empty_pipeline_context(run_id=run_id) as context: intermediate_storage.set_intermediate( context, resolve_dagster_type(LessSimpleDataFrame), StepOutputHandle("data_frame"), data_frame, ) assert intermediate_storage.has_intermediate(context, StepOutputHandle("data_frame")) assert ( intermediate_storage.get_intermediate( context, resolve_dagster_type(LessSimpleDataFrame), StepOutputHandle("data_frame"), ).obj == data_frame ) assert intermediate_storage.uri_for_paths(["data_frame"]).startswith("s3://") finally: intermediate_storage.rm_intermediate(context, StepOutputHandle("data_frame"))
def test_s3_intermediate_storage_with_custom_serializer(s3_bucket): run_id = make_new_run_id() intermediate_storage = S3IntermediateStorage(run_id=run_id, s3_bucket=s3_bucket) with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_storage.set_intermediate( context, LowercaseString, StepOutputHandle("foo"), "foo" ) assert ( intermediate_storage.object_store.s3.get_object( Bucket=intermediate_storage.object_store.bucket, Key=os.path.join(intermediate_storage.root, "intermediates", "foo", "result"), )["Body"] .read() .decode("utf-8") == "FOO" ) assert intermediate_storage.has_intermediate(context, StepOutputHandle("foo")) assert ( intermediate_storage.get_intermediate( context, LowercaseString, StepOutputHandle("foo") ).obj == "foo" ) finally: intermediate_storage.rm_intermediate(context, StepOutputHandle("foo"))
def test_gcs_intermediate_storage_with_custom_prefix(gcs_bucket): run_id = make_new_run_id() intermediate_storage = GCSIntermediateStorage(run_id=run_id, gcs_bucket=gcs_bucket, gcs_prefix="custom_prefix") assert intermediate_storage.root == "/".join( ["custom_prefix", "storage", run_id]) obj_name = "true" try: with yield_empty_pipeline_context(run_id=run_id) as context: intermediate_storage.set_intermediate(context, RuntimeBool, StepOutputHandle(obj_name), True) assert intermediate_storage.has_intermediate( context, StepOutputHandle(obj_name)) assert intermediate_storage.uri_for_paths([obj_name]).startswith( "gs://%s/custom_prefix" % gcs_bucket) finally: intermediate_storage.rm_intermediate(context, StepOutputHandle(obj_name))
def test_custom_read_write_mode(storage_account, file_system): run_id = make_new_run_id() data_frame = [OrderedDict({"foo": "1", "bar": "1"}), OrderedDict({"foo": "2", "bar": "2"})] try: with yield_empty_pipeline_context(run_id=run_id) as context: intermediate_storage = ADLS2IntermediateStorage( adls2_client=get_adls2_client(storage_account), blob_client=get_blob_client(storage_account), run_id=run_id, file_system=file_system, ) intermediate_storage.set_intermediate( context, resolve_dagster_type(LessSimpleDataFrame), StepOutputHandle("data_frame"), data_frame, ) assert intermediate_storage.has_intermediate(context, StepOutputHandle("data_frame")) assert ( intermediate_storage.get_intermediate( context, resolve_dagster_type(LessSimpleDataFrame), StepOutputHandle("data_frame"), ).obj == data_frame ) assert intermediate_storage.uri_for_paths(["data_frame"]).startswith("abfss://") finally: intermediate_storage.rm_intermediate(context, StepOutputHandle("data_frame"))
def test_file_system_intermediate_store_with_base_dir(): run_id = str(uuid.uuid4()) try: tempdir = tempfile.mkdtemp() intermediate_store = FileSystemIntermediateStore(run_id=run_id, base_dir=tempdir) assert intermediate_store.root == os.path.join(tempdir, 'dagster', 'runs', run_id, 'files') with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_store.set_object(True, context, RuntimeBool.inst(), ['true']) assert intermediate_store.has_object(context, ['true']) assert intermediate_store.get_object( context, RuntimeBool.inst(), ['true']) is True finally: try: shutil.rmtree(intermediate_store.root) except seven.FileNotFoundError: pass finally: try: shutil.rmtree(tempdir) except seven.FileNotFoundError: pass
def test_file_system_intermediate_store_composite_types_with_custom_serializer_for_inner_type( ): run_id = str(uuid.uuid4()) intermediate_store = FileSystemIntermediateStore(run_id=run_id) assert intermediate_store.root == os.path.join( seven.get_system_temp_directory(), 'dagster', 'runs', run_id, 'files') with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_store.set_object( ['foo', 'bar'], context, resolve_to_runtime_type(List(LowercaseString)).inst(), ['list'], ) assert intermediate_store.has_object(context, ['list']) assert intermediate_store.get_object( context, resolve_to_runtime_type(List(Bool)).inst(), ['list']) == ['foo', 'bar'] finally: try: shutil.rmtree(intermediate_store.root) except seven.FileNotFoundError: pass
def test_s3_intermediate_store_with_custom_serializer(s3_bucket): run_id = str(uuid.uuid4()) # FIXME need a dedicated test bucket intermediate_store = S3IntermediateStore(run_id=run_id, s3_bucket=s3_bucket) with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_store.set_object('foo', context, LowercaseString.inst(), ['foo']) assert ( intermediate_store.object_store.s3.get_object( Bucket=intermediate_store.object_store.bucket, Key='/'.join([intermediate_store.root] + ['foo']), )['Body'] .read() .decode('utf-8') == 'FOO' ) assert intermediate_store.has_object(context, ['foo']) assert ( intermediate_store.get_object(context, LowercaseString.inst(), ['foo']).obj == 'foo' ) finally: intermediate_store.rm_object(context, ['foo'])
def test_custom_read_write_mode(storage_account, file_system): run_id = make_new_run_id() data_frame = [OrderedDict({'foo': '1', 'bar': '1'}), OrderedDict({'foo': '2', 'bar': '2'})] try: with yield_empty_pipeline_context(run_id=run_id) as context: intermediate_store = ADLS2IntermediateStore( adls2_client=get_adls2_client(storage_account), blob_client=get_blob_client(storage_account), run_id=run_id, file_system=file_system, ) intermediate_store.set_object( data_frame, context, resolve_dagster_type(LessSimpleDataFrame), ['data_frame'] ) assert intermediate_store.has_object(context, ['data_frame']) assert ( intermediate_store.get_object( context, resolve_dagster_type(LessSimpleDataFrame), ['data_frame'] ).obj == data_frame ) assert intermediate_store.uri_for_paths(['data_frame']).startswith('abfss://') finally: intermediate_store.rm_object(context, ['data_frame'])
def test_adls2_intermediate_store(storage_account, file_system): run_id = make_new_run_id() run_id_2 = make_new_run_id() intermediate_store = ADLS2IntermediateStore( adls2_client=get_adls2_client(storage_account), blob_client=get_blob_client(storage_account), run_id=run_id, file_system=file_system, ) assert intermediate_store.root == '/'.join(['dagster', 'storage', run_id]) intermediate_store_2 = ADLS2IntermediateStore( adls2_client=get_adls2_client(storage_account), blob_client=get_blob_client(storage_account), run_id=run_id_2, file_system=file_system, ) assert intermediate_store_2.root == '/'.join(['dagster', 'storage', run_id_2]) try: with yield_empty_pipeline_context(run_id=run_id) as context: intermediate_store.set_object(True, context, RuntimeBool, ['true']) assert intermediate_store.has_object(context, ['true']) assert intermediate_store.get_object(context, RuntimeBool, ['true']).obj is True assert intermediate_store.uri_for_paths(['true']).startswith('abfss://') intermediate_store_2.copy_object_from_run(context, run_id, ['true']) assert intermediate_store_2.has_object(context, ['true']) assert intermediate_store_2.get_object(context, RuntimeBool, ['true']).obj is True finally: intermediate_store.rm_object(context, ['true']) intermediate_store_2.rm_object(context, ['true'])
def test_adls2_intermediate_store_with_custom_prefix(storage_account, file_system): run_id = make_new_run_id() intermediate_store = ADLS2IntermediateStore( adls2_client=get_adls2_client(storage_account), blob_client=get_blob_client(storage_account), run_id=run_id, file_system=file_system, prefix='custom_prefix', ) assert intermediate_store.root == '/'.join(['custom_prefix', 'storage', run_id]) try: with yield_empty_pipeline_context(run_id=run_id) as context: intermediate_store.set_object(True, context, RuntimeBool, ['true']) assert intermediate_store.has_object(context, ['true']) assert intermediate_store.uri_for_paths(['true']).startswith( 'abfss://{fs}@{account}.dfs.core.windows.net/custom_prefix'.format( account=storage_account, fs=file_system ) ) finally: intermediate_store.rm_object(context, ['true'])
def test_adls2_intermediate_store_with_custom_serializer(storage_account, file_system): run_id = make_new_run_id() intermediate_store = ADLS2IntermediateStore( adls2_client=get_adls2_client(storage_account), blob_client=get_blob_client(storage_account), run_id=run_id, file_system=file_system, ) with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_store.set_object('foo', context, LowercaseString, ['foo']) assert ( intermediate_store.object_store.file_system_client.get_file_client( '/'.join([intermediate_store.root] + ['foo']), ) .download_file() .readall() .decode('utf-8') == 'FOO' ) assert intermediate_store.has_object(context, ['foo']) assert intermediate_store.get_object(context, LowercaseString, ['foo']).obj == 'foo' finally: intermediate_store.rm_object(context, ['foo'])
def test_adls2_intermediate_store_composite_types_with_custom_serializer_for_inner_type( storage_account, file_system ): run_id = make_new_run_id() intermediate_store = ADLS2IntermediateStore( adls2_client=get_adls2_client(storage_account), blob_client=get_blob_client(storage_account), run_id=run_id, file_system=file_system, ) obj_name = 'list' with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_store.set_object( ['foo', 'bar'], context, resolve_dagster_type(List[LowercaseString]), [obj_name], ) assert intermediate_store.has_object(context, [obj_name]) assert intermediate_store.get_object( context, resolve_dagster_type(List[Bool]), [obj_name] ).obj == ['foo', 'bar'] finally: intermediate_store.rm_object(context, [obj_name])
def test_s3_intermediate_store(s3_bucket): run_id = str(uuid.uuid4()) run_id_2 = str(uuid.uuid4()) # FIXME need a dedicated test bucket intermediate_store = S3IntermediateStore(run_id=run_id, s3_bucket=s3_bucket) assert intermediate_store.root == '/'.join(['dagster', 'storage', run_id]) intermediate_store_2 = S3IntermediateStore(run_id=run_id_2, s3_bucket=s3_bucket) assert intermediate_store_2.root == '/'.join(['dagster', 'storage', run_id_2]) try: with yield_empty_pipeline_context(run_id=run_id) as context: intermediate_store.set_object(True, context, RuntimeBool.inst(), ['true']) assert intermediate_store.has_object(context, ['true']) assert intermediate_store.get_object(context, RuntimeBool.inst(), ['true']).obj is True assert intermediate_store.uri_for_paths(['true']).startswith('s3://') intermediate_store_2.copy_object_from_prev_run(context, run_id, ['true']) assert intermediate_store_2.has_object(context, ['true']) assert ( intermediate_store_2.get_object(context, RuntimeBool.inst(), ['true']).obj is True ) finally: intermediate_store.rm_object(context, ['true']) intermediate_store_2.rm_object(context, ['true'])
def test_s3_intermediate_storage_with_custom_serializer(s3_bucket): run_id = make_new_run_id() intermediate_storage = S3IntermediateStorage(run_id=run_id, s3_bucket=s3_bucket) with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_storage.set_intermediate(context, LowercaseString, StepOutputHandle('foo'), 'foo') assert (intermediate_storage.object_store.s3.get_object( Bucket=intermediate_storage.object_store.bucket, Key=os.path.join(intermediate_storage.root, 'intermediates', 'foo', 'result'), )['Body'].read().decode('utf-8') == 'FOO') assert intermediate_storage.has_intermediate( context, StepOutputHandle('foo')) assert (intermediate_storage.get_intermediate( context, LowercaseString, StepOutputHandle('foo')).obj == 'foo') finally: intermediate_storage.rm_intermediate(context, StepOutputHandle('foo'))
def test_gcs_intermediate_storage_with_type_storage_plugin(gcs_bucket): run_id = make_new_run_id() intermediate_storage = GCSIntermediateStorage( run_id=run_id, gcs_bucket=gcs_bucket, type_storage_plugin_registry=TypeStoragePluginRegistry([ (RuntimeString, FancyStringGCSTypeStoragePlugin) ]), ) obj_name = "obj_name" with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_storage.set_intermediate(context, RuntimeString, StepOutputHandle(obj_name), "hello") assert intermediate_storage.has_intermediate( context, StepOutputHandle(obj_name)) assert (intermediate_storage.get_intermediate( context, RuntimeString, StepOutputHandle(obj_name)) == "hello") finally: intermediate_storage.rm_intermediate(context, StepOutputHandle(obj_name))
def test_file_system_intermediate_store(): run_id = str(uuid.uuid4()) intermediate_store = FileSystemIntermediateStore(run_id=run_id) assert intermediate_store.root == os.path.join( seven.get_system_temp_directory(), 'dagster', 'runs', run_id, 'files') with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_store.set_object(True, context, RuntimeBool.inst(), ['true']) assert intermediate_store.has_object(context, ['true']) assert intermediate_store.get_object(context, RuntimeBool.inst(), ['true']) is True assert intermediate_store.uri_for_paths(['true' ]).startswith('file:///') assert intermediate_store.rm_object(context, ['true']) is None assert intermediate_store.rm_object(context, ['true']) is None assert intermediate_store.rm_object(context, ['dslkfhjsdflkjfs']) is None finally: try: shutil.rmtree(intermediate_store.root) except seven.FileNotFoundError: pass
def test_gcs_intermediate_storage_composite_types_with_custom_serializer_for_inner_type( gcs_bucket): run_id = make_new_run_id() intermediate_storage = GCSIntermediateStorage(run_id=run_id, gcs_bucket=gcs_bucket) obj_name = "list" with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_storage.set_intermediate( context, resolve_dagster_type(List[LowercaseString]), StepOutputHandle(obj_name), ["foo", "bar"], ) assert intermediate_storage.has_intermediate( context, StepOutputHandle(obj_name)) assert intermediate_storage.get_intermediate( context, resolve_dagster_type(List[Bool]), StepOutputHandle(obj_name)).obj == ["foo", "bar"] finally: intermediate_storage.rm_intermediate(context, StepOutputHandle(obj_name))
def test_gcs_intermediate_store(gcs_bucket): run_id = str(uuid.uuid4()) run_id_2 = str(uuid.uuid4()) intermediate_store = GCSIntermediateStore(run_id=run_id, gcs_bucket=gcs_bucket) assert intermediate_store.root == '/'.join(['dagster', 'storage', run_id]) intermediate_store_2 = GCSIntermediateStore(run_id=run_id_2, gcs_bucket=gcs_bucket) assert intermediate_store_2.root == '/'.join(['dagster', 'storage', run_id_2]) try: with yield_empty_pipeline_context(run_id=run_id) as context: intermediate_store.set_object(True, context, RuntimeBool, ['true']) assert intermediate_store.has_object(context, ['true']) assert intermediate_store.get_object(context, RuntimeBool, ['true']).obj is True assert intermediate_store.uri_for_paths(['true']).startswith('gs://') intermediate_store_2.copy_object_from_prev_run(context, run_id, ['true']) assert intermediate_store_2.has_object(context, ['true']) assert intermediate_store_2.get_object(context, RuntimeBool, ['true']).obj is True finally: intermediate_store.rm_object(context, ['true']) intermediate_store_2.rm_object(context, ['true'])
def test_s3_intermediate_storage_with_type_storage_plugin(s3_bucket): run_id = make_new_run_id() intermediate_storage = S3IntermediateStorage( run_id=run_id, s3_bucket=s3_bucket, type_storage_plugin_registry=TypeStoragePluginRegistry([ (RuntimeString, FancyStringS3TypeStoragePlugin) ]), ) with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_storage.set_intermediate(context, RuntimeString, StepOutputHandle('obj_name'), 'hello') assert intermediate_storage.has_intermediate( context, StepOutputHandle('obj_name')) assert (intermediate_storage.get_intermediate( context, RuntimeString, StepOutputHandle('obj_name')) == 'hello') finally: intermediate_storage.rm_intermediate(context, StepOutputHandle('obj_name'))
def test_adls2_intermediate_storage_with_custom_prefix(storage_account, file_system): run_id = make_new_run_id() intermediate_storage = ADLS2IntermediateStorage( adls2_client=get_adls2_client(storage_account), blob_client=get_blob_client(storage_account), run_id=run_id, file_system=file_system, prefix="custom_prefix", ) assert intermediate_storage.root == "/".join( ["custom_prefix", "storage", run_id]) try: with yield_empty_pipeline_context(run_id=run_id) as context: intermediate_storage.set_intermediate(context, RuntimeBool, StepOutputHandle("true"), True) assert intermediate_storage.has_intermediate( context, StepOutputHandle("true")) assert intermediate_storage.uri_for_paths(["true"]).startswith( "abfss://{fs}@{account}.dfs.core.windows.net/custom_prefix". format(account=storage_account, fs=file_system)) finally: intermediate_storage.rm_intermediate(context, StepOutputHandle("true"))
def test_adls2_intermediate_store_with_type_storage_plugin( storage_account, file_system): run_id = make_new_run_id() intermediate_store = ADLS2IntermediateStore( adls2_client=get_adls2_client(storage_account), blob_client=get_blob_client(storage_account), run_id=run_id, file_system=file_system, type_storage_plugin_registry=TypeStoragePluginRegistry([ (RuntimeString, FancyStringS3TypeStoragePlugin) ]), ) with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_store.set_value('hello', context, RuntimeString, ['obj_name']) assert intermediate_store.has_object(context, ['obj_name']) assert intermediate_store.get_value(context, RuntimeString, ['obj_name']) == 'hello' finally: intermediate_store.rm_object(context, ['obj_name'])
def test_adls2_intermediate_storage_composite_types_with_custom_serializer_for_inner_type( storage_account, file_system): run_id = make_new_run_id() intermediate_storage = ADLS2IntermediateStorage( adls2_client=get_adls2_client(storage_account), blob_client=get_blob_client(storage_account), run_id=run_id, file_system=file_system, ) obj_name = "list" with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_storage.set_intermediate( context, resolve_dagster_type(List[LowercaseString]), StepOutputHandle(obj_name), ["foo", "bar"], ) assert intermediate_storage.has_intermediate( context, StepOutputHandle(obj_name)) assert intermediate_storage.get_intermediate( context, resolve_dagster_type(List[Bool]), StepOutputHandle(obj_name)).obj == ["foo", "bar"] finally: intermediate_storage.rm_intermediate(context, StepOutputHandle(obj_name))
def test_adls2_intermediate_storage_with_custom_serializer( storage_account, file_system): run_id = make_new_run_id() intermediate_storage = ADLS2IntermediateStorage( adls2_client=get_adls2_client(storage_account), blob_client=get_blob_client(storage_account), run_id=run_id, file_system=file_system, ) with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_storage.set_intermediate(context, LowercaseString, StepOutputHandle("foo"), "foo") assert ( intermediate_storage.object_store.file_system_client. get_file_client( os.path.join(*[ intermediate_storage.root, "intermediates", "foo", "result" ]), ).download_file().readall().decode("utf-8") == "FOO") assert intermediate_storage.has_intermediate( context, StepOutputHandle("foo")) assert (intermediate_storage.get_intermediate( context, LowercaseString, StepOutputHandle("foo")).obj == "foo") finally: intermediate_storage.rm_intermediate(context, StepOutputHandle("foo"))
def test_adls2_intermediate_storage_with_type_storage_plugin( storage_account, file_system): run_id = make_new_run_id() intermediate_storage = ADLS2IntermediateStorage( adls2_client=get_adls2_client(storage_account), blob_client=get_blob_client(storage_account), run_id=run_id, file_system=file_system, type_storage_plugin_registry=TypeStoragePluginRegistry([ (RuntimeString, FancyStringS3TypeStoragePlugin) ]), ) with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_storage.set_intermediate(context, RuntimeString, StepOutputHandle("obj_name"), "hello") assert intermediate_storage.has_intermediate( context, StepOutputHandle("obj_name")) assert (intermediate_storage.get_intermediate( context, RuntimeString, StepOutputHandle("obj_name")) == "hello") finally: intermediate_storage.rm_intermediate(context, StepOutputHandle("obj_name"))
def test_custom_read_write_mode(s3_bucket): run_id = make_new_run_id() intermediate_store = S3IntermediateStore(run_id=run_id, s3_bucket=s3_bucket) data_frame = [ OrderedDict({ 'foo': '1', 'bar': '1' }), OrderedDict({ 'foo': '2', 'bar': '2' }) ] try: with yield_empty_pipeline_context(run_id=run_id) as context: intermediate_store.set_object( data_frame, context, resolve_dagster_type(LessSimpleDataFrame), ['data_frame']) assert intermediate_store.has_object(context, ['data_frame']) assert (intermediate_store.get_object( context, resolve_dagster_type(LessSimpleDataFrame), ['data_frame']).obj == data_frame) assert intermediate_store.uri_for_paths(['data_frame' ]).startswith('s3://') finally: intermediate_store.rm_object(context, ['data_frame'])
def test_s3_intermediate_store(s3_bucket): run_id = make_new_run_id() run_id_2 = make_new_run_id() intermediate_store = S3IntermediateStore(run_id=run_id, s3_bucket=s3_bucket) assert intermediate_store.root == '/'.join(['dagster', 'storage', run_id]) intermediate_store_2 = S3IntermediateStore(run_id=run_id_2, s3_bucket=s3_bucket) assert intermediate_store_2.root == '/'.join( ['dagster', 'storage', run_id_2]) try: with yield_empty_pipeline_context(run_id=run_id) as context: intermediate_store.set_object(True, context, RuntimeBool, ['true']) assert intermediate_store.has_object(context, ['true']) assert intermediate_store.get_object(context, RuntimeBool, ['true']).obj is True assert intermediate_store.uri_for_paths(['true' ]).startswith('s3://') intermediate_store_2.copy_object_from_run(context, run_id, ['true']) assert intermediate_store_2.has_object(context, ['true']) assert intermediate_store_2.get_object(context, RuntimeBool, ['true']).obj is True finally: intermediate_store.rm_object(context, ['true']) intermediate_store_2.rm_object(context, ['true'])
def test_gcs_intermediate_store_with_custom_serializer(gcs_bucket): run_id = make_new_run_id() intermediate_store = GCSIntermediateStore(run_id=run_id, gcs_bucket=gcs_bucket) obj_name = 'foo' with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_store.set_object('foo', context, LowercaseString, [obj_name]) bucket_obj = intermediate_store.object_store.client.get_bucket( intermediate_store.object_store.bucket) blob = bucket_obj.blob('/'.join([intermediate_store.root] + [obj_name])) file_obj = BytesIO() blob.download_to_file(file_obj) file_obj.seek(0) assert file_obj.read().decode('utf-8') == 'FOO' assert intermediate_store.has_object(context, [obj_name]) assert intermediate_store.get_object(context, LowercaseString, [obj_name]).obj == 'foo' finally: intermediate_store.rm_object(context, [obj_name])
def test_file_system_intermediate_storage_with_composite_type_storage_plugin(): run_id, _, intermediate_storage = define_intermediate_storage( type_storage_plugin_registry=TypeStoragePluginRegistry([ (RuntimeString, FancyStringFilesystemTypeStoragePlugin) ]), ) with yield_empty_pipeline_context(run_id=run_id) as context: with pytest.raises(check.NotImplementedCheckError): intermediate_storage.set_intermediate( context, resolve_dagster_type(List[String]), StepOutputHandle('obj_name'), ['hello']) with yield_empty_pipeline_context(run_id=run_id) as context: with pytest.raises(check.NotImplementedCheckError): intermediate_storage.set_intermediate( context, resolve_dagster_type(Optional[String]), StepOutputHandle('obj_name'), ['hello'], ) with yield_empty_pipeline_context(run_id=run_id) as context: with pytest.raises(check.NotImplementedCheckError): intermediate_storage.set_intermediate( context, resolve_dagster_type(List[Optional[String]]), StepOutputHandle('obj_name'), ['hello'], ) with yield_empty_pipeline_context(run_id=run_id) as context: with pytest.raises(check.NotImplementedCheckError): intermediate_storage.set_intermediate( context, resolve_dagster_type(Optional[List[String]]), StepOutputHandle('obj_name'), ['hello'], )