def test_file_system_intermediate_store_with_composite_type_storage_plugin():
    run_id = make_new_run_id()

    intermediate_store = build_fs_intermediate_store(
        DagsterInstance.ephemeral().intermediates_directory,
        run_id=run_id,
        type_storage_plugin_registry=TypeStoragePluginRegistry(
            [(RuntimeString, FancyStringFilesystemTypeStoragePlugin)]
        ),
    )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        with pytest.raises(check.NotImplementedCheckError):
            intermediate_store.set_value(
                ['hello'], context, resolve_dagster_type(List[String]), ['obj_name']
            )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        with pytest.raises(check.NotImplementedCheckError):
            intermediate_store.set_value(
                ['hello'], context, resolve_dagster_type(Optional[String]), ['obj_name']
            )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        with pytest.raises(check.NotImplementedCheckError):
            intermediate_store.set_value(
                ['hello'], context, resolve_dagster_type(List[Optional[String]]), ['obj_name']
            )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        with pytest.raises(check.NotImplementedCheckError):
            intermediate_store.set_value(
                ['hello'], context, resolve_dagster_type(Optional[List[String]]), ['obj_name']
            )
def test_file_system_intermediate_store_with_composite_type_storage_plugin():
    run_id = str(uuid.uuid4())

    # FIXME need a dedicated test bucket
    intermediate_store = FileSystemIntermediateStore(
        run_id=run_id,
        type_storage_plugin_registry=TypeStoragePluginRegistry(
            {RuntimeString.inst(): FancyStringFilesystemTypeStoragePlugin}),
    )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        with pytest.raises(check.NotImplementedCheckError):
            intermediate_store.set_value(['hello'], context,
                                         resolve_to_runtime_type(List[String]),
                                         ['obj_name'])

    with yield_empty_pipeline_context(run_id=run_id) as context:
        with pytest.raises(check.NotImplementedCheckError):
            intermediate_store.set_value(['hello'], context,
                                         resolve_to_runtime_type(
                                             Optional[String]), ['obj_name'])

    with yield_empty_pipeline_context(run_id=run_id) as context:
        with pytest.raises(check.NotImplementedCheckError):
            intermediate_store.set_value(
                ['hello'], context,
                resolve_to_runtime_type(List[Optional[String]]), ['obj_name'])

    with yield_empty_pipeline_context(run_id=run_id) as context:
        with pytest.raises(check.NotImplementedCheckError):
            intermediate_store.set_value(
                ['hello'], context,
                resolve_to_runtime_type(Optional[List[String]]), ['obj_name'])
def test_custom_read_write_mode(s3_bucket):
    run_id = make_new_run_id()
    intermediate_storage = S3IntermediateStorage(run_id=run_id, s3_bucket=s3_bucket)
    data_frame = [OrderedDict({"foo": "1", "bar": "1"}), OrderedDict({"foo": "2", "bar": "2"})]
    try:
        with yield_empty_pipeline_context(run_id=run_id) as context:
            intermediate_storage.set_intermediate(
                context,
                resolve_dagster_type(LessSimpleDataFrame),
                StepOutputHandle("data_frame"),
                data_frame,
            )

            assert intermediate_storage.has_intermediate(context, StepOutputHandle("data_frame"))
            assert (
                intermediate_storage.get_intermediate(
                    context,
                    resolve_dagster_type(LessSimpleDataFrame),
                    StepOutputHandle("data_frame"),
                ).obj
                == data_frame
            )
            assert intermediate_storage.uri_for_paths(["data_frame"]).startswith("s3://")

    finally:
        intermediate_storage.rm_intermediate(context, StepOutputHandle("data_frame"))
def test_s3_intermediate_storage_with_custom_serializer(s3_bucket):
    run_id = make_new_run_id()

    intermediate_storage = S3IntermediateStorage(run_id=run_id, s3_bucket=s3_bucket)

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(
                context, LowercaseString, StepOutputHandle("foo"), "foo"
            )

            assert (
                intermediate_storage.object_store.s3.get_object(
                    Bucket=intermediate_storage.object_store.bucket,
                    Key=os.path.join(intermediate_storage.root, "intermediates", "foo", "result"),
                )["Body"]
                .read()
                .decode("utf-8")
                == "FOO"
            )

            assert intermediate_storage.has_intermediate(context, StepOutputHandle("foo"))
            assert (
                intermediate_storage.get_intermediate(
                    context, LowercaseString, StepOutputHandle("foo")
                ).obj
                == "foo"
            )
        finally:
            intermediate_storage.rm_intermediate(context, StepOutputHandle("foo"))
def test_gcs_intermediate_storage_with_custom_prefix(gcs_bucket):
    run_id = make_new_run_id()

    intermediate_storage = GCSIntermediateStorage(run_id=run_id,
                                                  gcs_bucket=gcs_bucket,
                                                  gcs_prefix="custom_prefix")
    assert intermediate_storage.root == "/".join(
        ["custom_prefix", "storage", run_id])

    obj_name = "true"

    try:
        with yield_empty_pipeline_context(run_id=run_id) as context:

            intermediate_storage.set_intermediate(context, RuntimeBool,
                                                  StepOutputHandle(obj_name),
                                                  True)

            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle(obj_name))
            assert intermediate_storage.uri_for_paths([obj_name]).startswith(
                "gs://%s/custom_prefix" % gcs_bucket)

    finally:
        intermediate_storage.rm_intermediate(context,
                                             StepOutputHandle(obj_name))
def test_custom_read_write_mode(storage_account, file_system):
    run_id = make_new_run_id()
    data_frame = [OrderedDict({"foo": "1", "bar": "1"}), OrderedDict({"foo": "2", "bar": "2"})]
    try:
        with yield_empty_pipeline_context(run_id=run_id) as context:
            intermediate_storage = ADLS2IntermediateStorage(
                adls2_client=get_adls2_client(storage_account),
                blob_client=get_blob_client(storage_account),
                run_id=run_id,
                file_system=file_system,
            )
            intermediate_storage.set_intermediate(
                context,
                resolve_dagster_type(LessSimpleDataFrame),
                StepOutputHandle("data_frame"),
                data_frame,
            )

            assert intermediate_storage.has_intermediate(context, StepOutputHandle("data_frame"))
            assert (
                intermediate_storage.get_intermediate(
                    context,
                    resolve_dagster_type(LessSimpleDataFrame),
                    StepOutputHandle("data_frame"),
                ).obj
                == data_frame
            )
            assert intermediate_storage.uri_for_paths(["data_frame"]).startswith("abfss://")

    finally:
        intermediate_storage.rm_intermediate(context, StepOutputHandle("data_frame"))
def test_file_system_intermediate_store_with_base_dir():
    run_id = str(uuid.uuid4())

    try:
        tempdir = tempfile.mkdtemp()

        intermediate_store = FileSystemIntermediateStore(run_id=run_id,
                                                         base_dir=tempdir)
        assert intermediate_store.root == os.path.join(tempdir, 'dagster',
                                                       'runs', run_id, 'files')

        with yield_empty_pipeline_context(run_id=run_id) as context:
            try:
                intermediate_store.set_object(True, context,
                                              RuntimeBool.inst(), ['true'])
                assert intermediate_store.has_object(context, ['true'])
                assert intermediate_store.get_object(
                    context, RuntimeBool.inst(), ['true']) is True

            finally:
                try:
                    shutil.rmtree(intermediate_store.root)
                except seven.FileNotFoundError:
                    pass
    finally:
        try:
            shutil.rmtree(tempdir)
        except seven.FileNotFoundError:
            pass
def test_file_system_intermediate_store_composite_types_with_custom_serializer_for_inner_type(
):
    run_id = str(uuid.uuid4())

    intermediate_store = FileSystemIntermediateStore(run_id=run_id)
    assert intermediate_store.root == os.path.join(
        seven.get_system_temp_directory(), 'dagster', 'runs', run_id, 'files')

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_store.set_object(
                ['foo', 'bar'],
                context,
                resolve_to_runtime_type(List(LowercaseString)).inst(),
                ['list'],
            )
            assert intermediate_store.has_object(context, ['list'])
            assert intermediate_store.get_object(
                context,
                resolve_to_runtime_type(List(Bool)).inst(),
                ['list']) == ['foo', 'bar']

        finally:
            try:
                shutil.rmtree(intermediate_store.root)
            except seven.FileNotFoundError:
                pass
def test_s3_intermediate_store_with_custom_serializer(s3_bucket):
    run_id = str(uuid.uuid4())

    # FIXME need a dedicated test bucket
    intermediate_store = S3IntermediateStore(run_id=run_id, s3_bucket=s3_bucket)

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_store.set_object('foo', context, LowercaseString.inst(), ['foo'])

            assert (
                intermediate_store.object_store.s3.get_object(
                    Bucket=intermediate_store.object_store.bucket,
                    Key='/'.join([intermediate_store.root] + ['foo']),
                )['Body']
                .read()
                .decode('utf-8')
                == 'FOO'
            )

            assert intermediate_store.has_object(context, ['foo'])
            assert (
                intermediate_store.get_object(context, LowercaseString.inst(), ['foo']).obj == 'foo'
            )
        finally:
            intermediate_store.rm_object(context, ['foo'])
def test_custom_read_write_mode(storage_account, file_system):
    run_id = make_new_run_id()
    data_frame = [OrderedDict({'foo': '1', 'bar': '1'}), OrderedDict({'foo': '2', 'bar': '2'})]
    try:
        with yield_empty_pipeline_context(run_id=run_id) as context:
            intermediate_store = ADLS2IntermediateStore(
                adls2_client=get_adls2_client(storage_account),
                blob_client=get_blob_client(storage_account),
                run_id=run_id,
                file_system=file_system,
            )
            intermediate_store.set_object(
                data_frame, context, resolve_dagster_type(LessSimpleDataFrame), ['data_frame']
            )

            assert intermediate_store.has_object(context, ['data_frame'])
            assert (
                intermediate_store.get_object(
                    context, resolve_dagster_type(LessSimpleDataFrame), ['data_frame']
                ).obj
                == data_frame
            )
            assert intermediate_store.uri_for_paths(['data_frame']).startswith('abfss://')

    finally:
        intermediate_store.rm_object(context, ['data_frame'])
def test_adls2_intermediate_store(storage_account, file_system):
    run_id = make_new_run_id()
    run_id_2 = make_new_run_id()

    intermediate_store = ADLS2IntermediateStore(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id,
        file_system=file_system,
    )
    assert intermediate_store.root == '/'.join(['dagster', 'storage', run_id])

    intermediate_store_2 = ADLS2IntermediateStore(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id_2,
        file_system=file_system,
    )
    assert intermediate_store_2.root == '/'.join(['dagster', 'storage', run_id_2])

    try:
        with yield_empty_pipeline_context(run_id=run_id) as context:

            intermediate_store.set_object(True, context, RuntimeBool, ['true'])

            assert intermediate_store.has_object(context, ['true'])
            assert intermediate_store.get_object(context, RuntimeBool, ['true']).obj is True
            assert intermediate_store.uri_for_paths(['true']).startswith('abfss://')

            intermediate_store_2.copy_object_from_run(context, run_id, ['true'])
            assert intermediate_store_2.has_object(context, ['true'])
            assert intermediate_store_2.get_object(context, RuntimeBool, ['true']).obj is True
    finally:
        intermediate_store.rm_object(context, ['true'])
        intermediate_store_2.rm_object(context, ['true'])
def test_adls2_intermediate_store_with_custom_prefix(storage_account, file_system):
    run_id = make_new_run_id()

    intermediate_store = ADLS2IntermediateStore(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id,
        file_system=file_system,
        prefix='custom_prefix',
    )
    assert intermediate_store.root == '/'.join(['custom_prefix', 'storage', run_id])

    try:
        with yield_empty_pipeline_context(run_id=run_id) as context:

            intermediate_store.set_object(True, context, RuntimeBool, ['true'])

            assert intermediate_store.has_object(context, ['true'])
            assert intermediate_store.uri_for_paths(['true']).startswith(
                'abfss://{fs}@{account}.dfs.core.windows.net/custom_prefix'.format(
                    account=storage_account, fs=file_system
                )
            )

    finally:
        intermediate_store.rm_object(context, ['true'])
def test_adls2_intermediate_store_with_custom_serializer(storage_account, file_system):
    run_id = make_new_run_id()

    intermediate_store = ADLS2IntermediateStore(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id,
        file_system=file_system,
    )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_store.set_object('foo', context, LowercaseString, ['foo'])

            assert (
                intermediate_store.object_store.file_system_client.get_file_client(
                    '/'.join([intermediate_store.root] + ['foo']),
                )
                .download_file()
                .readall()
                .decode('utf-8')
                == 'FOO'
            )

            assert intermediate_store.has_object(context, ['foo'])
            assert intermediate_store.get_object(context, LowercaseString, ['foo']).obj == 'foo'
        finally:
            intermediate_store.rm_object(context, ['foo'])
def test_adls2_intermediate_store_composite_types_with_custom_serializer_for_inner_type(
    storage_account, file_system
):
    run_id = make_new_run_id()

    intermediate_store = ADLS2IntermediateStore(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id,
        file_system=file_system,
    )

    obj_name = 'list'

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_store.set_object(
                ['foo', 'bar'], context, resolve_dagster_type(List[LowercaseString]), [obj_name],
            )
            assert intermediate_store.has_object(context, [obj_name])
            assert intermediate_store.get_object(
                context, resolve_dagster_type(List[Bool]), [obj_name]
            ).obj == ['foo', 'bar']

        finally:
            intermediate_store.rm_object(context, [obj_name])
def test_s3_intermediate_store(s3_bucket):
    run_id = str(uuid.uuid4())
    run_id_2 = str(uuid.uuid4())

    # FIXME need a dedicated test bucket
    intermediate_store = S3IntermediateStore(run_id=run_id, s3_bucket=s3_bucket)
    assert intermediate_store.root == '/'.join(['dagster', 'storage', run_id])

    intermediate_store_2 = S3IntermediateStore(run_id=run_id_2, s3_bucket=s3_bucket)
    assert intermediate_store_2.root == '/'.join(['dagster', 'storage', run_id_2])

    try:
        with yield_empty_pipeline_context(run_id=run_id) as context:

            intermediate_store.set_object(True, context, RuntimeBool.inst(), ['true'])

            assert intermediate_store.has_object(context, ['true'])
            assert intermediate_store.get_object(context, RuntimeBool.inst(), ['true']).obj is True
            assert intermediate_store.uri_for_paths(['true']).startswith('s3://')

            intermediate_store_2.copy_object_from_prev_run(context, run_id, ['true'])
            assert intermediate_store_2.has_object(context, ['true'])
            assert (
                intermediate_store_2.get_object(context, RuntimeBool.inst(), ['true']).obj is True
            )
    finally:
        intermediate_store.rm_object(context, ['true'])
        intermediate_store_2.rm_object(context, ['true'])
Example #16
0
def test_s3_intermediate_storage_with_custom_serializer(s3_bucket):
    run_id = make_new_run_id()

    intermediate_storage = S3IntermediateStorage(run_id=run_id,
                                                 s3_bucket=s3_bucket)

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(context, LowercaseString,
                                                  StepOutputHandle('foo'),
                                                  'foo')

            assert (intermediate_storage.object_store.s3.get_object(
                Bucket=intermediate_storage.object_store.bucket,
                Key=os.path.join(intermediate_storage.root, 'intermediates',
                                 'foo', 'result'),
            )['Body'].read().decode('utf-8') == 'FOO')

            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle('foo'))
            assert (intermediate_storage.get_intermediate(
                context, LowercaseString,
                StepOutputHandle('foo')).obj == 'foo')
        finally:
            intermediate_storage.rm_intermediate(context,
                                                 StepOutputHandle('foo'))
Example #17
0
def test_gcs_intermediate_storage_with_type_storage_plugin(gcs_bucket):
    run_id = make_new_run_id()

    intermediate_storage = GCSIntermediateStorage(
        run_id=run_id,
        gcs_bucket=gcs_bucket,
        type_storage_plugin_registry=TypeStoragePluginRegistry([
            (RuntimeString, FancyStringGCSTypeStoragePlugin)
        ]),
    )

    obj_name = "obj_name"

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(context, RuntimeString,
                                                  StepOutputHandle(obj_name),
                                                  "hello")

            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle(obj_name))
            assert (intermediate_storage.get_intermediate(
                context, RuntimeString, StepOutputHandle(obj_name)) == "hello")

        finally:
            intermediate_storage.rm_intermediate(context,
                                                 StepOutputHandle(obj_name))
def test_file_system_intermediate_store():
    run_id = str(uuid.uuid4())

    intermediate_store = FileSystemIntermediateStore(run_id=run_id)
    assert intermediate_store.root == os.path.join(
        seven.get_system_temp_directory(), 'dagster', 'runs', run_id, 'files')

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_store.set_object(True, context, RuntimeBool.inst(),
                                          ['true'])
            assert intermediate_store.has_object(context, ['true'])
            assert intermediate_store.get_object(context, RuntimeBool.inst(),
                                                 ['true']) is True
            assert intermediate_store.uri_for_paths(['true'
                                                     ]).startswith('file:///')
            assert intermediate_store.rm_object(context, ['true']) is None
            assert intermediate_store.rm_object(context, ['true']) is None
            assert intermediate_store.rm_object(context,
                                                ['dslkfhjsdflkjfs']) is None
        finally:
            try:
                shutil.rmtree(intermediate_store.root)
            except seven.FileNotFoundError:
                pass
Example #19
0
def test_gcs_intermediate_storage_composite_types_with_custom_serializer_for_inner_type(
        gcs_bucket):
    run_id = make_new_run_id()

    intermediate_storage = GCSIntermediateStorage(run_id=run_id,
                                                  gcs_bucket=gcs_bucket)

    obj_name = "list"

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(
                context,
                resolve_dagster_type(List[LowercaseString]),
                StepOutputHandle(obj_name),
                ["foo", "bar"],
            )
            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle(obj_name))
            assert intermediate_storage.get_intermediate(
                context, resolve_dagster_type(List[Bool]),
                StepOutputHandle(obj_name)).obj == ["foo", "bar"]

        finally:
            intermediate_storage.rm_intermediate(context,
                                                 StepOutputHandle(obj_name))
def test_gcs_intermediate_store(gcs_bucket):
    run_id = str(uuid.uuid4())
    run_id_2 = str(uuid.uuid4())

    intermediate_store = GCSIntermediateStore(run_id=run_id, gcs_bucket=gcs_bucket)
    assert intermediate_store.root == '/'.join(['dagster', 'storage', run_id])

    intermediate_store_2 = GCSIntermediateStore(run_id=run_id_2, gcs_bucket=gcs_bucket)
    assert intermediate_store_2.root == '/'.join(['dagster', 'storage', run_id_2])

    try:
        with yield_empty_pipeline_context(run_id=run_id) as context:

            intermediate_store.set_object(True, context, RuntimeBool, ['true'])

            assert intermediate_store.has_object(context, ['true'])
            assert intermediate_store.get_object(context, RuntimeBool, ['true']).obj is True
            assert intermediate_store.uri_for_paths(['true']).startswith('gs://')

            intermediate_store_2.copy_object_from_prev_run(context, run_id, ['true'])
            assert intermediate_store_2.has_object(context, ['true'])
            assert intermediate_store_2.get_object(context, RuntimeBool, ['true']).obj is True
    finally:
        intermediate_store.rm_object(context, ['true'])
        intermediate_store_2.rm_object(context, ['true'])
Example #21
0
def test_s3_intermediate_storage_with_type_storage_plugin(s3_bucket):
    run_id = make_new_run_id()

    intermediate_storage = S3IntermediateStorage(
        run_id=run_id,
        s3_bucket=s3_bucket,
        type_storage_plugin_registry=TypeStoragePluginRegistry([
            (RuntimeString, FancyStringS3TypeStoragePlugin)
        ]),
    )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:

            intermediate_storage.set_intermediate(context, RuntimeString,
                                                  StepOutputHandle('obj_name'),
                                                  'hello')

            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle('obj_name'))
            assert (intermediate_storage.get_intermediate(
                context, RuntimeString,
                StepOutputHandle('obj_name')) == 'hello')

        finally:
            intermediate_storage.rm_intermediate(context,
                                                 StepOutputHandle('obj_name'))
def test_adls2_intermediate_storage_with_custom_prefix(storage_account,
                                                       file_system):
    run_id = make_new_run_id()

    intermediate_storage = ADLS2IntermediateStorage(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id,
        file_system=file_system,
        prefix="custom_prefix",
    )
    assert intermediate_storage.root == "/".join(
        ["custom_prefix", "storage", run_id])

    try:
        with yield_empty_pipeline_context(run_id=run_id) as context:

            intermediate_storage.set_intermediate(context, RuntimeBool,
                                                  StepOutputHandle("true"),
                                                  True)

            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle("true"))
            assert intermediate_storage.uri_for_paths(["true"]).startswith(
                "abfss://{fs}@{account}.dfs.core.windows.net/custom_prefix".
                format(account=storage_account, fs=file_system))

    finally:
        intermediate_storage.rm_intermediate(context, StepOutputHandle("true"))
def test_adls2_intermediate_store_with_type_storage_plugin(
        storage_account, file_system):
    run_id = make_new_run_id()

    intermediate_store = ADLS2IntermediateStore(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id,
        file_system=file_system,
        type_storage_plugin_registry=TypeStoragePluginRegistry([
            (RuntimeString, FancyStringS3TypeStoragePlugin)
        ]),
    )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_store.set_value('hello', context, RuntimeString,
                                         ['obj_name'])

            assert intermediate_store.has_object(context, ['obj_name'])
            assert intermediate_store.get_value(context, RuntimeString,
                                                ['obj_name']) == 'hello'

        finally:
            intermediate_store.rm_object(context, ['obj_name'])
def test_adls2_intermediate_storage_composite_types_with_custom_serializer_for_inner_type(
        storage_account, file_system):
    run_id = make_new_run_id()

    intermediate_storage = ADLS2IntermediateStorage(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id,
        file_system=file_system,
    )

    obj_name = "list"

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(
                context,
                resolve_dagster_type(List[LowercaseString]),
                StepOutputHandle(obj_name),
                ["foo", "bar"],
            )
            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle(obj_name))
            assert intermediate_storage.get_intermediate(
                context, resolve_dagster_type(List[Bool]),
                StepOutputHandle(obj_name)).obj == ["foo", "bar"]

        finally:
            intermediate_storage.rm_intermediate(context,
                                                 StepOutputHandle(obj_name))
def test_adls2_intermediate_storage_with_custom_serializer(
        storage_account, file_system):
    run_id = make_new_run_id()

    intermediate_storage = ADLS2IntermediateStorage(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id,
        file_system=file_system,
    )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(context, LowercaseString,
                                                  StepOutputHandle("foo"),
                                                  "foo")

            assert (
                intermediate_storage.object_store.file_system_client.
                get_file_client(
                    os.path.join(*[
                        intermediate_storage.root, "intermediates", "foo",
                        "result"
                    ]), ).download_file().readall().decode("utf-8") == "FOO")

            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle("foo"))
            assert (intermediate_storage.get_intermediate(
                context, LowercaseString,
                StepOutputHandle("foo")).obj == "foo")
        finally:
            intermediate_storage.rm_intermediate(context,
                                                 StepOutputHandle("foo"))
def test_adls2_intermediate_storage_with_type_storage_plugin(
        storage_account, file_system):
    run_id = make_new_run_id()

    intermediate_storage = ADLS2IntermediateStorage(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id,
        file_system=file_system,
        type_storage_plugin_registry=TypeStoragePluginRegistry([
            (RuntimeString, FancyStringS3TypeStoragePlugin)
        ]),
    )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(context, RuntimeString,
                                                  StepOutputHandle("obj_name"),
                                                  "hello")

            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle("obj_name"))
            assert (intermediate_storage.get_intermediate(
                context, RuntimeString,
                StepOutputHandle("obj_name")) == "hello")

        finally:
            intermediate_storage.rm_intermediate(context,
                                                 StepOutputHandle("obj_name"))
def test_custom_read_write_mode(s3_bucket):
    run_id = make_new_run_id()
    intermediate_store = S3IntermediateStore(run_id=run_id,
                                             s3_bucket=s3_bucket)
    data_frame = [
        OrderedDict({
            'foo': '1',
            'bar': '1'
        }),
        OrderedDict({
            'foo': '2',
            'bar': '2'
        })
    ]
    try:
        with yield_empty_pipeline_context(run_id=run_id) as context:
            intermediate_store.set_object(
                data_frame, context, resolve_dagster_type(LessSimpleDataFrame),
                ['data_frame'])

            assert intermediate_store.has_object(context, ['data_frame'])
            assert (intermediate_store.get_object(
                context, resolve_dagster_type(LessSimpleDataFrame),
                ['data_frame']).obj == data_frame)
            assert intermediate_store.uri_for_paths(['data_frame'
                                                     ]).startswith('s3://')

    finally:
        intermediate_store.rm_object(context, ['data_frame'])
def test_s3_intermediate_store(s3_bucket):
    run_id = make_new_run_id()
    run_id_2 = make_new_run_id()

    intermediate_store = S3IntermediateStore(run_id=run_id,
                                             s3_bucket=s3_bucket)
    assert intermediate_store.root == '/'.join(['dagster', 'storage', run_id])

    intermediate_store_2 = S3IntermediateStore(run_id=run_id_2,
                                               s3_bucket=s3_bucket)
    assert intermediate_store_2.root == '/'.join(
        ['dagster', 'storage', run_id_2])

    try:
        with yield_empty_pipeline_context(run_id=run_id) as context:

            intermediate_store.set_object(True, context, RuntimeBool, ['true'])

            assert intermediate_store.has_object(context, ['true'])
            assert intermediate_store.get_object(context, RuntimeBool,
                                                 ['true']).obj is True
            assert intermediate_store.uri_for_paths(['true'
                                                     ]).startswith('s3://')

            intermediate_store_2.copy_object_from_run(context, run_id,
                                                      ['true'])
            assert intermediate_store_2.has_object(context, ['true'])
            assert intermediate_store_2.get_object(context, RuntimeBool,
                                                   ['true']).obj is True
    finally:
        intermediate_store.rm_object(context, ['true'])
        intermediate_store_2.rm_object(context, ['true'])
Example #29
0
def test_gcs_intermediate_store_with_custom_serializer(gcs_bucket):
    run_id = make_new_run_id()

    intermediate_store = GCSIntermediateStore(run_id=run_id,
                                              gcs_bucket=gcs_bucket)

    obj_name = 'foo'

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_store.set_object('foo', context, LowercaseString,
                                          [obj_name])

            bucket_obj = intermediate_store.object_store.client.get_bucket(
                intermediate_store.object_store.bucket)
            blob = bucket_obj.blob('/'.join([intermediate_store.root] +
                                            [obj_name]))
            file_obj = BytesIO()
            blob.download_to_file(file_obj)
            file_obj.seek(0)

            assert file_obj.read().decode('utf-8') == 'FOO'

            assert intermediate_store.has_object(context, [obj_name])
            assert intermediate_store.get_object(context, LowercaseString,
                                                 [obj_name]).obj == 'foo'
        finally:
            intermediate_store.rm_object(context, [obj_name])
def test_file_system_intermediate_storage_with_composite_type_storage_plugin():
    run_id, _, intermediate_storage = define_intermediate_storage(
        type_storage_plugin_registry=TypeStoragePluginRegistry([
            (RuntimeString, FancyStringFilesystemTypeStoragePlugin)
        ]), )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        with pytest.raises(check.NotImplementedCheckError):
            intermediate_storage.set_intermediate(
                context, resolve_dagster_type(List[String]),
                StepOutputHandle('obj_name'), ['hello'])

    with yield_empty_pipeline_context(run_id=run_id) as context:
        with pytest.raises(check.NotImplementedCheckError):
            intermediate_storage.set_intermediate(
                context,
                resolve_dagster_type(Optional[String]),
                StepOutputHandle('obj_name'),
                ['hello'],
            )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        with pytest.raises(check.NotImplementedCheckError):
            intermediate_storage.set_intermediate(
                context,
                resolve_dagster_type(List[Optional[String]]),
                StepOutputHandle('obj_name'),
                ['hello'],
            )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        with pytest.raises(check.NotImplementedCheckError):
            intermediate_storage.set_intermediate(
                context,
                resolve_dagster_type(Optional[List[String]]),
                StepOutputHandle('obj_name'),
                ['hello'],
            )