예제 #1
0
def test_adls2_intermediate_storage_with_custom_prefix(storage_account, file_system):
    run_id = make_new_run_id()

    intermediate_storage = ADLS2IntermediateStorage(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id,
        file_system=file_system,
        prefix="custom_prefix",
    )
    assert intermediate_storage.root == "/".join(["custom_prefix", "storage", run_id])

    try:
        with yield_empty_pipeline_context(run_id=run_id) as context:

            intermediate_storage.set_intermediate(
                context, RuntimeBool, StepOutputHandle("true"), True
            )

            assert intermediate_storage.has_intermediate(context, StepOutputHandle("true"))
            assert intermediate_storage.uri_for_paths(["true"]).startswith(
                "abfss://{fs}@{account}.dfs.core.windows.net/custom_prefix".format(
                    account=storage_account, fs=file_system
                )
            )

    finally:
        intermediate_storage.rm_intermediate(context, StepOutputHandle("true"))
def test_pipeline_step_key_subset_execution():
    pipeline_def = define_addy_pipeline()
    instance = DagsterInstance.ephemeral()
    run_config = env_with_fs({'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}})
    result = execute_pipeline(pipeline_def, run_config=run_config, instance=instance)

    assert result.success

    intermediates_manager = IntermediateStoreIntermediatesManager(
        build_fs_intermediate_store(instance.intermediates_directory, result.run_id)
    )
    assert (
        intermediates_manager.get_intermediate(None, Int, StepOutputHandle('add_one.compute')).obj
        == 4
    )
    assert (
        intermediates_manager.get_intermediate(None, Int, StepOutputHandle('add_two.compute')).obj
        == 6
    )

    ## re-execute add_two

    pipeline_reexecution_result = reexecute_pipeline(
        pipeline_def,
        parent_run_id=result.run_id,
        run_config=run_config,
        step_keys_to_execute=['add_two.compute'],
        instance=instance,
    )

    assert pipeline_reexecution_result.success

    step_events = pipeline_reexecution_result.step_event_list
    assert step_events

    intermediates_manager = IntermediateStoreIntermediatesManager(
        build_fs_intermediate_store(instance.intermediates_directory, result.run_id)
    )
    assert (
        intermediates_manager.get_intermediate(None, Int, StepOutputHandle('add_one.compute')).obj
        == 4
    )
    assert (
        intermediates_manager.get_intermediate(None, Int, StepOutputHandle('add_two.compute')).obj
        == 6
    )

    assert not get_step_output_event(step_events, 'add_one.compute')
    assert get_step_output_event(step_events, 'add_two.compute')

    with pytest.raises(
        DagsterExecutionStepNotFoundError, match='Execution plan does not contain step'
    ):
        reexecute_pipeline(
            pipeline_def,
            parent_run_id=result.run_id,
            run_config=run_config,
            step_keys_to_execute=['nope.compute'],
            instance=instance,
        )
예제 #3
0
def test_gcs_intermediate_storage_composite_types_with_custom_serializer_for_inner_type(
        gcs_bucket):
    run_id = make_new_run_id()

    intermediate_storage = GCSIntermediateStorage(run_id=run_id,
                                                  gcs_bucket=gcs_bucket)

    obj_name = "list"

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(
                context,
                resolve_dagster_type(List[LowercaseString]),
                StepOutputHandle(obj_name),
                ["foo", "bar"],
            )
            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle(obj_name))
            assert intermediate_storage.get_intermediate(
                context, resolve_dagster_type(List[Bool]),
                StepOutputHandle(obj_name)).obj == ["foo", "bar"]

        finally:
            intermediate_storage.rm_intermediate(context,
                                                 StepOutputHandle(obj_name))
예제 #4
0
def test_adls2_intermediate_storage_composite_types_with_custom_serializer_for_inner_type(
    storage_account, file_system
):
    run_id = make_new_run_id()

    intermediate_storage = ADLS2IntermediateStorage(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id,
        file_system=file_system,
    )

    obj_name = "list"

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(
                context,
                resolve_dagster_type(List[LowercaseString]),
                StepOutputHandle(obj_name),
                ["foo", "bar"],
            )
            assert intermediate_storage.has_intermediate(context, StepOutputHandle(obj_name))
            assert intermediate_storage.get_intermediate(
                context, resolve_dagster_type(List[Bool]), StepOutputHandle(obj_name)
            ).obj == ["foo", "bar"]

        finally:
            intermediate_storage.rm_intermediate(context, StepOutputHandle(obj_name))
예제 #5
0
def test_adls2_intermediate_storage_with_custom_serializer(storage_account, file_system):
    run_id = make_new_run_id()

    intermediate_storage = ADLS2IntermediateStorage(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id,
        file_system=file_system,
    )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(
                context, LowercaseString, StepOutputHandle("foo"), "foo"
            )

            assert (
                intermediate_storage.object_store.file_system_client.get_file_client(
                    os.path.join(*[intermediate_storage.root, "intermediates", "foo", "result"]),
                )
                .download_file()
                .readall()
                .decode("utf-8")
                == "FOO"
            )

            assert intermediate_storage.has_intermediate(context, StepOutputHandle("foo"))
            assert (
                intermediate_storage.get_intermediate(
                    context, LowercaseString, StepOutputHandle("foo")
                ).obj
                == "foo"
            )
        finally:
            intermediate_storage.rm_intermediate(context, StepOutputHandle("foo"))
예제 #6
0
def test_gcs_intermediate_storage_with_custom_prefix(gcs_bucket):
    run_id = make_new_run_id()

    intermediate_storage = GCSIntermediateStorage(run_id=run_id,
                                                  gcs_bucket=gcs_bucket,
                                                  gcs_prefix="custom_prefix")
    assert intermediate_storage.root == "/".join(
        ["custom_prefix", "storage", run_id])

    obj_name = "true"

    try:
        with yield_empty_pipeline_context(run_id=run_id) as context:

            intermediate_storage.set_intermediate(context, RuntimeBool,
                                                  StepOutputHandle(obj_name),
                                                  True)

            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle(obj_name))
            assert intermediate_storage.uri_for_paths([obj_name]).startswith(
                "gs://%s/custom_prefix" % gcs_bucket)

    finally:
        intermediate_storage.rm_intermediate(context,
                                             StepOutputHandle(obj_name))
예제 #7
0
def test_adls2_intermediate_storage_with_type_storage_plugin(storage_account, file_system):
    run_id = make_new_run_id()

    intermediate_storage = ADLS2IntermediateStorage(
        adls2_client=get_adls2_client(storage_account),
        blob_client=get_blob_client(storage_account),
        run_id=run_id,
        file_system=file_system,
        type_storage_plugin_registry=TypeStoragePluginRegistry(
            [(RuntimeString, FancyStringS3TypeStoragePlugin)]
        ),
    )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(
                context, RuntimeString, StepOutputHandle("obj_name"), "hello"
            )

            assert intermediate_storage.has_intermediate(context, StepOutputHandle("obj_name"))
            assert (
                intermediate_storage.get_intermediate(
                    context, RuntimeString, StepOutputHandle("obj_name")
                )
                == "hello"
            )

        finally:
            intermediate_storage.rm_intermediate(context, StepOutputHandle("obj_name"))
예제 #8
0
def test_successful_one_part_execute_plan(graphql_context, snapshot):
    instance = graphql_context.instance
    environment_dict = csv_hello_world_solids_config_fs_storage()
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=csv_hello_world, environment_dict=environment_dict)
    selector = infer_pipeline_selector(graphql_context, 'csv_hello_world')

    result = execute_dagster_graphql(
        graphql_context,
        EXECUTE_PLAN_QUERY,
        variables={
            'executionParams': {
                'selector': selector,
                'runConfigData': environment_dict,
                'stepKeys': ['sum_solid.compute'],
                'executionMetadata': {
                    'runId': pipeline_run.run_id
                },
                'mode': 'default',
            }
        },
    )

    query_result = result.data['executePlan']

    assert query_result['__typename'] == 'ExecutePlanSuccess'
    assert query_result['pipeline']['name'] == 'csv_hello_world'
    assert query_result['hasFailures'] is False

    step_events = query_result['stepEvents']

    assert [se['__typename'] for se in step_events] == [
        'ExecutionStepStartEvent',
        'ExecutionStepInputEvent',
        'ExecutionStepOutputEvent',
        'ObjectStoreOperationEvent',
        'ExecutionStepSuccessEvent',
    ]

    assert step_events[1]['step']['key'] == 'sum_solid.compute'
    assert step_events[2]['outputName'] == 'result'

    expected_value_repr = (
        '''[OrderedDict([('num1', '1'), ('num2', '2'), ('sum', 3)]), '''
        '''OrderedDict([('num1', '3'), ('num2', '4'), ('sum', 7)])]''')

    assert step_events[3]['step']['key'] == 'sum_solid.compute'
    assert step_events[4]['step']['key'] == 'sum_solid.compute'

    snapshot.assert_match(clean_log_messages(result.data))

    store = build_fs_intermediate_store(instance.intermediates_directory,
                                        pipeline_run.run_id)
    intermediates_manager = IntermediateStoreIntermediatesManager(store)
    assert intermediates_manager.has_intermediate(
        None, StepOutputHandle('sum_solid.compute'))
    assert (str(
        intermediates_manager.get_intermediate(
            None, PoorMansDataFrame,
            StepOutputHandle('sum_solid.compute')).obj) == expected_value_repr)
예제 #9
0
def _create_step_events_for_output(step_context, output):
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.inst_param(output, "output", Output)

    step = step_context.step
    step_output = step.step_output_named(output.output_name)

    version = resolve_step_output_versions(
        step_context.execution_plan,
        step_context.environment_config,
        step_context.mode_def,
    )[StepOutputHandle(step_context.step.key, output.output_name)]

    for output_event in _type_checked_step_output_event_sequence(
            step_context, output, version):
        yield output_event

    step_output_handle = StepOutputHandle.from_step(
        step=step, output_name=output.output_name)

    for evt in _set_intermediates(step_context, step_output,
                                  step_output_handle, output, version):
        yield evt

    for evt in _create_output_materializations(step_context,
                                               output.output_name,
                                               output.value):
        yield evt
def test_resolve_memoized_execution_plan_yes_stored_results():
    speculative_execution_plan = create_execution_plan(versioned_pipeline)
    step_output_handle = StepOutputHandle("versioned_solid_no_input.compute",
                                          "result")

    instance = DagsterInstance.ephemeral()
    instance.get_addresses_for_step_output_versions = mock.MagicMock(
        return_value={
            (versioned_pipeline.name, step_output_handle): "some_address"
        })

    memoized_execution_plan = instance.resolve_memoized_execution_plan(
        speculative_execution_plan, run_config={}, mode="default")

    assert memoized_execution_plan.step_keys_to_execute == [
        "versioned_solid_takes_input.compute"
    ]

    expected_handle = StepOutputHandle(
        step_key="versioned_solid_no_input.compute", output_name="result")

    assert memoized_execution_plan.step_dict[
        "versioned_solid_takes_input.compute"].step_input_dict[
            "intput"].addresses == {
                expected_handle: "some_address"
            }
예제 #11
0
def test_pipeline_step_key_subset_execution():
    pipeline_def = define_addy_pipeline()
    instance = DagsterInstance.ephemeral()
    run_config = env_with_fs(
        {"solids": {
            "add_one": {
                "inputs": {
                    "num": {
                        "value": 3
                    }
                }
            }
        }})
    result = execute_pipeline(pipeline_def,
                              run_config=run_config,
                              instance=instance)

    assert result.success

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, result.run_id)
    assert (intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_one.compute")).obj == 4)
    assert (intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_two.compute")).obj == 6)

    ## re-execute add_two

    pipeline_reexecution_result = reexecute_pipeline(
        pipeline_def,
        parent_run_id=result.run_id,
        run_config=run_config,
        step_keys_to_execute=["add_two.compute"],
        instance=instance,
    )

    assert pipeline_reexecution_result.success

    step_events = pipeline_reexecution_result.step_event_list
    assert step_events

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, result.run_id)
    assert (intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_one.compute")).obj == 4)
    assert (intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_two.compute")).obj == 6)

    assert not get_step_output_event(step_events, "add_one.compute")
    assert get_step_output_event(step_events, "add_two.compute")

    with pytest.raises(DagsterExecutionStepNotFoundError,
                       match="Execution plan does not contain step"):
        reexecute_pipeline(
            pipeline_def,
            parent_run_id=result.run_id,
            run_config=run_config,
            step_keys_to_execute=["nope.compute"],
            instance=instance,
        )
예제 #12
0
def test_s3_intermediate_storage_with_type_storage_plugin(s3_bucket):
    run_id = make_new_run_id()

    intermediate_storage = S3IntermediateStorage(
        run_id=run_id,
        s3_bucket=s3_bucket,
        type_storage_plugin_registry=TypeStoragePluginRegistry(
            [(RuntimeString, FancyStringS3TypeStoragePlugin)]
        ),
    )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:

            intermediate_storage.set_intermediate(
                context, RuntimeString, StepOutputHandle("obj_name"), "hello"
            )

            assert intermediate_storage.has_intermediate(context, StepOutputHandle("obj_name"))
            assert (
                intermediate_storage.get_intermediate(
                    context, RuntimeString, StepOutputHandle("obj_name")
                )
                == "hello"
            )

        finally:
            intermediate_storage.rm_intermediate(context, StepOutputHandle("obj_name"))
예제 #13
0
def test_s3_intermediate_storage_with_custom_serializer(s3_bucket):
    run_id = make_new_run_id()

    intermediate_storage = S3IntermediateStorage(run_id=run_id, s3_bucket=s3_bucket)

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(
                context, LowercaseString, StepOutputHandle("foo"), "foo"
            )

            assert (
                intermediate_storage.object_store.s3.get_object(
                    Bucket=intermediate_storage.object_store.bucket,
                    Key=os.path.join(intermediate_storage.root, "intermediates", "foo", "result"),
                )["Body"]
                .read()
                .decode("utf-8")
                == "FOO"
            )

            assert intermediate_storage.has_intermediate(context, StepOutputHandle("foo"))
            assert (
                intermediate_storage.get_intermediate(
                    context, LowercaseString, StepOutputHandle("foo")
                ).obj
                == "foo"
            )
        finally:
            intermediate_storage.rm_intermediate(context, StepOutputHandle("foo"))
예제 #14
0
def test_s3_pipeline_with_custom_prefix(s3_bucket):
    s3_prefix = "custom_prefix"

    pipe = define_inty_pipeline(should_throw=False)
    run_config = {"storage": {"s3": {"config": {"s3_bucket": s3_bucket, "s3_prefix": s3_prefix}}}}

    pipeline_run = PipelineRun(pipeline_name=pipe.name, run_config=run_config)
    instance = DagsterInstance.ephemeral()

    result = execute_pipeline(pipe, run_config=run_config,)
    assert result.success

    execution_plan = create_execution_plan(pipe, run_config)
    with scoped_pipeline_context(execution_plan, run_config, pipeline_run, instance,) as context:
        intermediates_manager = S3IntermediateStorage(
            run_id=result.run_id,
            s3_bucket=s3_bucket,
            s3_prefix=s3_prefix,
            s3_session=context.scoped_resources_builder.build(required_resource_keys={"s3"}).s3,
        )
        assert intermediates_manager.root == "/".join(["custom_prefix", "storage", result.run_id])
        assert (
            intermediates_manager.get_intermediate(
                context, Int, StepOutputHandle("return_one.compute")
            ).obj
            == 1
        )
        assert (
            intermediates_manager.get_intermediate(
                context, Int, StepOutputHandle("add_one.compute")
            ).obj
            == 2
        )
예제 #15
0
def test_gcs_intermediate_storage_with_type_storage_plugin(gcs_bucket):
    run_id = make_new_run_id()

    intermediate_storage = GCSIntermediateStorage(
        run_id=run_id,
        gcs_bucket=gcs_bucket,
        type_storage_plugin_registry=TypeStoragePluginRegistry([
            (RuntimeString, FancyStringGCSTypeStoragePlugin)
        ]),
    )

    obj_name = 'obj_name'

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_storage.set_intermediate(context, RuntimeString,
                                                  StepOutputHandle(obj_name),
                                                  'hello')

            assert intermediate_storage.has_intermediate(
                context, StepOutputHandle(obj_name))
            assert (intermediate_storage.get_intermediate(
                context, RuntimeString, StepOutputHandle(obj_name)) == 'hello')

        finally:
            intermediate_storage.rm_intermediate(context,
                                                 StepOutputHandle(obj_name))
예제 #16
0
def test_custom_read_write_mode(storage_account, file_system):
    run_id = make_new_run_id()
    data_frame = [OrderedDict({"foo": "1", "bar": "1"}), OrderedDict({"foo": "2", "bar": "2"})]
    try:
        with yield_empty_pipeline_context(run_id=run_id) as context:
            intermediate_storage = ADLS2IntermediateStorage(
                adls2_client=get_adls2_client(storage_account),
                blob_client=get_blob_client(storage_account),
                run_id=run_id,
                file_system=file_system,
            )
            intermediate_storage.set_intermediate(
                context,
                resolve_dagster_type(LessSimpleDataFrame),
                StepOutputHandle("data_frame"),
                data_frame,
            )

            assert intermediate_storage.has_intermediate(context, StepOutputHandle("data_frame"))
            assert (
                intermediate_storage.get_intermediate(
                    context,
                    resolve_dagster_type(LessSimpleDataFrame),
                    StepOutputHandle("data_frame"),
                ).obj
                == data_frame
            )
            assert intermediate_storage.uri_for_paths(["data_frame"]).startswith("abfss://")

    finally:
        intermediate_storage.rm_intermediate(context, StepOutputHandle("data_frame"))
예제 #17
0
def test_successful_one_part_execute_plan(graphql_context, snapshot):
    instance = graphql_context.instance
    run_config = csv_hello_world_solids_config_fs_storage()
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=csv_hello_world, run_config=run_config)
    selector = infer_pipeline_selector(graphql_context, "csv_hello_world")

    result = execute_dagster_graphql(
        graphql_context,
        EXECUTE_PLAN_QUERY,
        variables={
            "executionParams": {
                "selector": selector,
                "runConfigData": run_config,
                "stepKeys": ["sum_solid.compute"],
                "executionMetadata": {
                    "runId": pipeline_run.run_id
                },
                "mode": "default",
            },
        },
    )

    query_result = result.data["executePlan"]

    assert query_result["__typename"] == "ExecutePlanSuccess"
    assert query_result["pipeline"]["name"] == "csv_hello_world"
    assert query_result["hasFailures"] is False

    step_events = query_result["stepEvents"]

    assert [se["__typename"] for se in step_events] == [
        "ExecutionStepStartEvent",
        "ExecutionStepInputEvent",
        "ExecutionStepOutputEvent",
        "ObjectStoreOperationEvent",
        "ExecutionStepSuccessEvent",
    ]

    assert step_events[1]["stepKey"] == "sum_solid.compute"
    assert step_events[2]["outputName"] == "result"

    expected_value_repr = (
        """[OrderedDict([('num1', '1'), ('num2', '2'), ('sum', 3)]), """
        """OrderedDict([('num1', '3'), ('num2', '4'), ('sum', 7)])]""")

    assert step_events[3]["stepKey"] == "sum_solid.compute"
    assert step_events[4]["stepKey"] == "sum_solid.compute"

    snapshot.assert_match(clean_log_messages(result.data))

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, pipeline_run.run_id)
    assert intermediate_storage.has_intermediate(
        None, StepOutputHandle("sum_solid.compute"))
    assert (str(
        intermediate_storage.get_intermediate(
            None, PoorMansDataFrame,
            StepOutputHandle("sum_solid.compute")).obj) == expected_value_repr)
예제 #18
0
def test_using_s3_for_subplan(s3_bucket):
    pipeline_def = define_inty_pipeline()

    run_config = {"storage": {"s3": {"config": {"s3_bucket": s3_bucket}}}}

    run_id = make_new_run_id()

    execution_plan = create_execution_plan(pipeline_def, run_config=run_config)

    assert execution_plan.get_step_by_key("return_one.compute")

    step_keys = ["return_one.compute"]
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(
        pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config
    )

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        )
    )

    assert get_step_output(return_one_step_events, "return_one.compute")
    with scoped_pipeline_context(
        execution_plan.build_subset_plan(["return_one.compute"]),
        run_config,
        pipeline_run,
        instance,
    ) as context:

        intermediates_manager = S3IntermediateStorage(
            s3_bucket,
            run_id,
            s3_session=context.scoped_resources_builder.build(required_resource_keys={"s3"},).s3,
        )
        step_output_handle = StepOutputHandle("return_one.compute")
        assert intermediates_manager.has_intermediate(context, step_output_handle)
        assert intermediates_manager.get_intermediate(context, Int, step_output_handle).obj == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one.compute"]),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        )
    )

    assert get_step_output(add_one_step_events, "add_one.compute")
    with scoped_pipeline_context(
        execution_plan.build_subset_plan(["add_one.compute"]), run_config, pipeline_run, instance,
    ) as context:
        step_output_handle = StepOutputHandle("add_one.compute")
        assert intermediates_manager.has_intermediate(context, step_output_handle)
        assert intermediates_manager.get_intermediate(context, Int, step_output_handle).obj == 2
def test_execution_plan_reexecution():
    pipeline_def = define_addy_pipeline()
    instance = DagsterInstance.ephemeral()
    environment_dict = env_with_fs(
        {'solids': {
            'add_one': {
                'inputs': {
                    'num': {
                        'value': 3
                    }
                }
            }
        }})
    result = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        instance=instance,
    )

    assert result.success

    intermediates_manager = IntermediateStoreIntermediatesManager(
        build_fs_intermediate_store(instance.intermediates_directory,
                                    result.run_id))
    assert (intermediates_manager.get_intermediate(
        None, Int, StepOutputHandle('add_one.compute')).obj == 4)
    assert (intermediates_manager.get_intermediate(
        None, Int, StepOutputHandle('add_two.compute')).obj == 6)

    ## re-execute add_two

    execution_plan = create_execution_plan(pipeline_def,
                                           environment_dict=environment_dict)

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def,
        execution_plan=execution_plan,
        environment_dict=environment_dict,
        parent_run_id=result.run_id,
        root_run_id=result.run_id,
    )

    step_events = execute_plan(
        execution_plan.build_subset_plan(['add_two.compute']),
        environment_dict=environment_dict,
        pipeline_run=pipeline_run,
        instance=instance,
    )

    intermediates_manager = IntermediateStoreIntermediatesManager(
        build_fs_intermediate_store(instance.intermediates_directory,
                                    result.run_id))
    assert (intermediates_manager.get_intermediate(
        None, Int, StepOutputHandle('add_one.compute')).obj == 4)
    assert (intermediates_manager.get_intermediate(
        None, Int, StepOutputHandle('add_two.compute')).obj == 6)

    assert not get_step_output_event(step_events, 'add_one.compute')
    assert get_step_output_event(step_events, 'add_two.compute')
def test_execution_plan_reexecution():
    pipeline_def = define_addy_pipeline()
    instance = DagsterInstance.ephemeral()
    run_config = env_with_fs(
        {"solids": {
            "add_one": {
                "inputs": {
                    "num": {
                        "value": 3
                    }
                }
            }
        }})
    result = execute_pipeline(
        pipeline_def,
        run_config=run_config,
        instance=instance,
    )

    assert result.success

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, result.run_id)
    assert (intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_one.compute")).obj == 4)
    assert (intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_two.compute")).obj == 6)

    ## re-execute add_two

    execution_plan = create_execution_plan(pipeline_def, run_config=run_config)

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def,
        execution_plan=execution_plan,
        run_config=run_config,
        parent_run_id=result.run_id,
        root_run_id=result.run_id,
    )

    step_events = execute_plan(
        execution_plan.build_subset_plan(["add_two.compute"]),
        run_config=run_config,
        pipeline_run=pipeline_run,
        instance=instance,
    )

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, result.run_id)
    assert (intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_one.compute")).obj == 4)
    assert (intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_two.compute")).obj == 6)

    assert not get_step_output_event(step_events, "add_one.compute")
    assert get_step_output_event(step_events, "add_two.compute")
예제 #21
0
def test_adls2_pipeline_with_custom_prefix(storage_account, file_system):
    adls2_prefix = 'custom_prefix'

    pipe = define_inty_pipeline(should_throw=False)
    run_config = {
        'resources': {
            'adls2': {
                'config': {
                    'storage_account': storage_account,
                    'credential': get_azure_credential()
                }
            }
        },
        'storage': {
            'adls2': {
                'config': {
                    'adls2_file_system': file_system,
                    'adls2_prefix': adls2_prefix
                }
            }
        },
    }

    pipeline_run = PipelineRun(pipeline_name=pipe.name, run_config=run_config)
    instance = DagsterInstance.ephemeral()

    result = execute_pipeline(
        pipe,
        run_config=run_config,
    )
    assert result.success

    execution_plan = create_execution_plan(pipe, run_config)
    with scoped_pipeline_context(
            execution_plan,
            run_config,
            pipeline_run,
            instance,
    ) as context:
        resource = context.scoped_resources_builder.build(
            required_resource_keys={'adls2'}).adls2
        store = ADLS2IntermediateStore(
            run_id=result.run_id,
            file_system=file_system,
            prefix=adls2_prefix,
            adls2_client=resource.adls2_client,
            blob_client=resource.blob_client,
        )
        intermediates_manager = IntermediateStoreIntermediatesManager(store)
        assert store.root == '/'.join(
            ['custom_prefix', 'storage', result.run_id])
        assert (intermediates_manager.get_intermediate(
            context, Int, StepOutputHandle('return_one.compute')).obj == 1)
        assert (intermediates_manager.get_intermediate(
            context, Int, StepOutputHandle('add_one.compute')).obj == 2)
def test_resolve_step_output_versions_no_external_dependencies():
    speculative_execution_plan = create_execution_plan(versioned_pipeline)
    versions = resolve_step_output_versions_for_test(
        speculative_execution_plan, run_config={}, mode="default")

    assert (versions[StepOutputHandle(
        "versioned_solid_no_input.compute",
        "result")] == versioned_pipeline_expected_step1_output_version())
    assert (versions[StepOutputHandle(
        "versioned_solid_takes_input.compute",
        "result")] == versioned_pipeline_expected_step2_output_version())
def test_using_intermediate_file_system_for_subplan_multiprocessing():
    with instance_for_test() as instance:

        run_config = {"intermediate_storage": {"filesystem": {}}}

        pipeline = reconstructable(define_inty_pipeline)

        execution_plan = create_execution_plan(pipeline, run_config=run_config)
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline.get_definition(), execution_plan=execution_plan
        )

        assert execution_plan.get_step_by_key("return_one.compute")

        return_one_step_events = list(
            execute_plan(
                execution_plan.build_subset_plan(["return_one.compute"]),
                instance,
                run_config=dict(run_config, execution={"multiprocess": {}}),
                pipeline_run=pipeline_run,
            )
        )

        intermediate_storage = build_fs_intermediate_storage(
            instance.intermediates_directory, pipeline_run.run_id
        )

        assert get_step_output(return_one_step_events, "return_one.compute")
        assert intermediate_storage.has_intermediate(None, StepOutputHandle("return_one.compute"))
        assert (
            intermediate_storage.get_intermediate(
                None, Int, StepOutputHandle("return_one.compute")
            ).obj
            == 1
        )

        add_one_step_events = list(
            execute_plan(
                execution_plan.build_subset_plan(["add_one.compute"]),
                instance,
                run_config=dict(run_config, execution={"multiprocess": {}}),
                pipeline_run=pipeline_run,
            )
        )

        assert get_step_output(add_one_step_events, "add_one.compute")
        assert intermediate_storage.has_intermediate(None, StepOutputHandle("add_one.compute"))
        assert (
            intermediate_storage.get_intermediate(
                None, Int, StepOutputHandle("add_one.compute")
            ).obj
            == 2
        )
예제 #24
0
def test_file_system_intermediate_storage_composite_types():
    _, _, intermediate_storage = define_intermediate_storage()

    assert intermediate_storage.set_intermediate(
        None, List[Bool], StepOutputHandle('return_true_lst.compute'), [True])

    assert intermediate_storage.has_intermediate(
        None, StepOutputHandle('return_true_lst.compute'))

    assert intermediate_storage.get_intermediate(
        None, List[Bool],
        StepOutputHandle('return_true_lst.compute')).obj == [True]
예제 #25
0
def test_output_handles_from_execution_plan():
    execution_plan = create_execution_plan(
        define_pipeline(), run_config={"solids": {"add_one": {"inputs": {"num": {"value": 3}}}}},
    )

    assert output_handles_from_execution_plan(execution_plan) == set()
    assert output_handles_from_execution_plan(
        execution_plan.build_subset_plan(["add_two.compute", "add_three.compute"])
    ) == {StepOutputHandle("add_one.compute", "result")}
    assert output_handles_from_execution_plan(
        execution_plan.build_subset_plan(["add_three.compute"])
    ) == {StepOutputHandle("add_two.compute", "result")}
예제 #26
0
def test_using_file_system_for_subplan_multiprocessing():

    environment_dict = {'storage': {'filesystem': {}}}
    instance = DagsterInstance.local_temp()

    pipeline = reconstructable(define_inty_pipeline)

    execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline.get_definition(), execution_plan=execution_plan
    )

    assert execution_plan.get_step_by_key('return_one.compute')

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(['return_one.compute']),
            instance,
            environment_dict=dict(environment_dict, execution={'multiprocess': {}}),
            pipeline_run=pipeline_run,
        )
    )

    store = build_fs_intermediate_store(instance.intermediates_directory, pipeline_run.run_id)
    intermediates_manager = IntermediateStoreIntermediatesManager(store)

    assert get_step_output(return_one_step_events, 'return_one.compute')
    assert intermediates_manager.has_intermediate(None, StepOutputHandle('return_one.compute'))
    assert (
        intermediates_manager.get_intermediate(
            None, Int, StepOutputHandle('return_one.compute')
        ).obj
        == 1
    )

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(['add_one.compute']),
            instance,
            environment_dict=dict(environment_dict, execution={'multiprocess': {}}),
            pipeline_run=pipeline_run,
        )
    )

    assert get_step_output(add_one_step_events, 'add_one.compute')
    assert intermediates_manager.has_intermediate(None, StepOutputHandle('add_one.compute'))
    assert (
        intermediates_manager.get_intermediate(None, Int, StepOutputHandle('add_one.compute')).obj
        == 2
    )
예제 #27
0
def test_address_operation_using_intermediates_file_system():
    with seven.TemporaryDirectory() as tmpdir_path:
        output_address = os.path.join(tmpdir_path, "solid1.output")
        output_value = 5

        instance = DagsterInstance.ephemeral()
        intermediate_storage = build_fs_intermediate_storage(
            instance.intermediates_directory, run_id="some_run_id")

        object_operation_result = intermediate_storage.set_intermediate_to_address(
            context=None,
            dagster_type=Int,
            step_output_handle=StepOutputHandle("solid1.compute"),
            value=output_value,
            address=output_address,
        )

        assert object_operation_result.key == output_address
        assert object_operation_result.obj == output_value

        assert (intermediate_storage.get_intermediate_from_address(
            context=None,
            dagster_type=Int,
            step_output_handle=StepOutputHandle("solid1.compute"),
            address=output_address,
        ).obj == output_value)

        with pytest.raises(
                DagsterAddressIOError,
                match="No such file or directory",
        ):
            intermediate_storage.set_intermediate_to_address(
                context=None,
                dagster_type=Int,
                step_output_handle=StepOutputHandle("solid1.compute"),
                value=1,
                address="invalid_address",
            )

        with pytest.raises(
                DagsterAddressIOError,
                match="No such file or directory",
        ):
            intermediate_storage.get_intermediate_from_address(
                context=None,
                dagster_type=Int,
                step_output_handle=StepOutputHandle("solid1.compute"),
                address=os.path.join(tmpdir_path, "invalid.output"),
            )
예제 #28
0
def test_file_system_intermediate_storage_composite_types_with_custom_serializer_for_inner_type(
):
    run_id, instance, intermediate_storage = define_intermediate_storage()

    with yield_empty_pipeline_context(run_id=run_id,
                                      instance=instance) as context:

        intermediate_storage.set_intermediate(
            context, resolve_dagster_type(List[LowercaseString]),
            StepOutputHandle('baz'), ['list'])
        assert intermediate_storage.has_intermediate(context,
                                                     StepOutputHandle('baz'))
        assert intermediate_storage.get_intermediate(
            context, resolve_dagster_type(List[Bool]),
            StepOutputHandle('baz')).obj == ['list']
예제 #29
0
def test_using_intermediates_to_override():
    pipeline = define_inty_pipeline()

    run_config = {
        'storage': {
            'filesystem': {}
        },
        'intermediate_storage': {
            'in_memory': {}
        }
    }

    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(
        pipeline,
        run_config=run_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)
    assert execution_plan.get_step_by_key('return_one.compute')

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(['return_one.compute']),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        ))

    store = build_fs_intermediate_store(instance.intermediates_directory,
                                        pipeline_run.run_id)
    intermediates_manager = IntermediateStoreIntermediatesManager(store)
    assert get_step_output(return_one_step_events, 'return_one.compute')
    assert not intermediates_manager.has_intermediate(
        None, StepOutputHandle('return_one.compute'))
예제 #30
0
def test_addresses_for_version(version_storing_context):
    @solid(version="abc")
    def solid1(_):
        yield Output(5, address="some_address")

    @solid(version="123")
    def solid2(_, _input1):
        pass

    @pipeline
    def my_pipeline():
        solid2(solid1())

    with version_storing_context() as ctx:
        instance, _ = ctx
        execute_pipeline(instance=instance, pipeline=my_pipeline)

        step_output_handle = StepOutputHandle("solid1.compute", "result")
        output_version = resolve_step_output_versions(
            create_execution_plan(my_pipeline), run_config={},
            mode="default")[step_output_handle]
        assert instance.get_addresses_for_step_output_versions({
            ("my_pipeline", step_output_handle):
            output_version
        }) == {
            ("my_pipeline", step_output_handle): "some_address"
        }