コード例 #1
0
ファイル: test_io_manager.py プロジェクト: zuik/dagster
def test_my_io_manager_load_input():
    my_io_manager = MyIOManager()
    my_io_manager.storage_dict[("123", "abc")] = 5

    context = InputContext(
        upstream_output=OutputContext(name="abc", step_key="123"))
    assert my_io_manager.load_input(context) == 5
コード例 #2
0
def test_handle_output_then_load_input():
    snowflake_config = generate_snowflake_config()
    snowflake_manager = SnowflakeIOManager()
    contents1 = DataFrame([{
        "col1": "a",
        "col2": 1
    }])  # just to get the types right
    contents2 = DataFrame([{
        "col1": "b",
        "col2": 2
    }])  # contents we will insert
    with temporary_snowflake_table(contents1) as temp_table_name:
        metadata = {
            "table": f"public.{temp_table_name}",
        }
        output_context = OutputContext(
            step_key="a",
            name="result",
            pipeline_name="fake_pipeline",
            metadata=metadata,
            resource_config=snowflake_config,
        )

        list(snowflake_manager.handle_output(
            output_context, contents2))  # exhaust the iterator

        input_context = InputContext(upstream_output=output_context,
                                     resource_config=snowflake_config)
        input_value = snowflake_manager.load_input(input_context)
        assert input_value.equals(contents2), f"{input_value}\n\n{contents2}"
コード例 #3
0
def test_s3_object_manager_execution(mock_s3_bucket):
    pipeline_def = define_inty_pipeline()

    run_config = {"resources": {"object_manager": {"config": {"s3_bucket": mock_s3_bucket.name}}}}

    run_id = make_new_run_id()

    execution_plan = create_execution_plan(pipeline_def, run_config=run_config)

    assert execution_plan.get_step_by_key("return_one")

    step_keys = ["return_one"]
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(
        pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config
    )

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        )
    )

    assert get_step_output(return_one_step_events, "return_one")

    object_manager = PickledObjectS3ObjectManager(
        mock_s3_bucket.name, construct_s3_client(max_attempts=5), s3_prefix="dagster"
    )
    step_output_handle = StepOutputHandle("return_one")
    context = InputContext(
        pipeline_name=pipeline_def.name,
        solid_def=pipeline_def.solid_def_named("return_one"),
        config={},
        metadata={},
        upstream_output=OutputContext(
            step_key=step_output_handle.step_key,
            name=step_output_handle.output_name,
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            metadata={},
            mapping_key=None,
            config=None,
            solid_def=pipeline_def.solid_def_named("return_one"),
        ),
    )
    assert object_manager.load_input(context) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"]),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        )
    )

    step_output_handle = StepOutputHandle("add_one")
    context = InputContext(
        pipeline_name=pipeline_def.name,
        solid_def=pipeline_def.solid_def_named("add_one"),
        config={},
        metadata={},
        upstream_output=OutputContext(
            step_key=step_output_handle.step_key,
            name=step_output_handle.output_name,
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            metadata={},
            mapping_key=None,
            config=None,
            solid_def=pipeline_def.solid_def_named("add_one"),
        ),
    )

    assert get_step_output(add_one_step_events, "add_one")
    assert object_manager.load_input(context) == 2
コード例 #4
0
ファイル: test_io_manager.py プロジェクト: zuik/dagster
def test_my_io_manager_handle_output():
    my_io_manager = MyIOManager()
    context = OutputContext(name="abc", step_key="123")
    my_io_manager.handle_output(context, 5)
    assert my_io_manager.storage_dict[("123", "abc")] == 5
コード例 #5
0
def test_gcs_pickle_io_manager_execution(gcs_bucket):
    pipeline_def = define_inty_pipeline()

    run_config = {
        "resources": {
            "io_manager": {
                "config": {
                    "gcs_bucket": gcs_bucket,
                }
            }
        }
    }

    run_id = make_new_run_id()

    execution_plan = create_execution_plan(pipeline_def, run_config=run_config)

    assert execution_plan.get_step_by_key("return_one")

    step_keys = ["return_one"]
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name=pipeline_def.name,
                               run_id=run_id,
                               run_config=run_config)

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(return_one_step_events, "return_one")

    io_manager = PickledObjectGCSIOManager(gcs_bucket, storage.Client())
    step_output_handle = StepOutputHandle("return_one")
    context = InputContext(
        pipeline_name=pipeline_def.name,
        solid_def=pipeline_def.solid_def_named("return_one"),
        upstream_output=OutputContext(
            step_key=step_output_handle.step_key,
            name=step_output_handle.output_name,
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            solid_def=pipeline_def.solid_def_named("return_one"),
        ),
        log_manager=DagsterLogManager(run_id=pipeline_run.run_id,
                                      logging_tags={},
                                      loggers=[]),
    )
    assert io_manager.load_input(context) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"]),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    step_output_handle = StepOutputHandle("add_one")
    context = InputContext(
        pipeline_name=pipeline_def.name,
        solid_def=pipeline_def.solid_def_named("add_one"),
        upstream_output=OutputContext(
            step_key=step_output_handle.step_key,
            name=step_output_handle.output_name,
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            solid_def=pipeline_def.solid_def_named("add_one"),
        ),
        log_manager=DagsterLogManager(run_id=pipeline_run.run_id,
                                      logging_tags={},
                                      loggers=[]),
    )

    assert get_step_output(add_one_step_events, "add_one")
    assert io_manager.load_input(context) == 2
コード例 #6
0
def test_adls2_object_manager_execution(storage_account, file_system,
                                        credential):
    pipeline_def = define_inty_pipeline()

    run_config = {
        "resources": {
            "object_manager": {
                "config": {
                    "adls2_file_system": file_system
                }
            },
            "adls2": {
                "config": {
                    "storage_account": storage_account,
                    "credential": {
                        "key": credential
                    }
                }
            },
        }
    }

    run_id = make_new_run_id()

    execution_plan = create_execution_plan(pipeline_def, run_config=run_config)

    assert execution_plan.get_step_by_key("return_one")

    step_keys = ["return_one"]
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name=pipeline_def.name,
                               run_id=run_id,
                               run_config=run_config)

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(return_one_step_events, "return_one")
    step_output_handle = StepOutputHandle("return_one")
    context = InputContext(
        pipeline_name=pipeline_def.name,
        solid_def=pipeline_def.solid_def_named("return_one"),
        upstream_output=OutputContext(
            step_key=step_output_handle.step_key,
            name=step_output_handle.output_name,
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            solid_def=pipeline_def.solid_def_named("return_one"),
        ),
    )

    object_manager = PickledObjectADLS2ObjectManager(
        file_system=file_system,
        adls2_client=create_adls2_client(storage_account, credential),
        blob_client=create_blob_client(storage_account, credential),
    )
    assert object_manager.load_input(context) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"]),
            pipeline_run=pipeline_run,
            run_config=run_config,
            instance=instance,
        ))

    step_output_handle = StepOutputHandle("add_one")
    context = InputContext(
        pipeline_name=pipeline_def.name,
        solid_def=pipeline_def.solid_def_named("add_one"),
        upstream_output=OutputContext(
            step_key=step_output_handle.step_key,
            name=step_output_handle.output_name,
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            solid_def=pipeline_def.solid_def_named("add_one"),
        ),
    )

    assert get_step_output(add_one_step_events, "add_one")
    assert object_manager.load_input(context) == 2