예제 #1
0
def test_adls2_object_manager_execution(storage_account, file_system,
                                        credential):
    pipeline_def = define_inty_pipeline()

    run_config = {
        "resources": {
            "object_manager": {
                "config": {
                    "adls2_file_system": file_system
                }
            },
            "adls2": {
                "config": {
                    "storage_account": storage_account,
                    "credential": {
                        "key": credential
                    }
                }
            },
        }
    }

    run_id = make_new_run_id()

    execution_plan = create_execution_plan(pipeline_def, run_config=run_config)

    assert execution_plan.get_step_by_key("return_one")

    step_keys = ["return_one"]
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name=pipeline_def.name,
                               run_id=run_id,
                               run_config=run_config)

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(return_one_step_events, "return_one")
    step_output_handle = StepOutputHandle("return_one")
    context = InputContext(
        pipeline_name=pipeline_def.name,
        solid_def=pipeline_def.solid_def_named("return_one"),
        upstream_output=OutputContext(
            step_key=step_output_handle.step_key,
            name=step_output_handle.output_name,
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            solid_def=pipeline_def.solid_def_named("return_one"),
        ),
    )

    object_manager = PickledObjectADLS2ObjectManager(
        file_system=file_system,
        adls2_client=create_adls2_client(storage_account, credential),
        blob_client=create_blob_client(storage_account, credential),
    )
    assert object_manager.load_input(context) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"]),
            pipeline_run=pipeline_run,
            run_config=run_config,
            instance=instance,
        ))

    step_output_handle = StepOutputHandle("add_one")
    context = InputContext(
        pipeline_name=pipeline_def.name,
        solid_def=pipeline_def.solid_def_named("add_one"),
        upstream_output=OutputContext(
            step_key=step_output_handle.step_key,
            name=step_output_handle.output_name,
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            solid_def=pipeline_def.solid_def_named("add_one"),
        ),
    )

    assert get_step_output(add_one_step_events, "add_one")
    assert object_manager.load_input(context) == 2
def get_blob_client(storage_account):
    creds = get_azure_credential()["key"]
    return create_blob_client(storage_account, creds)
예제 #3
0
def test_adls2_pickle_io_manager_execution(storage_account, file_system,
                                           credential):
    job = define_inty_job()

    run_config = {
        "resources": {
            "io_manager": {
                "config": {
                    "adls2_file_system": file_system
                }
            },
            "adls2": {
                "config": {
                    "storage_account": storage_account,
                    "credential": {
                        "key": credential
                    }
                }
            },
        }
    }

    run_id = make_new_run_id()

    resolved_run_config = ResolvedRunConfig.build(job, run_config=run_config)
    execution_plan = ExecutionPlan.build(InMemoryPipeline(job),
                                         resolved_run_config)

    assert execution_plan.get_step_by_key("return_one")

    step_keys = ["return_one"]
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name=job.name,
                               run_id=run_id,
                               run_config=run_config)

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys, job,
                                             resolved_run_config),
            pipeline=InMemoryPipeline(job),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(return_one_step_events, "return_one")
    context = build_input_context(upstream_output=build_output_context(
        step_key="return_one",
        name="result",
        run_id=run_id,
    ))

    io_manager = PickledObjectADLS2IOManager(
        file_system=file_system,
        adls2_client=create_adls2_client(storage_account, credential),
        blob_client=create_blob_client(storage_account, credential),
    )
    assert io_manager.load_input(context) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"], job,
                                             resolved_run_config),
            pipeline=InMemoryPipeline(job),
            pipeline_run=pipeline_run,
            run_config=run_config,
            instance=instance,
        ))

    context = build_input_context(upstream_output=build_output_context(
        step_key="add_one",
        name="result",
        run_id=run_id,
        mapping_key="foo",
    ))

    assert get_step_output(add_one_step_events, "add_one")
    assert io_manager.load_input(context) == 2