예제 #1
0
 def _has_run(self, run_id):
     global MOCK_HAS_RUN_CALLED  # pylint: disable=global-statement
     # pylint: disable=protected-access
     if not self._run_storage.has_run(
             run_id) and not MOCK_HAS_RUN_CALLED:
         self._run_storage.add_run(
             PipelineRun(pipeline_name="foo_pipeline", run_id=run_id))
         MOCK_HAS_RUN_CALLED = True
         return False
     elif self._run_storage.has_run(run_id) and MOCK_HAS_RUN_CALLED:
         MOCK_HAS_RUN_CALLED = False
         return True
     else:
         return False
예제 #2
0
def test_retry_deferral():
    events = execute_plan(
        create_execution_plan(define_retry_limit_pipeline()),
        pipeline_run=PipelineRun(pipeline_name='retry_limits', run_id='42'),
        retries=Retries(RetryMode.DEFERRED),
        instance=DagsterInstance.local_temp(),
    )
    events_by_type = defaultdict(list)
    for ev in events:
        events_by_type[ev.event_type].append(ev)

    assert len(events_by_type[DagsterEventType.STEP_START]) == 2
    assert len(events_by_type[DagsterEventType.STEP_UP_FOR_RETRY]) == 2
    assert DagsterEventType.STEP_RESTARTED not in events
    assert DagsterEventType.STEP_SUCCESS not in events
예제 #3
0
def _setup_test_two_handler_log_mgr():
    test_formatter = logging.Formatter(fmt="%(levelname)s :: %(message)s")

    test_info_handler = logging.StreamHandler(sys.stdout)
    test_info_handler.setLevel("INFO")
    test_info_handler.setFormatter(test_formatter)

    test_warn_handler = logging.StreamHandler(sys.stdout)
    test_warn_handler.setLevel("WARN")
    test_warn_handler.setFormatter(test_formatter)

    return DagsterLogManager.create(
        loggers=[],
        handlers=[test_info_handler, test_warn_handler],
        pipeline_run=PipelineRun(pipeline_name="system", run_id="123"),
    )
def test_adls2_pipeline_with_custom_prefix(storage_account, file_system):
    adls2_prefix = 'custom_prefix'

    pipe = define_inty_pipeline(should_throw=False)
    environment_dict = {
        'resources': {
            'adls2': {
                'config': {'storage_account': storage_account, 'credential': get_azure_credential()}
            }
        },
        'storage': {
            'adls2': {'config': {'adls2_file_system': file_system, 'adls2_prefix': adls2_prefix}}
        },
    }

    pipeline_run = PipelineRun(pipeline_name=pipe.name, environment_dict=environment_dict)
    instance = DagsterInstance.ephemeral()

    result = execute_pipeline(pipe, environment_dict=environment_dict,)
    assert result.success

    execution_plan = create_execution_plan(pipe, environment_dict)
    with scoped_pipeline_context(
        execution_plan, environment_dict, pipeline_run, instance,
    ) as context:
        resource = context.scoped_resources_builder.build(required_resource_keys={'adls2'}).adls2
        store = ADLS2IntermediateStore(
            run_id=result.run_id,
            file_system=file_system,
            prefix=adls2_prefix,
            adls2_client=resource.adls2_client,
            blob_client=resource.blob_client,
        )
        intermediates_manager = IntermediateStoreIntermediatesManager(store)
        assert store.root == '/'.join(['custom_prefix', 'storage', result.run_id])
        assert (
            intermediates_manager.get_intermediate(
                context, Int, StepOutputHandle('return_one.compute')
            ).obj
            == 1
        )
        assert (
            intermediates_manager.get_intermediate(
                context, Int, StepOutputHandle('add_one.compute')
            ).obj
            == 2
        )
예제 #5
0
def test_retry_deferral():
    with instance_for_test() as instance:
        events = execute_plan(
            create_execution_plan(define_retry_limit_pipeline()),
            pipeline_run=PipelineRun(pipeline_name="retry_limits",
                                     run_id="42"),
            retry_mode=RetryMode.DEFERRED,
            instance=instance,
        )
        events_by_type = defaultdict(list)
        for ev in events:
            events_by_type[ev.event_type].append(ev)

        assert len(events_by_type[DagsterEventType.STEP_START]) == 2
        assert len(events_by_type[DagsterEventType.STEP_UP_FOR_RETRY]) == 2
        assert DagsterEventType.STEP_RESTARTED not in events
        assert DagsterEventType.STEP_SUCCESS not in events
def test_s3_pipeline_with_custom_prefix(s3_bucket):
    s3_prefix = 'custom_prefix'

    pipe = define_inty_pipeline(should_throw=False)
    environment_dict = {
        'storage': {
            's3': {
                'config': {
                    's3_bucket': s3_bucket,
                    's3_prefix': s3_prefix
                }
            }
        }
    }

    pipeline_run = PipelineRun(pipeline_name=pipe.name,
                               environment_dict=environment_dict)
    instance = DagsterInstance.ephemeral()

    result = execute_pipeline(
        pipe,
        environment_dict=environment_dict,
    )
    assert result.success

    execution_plan = create_execution_plan(pipe, environment_dict)
    with scoped_pipeline_context(
            execution_plan,
            environment_dict,
            pipeline_run,
            instance,
    ) as context:
        store = S3IntermediateStore(
            run_id=result.run_id,
            s3_bucket=s3_bucket,
            s3_prefix=s3_prefix,
            s3_session=context.scoped_resources_builder.build(
                required_resource_keys={'s3'}).s3,
        )
        intermediates_manager = IntermediateStoreIntermediatesManager(store)
        assert store.root == '/'.join(
            ['custom_prefix', 'storage', result.run_id])
        assert (intermediates_manager.get_intermediate(
            context, Int, StepOutputHandle('return_one.compute')).obj == 1)
        assert (intermediates_manager.get_intermediate(
            context, Int, StepOutputHandle('add_one.compute')).obj == 2)
예제 #7
0
def test_s3_pipeline_with_custom_prefix(mock_s3_bucket):
    s3_prefix = "custom_prefix"

    pipe = define_inty_pipeline(should_throw=False)
    run_config = {
        "intermediate_storage": {
            "s3": {
                "config": {
                    "s3_bucket": mock_s3_bucket.name,
                    "s3_prefix": s3_prefix
                }
            }
        }
    }

    pipeline_run = PipelineRun(pipeline_name=pipe.name, run_config=run_config)
    instance = DagsterInstance.ephemeral()

    result = execute_pipeline(
        pipe,
        run_config=run_config,
    )
    assert result.success

    execution_plan = create_execution_plan(pipe, run_config)
    with scoped_pipeline_context(
            execution_plan,
            InMemoryPipeline(pipe),
            run_config,
            pipeline_run,
            instance,
    ) as context:
        intermediates_manager = S3IntermediateStorage(
            run_id=result.run_id,
            s3_bucket=mock_s3_bucket.name,
            s3_prefix=s3_prefix,
            s3_session=context.scoped_resources_builder.build(
                required_resource_keys={"s3"}).s3,
        )
        assert intermediates_manager.root == "/".join(
            ["custom_prefix", "storage", result.run_id])
        assert (intermediates_manager.get_intermediate(
            context, Int, StepOutputHandle("return_one")).obj == 1)
        assert (intermediates_manager.get_intermediate(
            context, Int, StepOutputHandle("add_one")).obj == 2)
예제 #8
0
def test_multiline_logging_complex():
    msg = "DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0"
    dagster_event = DagsterEvent(
        event_type_value="STEP_FAILURE",
        pipeline_name="error_monster",
        step_key="start.materialization.output.result.0",
        solid_handle=NodeHandle("start", None),
        step_kind_value="MATERIALIZATION_THUNK",
        logging_tags={
            "pipeline": "error_monster",
            "step_key": "start.materialization.output.result.0",
            "solid": "start",
            "solid_definition": "emit_num",
        },
        event_specific_data=StepFailureData(
            error=SerializableErrorInfo(
                message=
                "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n",
                stack=["a stack message"],
                cls_name="FileNotFoundError",
            ),
            user_failure_data=None,
        ),
    )

    with _setup_logger(DAGSTER_DEFAULT_LOGGER) as (captured_results, logger):

        dl = DagsterLogManager.create(loggers=[logger],
                                      pipeline_run=PipelineRun(
                                          run_id="123",
                                          pipeline_name="error_monster"))
        dl.log_dagster_event(logging.INFO, msg, dagster_event)

    expected_results = [
        "error_monster - 123 - STEP_FAILURE - DagsterEventType.STEP_FAILURE for step "
        "start.materialization.output.result.0",
        "",
        "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'",
        "",
        "Stack Trace:",
        "a stack message",
    ]

    assert captured_results[0].split("\n") == expected_results
예제 #9
0
def test_adls2_pipeline_with_custom_prefix(storage_account, file_system):
    adls2_prefix = "custom_prefix"

    pipe = define_inty_pipeline(should_throw=False)
    run_config = {
        "resources": {
            "adls2": {
                "config": {"storage_account": storage_account, "credential": get_azure_credential()}
            }
        },
        "storage": {
            "adls2": {"config": {"adls2_file_system": file_system, "adls2_prefix": adls2_prefix}}
        },
    }

    pipeline_run = PipelineRun(pipeline_name=pipe.name, run_config=run_config)
    instance = DagsterInstance.ephemeral()

    result = execute_pipeline(pipe, run_config=run_config,)
    assert result.success

    execution_plan = create_execution_plan(pipe, run_config)
    with scoped_pipeline_context(execution_plan, run_config, pipeline_run, instance,) as context:
        resource = context.scoped_resources_builder.build(required_resource_keys={"adls2"}).adls2
        intermediate_storage = ADLS2IntermediateStorage(
            run_id=result.run_id,
            file_system=file_system,
            prefix=adls2_prefix,
            adls2_client=resource.adls2_client,
            blob_client=resource.blob_client,
        )
        assert intermediate_storage.root == "/".join(["custom_prefix", "storage", result.run_id])
        assert (
            intermediate_storage.get_intermediate(
                context, Int, StepOutputHandle("return_one.compute")
            ).obj
            == 1
        )
        assert (
            intermediate_storage.get_intermediate(
                context, Int, StepOutputHandle("add_one.compute")
            ).obj
            == 2
        )
예제 #10
0
def test_capture_handler_log_records():
    capture_handler = CaptureHandler()

    dl = DagsterLogManager.create(
        loggers=[],
        handlers=[capture_handler],
        pipeline_run=PipelineRun(run_id="123456", pipeline_name="pipeline"),
    ).with_tags(step_key="some_step")

    dl.info("info")
    dl.critical("critical error", extra={"foo": "bar"})

    assert len(capture_handler.captured) == 2

    captured_info_record = capture_handler.captured[0]
    assert captured_info_record.name == "dagster"
    assert captured_info_record.msg == "pipeline - 123456 - some_step - info"
    assert captured_info_record.levelno == logging.INFO

    captured_critical_record = capture_handler.captured[1]
    assert captured_critical_record.name == "dagster"
    assert captured_critical_record.msg == "pipeline - 123456 - some_step - critical error"
    assert captured_critical_record.levelno == logging.CRITICAL
    assert captured_critical_record.foo == "bar"
예제 #11
0
def test_s3_pickle_io_manager_execution(mock_s3_bucket):
    pipeline_def = define_inty_pipeline()

    run_config = {
        "resources": {
            "io_manager": {
                "config": {
                    "s3_bucket": mock_s3_bucket.name
                }
            }
        }
    }

    run_id = make_new_run_id()

    resolved_run_config = ResolvedRunConfig.build(pipeline_def,
                                                  run_config=run_config)
    execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline_def),
                                         resolved_run_config)

    assert execution_plan.get_step_by_key("return_one")

    step_keys = ["return_one"]
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name=pipeline_def.name,
                               run_id=run_id,
                               run_config=run_config)

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys, pipeline_def,
                                             resolved_run_config),
            pipeline=InMemoryPipeline(pipeline_def),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(return_one_step_events, "return_one")

    io_manager = PickledObjectS3IOManager(mock_s3_bucket.name,
                                          construct_s3_client(max_attempts=5),
                                          s3_prefix="dagster")
    step_output_handle = StepOutputHandle("return_one")
    context = build_input_context(upstream_output=build_output_context(
        step_key=step_output_handle.step_key,
        name=step_output_handle.output_name,
        run_id=run_id,
    ))
    assert io_manager.load_input(context) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"], pipeline_def,
                                             resolved_run_config),
            pipeline=InMemoryPipeline(pipeline_def),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    step_output_handle = StepOutputHandle("add_one")
    context = build_input_context(upstream_output=build_output_context(
        step_key=step_output_handle.step_key,
        name=step_output_handle.output_name,
        run_id=run_id,
    ))

    assert get_step_output(add_one_step_events, "add_one")
    assert io_manager.load_input(context) == 2
예제 #12
0
def test_logging_integer_log_levels():
    with _setup_logger("test", {"FOO": 3}) as (_captured_results, logger):

        dl = DagsterLogManager.create(loggers=[logger],
                                      pipeline_run=PipelineRun(run_id="123"))
        dl.log(3, "test")  # pylint: disable=no-member
예제 #13
0
def test_using_s3_for_subplan(mock_s3_bucket):
    pipeline_def = define_inty_pipeline()

    run_config = {
        "intermediate_storage": {
            "s3": {
                "config": {
                    "s3_bucket": mock_s3_bucket.name
                }
            }
        }
    }

    run_id = make_new_run_id()

    environment_config = EnvironmentConfig.build(pipeline_def,
                                                 run_config=run_config)
    execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline_def),
                                         environment_config)

    assert execution_plan.get_step_by_key("return_one")

    step_keys = ["return_one"]
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name=pipeline_def.name,
                               run_id=run_id,
                               run_config=run_config)

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys, pipeline_def,
                                             environment_config),
            pipeline=InMemoryPipeline(pipeline_def),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(return_one_step_events, "return_one")
    with scoped_pipeline_context(
            execution_plan.build_subset_plan(["return_one"], pipeline_def,
                                             environment_config),
            InMemoryPipeline(pipeline_def),
            run_config,
            pipeline_run,
            instance,
    ) as context:

        intermediates_manager = S3IntermediateStorage(
            mock_s3_bucket.name,
            run_id,
            s3_session=context.scoped_resources_builder.build(
                required_resource_keys={"s3"}, ).s3,
        )
        step_output_handle = StepOutputHandle("return_one")
        assert intermediates_manager.has_intermediate(context,
                                                      step_output_handle)
        assert intermediates_manager.get_intermediate(
            context, Int, step_output_handle).obj == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"], pipeline_def,
                                             environment_config),
            pipeline=InMemoryPipeline(pipeline_def),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(add_one_step_events, "add_one")
    with scoped_pipeline_context(
            execution_plan.build_subset_plan(["add_one"], pipeline_def,
                                             environment_config),
            InMemoryPipeline(pipeline_def),
            run_config,
            pipeline_run,
            instance,
    ) as context:
        step_output_handle = StepOutputHandle("add_one")
        assert intermediates_manager.has_intermediate(context,
                                                      step_output_handle)
        assert intermediates_manager.get_intermediate(
            context, Int, step_output_handle).obj == 2
예제 #14
0
def test_s3_asset_store_execution(mock_s3_bucket):
    pipeline_def = define_inty_pipeline()

    run_config = {
        "resources": {
            "asset_store": {
                "config": {
                    "s3_bucket": mock_s3_bucket.name
                }
            }
        }
    }

    run_id = make_new_run_id()

    execution_plan = create_execution_plan(pipeline_def, run_config=run_config)

    assert execution_plan.get_step_by_key("return_one.compute")

    step_keys = ["return_one.compute"]
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name=pipeline_def.name,
                               run_id=run_id,
                               run_config=run_config)

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(return_one_step_events, "return_one.compute")

    asset_store = PickledObjectS3AssetStore(mock_s3_bucket.name,
                                            s3_prefix="dagster")
    step_output_handle = StepOutputHandle("return_one.compute")
    context = AssetStoreContext(
        step_output_handle.step_key,
        step_output_handle.output_name,
        {},
        pipeline_def.name,
        pipeline_def.solid_def_named("return_one"),
        run_id,
    )
    assert asset_store.get_asset(context) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one.compute"]),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    step_output_handle = StepOutputHandle("add_one.compute")
    context = AssetStoreContext(
        step_output_handle.step_key,
        step_output_handle.output_name,
        {},
        pipeline_def.name,
        pipeline_def.solid_def_named("add_one"),
        run_id,
    )

    assert get_step_output(add_one_step_events, "add_one.compute")
    assert asset_store.get_asset(context) == 2
def test_using_adls2_for_subplan(storage_account, file_system):
    pipeline_def = define_inty_pipeline()

    environment_dict = {
        'resources': {
            'adls2': {
                'config': {'storage_account': storage_account, 'credential': get_azure_credential()}
            }
        },
        'storage': {'adls2': {'config': {'adls2_file_system': file_system}}},
    }

    run_id = make_new_run_id()

    execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict)

    assert execution_plan.get_step_by_key('return_one.compute')

    step_keys = ['return_one.compute']
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(
        pipeline_name=pipeline_def.name, run_id=run_id, environment_dict=environment_dict
    )

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys),
            environment_dict=environment_dict,
            pipeline_run=pipeline_run,
            instance=instance,
        )
    )

    assert get_step_output(return_one_step_events, 'return_one.compute')
    with scoped_pipeline_context(
        execution_plan.build_subset_plan(['return_one.compute']),
        environment_dict,
        pipeline_run,
        instance,
    ) as context:

        resource = context.scoped_resources_builder.build(required_resource_keys={'adls2'}).adls2
        store = ADLS2IntermediateStore(
            file_system=file_system,
            run_id=run_id,
            adls2_client=resource.adls2_client,
            blob_client=resource.blob_client,
        )
        intermediates_manager = IntermediateStoreIntermediatesManager(store)
        step_output_handle = StepOutputHandle('return_one.compute')
        assert intermediates_manager.has_intermediate(context, step_output_handle)
        assert intermediates_manager.get_intermediate(context, Int, step_output_handle).obj == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(['add_one.compute']),
            environment_dict=environment_dict,
            pipeline_run=pipeline_run,
            instance=instance,
        )
    )

    assert get_step_output(add_one_step_events, 'add_one.compute')
    with scoped_pipeline_context(
        execution_plan.build_subset_plan(['add_one.compute']),
        environment_dict,
        pipeline_run,
        instance,
    ) as context:
        step_output_handle = StepOutputHandle('add_one.compute')
        assert intermediates_manager.has_intermediate(context, step_output_handle)
        assert intermediates_manager.get_intermediate(context, Int, step_output_handle).obj == 2
예제 #16
0
def test_gcs_pickle_io_manager_execution(gcs_bucket):
    inty_job = define_inty_job()

    run_config = {
        "resources": {
            "io_manager": {
                "config": {
                    "gcs_bucket": gcs_bucket,
                }
            }
        }
    }

    run_id = make_new_run_id()

    resolved_run_config = ResolvedRunConfig.build(inty_job,
                                                  run_config=run_config)
    execution_plan = ExecutionPlan.build(InMemoryPipeline(inty_job),
                                         resolved_run_config)

    assert execution_plan.get_step_by_key("return_one")

    step_keys = ["return_one"]
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name=inty_job.name,
                               run_id=run_id,
                               run_config=run_config)

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys, inty_job,
                                             resolved_run_config),
            pipeline=InMemoryPipeline(inty_job),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(return_one_step_events, "return_one")

    io_manager = PickledObjectGCSIOManager(gcs_bucket, storage.Client())
    step_output_handle = StepOutputHandle("return_one")
    context = build_input_context(upstream_output=build_output_context(
        step_key=step_output_handle.step_key,
        name=step_output_handle.output_name,
        run_id=run_id,
    ))
    assert io_manager.load_input(context) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"], inty_job,
                                             resolved_run_config),
            pipeline=InMemoryPipeline(inty_job),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    step_output_handle = StepOutputHandle("add_one")
    context = build_input_context(upstream_output=build_output_context(
        step_key=step_output_handle.step_key,
        name=step_output_handle.output_name,
        run_id=run_id,
    ))

    assert get_step_output(add_one_step_events, "add_one")
    assert io_manager.load_input(context) == 2
def test_using_s3_for_subplan(s3_bucket):
    pipeline_def = define_inty_pipeline()

    environment_dict = {
        'storage': {
            's3': {
                'config': {
                    's3_bucket': s3_bucket
                }
            }
        }
    }

    run_id = make_new_run_id()

    execution_plan = create_execution_plan(pipeline_def,
                                           environment_dict=environment_dict)

    assert execution_plan.get_step_by_key('return_one.compute')

    step_keys = ['return_one.compute']
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name=pipeline_def.name,
                               run_id=run_id,
                               environment_dict=environment_dict)

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys),
            environment_dict=environment_dict,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(return_one_step_events, 'return_one.compute')
    with scoped_pipeline_context(
            execution_plan.build_subset_plan(['return_one.compute']),
            environment_dict,
            pipeline_run,
            instance,
    ) as context:

        store = S3IntermediateStore(
            s3_bucket,
            run_id,
            s3_session=context.scoped_resources_builder.build(
                required_resource_keys={'s3'}, ).s3,
        )
        intermediates_manager = IntermediateStoreIntermediatesManager(store)
        step_output_handle = StepOutputHandle('return_one.compute')
        assert intermediates_manager.has_intermediate(context,
                                                      step_output_handle)
        assert intermediates_manager.get_intermediate(
            context, Int, step_output_handle).obj == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(['add_one.compute']),
            environment_dict=environment_dict,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(add_one_step_events, 'add_one.compute')
    with scoped_pipeline_context(
            execution_plan.build_subset_plan(['add_one.compute']),
            environment_dict,
            pipeline_run,
            instance,
    ) as context:
        step_output_handle = StepOutputHandle('add_one.compute')
        assert intermediates_manager.has_intermediate(context,
                                                      step_output_handle)
        assert intermediates_manager.get_intermediate(
            context, Int, step_output_handle).obj == 2
예제 #18
0
def test_adls2_object_manager_execution(storage_account, file_system,
                                        credential):
    pipeline_def = define_inty_pipeline()

    run_config = {
        "resources": {
            "object_manager": {
                "config": {
                    "adls2_file_system": file_system
                }
            },
            "adls2": {
                "config": {
                    "storage_account": storage_account,
                    "credential": {
                        "key": credential
                    }
                }
            },
        }
    }

    run_id = make_new_run_id()

    execution_plan = create_execution_plan(pipeline_def, run_config=run_config)

    assert execution_plan.get_step_by_key("return_one")

    step_keys = ["return_one"]
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name=pipeline_def.name,
                               run_id=run_id,
                               run_config=run_config)

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(return_one_step_events, "return_one")
    step_output_handle = StepOutputHandle("return_one")
    context = InputContext(
        pipeline_name=pipeline_def.name,
        solid_def=pipeline_def.solid_def_named("return_one"),
        upstream_output=OutputContext(
            step_key=step_output_handle.step_key,
            name=step_output_handle.output_name,
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            solid_def=pipeline_def.solid_def_named("return_one"),
        ),
    )

    object_manager = PickledObjectADLS2ObjectManager(
        file_system=file_system,
        adls2_client=create_adls2_client(storage_account, credential),
        blob_client=create_blob_client(storage_account, credential),
    )
    assert object_manager.load_input(context) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"]),
            pipeline_run=pipeline_run,
            run_config=run_config,
            instance=instance,
        ))

    step_output_handle = StepOutputHandle("add_one")
    context = InputContext(
        pipeline_name=pipeline_def.name,
        solid_def=pipeline_def.solid_def_named("add_one"),
        upstream_output=OutputContext(
            step_key=step_output_handle.step_key,
            name=step_output_handle.output_name,
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            solid_def=pipeline_def.solid_def_named("add_one"),
        ),
    )

    assert get_step_output(add_one_step_events, "add_one")
    assert object_manager.load_input(context) == 2
예제 #19
0
def test_s3_object_manager_execution(mock_s3_bucket):
    pipeline_def = define_inty_pipeline()

    run_config = {"resources": {"object_manager": {"config": {"s3_bucket": mock_s3_bucket.name}}}}

    run_id = make_new_run_id()

    execution_plan = create_execution_plan(pipeline_def, run_config=run_config)

    assert execution_plan.get_step_by_key("return_one")

    step_keys = ["return_one"]
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(
        pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config
    )

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        )
    )

    assert get_step_output(return_one_step_events, "return_one")

    object_manager = PickledObjectS3ObjectManager(
        mock_s3_bucket.name, construct_s3_client(max_attempts=5), s3_prefix="dagster"
    )
    step_output_handle = StepOutputHandle("return_one")
    context = InputContext(
        pipeline_name=pipeline_def.name,
        solid_def=pipeline_def.solid_def_named("return_one"),
        config={},
        metadata={},
        upstream_output=OutputContext(
            step_key=step_output_handle.step_key,
            name=step_output_handle.output_name,
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            metadata={},
            mapping_key=None,
            config=None,
            solid_def=pipeline_def.solid_def_named("return_one"),
        ),
    )
    assert object_manager.load_input(context) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"]),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        )
    )

    step_output_handle = StepOutputHandle("add_one")
    context = InputContext(
        pipeline_name=pipeline_def.name,
        solid_def=pipeline_def.solid_def_named("add_one"),
        config={},
        metadata={},
        upstream_output=OutputContext(
            step_key=step_output_handle.step_key,
            name=step_output_handle.output_name,
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            metadata={},
            mapping_key=None,
            config=None,
            solid_def=pipeline_def.solid_def_named("add_one"),
        ),
    )

    assert get_step_output(add_one_step_events, "add_one")
    assert object_manager.load_input(context) == 2
예제 #20
0
def test_gcs_pickle_io_manager_execution(gcs_bucket):
    pipeline_def = define_inty_pipeline()

    run_config = {
        "resources": {
            "io_manager": {
                "config": {
                    "gcs_bucket": gcs_bucket,
                }
            }
        }
    }

    run_id = make_new_run_id()

    execution_plan = create_execution_plan(pipeline_def, run_config=run_config)

    assert execution_plan.get_step_by_key("return_one")

    step_keys = ["return_one"]
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name=pipeline_def.name,
                               run_id=run_id,
                               run_config=run_config)

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(return_one_step_events, "return_one")

    io_manager = PickledObjectGCSIOManager(gcs_bucket, storage.Client())
    step_output_handle = StepOutputHandle("return_one")
    context = InputContext(
        pipeline_name=pipeline_def.name,
        solid_def=pipeline_def.solid_def_named("return_one"),
        upstream_output=OutputContext(
            step_key=step_output_handle.step_key,
            name=step_output_handle.output_name,
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            solid_def=pipeline_def.solid_def_named("return_one"),
        ),
        log_manager=DagsterLogManager(run_id=pipeline_run.run_id,
                                      logging_tags={},
                                      loggers=[]),
    )
    assert io_manager.load_input(context) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"]),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    step_output_handle = StepOutputHandle("add_one")
    context = InputContext(
        pipeline_name=pipeline_def.name,
        solid_def=pipeline_def.solid_def_named("add_one"),
        upstream_output=OutputContext(
            step_key=step_output_handle.step_key,
            name=step_output_handle.output_name,
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            solid_def=pipeline_def.solid_def_named("add_one"),
        ),
        log_manager=DagsterLogManager(run_id=pipeline_run.run_id,
                                      logging_tags={},
                                      loggers=[]),
    )

    assert get_step_output(add_one_step_events, "add_one")
    assert io_manager.load_input(context) == 2
def test_using_adls2_for_subplan(storage_account, file_system):
    pipeline_def = define_inty_pipeline()

    run_config = {
        "resources": {
            "adls2": {
                "config": {
                    "storage_account": storage_account,
                    "credential": get_azure_credential()
                }
            }
        },
        "intermediate_storage": {
            "adls2": {
                "config": {
                    "adls2_file_system": file_system
                }
            }
        },
    }

    run_id = make_new_run_id()

    execution_plan = create_execution_plan(pipeline_def, run_config=run_config)

    assert execution_plan.get_step_by_key("return_one")

    step_keys = ["return_one"]
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name=pipeline_def.name,
                               run_id=run_id,
                               run_config=run_config)

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(return_one_step_events, "return_one")
    with scoped_pipeline_context(
            execution_plan.build_subset_plan(["return_one"]),
            run_config,
            pipeline_run,
            instance,
    ) as context:

        resource = context.scoped_resources_builder.build(
            required_resource_keys={"adls2"}).adls2
        intermediate_storage = ADLS2IntermediateStorage(
            file_system=file_system,
            run_id=run_id,
            adls2_client=resource.adls2_client,
            blob_client=resource.blob_client,
        )
        step_output_handle = StepOutputHandle("return_one")
        assert intermediate_storage.has_intermediate(context,
                                                     step_output_handle)
        assert intermediate_storage.get_intermediate(
            context, Int, step_output_handle).obj == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"]),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(add_one_step_events, "add_one")
    with scoped_pipeline_context(
            execution_plan.build_subset_plan(["add_one"]),
            run_config,
            pipeline_run,
            instance,
    ) as context:
        step_output_handle = StepOutputHandle("add_one")
        assert intermediate_storage.has_intermediate(context,
                                                     step_output_handle)
        assert intermediate_storage.get_intermediate(
            context, Int, step_output_handle).obj == 2
예제 #22
0
def test_adls2_pickle_io_manager_execution(storage_account, file_system,
                                           credential):
    job = define_inty_job()

    run_config = {
        "resources": {
            "io_manager": {
                "config": {
                    "adls2_file_system": file_system
                }
            },
            "adls2": {
                "config": {
                    "storage_account": storage_account,
                    "credential": {
                        "key": credential
                    }
                }
            },
        }
    }

    run_id = make_new_run_id()

    resolved_run_config = ResolvedRunConfig.build(job, run_config=run_config)
    execution_plan = ExecutionPlan.build(InMemoryPipeline(job),
                                         resolved_run_config)

    assert execution_plan.get_step_by_key("return_one")

    step_keys = ["return_one"]
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name=job.name,
                               run_id=run_id,
                               run_config=run_config)

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys, job,
                                             resolved_run_config),
            pipeline=InMemoryPipeline(job),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(return_one_step_events, "return_one")
    context = build_input_context(upstream_output=build_output_context(
        step_key="return_one",
        name="result",
        run_id=run_id,
    ))

    io_manager = PickledObjectADLS2IOManager(
        file_system=file_system,
        adls2_client=create_adls2_client(storage_account, credential),
        blob_client=create_blob_client(storage_account, credential),
    )
    assert io_manager.load_input(context) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"], job,
                                             resolved_run_config),
            pipeline=InMemoryPipeline(job),
            pipeline_run=pipeline_run,
            run_config=run_config,
            instance=instance,
        ))

    context = build_input_context(upstream_output=build_output_context(
        step_key="add_one",
        name="result",
        run_id=run_id,
        mapping_key="foo",
    ))

    assert get_step_output(add_one_step_events, "add_one")
    assert io_manager.load_input(context) == 2