def _has_run(self, run_id): global MOCK_HAS_RUN_CALLED # pylint: disable=global-statement # pylint: disable=protected-access if not self._run_storage.has_run( run_id) and not MOCK_HAS_RUN_CALLED: self._run_storage.add_run( PipelineRun(pipeline_name="foo_pipeline", run_id=run_id)) MOCK_HAS_RUN_CALLED = True return False elif self._run_storage.has_run(run_id) and MOCK_HAS_RUN_CALLED: MOCK_HAS_RUN_CALLED = False return True else: return False
def test_retry_deferral(): events = execute_plan( create_execution_plan(define_retry_limit_pipeline()), pipeline_run=PipelineRun(pipeline_name='retry_limits', run_id='42'), retries=Retries(RetryMode.DEFERRED), instance=DagsterInstance.local_temp(), ) events_by_type = defaultdict(list) for ev in events: events_by_type[ev.event_type].append(ev) assert len(events_by_type[DagsterEventType.STEP_START]) == 2 assert len(events_by_type[DagsterEventType.STEP_UP_FOR_RETRY]) == 2 assert DagsterEventType.STEP_RESTARTED not in events assert DagsterEventType.STEP_SUCCESS not in events
def _setup_test_two_handler_log_mgr(): test_formatter = logging.Formatter(fmt="%(levelname)s :: %(message)s") test_info_handler = logging.StreamHandler(sys.stdout) test_info_handler.setLevel("INFO") test_info_handler.setFormatter(test_formatter) test_warn_handler = logging.StreamHandler(sys.stdout) test_warn_handler.setLevel("WARN") test_warn_handler.setFormatter(test_formatter) return DagsterLogManager.create( loggers=[], handlers=[test_info_handler, test_warn_handler], pipeline_run=PipelineRun(pipeline_name="system", run_id="123"), )
def test_adls2_pipeline_with_custom_prefix(storage_account, file_system): adls2_prefix = 'custom_prefix' pipe = define_inty_pipeline(should_throw=False) environment_dict = { 'resources': { 'adls2': { 'config': {'storage_account': storage_account, 'credential': get_azure_credential()} } }, 'storage': { 'adls2': {'config': {'adls2_file_system': file_system, 'adls2_prefix': adls2_prefix}} }, } pipeline_run = PipelineRun(pipeline_name=pipe.name, environment_dict=environment_dict) instance = DagsterInstance.ephemeral() result = execute_pipeline(pipe, environment_dict=environment_dict,) assert result.success execution_plan = create_execution_plan(pipe, environment_dict) with scoped_pipeline_context( execution_plan, environment_dict, pipeline_run, instance, ) as context: resource = context.scoped_resources_builder.build(required_resource_keys={'adls2'}).adls2 store = ADLS2IntermediateStore( run_id=result.run_id, file_system=file_system, prefix=adls2_prefix, adls2_client=resource.adls2_client, blob_client=resource.blob_client, ) intermediates_manager = IntermediateStoreIntermediatesManager(store) assert store.root == '/'.join(['custom_prefix', 'storage', result.run_id]) assert ( intermediates_manager.get_intermediate( context, Int, StepOutputHandle('return_one.compute') ).obj == 1 ) assert ( intermediates_manager.get_intermediate( context, Int, StepOutputHandle('add_one.compute') ).obj == 2 )
def test_retry_deferral(): with instance_for_test() as instance: events = execute_plan( create_execution_plan(define_retry_limit_pipeline()), pipeline_run=PipelineRun(pipeline_name="retry_limits", run_id="42"), retry_mode=RetryMode.DEFERRED, instance=instance, ) events_by_type = defaultdict(list) for ev in events: events_by_type[ev.event_type].append(ev) assert len(events_by_type[DagsterEventType.STEP_START]) == 2 assert len(events_by_type[DagsterEventType.STEP_UP_FOR_RETRY]) == 2 assert DagsterEventType.STEP_RESTARTED not in events assert DagsterEventType.STEP_SUCCESS not in events
def test_s3_pipeline_with_custom_prefix(s3_bucket): s3_prefix = 'custom_prefix' pipe = define_inty_pipeline(should_throw=False) environment_dict = { 'storage': { 's3': { 'config': { 's3_bucket': s3_bucket, 's3_prefix': s3_prefix } } } } pipeline_run = PipelineRun(pipeline_name=pipe.name, environment_dict=environment_dict) instance = DagsterInstance.ephemeral() result = execute_pipeline( pipe, environment_dict=environment_dict, ) assert result.success execution_plan = create_execution_plan(pipe, environment_dict) with scoped_pipeline_context( execution_plan, environment_dict, pipeline_run, instance, ) as context: store = S3IntermediateStore( run_id=result.run_id, s3_bucket=s3_bucket, s3_prefix=s3_prefix, s3_session=context.scoped_resources_builder.build( required_resource_keys={'s3'}).s3, ) intermediates_manager = IntermediateStoreIntermediatesManager(store) assert store.root == '/'.join( ['custom_prefix', 'storage', result.run_id]) assert (intermediates_manager.get_intermediate( context, Int, StepOutputHandle('return_one.compute')).obj == 1) assert (intermediates_manager.get_intermediate( context, Int, StepOutputHandle('add_one.compute')).obj == 2)
def test_s3_pipeline_with_custom_prefix(mock_s3_bucket): s3_prefix = "custom_prefix" pipe = define_inty_pipeline(should_throw=False) run_config = { "intermediate_storage": { "s3": { "config": { "s3_bucket": mock_s3_bucket.name, "s3_prefix": s3_prefix } } } } pipeline_run = PipelineRun(pipeline_name=pipe.name, run_config=run_config) instance = DagsterInstance.ephemeral() result = execute_pipeline( pipe, run_config=run_config, ) assert result.success execution_plan = create_execution_plan(pipe, run_config) with scoped_pipeline_context( execution_plan, InMemoryPipeline(pipe), run_config, pipeline_run, instance, ) as context: intermediates_manager = S3IntermediateStorage( run_id=result.run_id, s3_bucket=mock_s3_bucket.name, s3_prefix=s3_prefix, s3_session=context.scoped_resources_builder.build( required_resource_keys={"s3"}).s3, ) assert intermediates_manager.root == "/".join( ["custom_prefix", "storage", result.run_id]) assert (intermediates_manager.get_intermediate( context, Int, StepOutputHandle("return_one")).obj == 1) assert (intermediates_manager.get_intermediate( context, Int, StepOutputHandle("add_one")).obj == 2)
def test_multiline_logging_complex(): msg = "DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0" dagster_event = DagsterEvent( event_type_value="STEP_FAILURE", pipeline_name="error_monster", step_key="start.materialization.output.result.0", solid_handle=NodeHandle("start", None), step_kind_value="MATERIALIZATION_THUNK", logging_tags={ "pipeline": "error_monster", "step_key": "start.materialization.output.result.0", "solid": "start", "solid_definition": "emit_num", }, event_specific_data=StepFailureData( error=SerializableErrorInfo( message= "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n", stack=["a stack message"], cls_name="FileNotFoundError", ), user_failure_data=None, ), ) with _setup_logger(DAGSTER_DEFAULT_LOGGER) as (captured_results, logger): dl = DagsterLogManager.create(loggers=[logger], pipeline_run=PipelineRun( run_id="123", pipeline_name="error_monster")) dl.log_dagster_event(logging.INFO, msg, dagster_event) expected_results = [ "error_monster - 123 - STEP_FAILURE - DagsterEventType.STEP_FAILURE for step " "start.materialization.output.result.0", "", "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'", "", "Stack Trace:", "a stack message", ] assert captured_results[0].split("\n") == expected_results
def test_adls2_pipeline_with_custom_prefix(storage_account, file_system): adls2_prefix = "custom_prefix" pipe = define_inty_pipeline(should_throw=False) run_config = { "resources": { "adls2": { "config": {"storage_account": storage_account, "credential": get_azure_credential()} } }, "storage": { "adls2": {"config": {"adls2_file_system": file_system, "adls2_prefix": adls2_prefix}} }, } pipeline_run = PipelineRun(pipeline_name=pipe.name, run_config=run_config) instance = DagsterInstance.ephemeral() result = execute_pipeline(pipe, run_config=run_config,) assert result.success execution_plan = create_execution_plan(pipe, run_config) with scoped_pipeline_context(execution_plan, run_config, pipeline_run, instance,) as context: resource = context.scoped_resources_builder.build(required_resource_keys={"adls2"}).adls2 intermediate_storage = ADLS2IntermediateStorage( run_id=result.run_id, file_system=file_system, prefix=adls2_prefix, adls2_client=resource.adls2_client, blob_client=resource.blob_client, ) assert intermediate_storage.root == "/".join(["custom_prefix", "storage", result.run_id]) assert ( intermediate_storage.get_intermediate( context, Int, StepOutputHandle("return_one.compute") ).obj == 1 ) assert ( intermediate_storage.get_intermediate( context, Int, StepOutputHandle("add_one.compute") ).obj == 2 )
def test_capture_handler_log_records(): capture_handler = CaptureHandler() dl = DagsterLogManager.create( loggers=[], handlers=[capture_handler], pipeline_run=PipelineRun(run_id="123456", pipeline_name="pipeline"), ).with_tags(step_key="some_step") dl.info("info") dl.critical("critical error", extra={"foo": "bar"}) assert len(capture_handler.captured) == 2 captured_info_record = capture_handler.captured[0] assert captured_info_record.name == "dagster" assert captured_info_record.msg == "pipeline - 123456 - some_step - info" assert captured_info_record.levelno == logging.INFO captured_critical_record = capture_handler.captured[1] assert captured_critical_record.name == "dagster" assert captured_critical_record.msg == "pipeline - 123456 - some_step - critical error" assert captured_critical_record.levelno == logging.CRITICAL assert captured_critical_record.foo == "bar"
def test_s3_pickle_io_manager_execution(mock_s3_bucket): pipeline_def = define_inty_pipeline() run_config = { "resources": { "io_manager": { "config": { "s3_bucket": mock_s3_bucket.name } } } } run_id = make_new_run_id() resolved_run_config = ResolvedRunConfig.build(pipeline_def, run_config=run_config) execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline_def), resolved_run_config) assert execution_plan.get_step_by_key("return_one") step_keys = ["return_one"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys, pipeline_def, resolved_run_config), pipeline=InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, "return_one") io_manager = PickledObjectS3IOManager(mock_s3_bucket.name, construct_s3_client(max_attempts=5), s3_prefix="dagster") step_output_handle = StepOutputHandle("return_one") context = build_input_context(upstream_output=build_output_context( step_key=step_output_handle.step_key, name=step_output_handle.output_name, run_id=run_id, )) assert io_manager.load_input(context) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline_def, resolved_run_config), pipeline=InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) step_output_handle = StepOutputHandle("add_one") context = build_input_context(upstream_output=build_output_context( step_key=step_output_handle.step_key, name=step_output_handle.output_name, run_id=run_id, )) assert get_step_output(add_one_step_events, "add_one") assert io_manager.load_input(context) == 2
def test_logging_integer_log_levels(): with _setup_logger("test", {"FOO": 3}) as (_captured_results, logger): dl = DagsterLogManager.create(loggers=[logger], pipeline_run=PipelineRun(run_id="123")) dl.log(3, "test") # pylint: disable=no-member
def test_using_s3_for_subplan(mock_s3_bucket): pipeline_def = define_inty_pipeline() run_config = { "intermediate_storage": { "s3": { "config": { "s3_bucket": mock_s3_bucket.name } } } } run_id = make_new_run_id() environment_config = EnvironmentConfig.build(pipeline_def, run_config=run_config) execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline_def), environment_config) assert execution_plan.get_step_by_key("return_one") step_keys = ["return_one"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys, pipeline_def, environment_config), pipeline=InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, "return_one") with scoped_pipeline_context( execution_plan.build_subset_plan(["return_one"], pipeline_def, environment_config), InMemoryPipeline(pipeline_def), run_config, pipeline_run, instance, ) as context: intermediates_manager = S3IntermediateStorage( mock_s3_bucket.name, run_id, s3_session=context.scoped_resources_builder.build( required_resource_keys={"s3"}, ).s3, ) step_output_handle = StepOutputHandle("return_one") assert intermediates_manager.has_intermediate(context, step_output_handle) assert intermediates_manager.get_intermediate( context, Int, step_output_handle).obj == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline_def, environment_config), pipeline=InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(add_one_step_events, "add_one") with scoped_pipeline_context( execution_plan.build_subset_plan(["add_one"], pipeline_def, environment_config), InMemoryPipeline(pipeline_def), run_config, pipeline_run, instance, ) as context: step_output_handle = StepOutputHandle("add_one") assert intermediates_manager.has_intermediate(context, step_output_handle) assert intermediates_manager.get_intermediate( context, Int, step_output_handle).obj == 2
def test_s3_asset_store_execution(mock_s3_bucket): pipeline_def = define_inty_pipeline() run_config = { "resources": { "asset_store": { "config": { "s3_bucket": mock_s3_bucket.name } } } } run_id = make_new_run_id() execution_plan = create_execution_plan(pipeline_def, run_config=run_config) assert execution_plan.get_step_by_key("return_one.compute") step_keys = ["return_one.compute"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, "return_one.compute") asset_store = PickledObjectS3AssetStore(mock_s3_bucket.name, s3_prefix="dagster") step_output_handle = StepOutputHandle("return_one.compute") context = AssetStoreContext( step_output_handle.step_key, step_output_handle.output_name, {}, pipeline_def.name, pipeline_def.solid_def_named("return_one"), run_id, ) assert asset_store.get_asset(context) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one.compute"]), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) step_output_handle = StepOutputHandle("add_one.compute") context = AssetStoreContext( step_output_handle.step_key, step_output_handle.output_name, {}, pipeline_def.name, pipeline_def.solid_def_named("add_one"), run_id, ) assert get_step_output(add_one_step_events, "add_one.compute") assert asset_store.get_asset(context) == 2
def test_using_adls2_for_subplan(storage_account, file_system): pipeline_def = define_inty_pipeline() environment_dict = { 'resources': { 'adls2': { 'config': {'storage_account': storage_account, 'credential': get_azure_credential()} } }, 'storage': {'adls2': {'config': {'adls2_file_system': file_system}}}, } run_id = make_new_run_id() execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict) assert execution_plan.get_step_by_key('return_one.compute') step_keys = ['return_one.compute'] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun( pipeline_name=pipeline_def.name, run_id=run_id, environment_dict=environment_dict ) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys), environment_dict=environment_dict, pipeline_run=pipeline_run, instance=instance, ) ) assert get_step_output(return_one_step_events, 'return_one.compute') with scoped_pipeline_context( execution_plan.build_subset_plan(['return_one.compute']), environment_dict, pipeline_run, instance, ) as context: resource = context.scoped_resources_builder.build(required_resource_keys={'adls2'}).adls2 store = ADLS2IntermediateStore( file_system=file_system, run_id=run_id, adls2_client=resource.adls2_client, blob_client=resource.blob_client, ) intermediates_manager = IntermediateStoreIntermediatesManager(store) step_output_handle = StepOutputHandle('return_one.compute') assert intermediates_manager.has_intermediate(context, step_output_handle) assert intermediates_manager.get_intermediate(context, Int, step_output_handle).obj == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(['add_one.compute']), environment_dict=environment_dict, pipeline_run=pipeline_run, instance=instance, ) ) assert get_step_output(add_one_step_events, 'add_one.compute') with scoped_pipeline_context( execution_plan.build_subset_plan(['add_one.compute']), environment_dict, pipeline_run, instance, ) as context: step_output_handle = StepOutputHandle('add_one.compute') assert intermediates_manager.has_intermediate(context, step_output_handle) assert intermediates_manager.get_intermediate(context, Int, step_output_handle).obj == 2
def test_gcs_pickle_io_manager_execution(gcs_bucket): inty_job = define_inty_job() run_config = { "resources": { "io_manager": { "config": { "gcs_bucket": gcs_bucket, } } } } run_id = make_new_run_id() resolved_run_config = ResolvedRunConfig.build(inty_job, run_config=run_config) execution_plan = ExecutionPlan.build(InMemoryPipeline(inty_job), resolved_run_config) assert execution_plan.get_step_by_key("return_one") step_keys = ["return_one"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name=inty_job.name, run_id=run_id, run_config=run_config) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys, inty_job, resolved_run_config), pipeline=InMemoryPipeline(inty_job), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, "return_one") io_manager = PickledObjectGCSIOManager(gcs_bucket, storage.Client()) step_output_handle = StepOutputHandle("return_one") context = build_input_context(upstream_output=build_output_context( step_key=step_output_handle.step_key, name=step_output_handle.output_name, run_id=run_id, )) assert io_manager.load_input(context) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], inty_job, resolved_run_config), pipeline=InMemoryPipeline(inty_job), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) step_output_handle = StepOutputHandle("add_one") context = build_input_context(upstream_output=build_output_context( step_key=step_output_handle.step_key, name=step_output_handle.output_name, run_id=run_id, )) assert get_step_output(add_one_step_events, "add_one") assert io_manager.load_input(context) == 2
def test_using_s3_for_subplan(s3_bucket): pipeline_def = define_inty_pipeline() environment_dict = { 'storage': { 's3': { 'config': { 's3_bucket': s3_bucket } } } } run_id = make_new_run_id() execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict) assert execution_plan.get_step_by_key('return_one.compute') step_keys = ['return_one.compute'] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name=pipeline_def.name, run_id=run_id, environment_dict=environment_dict) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys), environment_dict=environment_dict, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, 'return_one.compute') with scoped_pipeline_context( execution_plan.build_subset_plan(['return_one.compute']), environment_dict, pipeline_run, instance, ) as context: store = S3IntermediateStore( s3_bucket, run_id, s3_session=context.scoped_resources_builder.build( required_resource_keys={'s3'}, ).s3, ) intermediates_manager = IntermediateStoreIntermediatesManager(store) step_output_handle = StepOutputHandle('return_one.compute') assert intermediates_manager.has_intermediate(context, step_output_handle) assert intermediates_manager.get_intermediate( context, Int, step_output_handle).obj == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(['add_one.compute']), environment_dict=environment_dict, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(add_one_step_events, 'add_one.compute') with scoped_pipeline_context( execution_plan.build_subset_plan(['add_one.compute']), environment_dict, pipeline_run, instance, ) as context: step_output_handle = StepOutputHandle('add_one.compute') assert intermediates_manager.has_intermediate(context, step_output_handle) assert intermediates_manager.get_intermediate( context, Int, step_output_handle).obj == 2
def test_adls2_object_manager_execution(storage_account, file_system, credential): pipeline_def = define_inty_pipeline() run_config = { "resources": { "object_manager": { "config": { "adls2_file_system": file_system } }, "adls2": { "config": { "storage_account": storage_account, "credential": { "key": credential } } }, } } run_id = make_new_run_id() execution_plan = create_execution_plan(pipeline_def, run_config=run_config) assert execution_plan.get_step_by_key("return_one") step_keys = ["return_one"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, "return_one") step_output_handle = StepOutputHandle("return_one") context = InputContext( pipeline_name=pipeline_def.name, solid_def=pipeline_def.solid_def_named("return_one"), upstream_output=OutputContext( step_key=step_output_handle.step_key, name=step_output_handle.output_name, pipeline_name=pipeline_def.name, run_id=run_id, solid_def=pipeline_def.solid_def_named("return_one"), ), ) object_manager = PickledObjectADLS2ObjectManager( file_system=file_system, adls2_client=create_adls2_client(storage_account, credential), blob_client=create_blob_client(storage_account, credential), ) assert object_manager.load_input(context) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"]), pipeline_run=pipeline_run, run_config=run_config, instance=instance, )) step_output_handle = StepOutputHandle("add_one") context = InputContext( pipeline_name=pipeline_def.name, solid_def=pipeline_def.solid_def_named("add_one"), upstream_output=OutputContext( step_key=step_output_handle.step_key, name=step_output_handle.output_name, pipeline_name=pipeline_def.name, run_id=run_id, solid_def=pipeline_def.solid_def_named("add_one"), ), ) assert get_step_output(add_one_step_events, "add_one") assert object_manager.load_input(context) == 2
def test_s3_object_manager_execution(mock_s3_bucket): pipeline_def = define_inty_pipeline() run_config = {"resources": {"object_manager": {"config": {"s3_bucket": mock_s3_bucket.name}}}} run_id = make_new_run_id() execution_plan = create_execution_plan(pipeline_def, run_config=run_config) assert execution_plan.get_step_by_key("return_one") step_keys = ["return_one"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun( pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config ) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys), run_config=run_config, pipeline_run=pipeline_run, instance=instance, ) ) assert get_step_output(return_one_step_events, "return_one") object_manager = PickledObjectS3ObjectManager( mock_s3_bucket.name, construct_s3_client(max_attempts=5), s3_prefix="dagster" ) step_output_handle = StepOutputHandle("return_one") context = InputContext( pipeline_name=pipeline_def.name, solid_def=pipeline_def.solid_def_named("return_one"), config={}, metadata={}, upstream_output=OutputContext( step_key=step_output_handle.step_key, name=step_output_handle.output_name, pipeline_name=pipeline_def.name, run_id=run_id, metadata={}, mapping_key=None, config=None, solid_def=pipeline_def.solid_def_named("return_one"), ), ) assert object_manager.load_input(context) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"]), run_config=run_config, pipeline_run=pipeline_run, instance=instance, ) ) step_output_handle = StepOutputHandle("add_one") context = InputContext( pipeline_name=pipeline_def.name, solid_def=pipeline_def.solid_def_named("add_one"), config={}, metadata={}, upstream_output=OutputContext( step_key=step_output_handle.step_key, name=step_output_handle.output_name, pipeline_name=pipeline_def.name, run_id=run_id, metadata={}, mapping_key=None, config=None, solid_def=pipeline_def.solid_def_named("add_one"), ), ) assert get_step_output(add_one_step_events, "add_one") assert object_manager.load_input(context) == 2
def test_gcs_pickle_io_manager_execution(gcs_bucket): pipeline_def = define_inty_pipeline() run_config = { "resources": { "io_manager": { "config": { "gcs_bucket": gcs_bucket, } } } } run_id = make_new_run_id() execution_plan = create_execution_plan(pipeline_def, run_config=run_config) assert execution_plan.get_step_by_key("return_one") step_keys = ["return_one"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, "return_one") io_manager = PickledObjectGCSIOManager(gcs_bucket, storage.Client()) step_output_handle = StepOutputHandle("return_one") context = InputContext( pipeline_name=pipeline_def.name, solid_def=pipeline_def.solid_def_named("return_one"), upstream_output=OutputContext( step_key=step_output_handle.step_key, name=step_output_handle.output_name, pipeline_name=pipeline_def.name, run_id=run_id, solid_def=pipeline_def.solid_def_named("return_one"), ), log_manager=DagsterLogManager(run_id=pipeline_run.run_id, logging_tags={}, loggers=[]), ) assert io_manager.load_input(context) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"]), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) step_output_handle = StepOutputHandle("add_one") context = InputContext( pipeline_name=pipeline_def.name, solid_def=pipeline_def.solid_def_named("add_one"), upstream_output=OutputContext( step_key=step_output_handle.step_key, name=step_output_handle.output_name, pipeline_name=pipeline_def.name, run_id=run_id, solid_def=pipeline_def.solid_def_named("add_one"), ), log_manager=DagsterLogManager(run_id=pipeline_run.run_id, logging_tags={}, loggers=[]), ) assert get_step_output(add_one_step_events, "add_one") assert io_manager.load_input(context) == 2
def test_using_adls2_for_subplan(storage_account, file_system): pipeline_def = define_inty_pipeline() run_config = { "resources": { "adls2": { "config": { "storage_account": storage_account, "credential": get_azure_credential() } } }, "intermediate_storage": { "adls2": { "config": { "adls2_file_system": file_system } } }, } run_id = make_new_run_id() execution_plan = create_execution_plan(pipeline_def, run_config=run_config) assert execution_plan.get_step_by_key("return_one") step_keys = ["return_one"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, "return_one") with scoped_pipeline_context( execution_plan.build_subset_plan(["return_one"]), run_config, pipeline_run, instance, ) as context: resource = context.scoped_resources_builder.build( required_resource_keys={"adls2"}).adls2 intermediate_storage = ADLS2IntermediateStorage( file_system=file_system, run_id=run_id, adls2_client=resource.adls2_client, blob_client=resource.blob_client, ) step_output_handle = StepOutputHandle("return_one") assert intermediate_storage.has_intermediate(context, step_output_handle) assert intermediate_storage.get_intermediate( context, Int, step_output_handle).obj == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"]), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(add_one_step_events, "add_one") with scoped_pipeline_context( execution_plan.build_subset_plan(["add_one"]), run_config, pipeline_run, instance, ) as context: step_output_handle = StepOutputHandle("add_one") assert intermediate_storage.has_intermediate(context, step_output_handle) assert intermediate_storage.get_intermediate( context, Int, step_output_handle).obj == 2
def test_adls2_pickle_io_manager_execution(storage_account, file_system, credential): job = define_inty_job() run_config = { "resources": { "io_manager": { "config": { "adls2_file_system": file_system } }, "adls2": { "config": { "storage_account": storage_account, "credential": { "key": credential } } }, } } run_id = make_new_run_id() resolved_run_config = ResolvedRunConfig.build(job, run_config=run_config) execution_plan = ExecutionPlan.build(InMemoryPipeline(job), resolved_run_config) assert execution_plan.get_step_by_key("return_one") step_keys = ["return_one"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name=job.name, run_id=run_id, run_config=run_config) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys, job, resolved_run_config), pipeline=InMemoryPipeline(job), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, "return_one") context = build_input_context(upstream_output=build_output_context( step_key="return_one", name="result", run_id=run_id, )) io_manager = PickledObjectADLS2IOManager( file_system=file_system, adls2_client=create_adls2_client(storage_account, credential), blob_client=create_blob_client(storage_account, credential), ) assert io_manager.load_input(context) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], job, resolved_run_config), pipeline=InMemoryPipeline(job), pipeline_run=pipeline_run, run_config=run_config, instance=instance, )) context = build_input_context(upstream_output=build_output_context( step_key="add_one", name="result", run_id=run_id, mapping_key="foo", )) assert get_step_output(add_one_step_events, "add_one") assert io_manager.load_input(context) == 2