def test_fan_out_should_skip_step(): @solid(output_defs=[ OutputDefinition(Int, "out_1", is_required=False), OutputDefinition(Int, "out_2", is_required=False), OutputDefinition(Int, "out_3", is_required=False), ]) def foo(_): yield Output(1, "out_1") @solid def bar(_, input_arg): return input_arg @pipeline def optional_outputs(): foo_res = foo() # pylint: disable=no-member bar.alias("bar_1")(input_arg=foo_res.out_1) bar.alias("bar_2")(input_arg=foo_res.out_2) bar.alias("bar_3")(input_arg=foo_res.out_3) instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name="optional_outputs", run_id=make_new_run_id()) execute_plan( create_execution_plan(optional_outputs, step_keys_to_execute=["foo"]), InMemoryPipeline(optional_outputs), instance, pipeline_run, ) assert not should_skip_step( create_execution_plan(optional_outputs, step_keys_to_execute=["bar_1" ]), instance, pipeline_run.run_id, ) assert should_skip_step( create_execution_plan(optional_outputs, step_keys_to_execute=["bar_2"]), instance, pipeline_run.run_id, ) assert should_skip_step( create_execution_plan(optional_outputs, step_keys_to_execute=["bar_3"]), instance, pipeline_run.run_id, )
def execute_step_command(input_json): try: signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT)) except ValueError: warnings.warn(( "Unexpected error attempting to manage signal handling on thread {thread_name}. " "You should not invoke this API (execute_step) from threads " "other than the main thread.").format( thread_name=threading.current_thread().name)) args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteStepArgs) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: pipeline_run = instance.get_run_by_id(args.pipeline_run_id) check.inst( pipeline_run, PipelineRun, "Pipeline run with id '{}' not found for step execution".format( args.pipeline_run_id), ) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) retries = Retries.from_config(args.retries_dict) if args.should_verify_step: success = verify_step(instance, pipeline_run, retries, args.step_keys_to_execute) if not success: return execution_plan = create_execution_plan( recon_pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute), run_config=pipeline_run.run_config, step_keys_to_execute=args.step_keys_to_execute, mode=pipeline_run.mode, ) buff = [] # Flag that the step execution is skipped if should_skip_step(execution_plan, instance=instance, run_id=pipeline_run.run_id): click.echo(serialize_dagster_namedtuple(StepExecutionSkipped())) return for event in execute_plan_iterator( execution_plan, pipeline_run, instance, run_config=pipeline_run.run_config, retries=retries, ): buff.append(serialize_dagster_namedtuple(event)) for line in buff: click.echo(line)
def execute_step_command(input_json): with capture_interrupts(): args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteStepArgs) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: pipeline_run = instance.get_run_by_id(args.pipeline_run_id) check.inst( pipeline_run, PipelineRun, "Pipeline run with id '{}' not found for step execution". format(args.pipeline_run_id), ) if args.should_verify_step: success = verify_step( instance, pipeline_run, args.known_state.get_retry_state(), args.step_keys_to_execute, ) if not success: return recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) execution_plan = create_execution_plan( recon_pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute), run_config=pipeline_run.run_config, step_keys_to_execute=args.step_keys_to_execute, mode=pipeline_run.mode, known_state=args.known_state, ) buff = [] # Flag that the step execution is skipped if should_skip_step(execution_plan, instance=instance, run_id=pipeline_run.run_id): click.echo(serialize_dagster_namedtuple( StepExecutionSkipped())) return for event in execute_plan_iterator( execution_plan, pipeline_run, instance, run_config=pipeline_run.run_config, retry_mode=args.retry_mode, ): buff.append(serialize_dagster_namedtuple(event)) for line in buff: click.echo(line)
def invoke_steps_within_python_operator(invocation_args, ts, dag_run, **kwargs): # pylint: disable=unused-argument mode = invocation_args.mode pipeline_name = invocation_args.pipeline_name step_keys = invocation_args.step_keys instance_ref = invocation_args.instance_ref run_config = invocation_args.run_config recon_repo = invocation_args.recon_repo pipeline_snapshot = invocation_args.pipeline_snapshot execution_plan_snapshot = invocation_args.execution_plan_snapshot parent_pipeline_snapshot = invocation_args.parent_pipeline_snapshot run_id = dag_run.run_id instance = DagsterInstance.from_ref(instance_ref) if instance_ref else None if instance: with instance: tags = {AIRFLOW_EXECUTION_DATE_STR: ts} if ts else {} pipeline_run = instance.register_managed_run( pipeline_name=pipeline_name, run_id=run_id, run_config=run_config, mode=mode, solids_to_execute=None, step_keys_to_execute=None, tags=tags, root_run_id=None, parent_run_id=None, pipeline_snapshot=pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=parent_pipeline_snapshot, ) recon_pipeline = recon_repo.get_reconstructable_pipeline( pipeline_name).subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute) execution_plan = create_execution_plan( recon_pipeline, run_config=run_config, step_keys_to_execute=step_keys, mode=mode, ) if should_skip_step(execution_plan, instance, pipeline_run.run_id): raise AirflowSkipException( "Dagster emitted skip event, skipping execution in Airflow" ) events = execute_plan(execution_plan, recon_pipeline, instance, pipeline_run, run_config=run_config) check_events_for_failures(events) check_events_for_skips(events) return events
def _should_skip(self, pipeline_run): recon_pipeline = self.recon_repo.get_reconstructable_pipeline(self.pipeline_name) execution_plan = create_execution_plan( recon_pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute ), run_config=self.run_config, step_keys_to_execute=self.step_keys, mode=self.mode, ) return should_skip_step(execution_plan, instance=self.instance, run_id=pipeline_run.run_id)
def test_configured_input_should_skip_step(): called = {} @solid(output_defs=[OutputDefinition(is_required=False)]) def one(_): yield Output(1) @solid def solid_should_not_skip(_, input_one, input_two): # pylint: disable=unused-argument called["yup"] = True @pipeline def my_pipeline(): solid_should_not_skip(one()) run_config = { "solids": { "solid_should_not_skip": { "inputs": { "input_two": { "value": "2" } } } } } execute_pipeline(my_pipeline, run_config=run_config) assert called.get("yup") # ensure should_skip_step behave the same as execute_pipeline instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name="my_pipeline", run_id=make_new_run_id()) execute_plan( create_execution_plan( my_pipeline, step_keys_to_execute=["one"], run_config=run_config, ), InMemoryPipeline(my_pipeline), instance, pipeline_run, run_config=run_config, ) assert not should_skip_step( create_execution_plan( my_pipeline, step_keys_to_execute=["solid_should_not_skip"], run_config=run_config, ), instance, pipeline_run.run_id, )
def test_fan_in_should_skip_step(): @lambda_solid def one(): return 1 @solid(output_defs=[OutputDefinition(is_required=False)]) def skip(_): return yield # pylint: disable=unreachable @solid def fan_in(_context, items): return items @composite_solid(output_defs=[OutputDefinition(is_required=False)]) def composite_all_upstream_skip(): return fan_in([skip(), skip()]) @composite_solid(output_defs=[OutputDefinition(is_required=False)]) def composite_one_upstream_skip(): return fan_in([one(), skip()]) @pipeline def optional_outputs_composite(): composite_all_upstream_skip() composite_one_upstream_skip() instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name="optional_outputs_composite", run_id=make_new_run_id()) execute_plan( create_execution_plan( optional_outputs_composite, step_keys_to_execute=[ "composite_all_upstream_skip.skip", "composite_all_upstream_skip.skip_2", ], ), instance, pipeline_run, ) # skip when all the step's sources weren't yield assert should_skip_step( create_execution_plan( optional_outputs_composite, step_keys_to_execute=["composite_all_upstream_skip.fan_in"], ), instance, pipeline_run.run_id, ) execute_plan( create_execution_plan( optional_outputs_composite, step_keys_to_execute=[ "composite_one_upstream_skip.one", "composite_one_upstream_skip.skip", ], ), instance, pipeline_run, ) # do not skip when some of the sources exist assert not should_skip_step( create_execution_plan( optional_outputs_composite, step_keys_to_execute=["composite_one_upstream_skip.fan_in"], ), instance, pipeline_run.run_id, )