def execute(pipeline_context, execution_plan, step_keys_to_execute=None): check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext) check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str) step_key_set = None if step_keys_to_execute is None else set( step_keys_to_execute) yield DagsterEvent.engine_event( pipeline_context, 'Executing steps in process (pid: {pid})'.format(pid=os.getpid()), event_specific_data=EngineEventData.in_process( os.getpid(), step_key_set), ) with time_execution_scope() as timer_result: check.param_invariant( isinstance(pipeline_context.executor_config, ExecutorConfig), 'pipeline_context', 'Expected executor_config to be ExecutorConfig got {}'.format( pipeline_context.executor_config), ) failed_or_skipped_steps = set() step_levels = execution_plan.topological_step_levels() # It would be good to implement a reference tracking algorithm here to # garbage collect results that are no longer needed by any steps # https://github.com/dagster-io/dagster/issues/811 for step_level in step_levels: for step in step_level: if step_key_set and step.key not in step_key_set: continue step_context = pipeline_context.for_step(step) failed_inputs = [] for step_input in step.step_inputs: failed_inputs.extend( failed_or_skipped_steps.intersection( step_input.dependency_keys)) if failed_inputs: step_context.log.info(( 'Dependencies for step {step} failed: {failed_inputs}. Not executing.' ).format(step=step.key, failed_inputs=failed_inputs)) failed_or_skipped_steps.add(step.key) yield DagsterEvent.step_skipped_event(step_context) continue uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs( step_context, step) if uncovered_inputs: # In partial pipeline execution, we may end up here without having validated the # missing dependent outputs were optional _assert_missing_inputs_optional( uncovered_inputs, execution_plan, step.key) step_context.log.info(( 'Not all inputs covered for {step}. Not executing. Output missing for ' 'inputs: {uncovered_inputs}').format( uncovered_inputs=uncovered_inputs, step=step.key)) failed_or_skipped_steps.add(step.key) yield DagsterEvent.step_skipped_event(step_context) continue for step_event in check.generator( dagster_event_sequence_for_step(step_context)): check.inst(step_event, DagsterEvent) if step_event.is_step_failure: failed_or_skipped_steps.add(step.key) yield step_event yield DagsterEvent.engine_event( pipeline_context, 'Finished steps in process (pid: {pid}) in {duration_ms}'.format( pid=os.getpid(), duration_ms=format_duration(timer_result.millis)), event_specific_data=EngineEventData.in_process( os.getpid(), step_key_set), )
def inner_plan_execution_iterator(pipeline_context, execution_plan, retries): check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext) check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) check.inst_param(retries, 'retries', Retries) for event in copy_required_intermediates_for_execution( pipeline_context, execution_plan): yield event # It would be good to implement a reference tracking algorithm here to # garbage collect results that are no longer needed by any steps # https://github.com/dagster-io/dagster/issues/811 active_execution = execution_plan.start(retries=retries) while not active_execution.is_complete: step = active_execution.get_next_step() step_context = pipeline_context.for_step(step) missing_resources = [ resource_key for resource_key in step_context.required_resource_keys if not hasattr(step_context.resources, resource_key) ] check.invariant( len(missing_resources) == 0, ('Expected step context for solid {solid_name} to have all required resources, but ' 'missing {missing_resources}.').format( solid_name=step_context.solid.name, missing_resources=missing_resources), ) with pipeline_context.instance.compute_log_manager.watch( step_context.pipeline_run, step_context.step.key): # capture all of the logs for this step uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs( step_context, step) if uncovered_inputs: # In partial pipeline execution, we may end up here without having validated the # missing dependent outputs were optional _assert_missing_inputs_optional(uncovered_inputs, execution_plan, step.key) step_context.log.info(( 'Not all inputs covered for {step}. Not executing. Output missing for ' 'inputs: {uncovered_inputs}').format( uncovered_inputs=uncovered_inputs, step=step.key)) yield DagsterEvent.step_skipped_event(step_context) active_execution.mark_skipped(step.key) else: for step_event in check.generator( _dagster_event_sequence_for_step( step_context, retries)): check.inst(step_event, DagsterEvent) yield step_event active_execution.handle_event(step_event) active_execution.verify_complete(pipeline_context, step.key) # process skips from failures or uncovered inputs for event in active_execution.skipped_step_events_iterator( pipeline_context): yield event
def execute(pipeline_context, execution_plan): check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext) check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) yield DagsterEvent.engine_event( pipeline_context, 'Executing steps in process (pid: {pid})'.format(pid=os.getpid()), event_specific_data=EngineEventData.in_process( os.getpid(), execution_plan.step_keys_to_execute ), ) with time_execution_scope() as timer_result: check.param_invariant( isinstance(pipeline_context.executor_config, ExecutorConfig), 'pipeline_context', 'Expected executor_config to be ExecutorConfig got {}'.format( pipeline_context.executor_config ), ) for event in copy_required_intermediates_for_execution( pipeline_context, execution_plan ): yield event # It would be good to implement a reference tracking algorithm here to # garbage collect results that are no longer needed by any steps # https://github.com/dagster-io/dagster/issues/811 active_execution = execution_plan.start() while not active_execution.is_complete: steps = active_execution.get_steps_to_execute(limit=1) check.invariant( len(steps) == 1, 'Invariant Violation: expected step to be available to execute' ) step = steps[0] step_context = pipeline_context.for_step(step) check.invariant( all( hasattr(step_context.resources, resource_key) for resource_key in step_context.required_resource_keys ), 'expected step context to have all required resources', ) with mirror_step_io(step_context): # capture all of the logs for this step uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs( step_context, step ) if uncovered_inputs: # In partial pipeline execution, we may end up here without having validated the # missing dependent outputs were optional _assert_missing_inputs_optional(uncovered_inputs, execution_plan, step.key) step_context.log.info( ( 'Not all inputs covered for {step}. Not executing. Output missing for ' 'inputs: {uncovered_inputs}' ).format(uncovered_inputs=uncovered_inputs, step=step.key) ) yield DagsterEvent.step_skipped_event(step_context) active_execution.mark_skipped(step.key) continue step_success = None for step_event in check.generator( dagster_event_sequence_for_step(step_context) ): check.inst(step_event, DagsterEvent) yield step_event if step_event.is_step_failure: step_success = False elif step_event.is_step_success: step_success = True if step_success == True: active_execution.mark_success(step.key) elif step_success == False: active_execution.mark_failed(step.key) else: pipeline_context.log.error( 'Step {key} finished without success or failure event, assuming failure.'.format( key=step.key ) ) active_execution.mark_failed(step.key) # process skips from failures or uncovered inputs for event in active_execution.skipped_step_events_iterator(pipeline_context): yield event yield DagsterEvent.engine_event( pipeline_context, 'Finished steps in process (pid: {pid}) in {duration_ms}'.format( pid=os.getpid(), duration_ms=format_duration(timer_result.millis) ), event_specific_data=EngineEventData.in_process( os.getpid(), execution_plan.step_keys_to_execute ), )
def execute(pipeline_context, execution_plan, step_keys_to_execute=None): check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext) check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str) step_key_set = None if step_keys_to_execute is None else set( step_keys_to_execute) check.param_invariant( isinstance(pipeline_context.executor_config, ExecutorConfig), 'pipeline_context', 'Expected executor_config to be ExecutorConfig got {}'.format( pipeline_context.executor_config), ) failed_or_skipped_steps = set() step_levels = execution_plan.topological_step_levels() intermediates_manager = pipeline_context.intermediates_manager # It would be good to implement a reference tracking algorithm here so we could # garbage collection results that are no longer needed by any steps # https://github.com/dagster-io/dagster/issues/811 for step_level in step_levels: for step in step_level: if step_key_set and step.key not in step_key_set: continue step_context = pipeline_context.for_step(step) failed_inputs = [ step_input.prev_output_handle.step_key for step_input in step.step_inputs if step_input.prev_output_handle.step_key in failed_or_skipped_steps ] if failed_inputs: step_context.log.info(( 'Dependencies for step {step} failed: {failed_inputs}. Not executing.' ).format(step=step.key, failed_inputs=failed_inputs)) failed_or_skipped_steps.add(step.key) yield DagsterEvent.step_skipped_event(step_context) continue uncovered_inputs = intermediates_manager.uncovered_inputs( step_context, step) if uncovered_inputs: # In partial pipeline execution, we may end up here without having validated the # missing dependent outputs were optional _assert_missing_inputs_optional(uncovered_inputs, execution_plan, step.key) step_context.log.info(( 'Not all inputs covered for {step}. Not executing. Output missing for ' 'inputs: {uncovered_inputs}').format( uncovered_inputs=uncovered_inputs, step=step.key)) failed_or_skipped_steps.add(step.key) yield DagsterEvent.step_skipped_event(step_context) continue input_values = _create_input_values(step_context, intermediates_manager) for step_event in check.generator( execute_step_in_memory(step_context, input_values, intermediates_manager)): check.inst(step_event, DagsterEvent) if step_event.is_step_failure: failed_or_skipped_steps.add(step.key) yield step_event