def ExecutionPlanSnapshot(self, request, _context): execution_plan_args = deserialize_json_to_dagster_namedtuple( request.serialized_execution_plan_snapshot_args) check.inst_param(execution_plan_args, 'execution_plan_args', ExecutionPlanSnapshotArgs) recon_pipeline = (recon_pipeline_from_origin( execution_plan_args.pipeline_origin).subset_for_execution( execution_plan_args.solid_selection) if execution_plan_args.solid_selection else recon_pipeline_from_origin( execution_plan_args.pipeline_origin)) execution_plan_snapshot = snapshot_from_execution_plan( create_execution_plan( pipeline=recon_pipeline, run_config=execution_plan_args.run_config, mode=execution_plan_args.mode, step_keys_to_execute=execution_plan_args.step_keys_to_execute, ), execution_plan_args.pipeline_snapshot_id, ) return api_pb2.ExecutionPlanSnapshotReply( serialized_execution_plan_snapshot=serialize_dagster_namedtuple( execution_plan_snapshot))
def launch_run(self, instance, run, external_pipeline): check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) recon_pipeline = recon_pipeline_from_origin( external_pipeline.get_origin()) execute_run(recon_pipeline, run, self._instance) return run
def execute_list_versions_command(instance: DagsterInstance, kwargs: Dict[str, Any]) -> None: check.inst_param(instance, "instance", DagsterInstance) config = list( check.opt_tuple_param(kwargs.get("config"), "config", default=tuple(), of_type=str)) preset = kwargs.get("preset") mode = kwargs.get("mode") if preset and config: raise click.UsageError("Can not use --preset with --config.") pipeline_origin = get_pipeline_or_job_python_origin_from_kwargs(kwargs) pipeline = recon_pipeline_from_origin(pipeline_origin) run_config = get_run_config_from_file_list(config) memoized_plan = create_execution_plan( pipeline, run_config=run_config, mode=mode, instance_ref=instance.get_ref(), tags={MEMOIZED_RUN_TAG: "true"}, ) add_step_to_table(memoized_plan)
def execute_step_with_structured_logs_command(input_json): signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT)) args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteStepArgs) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: pipeline_run = instance.get_run_by_id(args.pipeline_run_id) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) execution_plan = create_execution_plan( recon_pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute), run_config=args.run_config, step_keys_to_execute=args.step_keys_to_execute, mode=args.mode, ) retries = Retries.from_config(args.retries_dict) buff = [] for event in execute_plan_iterator( execution_plan, pipeline_run, instance, run_config=args.run_config, retries=retries, ): buff.append(serialize_dagster_namedtuple(event)) for line in buff: click.echo(line)
def launch_run(self, input_json): args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteRunArgs) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) # Ensure all keys set by celery/bin/celery.py are cleared so they don't affect nested configuration. for key in [ 'CELERY_BROKER_URL', 'CELERY_BROKER_READ_URL', 'CELERY_BROKER_WRITE_URL', 'CELERY_RESULT_BACKEND' ]: os.environ.pop(key, default=None) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: buffer = [] def send_to_buffer(event): buffer.append(serialize_dagster_namedtuple(event)) _execute_run_command_body(self.request.id, recon_pipeline, args.pipeline_run_id, instance, send_to_buffer) for line in buffer: print(line)
def _execute_step_command_body(args: ExecuteStepArgs, instance: DagsterInstance, pipeline_run: PipelineRun): single_step_key = (args.step_keys_to_execute[0] if args.step_keys_to_execute and len(args.step_keys_to_execute) == 1 else None) try: check.inst( pipeline_run, PipelineRun, "Pipeline run with id '{}' not found for step execution".format( args.pipeline_run_id), ) if args.should_verify_step: success = verify_step( instance, pipeline_run, check.not_none(args.known_state).get_retry_state(), args.step_keys_to_execute, ) if not success: return recon_pipeline = recon_pipeline_from_origin( args.pipeline_origin).subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute) execution_plan = create_execution_plan( recon_pipeline, run_config=pipeline_run.run_config, step_keys_to_execute=args.step_keys_to_execute, mode=pipeline_run.mode, known_state=args.known_state, ) yield from execute_plan_iterator( execution_plan, recon_pipeline, pipeline_run, instance, run_config=pipeline_run.run_config, retry_mode=args.retry_mode, ) except (KeyboardInterrupt, DagsterExecutionInterruptedError): yield instance.report_engine_event( message="Step execution terminated by interrupt", pipeline_run=pipeline_run, step_key=single_step_key, ) raise except Exception: yield instance.report_engine_event( "An exception was thrown during step execution that is likely a framework error, rather than an error in user code.", pipeline_run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info())), step_key=single_step_key, ) raise
def execute_run_with_structured_logs_command(input_json): try: signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT)) except ValueError: warnings.warn(( "Unexpected error attempting to manage signal handling on thread {thread_name}. " "You should not invoke this API (execute_run_with_structured_logs) from threads " "other than the main thread.").format( thread_name=threading.current_thread().name)) args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteRunArgs) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: buffer = [] def send_to_buffer(event): buffer.append(serialize_dagster_namedtuple(event)) _execute_run_command_body(recon_pipeline, args.pipeline_run_id, instance, send_to_buffer) for line in buffer: click.echo(line)
def execute_run_command(input_json): with capture_interrupts(): args = deserialize_as(input_json, ExecuteRunArgs) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: buffer = [] def send_to_buffer(event): buffer.append(serialize_dagster_namedtuple(event)) return_code = _execute_run_command_body( recon_pipeline, args.pipeline_run_id, instance, send_to_buffer, set_exit_code_on_failure=args.set_exit_code_on_failure or False, ) for line in buffer: click.echo(line) if return_code != 0: sys.exit(return_code)
def execute_step_command(input_json): try: signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT)) except ValueError: warnings.warn(( "Unexpected error attempting to manage signal handling on thread {thread_name}. " "You should not invoke this API (execute_step) from threads " "other than the main thread.").format( thread_name=threading.current_thread().name)) args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteStepArgs) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: pipeline_run = instance.get_run_by_id(args.pipeline_run_id) check.inst( pipeline_run, PipelineRun, "Pipeline run with id '{}' not found for step execution".format( args.pipeline_run_id), ) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) retries = Retries.from_config(args.retries_dict) if args.should_verify_step: success = verify_step(instance, pipeline_run, retries, args.step_keys_to_execute) if not success: return execution_plan = create_execution_plan( recon_pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute), run_config=pipeline_run.run_config, step_keys_to_execute=args.step_keys_to_execute, mode=pipeline_run.mode, ) buff = [] # Flag that the step execution is skipped if should_skip_step(execution_plan, instance=instance, run_id=pipeline_run.run_id): click.echo(serialize_dagster_namedtuple(StepExecutionSkipped())) return for event in execute_plan_iterator( execution_plan, pipeline_run, instance, run_config=pipeline_run.run_config, retries=retries, ): buff.append(serialize_dagster_namedtuple(event)) for line in buff: click.echo(line)
def execute_scaffold_command(cli_args, print_fn): external_pipeline = get_external_pipeline_from_kwargs(cli_args, DagsterInstance.get()) # We should move this to use external pipeline # https://github.com/dagster-io/dagster/issues/2556 pipeline = recon_pipeline_from_origin(external_pipeline.get_origin()) skip_non_required = cli_args['print_only_required'] do_scaffold_command(pipeline.get_definition(), print_fn, skip_non_required)
def execute_run_command(input_file, output_file): args = check.inst(read_unary_input(input_file), ExecuteRunArgs) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) return _execute_run_command_body( output_file, recon_pipeline, args.pipeline_run_id, args.instance_ref, )
def execute_execute_command(instance, kwargs): check.inst_param(instance, "instance", DagsterInstance) config = list( check.opt_tuple_param(kwargs.get("config"), "config", default=(), of_type=str)) preset = kwargs.get("preset") mode = kwargs.get("mode") if preset and config: raise click.UsageError("Can not use --preset with --config.") tags = get_tags_from_args(kwargs) pipeline_origin = get_pipeline_python_origin_from_kwargs(kwargs) pipeline = recon_pipeline_from_origin(pipeline_origin) solid_selection = get_solid_selection_from_args(kwargs) result = do_execute_command(pipeline, instance, config, mode, tags, solid_selection, preset) if not result.success: raise click.ClickException( "Pipeline run {} resulted in failure.".format(result.run_id)) return result
def test_reconstruct_from_origin(): origin = PipelinePythonOrigin( pipeline_name="foo_pipe", repository_origin=RepositoryPythonOrigin( executable_path="my_python", code_pointer=FileCodePointer( python_file="foo.py", fn_name="bar", working_directory="/", ), container_image="my_image", entry_point=DEFAULT_DAGSTER_ENTRY_POINT, container_context={"docker": { "registry": "my_reg" }}, ), ) recon_pipeline = recon_pipeline_from_origin(origin) assert recon_pipeline.pipeline_name == origin.pipeline_name assert recon_pipeline.repository.pointer == origin.repository_origin.code_pointer assert recon_pipeline.repository.container_image == origin.repository_origin.container_image assert recon_pipeline.repository.executable_path == origin.repository_origin.executable_path assert recon_pipeline.repository.container_context == origin.repository_origin.container_context
def execute_execute_command(env, cli_args, mode=None, tags=None): external_pipeline = get_external_pipeline_from_kwargs(cli_args) # We should move this to use external pipeline # https://github.com/dagster-io/dagster/issues/2556 pipeline = recon_pipeline_from_origin(external_pipeline.handle.get_origin()) solid_selection = get_solid_selection_from_args(cli_args) return do_execute_command(pipeline, env, mode, tags, solid_selection)
def execute_scaffold_command(cli_args, print_fn, using_job_op_graph_apis=False): pipeline_origin = get_pipeline_or_job_python_origin_from_kwargs( cli_args, using_job_op_graph_apis) pipeline = recon_pipeline_from_origin(pipeline_origin) skip_non_required = cli_args["print_only_required"] do_scaffold_command(pipeline.get_definition(), print_fn, skip_non_required)
def execute_step_command(input_json): with capture_interrupts(): args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteStepArgs) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: pipeline_run = instance.get_run_by_id(args.pipeline_run_id) check.inst( pipeline_run, PipelineRun, "Pipeline run with id '{}' not found for step execution". format(args.pipeline_run_id), ) if args.should_verify_step: success = verify_step( instance, pipeline_run, args.known_state.get_retry_state(), args.step_keys_to_execute, ) if not success: return recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) execution_plan = create_execution_plan( recon_pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute), run_config=pipeline_run.run_config, step_keys_to_execute=args.step_keys_to_execute, mode=pipeline_run.mode, known_state=args.known_state, ) buff = [] # Flag that the step execution is skipped if should_skip_step(execution_plan, instance=instance, run_id=pipeline_run.run_id): click.echo(serialize_dagster_namedtuple( StepExecutionSkipped())) return for event in execute_plan_iterator( execution_plan, pipeline_run, instance, run_config=pipeline_run.run_config, retry_mode=args.retry_mode, ): buff.append(serialize_dagster_namedtuple(event)) for line in buff: click.echo(line)
def execution_plan_snapshot_command(args): check.inst_param(args, 'args', ExecutionPlanSnapshotArgs) recon_pipeline = (recon_pipeline_from_origin( args.pipeline_origin).subset_for_execution(args.solid_selection) if args.solid_selection else recon_pipeline_from_origin( args.pipeline_origin)) return snapshot_from_execution_plan( create_execution_plan( pipeline=recon_pipeline, run_config=args.run_config, mode=args.mode, step_keys_to_execute=args.step_keys_to_execute, ), args.pipeline_snapshot_id, )
def execution_plan_snapshot_command(args): check.inst_param(args, 'args', ExecutionPlanSnapshotArgs) recon_pipeline = (recon_pipeline_from_origin( args.pipeline_origin).subset_for_execution(args.solid_selection) if args.solid_selection else recon_pipeline_from_origin( args.pipeline_origin)) try: return snapshot_from_execution_plan( create_execution_plan( pipeline=recon_pipeline, run_config=args.run_config, mode=args.mode, step_keys_to_execute=args.step_keys_to_execute, ), args.pipeline_snapshot_id, ) except: # pylint: disable=bare-except return ExecutionPlanSnapshotErrorData( error=serializable_error_info_from_exc_info(sys.exc_info()))
def execute_run_command(input_file, output_file): args = check.inst(read_unary_input(input_file), ExecuteRunArgs) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) with DagsterInstance.from_ref(args.instance_ref) as instance: with ipc_write_stream(output_file) as ipc_stream: def send_to_stream(event): ipc_stream.send(event) return _execute_run_command_body(recon_pipeline, args.pipeline_run_id, instance, send_to_stream)
def execute_run_command(input_file, output_file): # Ensure that interrupts from the run launcher only happen inside user code or specially # designated checkpoints with delay_interrupts(): args = check.inst(read_unary_input(input_file), ExecuteRunArgs) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) with DagsterInstance.from_ref(args.instance_ref) as instance: with ipc_write_stream(output_file) as ipc_stream: def send_to_stream(event): ipc_stream.send(event) return _execute_run_command_body(recon_pipeline, args.pipeline_run_id, instance, send_to_stream)
def execute_list_versions_command(instance, kwargs): check.inst_param(instance, "instance", DagsterInstance) config = list( check.opt_tuple_param(kwargs.get("config"), "config", default=(), of_type=str)) preset = kwargs.get("preset") mode = kwargs.get("mode") if preset and config: raise click.UsageError("Can not use --preset with --config.") pipeline_origin = get_pipeline_python_origin_from_kwargs(kwargs) pipeline = recon_pipeline_from_origin(pipeline_origin) run_config = get_run_config_from_file_list(config) pipeline_def = pipeline.get_definition() pipeline_name = pipeline_def.name execution_plan = create_execution_plan(pipeline.get_definition(), run_config=run_config, mode=mode) step_output_versions = resolve_step_output_versions( execution_plan, environment_config=EnvironmentConfig.build(pipeline_def, run_config=run_config, mode=mode), mode_def=pipeline_def.get_mode_definition(mode), ) step_output_addresses = instance.get_addresses_for_step_output_versions({ (pipeline_name, step_output_handle): version for step_output_handle, version in step_output_versions.items() if version }) table = [] for step_output_handle, version in step_output_versions.items(): address = step_output_addresses.get( (pipeline_name, step_output_handle), "None") table.append([ "{key}.{output}".format(key=step_output_handle.step_key, output=step_output_handle.output_name), version, address, ]) table_str = tabulate(table, headers=["Step Output", "Version", "Address"], tablefmt="github") click.echo(table_str)
def test_origin_id(user_process_api): with RepositoryLocationHandle.create_python_env_location( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, attribute="the_repo" ), location_name="the_location", user_process_api=user_process_api, ) as handle: host_location = RepositoryLocation.from_handle(handle) external_pipeline = host_location.get_repository("the_repo").get_full_external_pipeline( "the_pipe" ) recon_pipeline = recon_pipeline_from_origin(external_pipeline.get_origin()) assert external_pipeline.get_origin_id() == recon_pipeline.get_origin_id()
def test_origin_id(): host_location = PythonEnvRepositoryLocation( RepositoryLocationHandle.create_python_env_location( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, attribute="the_repo" ), location_name="the_location", ) ) external_pipeline = host_location.get_repository("the_repo").get_full_external_pipeline( "the_pipe" ) recon_pipeline = recon_pipeline_from_origin(external_pipeline.get_origin()) assert external_pipeline.get_origin_id() == recon_pipeline.get_origin_id()
def _synchronously_execute_run_within_hosted_user_process( graphene_info, repository_location_name, repository_name, run_id, ): run_info_or_error = get_run_execution_info_for_created_run_or_error( graphene_info, repository_location_name, repository_name, run_id ) if not isinstance(run_info_or_error, RunExecutionInfo): # if it is not a success the return value is the dauphin error return run_info_or_error external_pipeline, pipeline_run = run_info_or_error recon_pipeline = recon_pipeline_from_origin(external_pipeline.get_origin()) execute_run(recon_pipeline, pipeline_run, graphene_info.context.instance) return graphene_info.schema.type_named("ExecuteRunInProcessSuccess")( run=graphene_info.schema.type_named("PipelineRun")(pipeline_run) )
def execute_execute_command(env_file_list, cli_args, instance=None, mode=None, tags=None): check.opt_list_param(env_file_list, 'env_file_list', of_type=str) check.opt_inst_param(instance, 'instance', DagsterInstance) instance = instance if instance else DagsterInstance.get() external_pipeline = get_external_pipeline_from_kwargs(cli_args, instance) # We should move this to use external pipeline # https://github.com/dagster-io/dagster/issues/2556 pipeline = recon_pipeline_from_origin( external_pipeline.handle.get_origin()) solid_selection = get_solid_selection_from_args(cli_args) return do_execute_command(pipeline, instance, env_file_list, mode, tags, solid_selection)
def execute_execute_command_with_preset(preset_name, cli_args, instance, _mode): external_pipeline = get_external_pipeline_from_kwargs(cli_args, instance) # We should move this to use external pipeline # https://github.com/dagster-io/dagster/issues/2556 pipeline = recon_pipeline_from_origin(external_pipeline.handle.get_origin()) tags = get_tags_from_args(cli_args) solid_selection = get_solid_selection_from_args(cli_args) return execute_pipeline( pipeline, preset=preset_name, instance=DagsterInstance.get(), raise_on_error=False, tags=tags, solid_selection=solid_selection, )
def execute_list_versions_command(instance, kwargs): check.inst_param(instance, "instance", DagsterInstance) config = list( check.opt_tuple_param(kwargs.get("config"), "config", default=(), of_type=str)) preset = kwargs.get("preset") mode = kwargs.get("mode") if preset and config: raise click.UsageError("Can not use --preset with --config.") pipeline_origin = get_pipeline_python_origin_from_kwargs(kwargs) pipeline = recon_pipeline_from_origin(pipeline_origin) run_config = get_run_config_from_file_list(config) environment_config = EnvironmentConfig.build(pipeline.get_definition(), run_config, mode=mode) execution_plan = ExecutionPlan.build(pipeline, environment_config) step_output_versions = resolve_step_output_versions( pipeline.get_definition(), execution_plan, environment_config) memoized_plan = resolve_memoized_execution_plan(execution_plan, pipeline.get_definition(), run_config, instance, environment_config) # the step keys that we need to execute are those which do not have their inputs populated. step_keys_not_stored = set(memoized_plan.step_keys_to_execute) table = [] for step_output_handle, version in step_output_versions.items(): table.append([ "{key}.{output}".format(key=step_output_handle.step_key, output=step_output_handle.output_name), version, "stored" if step_output_handle.step_key not in step_keys_not_stored else "to-be-recomputed", ]) table_str = tabulate( table, headers=["Step Output", "Version", "Status of Output"], tablefmt="github") click.echo(table_str)
def execute_run_with_structured_logs_command(input_json): signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT)) args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteRunArgs) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) with ( DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get() ) as instance: buffer = [] def send_to_buffer(event): buffer.append(serialize_dagster_namedtuple(event)) _execute_run_command_body(recon_pipeline, args.pipeline_run_id, instance, send_to_buffer) for line in buffer: click.echo(line)
def ExternalPipelineSubsetSnapshot(self, request, _context): pipeline_subset_snapshot_args = deserialize_json_to_dagster_namedtuple( request.serialized_pipeline_subset_snapshot_args) check.inst_param( pipeline_subset_snapshot_args, 'pipeline_subset_snapshot_args', PipelineSubsetSnapshotArgs, ) return api_pb2.ExternalPipelineSubsetSnapshotReply( serialized_external_pipeline_subset_result= serialize_dagster_namedtuple( get_external_pipeline_subset_result( recon_pipeline_from_origin( pipeline_subset_snapshot_args.pipeline_origin), pipeline_subset_snapshot_args.solid_selection, )))
def execute_run_command(input_json): with capture_interrupts(): args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteRunArgs) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: buffer = [] def send_to_buffer(event): buffer.append(serialize_dagster_namedtuple(event)) _execute_run_command_body(recon_pipeline, args.pipeline_run_id, instance, send_to_buffer) for line in buffer: click.echo(line)