def launch_scheduled_runs( instance, logger, end_datetime_utc, max_catchup_runs=DEFAULT_MAX_CATCHUP_RUNS, debug_crash_flags=None, ): schedules = [ s for s in instance.all_stored_job_state(job_type=JobType.SCHEDULE) if s.status == JobStatus.RUNNING ] if not schedules: logger.info( "Not checking for any runs since no schedules have been started.") return schedule_names = ", ".join([schedule.job_name for schedule in schedules]) logger.info( f"Checking for new runs for the following schedules: {schedule_names}") for schedule_state in schedules: error_info = None try: origin = schedule_state.origin.external_repository_origin.repository_location_origin with origin.create_handle() as repo_location_handle: repo_location = repo_location_handle.create_location() launch_scheduled_runs_for_schedule( instance, logger, schedule_state, repo_location, end_datetime_utc, max_catchup_runs, (debug_crash_flags.get(schedule_state.job_name) if debug_crash_flags else None), ) except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) logger.error( f"Scheduler caught an error for schedule {schedule_state.job_name} : {error_info.to_string()}" ) yield error_info
def get_partition_names(args): check.inst_param(args, "args", PartitionNamesArgs) recon_repo = recon_repository_from_origin(args.repository_origin) definition = recon_repo.get_definition() partition_set_def = definition.get_partition_set_def( args.partition_set_name) try: with user_code_error_boundary( PartitionExecutionError, lambda: "Error occurred during the execution of the partition generation function for " "partition set {partition_set_name}".format( partition_set_name=partition_set_def.name), ): return ExternalPartitionNamesData( partition_names=partition_set_def.get_partition_names()) except PartitionExecutionError: return ExternalPartitionExecutionErrorData( serializable_error_info_from_exc_info(sys.exc_info()))
def execute_pipeline(self, repository_container, pipeline, pipeline_run): check.inst_param(pipeline, 'pipeline', PipelineDefinition) try: return execute_pipeline_with_metadata( pipeline, create_typed_environment(pipeline, pipeline_run.config), execution_metadata=ExecutionMetadata( pipeline_run.run_id, event_callback=pipeline_run.handle_new_event ), throw_on_user_error=False, ) except: # pylint: disable=W0702 pipeline_run.handle_new_event( build_synthetic_pipeline_error_record( pipeline_run.run_id, serializable_error_info_from_exc_info(sys.exc_info()), pipeline.name, ) )
def test_syntax_error_serialized_message(): serialized_error = None try: eval( # pylint: disable=eval-used """ foo = bar """) except SyntaxError: serialized_error = serializable_error_info_from_exc_info( sys.exc_info()) assert serialized_error assert (serialized_error.message == """ File "<string>", line 2 foo = bar ^ SyntaxError: invalid syntax """)
def resolve_evaluationResult(self, graphene_info): if self._job_state.status != JobStatus.RUNNING: return None if self._job_state.job_type != JobType.SCHEDULE: return None repository_origin = self._job_state.origin.external_repository_origin if not graphene_info.context.has_repository_location( repository_origin.repository_location_origin.location_name): return None repository_location = graphene_info.context.get_repository_location( repository_origin.repository_location_origin.location_name) if not repository_location.has_repository( repository_origin.repository_name): return None repository = repository_location.get_repository( repository_origin.repository_name) external_schedule = repository.get_external_schedule( self._job_state.name) timezone_str = external_schedule.execution_timezone if not timezone_str: timezone_str = "UTC" next_tick_datetime = next( external_schedule.execution_time_iterator(self._timestamp)) schedule_time = to_timezone(pendulum.instance(next_tick_datetime), timezone_str) try: schedule_data = repository_location.get_external_schedule_execution_data( instance=graphene_info.context.instance, repository_handle=repository.handle, schedule_name=external_schedule.name, scheduled_execution_time=schedule_time, ) except Exception: # pylint: disable=broad-except schedule_data = serializable_error_info_from_exc_info( sys.exc_info()) return GrapheneTickEvaluation(schedule_data)
def get_partition_config(args): check.inst_param(args, 'args', PartitionArgs) recon_repo = recon_repository_from_origin(args.repository_origin) definition = recon_repo.get_definition() partition_set_def = definition.get_partition_set_def(args.partition_set_name) partition = partition_set_def.get_partition(args.partition_name) try: with user_code_error_boundary( PartitionExecutionError, lambda: 'Error occurred during the evaluation of the `run_config_for_partition` ' 'function for partition set {partition_set_name}'.format( partition_set_name=partition_set_def.name ), ): run_config = partition_set_def.run_config_for_partition(partition) return ExternalPartitionConfigData(name=partition.name, run_config=run_config) except PartitionExecutionError: return ExternalPartitionExecutionErrorData( serializable_error_info_from_exc_info(sys.exc_info()) )
def list_repositories_command(args): check.inst_param(args, "args", ListRepositoriesInput) python_file, module_name, working_directory, attribute = ( args.python_file, args.module_name, args.working_directory, args.attribute, ) try: loadable_targets = get_loadable_targets(python_file, module_name, working_directory, attribute) return ListRepositoriesResponse([ LoadableRepositorySymbol( attribute=lt.attribute, repository_name=repository_def_from_target_def( lt.target_definition).name, ) for lt in loadable_targets ]) except Exception: # pylint: disable=broad-except return serializable_error_info_from_exc_info(sys.exc_info())
def partition_tags_command(args): check.inst_param(args, 'args', PartitionApiCommandArgs) recon_repo = recon_repository_from_origin(args.repository_origin) definition = recon_repo.get_definition() partition_set_def = definition.get_partition_set_def( args.partition_set_name) partition = partition_set_def.get_partition(args.partition_name) try: with user_code_error_boundary( PartitionExecutionError, lambda: 'Error occurred during the evaluation of the `tags_for_partition` function for ' 'partition set {partition_set_name}'.format( partition_set_name=partition_set_def.name), ): tags = partition_set_def.tags_for_partition(partition) return ExternalPartitionTagsData(name=partition.name, tags=tags) except PartitionExecutionError: return ExternalPartitionExecutionErrorData( serializable_error_info_from_exc_info(sys.exc_info()))
def test_pipeline_with_invalid_definition_snapshot_api_grpc(): with get_bar_repo_handle() as repo_handle: pipeline_handle = PipelineHandle("bar", repo_handle) try: _test_pipeline_subset_grpc(pipeline_handle, ["fail_subset"]) except DagsterUserCodeProcessError: error_info = serializable_error_info_from_exc_info(sys.exc_info()) assert re.match( (r".*DagsterInvalidSubsetError[\s\S]*" r"The attempted subset \['fail_subset'\] for pipeline bar results in an invalid pipeline" ), error_info.message, ) assert re.match( (r".*DagsterInvalidDefinitionError[\s\S]*" r'add a dagster_type_loader for the type "InputTypeWithoutHydration"' ), error_info.cause.message, )
def get_external_pipeline_subset_result( recon_pipeline: ReconstructablePipeline, solid_selection: Optional[List[str]]): check.inst_param(recon_pipeline, "recon_pipeline", ReconstructablePipeline) check.opt_list_param(solid_selection, "solid_selection", str) if solid_selection: try: sub_pipeline = recon_pipeline.subset_for_execution(solid_selection) definition = sub_pipeline.get_definition() except DagsterInvalidSubsetError: return ExternalPipelineSubsetResult( success=False, error=serializable_error_info_from_exc_info(sys.exc_info())) else: definition = recon_pipeline.get_definition() external_pipeline_data = external_pipeline_data_from_def(definition) return ExternalPipelineSubsetResult( success=True, external_pipeline_data=external_pipeline_data)
def _execute_command_in_child_process(event_queue, command): """Wraps the execution of a ChildProcessCommand. Handles errors and communicates across a queue with the parent process.""" check.inst_param(command, "command", ChildProcessCommand) with capture_interrupts(): pid = os.getpid() event_queue.put(ChildProcessStartEvent(pid=pid)) try: for step_event in command.execute(): event_queue.put(step_event) event_queue.put(ChildProcessDoneEvent(pid=pid)) except (Exception, KeyboardInterrupt): # pylint: disable=broad-except event_queue.put( ChildProcessSystemErrorEvent( pid=pid, error_info=serializable_error_info_from_exc_info( sys.exc_info())))
def core_execute_run(recon_pipeline, pipeline_run, instance): check.inst_param(recon_pipeline, "recon_pipeline", ReconstructablePipeline) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) try: yield from execute_run_iterator(recon_pipeline, pipeline_run, instance) except (KeyboardInterrupt, DagsterExecutionInterruptedError): yield from _report_run_failed_if_not_finished(instance, pipeline_run.run_id) yield instance.report_engine_event( message="Pipeline execution terminated by interrupt", pipeline_run=pipeline_run, ) except Exception: # pylint: disable=broad-except yield instance.report_engine_event( "An exception was thrown during execution that is likely a framework error, " "rather than an error in user code.", pipeline_run, EngineEventData.engine_error(serializable_error_info_from_exc_info(sys.exc_info())), ) yield from _report_run_failed_if_not_finished(instance, pipeline_run.run_id)
def get_external_execution_plan_snapshot(recon_pipeline, args): check.inst_param(recon_pipeline, "recon_pipeline", ReconstructablePipeline) check.inst_param(args, "args", ExecutionPlanSnapshotArgs) try: pipeline = (recon_pipeline.subset_for_execution(args.solid_selection) if args.solid_selection else recon_pipeline) return snapshot_from_execution_plan( create_execution_plan( pipeline=pipeline, run_config=args.run_config, mode=args.mode, step_keys_to_execute=args.step_keys_to_execute, ), args.pipeline_snapshot_id, ) except: # pylint: disable=bare-except return ExecutionPlanSnapshotErrorData( error=serializable_error_info_from_exc_info(sys.exc_info()))
def schedule_execution_data_command(args): recon_repo = recon_repository_from_origin(args.repository_origin) definition = recon_repo.get_definition() schedule_def = definition.get_schedule_def(args.schedule_name) instance = DagsterInstance.from_ref(args.instance_ref) schedule_context = ScheduleExecutionContext(instance) try: with user_code_error_boundary( ScheduleExecutionError, lambda: 'Error occurred during the execution of run_config_fn for schedule ' '{schedule_name}'.format(schedule_name=schedule_def.name), ): run_config = schedule_def.get_run_config(schedule_context) schedule_execution_data = ExternalScheduleExecutionData( run_config=run_config) except ScheduleExecutionError: schedule_execution_data = ExternalScheduleExecutionData( error=serializable_error_info_from_exc_info(sys.exc_info())) return schedule_execution_data
def execute_pipeline(self, _, pipeline, pipeline_run, instance, raise_on_error): check.inst_param(pipeline, 'pipeline', PipelineDefinition) try: event_list = [] for event in execute_run_iterator(pipeline, pipeline_run, instance): event_list.append(event) return PipelineExecutionResult(pipeline, pipeline_run.run_id, event_list, lambda: None) except Exception: # pylint: disable=broad-except if raise_on_error: six.reraise(*sys.exc_info()) instance.handle_new_event( build_synthetic_pipeline_error_record( pipeline_run.run_id, serializable_error_info_from_exc_info(sys.exc_info()), pipeline.name, ))
def execution_plan_snapshot_command(args): check.inst_param(args, 'args', ExecutionPlanSnapshotArgs) recon_pipeline = (recon_pipeline_from_origin( args.pipeline_origin).subset_for_execution(args.solid_selection) if args.solid_selection else recon_pipeline_from_origin( args.pipeline_origin)) try: return snapshot_from_execution_plan( create_execution_plan( pipeline=recon_pipeline, run_config=args.run_config, mode=args.mode, step_keys_to_execute=args.step_keys_to_execute, ), args.pipeline_snapshot_id, ) except: # pylint: disable=bare-except return ExecutionPlanSnapshotErrorData( error=serializable_error_info_from_exc_info(sys.exc_info()))
def _execute_command_in_child_process(queue, command): '''Wraps the execution of a ChildProcessCommand. Handles errors and communicates across a queue with the parent process.''' check.inst_param(command, 'command', ChildProcessCommand) pid = os.getpid() queue.put(ChildProcessStartEvent(pid=pid)) try: for step_event in command.execute(): queue.put(step_event) queue.put(ChildProcessDoneEvent(pid=pid)) except Exception: # pylint: disable=broad-except queue.put( ChildProcessSystemErrorEvent( pid=pid, error_info=serializable_error_info_from_exc_info( sys.exc_info()))) finally: queue.close()
def _load_handle(self, location_name): existing_handle = self._location_handle_dict.get(location_name) if existing_handle: existing_handle.cleanup() del self._location_handle_dict[location_name] if self._location_error_dict.get(location_name): del self._location_error_dict[location_name] origin = self._location_origin_dict[location_name] try: handle = RepositoryLocationHandle.create_from_repository_location_origin( origin) self._location_handle_dict[location_name] = handle except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) self._location_error_dict[location_name] = error_info warnings.warn( "Error loading repository location {location_name}:{error_string}" .format(location_name=location_name, error_string=error_info.to_string()))
def get_external_schedule_execution( recon_repo, instance_ref, schedule_name, scheduled_execution_timestamp, scheduled_execution_timezone, ): check.inst_param( recon_repo, "recon_repo", ReconstructableRepository, ) definition = recon_repo.get_definition() schedule_def = definition.get_schedule_def(schedule_name) with DagsterInstance.from_ref(instance_ref) as instance: scheduled_execution_time = ( pendulum.from_timestamp( scheduled_execution_timestamp, tz=scheduled_execution_timezone, ) if scheduled_execution_timestamp else None ) schedule_context = ScheduleExecutionContext(instance, scheduled_execution_time) try: with user_code_error_boundary( ScheduleExecutionError, lambda: "Error occurred during the execution function for schedule " "{schedule_name}".format(schedule_name=schedule_def.name), ): return ExternalScheduleExecutionData.from_execution_data( schedule_def.get_execution_data(schedule_context) ) except ScheduleExecutionError: return ExternalScheduleExecutionErrorData( serializable_error_info_from_exc_info(sys.exc_info()) )
def get_external_pipeline_subset_or_raise(graphene_info, pipeline_name, solid_subset): check.inst_param(graphene_info, 'graphene_info', ResolveInfo) check.str_param(pipeline_name, 'pipeline_name') check.opt_list_param(solid_subset, 'solid_subset', of_type=str) from dagster_graphql.schema.errors import DauphinInvalidSubsetError full_pipeline = get_external_pipeline_or_raise(graphene_info, pipeline_name) if solid_subset is None: return full_pipeline for solid_name in solid_subset: if not full_pipeline.has_solid_invocation(solid_name): raise UserFacingGraphQLError( DauphinInvalidSubsetError( message= 'Solid "{solid_name}" does not exist in "{pipeline_name}"'. format(solid_name=solid_name, pipeline_name=pipeline_name), pipeline=graphene_info.schema.type_named('Pipeline')( full_pipeline), )) try: return graphene_info.context.get_external_pipeline_subset( pipeline_name, solid_subset) except DagsterInvalidDefinitionError: # this handles the case when you construct a subset such that an unsatisfied # input cannot be hydrate from config. Current this is only relevant for # the in-process case. Once we add the out-of-process we will communicate # this error through the communication channel and change what exception # is thrown raise UserFacingGraphQLError( DauphinInvalidSubsetError( message=serializable_error_info_from_exc_info( sys.exc_info()).message, pipeline=graphene_info.schema.type_named('Pipeline')( full_pipeline), ))
def test_construct_log_string_with_error(): try: raise ValueError("some error") except ValueError: error = serializable_error_info_from_exc_info(sys.exc_info()) step_failure_event = DagsterEvent( event_type_value="STEP_FAILURE", pipeline_name="my_pipeline", step_key="solid2.compute", solid_handle=SolidHandle("solid2", None), step_kind_value="COMPUTE", logging_tags={}, event_specific_data=StepFailureData(error=error, user_failure_data=None), message='Execution of step "solid2.compute" failed.', pid=54348, ) message_props = { "dagster_event": step_failure_event, "pipeline_name": "my_pipeline" } synth_props = { "orig_message": step_failure_event.message, "run_id": "f79a8a93-27f1-41b5-b465-b35d0809b26d", } log_string = construct_log_string(message_props=message_props, logging_tags={}, synth_props=synth_props) expected_start = textwrap.dedent(""" my_pipeline - f79a8a93-27f1-41b5-b465-b35d0809b26d - 54348 - STEP_FAILURE - Execution of step "solid2.compute" failed. ValueError: some error File " """).strip() assert log_string.startswith(expected_start)
def execute_pipeline(self, repository_container, pipeline, pipeline_run, throw_on_user_error): check.inst_param(pipeline, 'pipeline', PipelineDefinition) try: return execute_pipeline( pipeline, pipeline_run.config, execution_metadata=ExecutionMetadata( pipeline_run.run_id, event_callback=pipeline_run.handle_new_event), throw_on_user_error=throw_on_user_error, ) except: # pylint: disable=W0702 if throw_on_user_error: six.reraise(*sys.exc_info()) pipeline_run.handle_new_event( build_synthetic_pipeline_error_record( pipeline_run.run_id, serializable_error_info_from_exc_info(sys.exc_info()), pipeline.name, ))
def get_solid_subset_pipeline_definition(graphene_info, selector): orig_pipeline = get_pipeline_definition(graphene_info, selector.name) for solid_name in selector.solid_subset: if not orig_pipeline.has_solid_named(solid_name): raise UserFacingGraphQLError( graphene_info.schema.type_named('InvalidSubsetError')( message= 'Solid "{solid_name}" does not exist in "{pipeline_name}"'. format(solid_name=solid_name, pipeline_name=selector.name), pipeline=graphene_info.schema.type_named( 'Pipeline').from_pipeline_def(orig_pipeline), )) try: return orig_pipeline.build_sub_pipeline(selector.solid_subset) except DagsterInvalidDefinitionError: raise UserFacingGraphQLError( graphene_info.schema.type_named('InvalidSubsetError')( message=serializable_error_info_from_exc_info( sys.exc_info()).message, pipeline=graphene_info.schema.type_named( 'Pipeline').from_pipeline_def(orig_pipeline), ))
def construct_step_failure_event_and_handle(pipeline_run, step_key, err, instance): step_failure_event = DagsterEvent( event_type_value=DagsterEventType.STEP_FAILURE.value, pipeline_name=pipeline_run.pipeline_name, step_key=step_key, event_specific_data=StepFailureData( error=serializable_error_info_from_exc_info(sys.exc_info()), user_failure_data=UserFailureData(label="K8sError"), ), ) event_record = EventLogEntry( user_message=str(err), level=logging.ERROR, pipeline_name=pipeline_run.pipeline_name, run_id=pipeline_run.run_id, error_info=None, step_key=step_key, timestamp=time.time(), dagster_event=step_failure_event, ) instance.handle_new_event(event_record) return step_failure_event
def run_iteration(self, curr_time): for daemon in self.daemons: if (not daemon.last_iteration_time) or ( (curr_time - daemon.last_iteration_time).total_seconds() >= daemon.interval_seconds): daemon.last_iteration_time = curr_time daemon.last_iteration_exception = None try: daemon.run_iteration() except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info( sys.exc_info()) daemon.last_iteration_exception = error_info self._logger.error("Caught error in {}:\n{}".format( daemon.daemon_type(), error_info)) if (not self._last_heartbeat_time) or ( (curr_time - self._last_heartbeat_time).total_seconds() >= DAEMON_HEARTBEAT_INTERVAL_SECONDS): self._last_heartbeat_time = curr_time for daemon in self.daemons: self._add_heartbeat(daemon)
def _load_handle(self, location_name): existing_handle = self._location_handle_dict.get(location_name) if existing_handle: # We don't clean up here anymore because we want these to last while being # used in other requests # existing_handle.cleanup() del self._location_handle_dict[location_name] if self._location_error_dict.get(location_name): del self._location_error_dict[location_name] origin = self._location_origin_dict[location_name] try: handle = origin.create_handle() self._location_handle_dict[location_name] = handle except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) self._location_error_dict[location_name] = error_info warnings.warn( "Error loading repository location {location_name}:{error_string}" .format(location_name=location_name, error_string=error_info.to_string()))
def start_run(self, execute_run_args): check.inst_param(execute_run_args, "execute_run_args", ExecuteExternalPipelineArgs) with DagsterInstance.from_ref(execute_run_args.instance_ref) as instance: try: res = self._query( "StartRun", api_pb2.StartRunRequest, serialized_execute_run_args=serialize_dagster_namedtuple(execute_run_args), ) return deserialize_json_to_dagster_namedtuple(res.serialized_start_run_result) except Exception: # pylint: disable=bare-except pipeline_run = instance.get_run_by_id(execute_run_args.pipeline_run_id) instance.report_engine_event( message="Unexpected error in IPC client", pipeline_run=pipeline_run, engine_event_data=EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info()) ), ) raise
def _run_iteration( self, instance, daemon_uuid, daemon_shutdown_event, grpc_server_registry, until=None ): # Build a list of any exceptions encountered during the iteration. # Once the iteration completes, this is copied to last_iteration_exceptions # which is used in the heartbeats. This guarantees that heartbeats contain the full # list of errors raised. self._current_iteration_exceptions = [] daemon_generator = self.run_iteration(instance, grpc_server_registry) try: while (not daemon_shutdown_event.is_set()) and ( not until or pendulum.now("UTC") < until ): try: result = check.opt_inst( next(daemon_generator), tuple([SerializableErrorInfo, CompletedIteration]) ) if isinstance(result, CompletedIteration): self._last_iteration_exceptions = self._current_iteration_exceptions self._current_iteration_exceptions = [] elif result: self._current_iteration_exceptions.append(result) except StopIteration: self._last_iteration_exceptions = self._current_iteration_exceptions break except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) self._logger.error("Caught error:\n{}".format(error_info)) self._current_iteration_exceptions.append(error_info) self._last_iteration_exceptions = self._current_iteration_exceptions break finally: self._check_add_heartbeat(instance, daemon_uuid) finally: # cleanup the generator if it was stopped part-way through daemon_generator.close()
def test_construct_log_string_with_user_code_error(): try: with user_code_error_boundary( DagsterUserCodeExecutionError, lambda: "Error occurred while eating a banana"): raise ValueError("some error") except DagsterUserCodeExecutionError: error = serializable_error_info_from_exc_info(sys.exc_info()) log_string = make_log_string(error, error_source=ErrorSource.USER_CODE_ERROR) expected_start = textwrap.dedent(""" my_pipeline - f79a8a93-27f1-41b5-b465-b35d0809b26d - 54348 - STEP_FAILURE - Execution of step "solid2" failed. dagster.core.errors.DagsterUserCodeExecutionError: Error occurred while eating a banana: ValueError: some error Stack Trace: File " """).strip() assert log_string.startswith(expected_start)
def test_failure_error_display_string(): try: with user_code_error_boundary( DagsterUserCodeExecutionError, lambda: "Error occurred while doing the thing" ): raise ValueError("some error") except DagsterUserCodeExecutionError: step_failure_data = StepFailureData( error=serializable_error_info_from_exc_info(sys.exc_info()), user_failure_data=None, error_source=ErrorSource.USER_CODE_ERROR, ) assert step_failure_data.error_display_string.startswith( """ dagster.core.errors.DagsterUserCodeExecutionError: Error occurred while doing the thing: ValueError: some error Stack Trace: File " """.strip() )