def _run_iteration(self, instance, curr_time, daemon_uuid): # Build a list of any exceptions encountered during the iteration. # Once the iteration completes, this is copied to last_iteration_exceptions # which is used in the heartbeats. This guarantees that heartbeats contain the full # list of errors raised. self._current_iteration_exceptions = [] first_iteration = not self._last_heartbeat_time daemon_generator = self.run_iteration(instance) while True: try: error_info = check.opt_inst(next(daemon_generator), SerializableErrorInfo) if error_info: self._current_iteration_exceptions.append(error_info) except StopIteration: self._last_iteration_exceptions = self._current_iteration_exceptions break except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) self._logger.error("Caught error:\n{}".format(error_info)) self._current_iteration_exceptions.append(error_info) self._last_iteration_exceptions = self._current_iteration_exceptions break finally: if not first_iteration: # wait until first iteration completes, since we report any errors from the previous # iteration in the heartbeat. After the first iteration, start logging a heartbeat # every time the generator yields. self._check_add_heartbeat(instance, curr_time, daemon_uuid) if first_iteration: self._check_add_heartbeat(instance, curr_time, daemon_uuid)
def run_daemon_loop( self, instance_ref, daemon_uuid, daemon_shutdown_event, gen_workspace, heartbeat_interval_seconds, error_interval_seconds, until=None, ): from dagster.core.telemetry_upload import uploading_logging_thread # Each loop runs in its own thread with its own instance and IWorkspace with DagsterInstance.from_ref(instance_ref) as instance: with uploading_logging_thread(): with gen_workspace(instance) as workspace: check.inst_param(workspace, "workspace", IWorkspace) daemon_generator = self.core_loop(instance, workspace) try: while (not daemon_shutdown_event.is_set()) and ( not until or pendulum.now("UTC") < until ): try: result = check.opt_inst( next(daemon_generator), SerializableErrorInfo ) if result: self._errors.appendleft((result, pendulum.now("UTC"))) except StopIteration: self._logger.error( "Daemon loop finished without raising an error - daemon loops should run forever until they are interrupted." ) break except Exception: error_info = serializable_error_info_from_exc_info(sys.exc_info()) self._logger.error( "Caught error, daemon loop will restart:\n{}".format(error_info) ) self._errors.appendleft((error_info, pendulum.now("UTC"))) daemon_generator.close() daemon_generator = self.core_loop(instance, workspace) finally: try: self._check_add_heartbeat( instance, daemon_uuid, heartbeat_interval_seconds, error_interval_seconds, ) except Exception: self._logger.error( "Failed to add heartbeat: \n{}".format( serializable_error_info_from_exc_info(sys.exc_info()) ) ) finally: # cleanup the generator if it was stopped part-way through daemon_generator.close()
def get_run_records( self, filters: PipelineRunsFilter = None, limit: int = None, order_by: str = None, ascending: bool = False, cursor: str = None, bucket_by: Optional[Union[JobBucket, TagBucket]] = None, ) -> List[RunRecord]: filters = check.opt_inst_param(filters, "filters", PipelineRunsFilter, default=PipelineRunsFilter()) check.opt_int_param(limit, "limit") columns = ["id", "run_body", "create_timestamp", "update_timestamp"] if self.has_run_stats_index_cols(): columns += ["start_time", "end_time"] # only fetch columns we use to build RunRecord query = self._runs_query( filters=filters, limit=limit, columns=columns, order_by=order_by, ascending=ascending, cursor=cursor, bucket_by=bucket_by, ) rows = self.fetchall(query) return [ RunRecord( storage_id=check.int_param(row["id"], "id"), pipeline_run=deserialize_as( check.str_param(row["run_body"], "run_body"), PipelineRun), create_timestamp=check.inst(row["create_timestamp"], datetime), update_timestamp=check.inst(row["update_timestamp"], datetime), start_time=check.opt_inst(row["start_time"], float) if "start_time" in row else None, end_time=check.opt_inst(row["end_time"], float) if "end_time" in row else None, ) for row in rows ]
def run_iteration(self, curr_time): first_controller_iteration = not self._last_heartbeat_times daemon_generators = [] # list of daemon generator functions for daemon in self.daemons: daemon_type = daemon.daemon_type() if (not daemon.daemon_type() in self._last_heartbeat_times) or ( (curr_time - self._last_iteration_times[daemon.daemon_type()]).total_seconds() >= daemon.interval_seconds ): self._initialize_daemon_iteration(daemon_type, curr_time) daemon_generators.append((daemon, daemon.run_iteration())) # Call next on each daemon generator function, rotating through the daemons. while len(daemon_generators) > 0: daemon, generator = daemon_generators.pop(0) daemon_type = daemon.daemon_type() try: error_info = check.opt_inst(next(generator), SerializableErrorInfo) if error_info: self._current_iteration_exceptions[daemon_type].append(error_info) except StopIteration: # daemon has completed an iteration, don't add the generator back # We've completed an iteration, so errors can be reported in heartbeat self._last_iteration_exceptions[daemon_type] = self._current_iteration_exceptions[ daemon_type ] except Exception: # pylint: disable=broad-except # log errors in daemon error_info = serializable_error_info_from_exc_info(sys.exc_info()) self._logger.error("Caught error in {}:\n{}".format(daemon_type, error_info,)) self._current_iteration_exceptions[daemon_type].append(error_info) # The iteration stopped short, so errors can be reported in heartbeat self._last_iteration_exceptions[daemon_type] = self._current_iteration_exceptions[ daemon_type ] else: # append to the back, so other daemons will execute next daemon_generators.append((daemon, generator)) if not first_controller_iteration: # wait until first iteration completes, otherwise heartbeats may be reported before # errors occur self._check_add_heartbeat(daemon_type, curr_time) # All initial iterations are now complete, we can report all heartbeats if first_controller_iteration: for daemon in self.daemons: self._check_add_heartbeat(daemon.daemon_type(), curr_time)
def _run_iteration( self, instance, daemon_uuid, daemon_shutdown_event, workspace, heartbeat_interval_seconds, error_interval_seconds, until=None, ): # Clear out the workspace locations after each iteration workspace.cleanup() daemon_generator = self.run_iteration(instance, workspace) try: while (not daemon_shutdown_event.is_set()) and ( not until or pendulum.now("UTC") < until): try: result = check.opt_inst(next(daemon_generator), SerializableErrorInfo) if result: self._errors.append((result, pendulum.now("UTC"))) except StopIteration: break except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info( sys.exc_info()) self._logger.error("Caught error:\n{}".format(error_info)) self._errors.append((error_info, pendulum.now("UTC"))) break finally: try: self._check_add_heartbeat( instance, daemon_uuid, heartbeat_interval_seconds, error_interval_seconds, ) except Exception: # pylint: disable=broad-except self._logger.error( "Failed to add heartbeat: \n{}".format( serializable_error_info_from_exc_info( sys.exc_info()))) finally: # cleanup the generator if it was stopped part-way through daemon_generator.close()
def _run_iteration(self, instance, daemon_uuid, daemon_shutdown_event, workspace, until=None): # Build a list of any exceptions encountered during the iteration. # Once the iteration completes, this is copied to last_iteration_exceptions # which is used in the heartbeats. This guarantees that heartbeats contain the full # list of errors raised. self._current_iteration_exceptions = [] # Clear out the workspace locations after each iteration workspace.cleanup() daemon_generator = self.run_iteration(instance, workspace) try: while (not daemon_shutdown_event.is_set()) and ( not until or pendulum.now("UTC") < until ): try: result = check.opt_inst( next(daemon_generator), tuple([SerializableErrorInfo, CompletedIteration]) ) if isinstance(result, CompletedIteration): self._last_iteration_exceptions = self._current_iteration_exceptions self._current_iteration_exceptions = [] elif result: self._current_iteration_exceptions.append(result) except StopIteration: self._last_iteration_exceptions = self._current_iteration_exceptions break except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) self._logger.error("Caught error:\n{}".format(error_info)) self._current_iteration_exceptions.append(error_info) self._last_iteration_exceptions = self._current_iteration_exceptions break finally: try: self._check_add_heartbeat(instance, daemon_uuid) except Exception: # pylint: disable=broad-except self._logger.error( "Failed to add heartbeat: \n{}".format( serializable_error_info_from_exc_info(sys.exc_info()) ) ) finally: # cleanup the generator if it was stopped part-way through daemon_generator.close()
def _run_iteration(self, instance, daemon_uuid, daemon_shutdown_event, grpc_server_registry, until=None): # Build a list of any exceptions encountered during the iteration. # Once the iteration completes, this is copied to last_iteration_exceptions # which is used in the heartbeats. This guarantees that heartbeats contain the full # list of errors raised. self._current_iteration_exceptions = [] daemon_generator = self.run_iteration(instance, daemon_shutdown_event, grpc_server_registry) while (not daemon_shutdown_event.is_set()) and ( not until or pendulum.now("UTC") < until): try: result = check.opt_inst( next(daemon_generator), tuple([SerializableErrorInfo, CompletedIteration])) if isinstance(result, CompletedIteration): self._last_iteration_exceptions = self._current_iteration_exceptions self._current_iteration_exceptions = [] elif result: self._current_iteration_exceptions.append(result) except StopIteration: self._last_iteration_exceptions = self._current_iteration_exceptions break except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info( sys.exc_info()) self._logger.error("Caught error:\n{}".format(error_info)) self._current_iteration_exceptions.append(error_info) self._last_iteration_exceptions = self._current_iteration_exceptions break finally: self._check_add_heartbeat(instance, daemon_uuid)