Beispiel #1
0
    def _run_iteration(self, instance, curr_time, daemon_uuid):
        # Build a list of any exceptions encountered during the iteration.
        # Once the iteration completes, this is copied to last_iteration_exceptions
        # which is used in the heartbeats. This guarantees that heartbeats contain the full
        # list of errors raised.
        self._current_iteration_exceptions = []
        first_iteration = not self._last_heartbeat_time

        daemon_generator = self.run_iteration(instance)

        while True:
            try:
                error_info = check.opt_inst(next(daemon_generator), SerializableErrorInfo)
                if error_info:
                    self._current_iteration_exceptions.append(error_info)
            except StopIteration:
                self._last_iteration_exceptions = self._current_iteration_exceptions
                break
            except Exception:  # pylint: disable=broad-except
                error_info = serializable_error_info_from_exc_info(sys.exc_info())
                self._logger.error("Caught error:\n{}".format(error_info))
                self._current_iteration_exceptions.append(error_info)
                self._last_iteration_exceptions = self._current_iteration_exceptions
                break
            finally:
                if not first_iteration:
                    # wait until first iteration completes, since we report any errors from the previous
                    # iteration in the heartbeat. After the first iteration, start logging a heartbeat
                    # every time the generator yields.
                    self._check_add_heartbeat(instance, curr_time, daemon_uuid)

        if first_iteration:
            self._check_add_heartbeat(instance, curr_time, daemon_uuid)
Beispiel #2
0
    def run_daemon_loop(
        self,
        instance_ref,
        daemon_uuid,
        daemon_shutdown_event,
        gen_workspace,
        heartbeat_interval_seconds,
        error_interval_seconds,
        until=None,
    ):
        from dagster.core.telemetry_upload import uploading_logging_thread

        # Each loop runs in its own thread with its own instance and IWorkspace
        with DagsterInstance.from_ref(instance_ref) as instance:
            with uploading_logging_thread():
                with gen_workspace(instance) as workspace:
                    check.inst_param(workspace, "workspace", IWorkspace)

                    daemon_generator = self.core_loop(instance, workspace)

                    try:
                        while (not daemon_shutdown_event.is_set()) and (
                            not until or pendulum.now("UTC") < until
                        ):
                            try:
                                result = check.opt_inst(
                                    next(daemon_generator), SerializableErrorInfo
                                )
                                if result:
                                    self._errors.appendleft((result, pendulum.now("UTC")))
                            except StopIteration:
                                self._logger.error(
                                    "Daemon loop finished without raising an error - daemon loops should run forever until they are interrupted."
                                )
                                break
                            except Exception:
                                error_info = serializable_error_info_from_exc_info(sys.exc_info())
                                self._logger.error(
                                    "Caught error, daemon loop will restart:\n{}".format(error_info)
                                )
                                self._errors.appendleft((error_info, pendulum.now("UTC")))
                                daemon_generator.close()
                                daemon_generator = self.core_loop(instance, workspace)
                            finally:
                                try:
                                    self._check_add_heartbeat(
                                        instance,
                                        daemon_uuid,
                                        heartbeat_interval_seconds,
                                        error_interval_seconds,
                                    )
                                except Exception:
                                    self._logger.error(
                                        "Failed to add heartbeat: \n{}".format(
                                            serializable_error_info_from_exc_info(sys.exc_info())
                                        )
                                    )
                    finally:
                        # cleanup the generator if it was stopped part-way through
                        daemon_generator.close()
Beispiel #3
0
    def get_run_records(
        self,
        filters: PipelineRunsFilter = None,
        limit: int = None,
        order_by: str = None,
        ascending: bool = False,
        cursor: str = None,
        bucket_by: Optional[Union[JobBucket, TagBucket]] = None,
    ) -> List[RunRecord]:
        filters = check.opt_inst_param(filters,
                                       "filters",
                                       PipelineRunsFilter,
                                       default=PipelineRunsFilter())
        check.opt_int_param(limit, "limit")

        columns = ["id", "run_body", "create_timestamp", "update_timestamp"]

        if self.has_run_stats_index_cols():
            columns += ["start_time", "end_time"]
        # only fetch columns we use to build RunRecord
        query = self._runs_query(
            filters=filters,
            limit=limit,
            columns=columns,
            order_by=order_by,
            ascending=ascending,
            cursor=cursor,
            bucket_by=bucket_by,
        )

        rows = self.fetchall(query)
        return [
            RunRecord(
                storage_id=check.int_param(row["id"], "id"),
                pipeline_run=deserialize_as(
                    check.str_param(row["run_body"], "run_body"), PipelineRun),
                create_timestamp=check.inst(row["create_timestamp"], datetime),
                update_timestamp=check.inst(row["update_timestamp"], datetime),
                start_time=check.opt_inst(row["start_time"], float)
                if "start_time" in row else None,
                end_time=check.opt_inst(row["end_time"], float)
                if "end_time" in row else None,
            ) for row in rows
        ]
Beispiel #4
0
    def run_iteration(self, curr_time):
        first_controller_iteration = not self._last_heartbeat_times

        daemon_generators = []  # list of daemon generator functions
        for daemon in self.daemons:
            daemon_type = daemon.daemon_type()
            if (not daemon.daemon_type() in self._last_heartbeat_times) or (
                (curr_time - self._last_iteration_times[daemon.daemon_type()]).total_seconds()
                >= daemon.interval_seconds
            ):
                self._initialize_daemon_iteration(daemon_type, curr_time)
                daemon_generators.append((daemon, daemon.run_iteration()))

        # Call next on each daemon generator function, rotating through the daemons.
        while len(daemon_generators) > 0:
            daemon, generator = daemon_generators.pop(0)
            daemon_type = daemon.daemon_type()
            try:
                error_info = check.opt_inst(next(generator), SerializableErrorInfo)
                if error_info:
                    self._current_iteration_exceptions[daemon_type].append(error_info)
            except StopIteration:
                # daemon has completed an iteration, don't add the generator back
                # We've completed an iteration, so errors can be reported in heartbeat
                self._last_iteration_exceptions[daemon_type] = self._current_iteration_exceptions[
                    daemon_type
                ]
            except Exception:  # pylint: disable=broad-except
                # log errors in daemon
                error_info = serializable_error_info_from_exc_info(sys.exc_info())
                self._logger.error("Caught error in {}:\n{}".format(daemon_type, error_info,))
                self._current_iteration_exceptions[daemon_type].append(error_info)
                # The iteration stopped short, so errors can be reported in heartbeat
                self._last_iteration_exceptions[daemon_type] = self._current_iteration_exceptions[
                    daemon_type
                ]
            else:
                # append to the back, so other daemons will execute next
                daemon_generators.append((daemon, generator))

            if not first_controller_iteration:
                # wait until first iteration completes, otherwise heartbeats may be reported before
                # errors occur
                self._check_add_heartbeat(daemon_type, curr_time)

        # All initial iterations are now complete, we can report all heartbeats
        if first_controller_iteration:
            for daemon in self.daemons:
                self._check_add_heartbeat(daemon.daemon_type(), curr_time)
Beispiel #5
0
    def _run_iteration(
        self,
        instance,
        daemon_uuid,
        daemon_shutdown_event,
        workspace,
        heartbeat_interval_seconds,
        error_interval_seconds,
        until=None,
    ):
        # Clear out the workspace locations after each iteration
        workspace.cleanup()

        daemon_generator = self.run_iteration(instance, workspace)

        try:
            while (not daemon_shutdown_event.is_set()) and (
                    not until or pendulum.now("UTC") < until):
                try:
                    result = check.opt_inst(next(daemon_generator),
                                            SerializableErrorInfo)
                    if result:
                        self._errors.append((result, pendulum.now("UTC")))
                except StopIteration:
                    break
                except Exception:  # pylint: disable=broad-except
                    error_info = serializable_error_info_from_exc_info(
                        sys.exc_info())
                    self._logger.error("Caught error:\n{}".format(error_info))
                    self._errors.append((error_info, pendulum.now("UTC")))
                    break
                finally:
                    try:
                        self._check_add_heartbeat(
                            instance,
                            daemon_uuid,
                            heartbeat_interval_seconds,
                            error_interval_seconds,
                        )
                    except Exception:  # pylint: disable=broad-except
                        self._logger.error(
                            "Failed to add heartbeat: \n{}".format(
                                serializable_error_info_from_exc_info(
                                    sys.exc_info())))

        finally:
            # cleanup the generator if it was stopped part-way through
            daemon_generator.close()
Beispiel #6
0
    def _run_iteration(self, instance, daemon_uuid, daemon_shutdown_event, workspace, until=None):
        # Build a list of any exceptions encountered during the iteration.
        # Once the iteration completes, this is copied to last_iteration_exceptions
        # which is used in the heartbeats. This guarantees that heartbeats contain the full
        # list of errors raised.
        self._current_iteration_exceptions = []

        # Clear out the workspace locations after each iteration
        workspace.cleanup()

        daemon_generator = self.run_iteration(instance, workspace)

        try:
            while (not daemon_shutdown_event.is_set()) and (
                not until or pendulum.now("UTC") < until
            ):
                try:
                    result = check.opt_inst(
                        next(daemon_generator), tuple([SerializableErrorInfo, CompletedIteration])
                    )
                    if isinstance(result, CompletedIteration):
                        self._last_iteration_exceptions = self._current_iteration_exceptions
                        self._current_iteration_exceptions = []
                    elif result:
                        self._current_iteration_exceptions.append(result)
                except StopIteration:
                    self._last_iteration_exceptions = self._current_iteration_exceptions
                    break
                except Exception:  # pylint: disable=broad-except
                    error_info = serializable_error_info_from_exc_info(sys.exc_info())
                    self._logger.error("Caught error:\n{}".format(error_info))
                    self._current_iteration_exceptions.append(error_info)
                    self._last_iteration_exceptions = self._current_iteration_exceptions
                    break
                finally:
                    try:
                        self._check_add_heartbeat(instance, daemon_uuid)
                    except Exception:  # pylint: disable=broad-except
                        self._logger.error(
                            "Failed to add heartbeat: \n{}".format(
                                serializable_error_info_from_exc_info(sys.exc_info())
                            )
                        )

        finally:
            # cleanup the generator if it was stopped part-way through
            daemon_generator.close()
Beispiel #7
0
    def _run_iteration(self,
                       instance,
                       daemon_uuid,
                       daemon_shutdown_event,
                       grpc_server_registry,
                       until=None):
        # Build a list of any exceptions encountered during the iteration.
        # Once the iteration completes, this is copied to last_iteration_exceptions
        # which is used in the heartbeats. This guarantees that heartbeats contain the full
        # list of errors raised.
        self._current_iteration_exceptions = []
        daemon_generator = self.run_iteration(instance, daemon_shutdown_event,
                                              grpc_server_registry)

        while (not daemon_shutdown_event.is_set()) and (
                not until or pendulum.now("UTC") < until):
            try:
                result = check.opt_inst(
                    next(daemon_generator),
                    tuple([SerializableErrorInfo, CompletedIteration]))
                if isinstance(result, CompletedIteration):
                    self._last_iteration_exceptions = self._current_iteration_exceptions
                    self._current_iteration_exceptions = []
                elif result:
                    self._current_iteration_exceptions.append(result)
            except StopIteration:
                self._last_iteration_exceptions = self._current_iteration_exceptions
                break
            except Exception:  # pylint: disable=broad-except
                error_info = serializable_error_info_from_exc_info(
                    sys.exc_info())
                self._logger.error("Caught error:\n{}".format(error_info))
                self._current_iteration_exceptions.append(error_info)
                self._last_iteration_exceptions = self._current_iteration_exceptions
                break
            finally:
                self._check_add_heartbeat(instance, daemon_uuid)