Exemple #1
0
    def test_add_heartbeat(self, storage):
        self._skip_in_memory(storage)

        # test insert
        added_heartbeat = DaemonHeartbeat(
            timestamp=pendulum.from_timestamp(1000).float_timestamp,
            daemon_type=SensorDaemon.daemon_type(),
            daemon_id=None,
            errors=[],
        )
        storage.add_daemon_heartbeat(added_heartbeat)
        assert len(storage.get_daemon_heartbeats()) == 1
        stored_heartbeat = storage.get_daemon_heartbeats()[
            SensorDaemon.daemon_type()]
        assert stored_heartbeat == added_heartbeat

        # test update
        second_added_heartbeat = DaemonHeartbeat(
            timestamp=pendulum.from_timestamp(2000).float_timestamp,
            daemon_type=SensorDaemon.daemon_type(),
            daemon_id=None,
            errors=[],
        )
        storage.add_daemon_heartbeat(second_added_heartbeat)
        assert len(storage.get_daemon_heartbeats()) == 1
        stored_heartbeat = storage.get_daemon_heartbeats()[
            SensorDaemon.daemon_type()]
        assert stored_heartbeat == second_added_heartbeat
Exemple #2
0
    def test_add_heartbeat(self, storage):
        import pendulum

        self._skip_in_memory(storage)

        # test insert
        added_heartbeat = DaemonHeartbeat(
            timestamp=pendulum.from_timestamp(1000),
            daemon_type="foobar",
            daemon_id=None,
            info=None)
        storage.add_daemon_heartbeat(added_heartbeat)
        assert len(storage.get_daemon_heartbeats()) == 1
        stored_heartbeat = storage.get_daemon_heartbeats()["foobar"]
        assert stored_heartbeat == added_heartbeat

        # test update
        second_added_heartbeat = DaemonHeartbeat(
            timestamp=pendulum.from_timestamp(2000),
            daemon_type="foobar",
            daemon_id=None,
            info=None)
        storage.add_daemon_heartbeat(second_added_heartbeat)
        assert len(storage.get_daemon_heartbeats()) == 1
        stored_heartbeat = storage.get_daemon_heartbeats()["foobar"]
        assert stored_heartbeat == second_added_heartbeat
Exemple #3
0
 def _add_heartbeat(self, daemon):
     """
     Add a heartbeat for the given daemon
     """
     self._instance.add_daemon_heartbeat(
         DaemonHeartbeat(pendulum.now("UTC"),
                         type(daemon).__name__, None, None))
Exemple #4
0
    def _check_add_heartbeat(self, instance, curr_time, daemon_uuid):
        if (self._last_heartbeat_time
                and (curr_time - self._last_heartbeat_time).total_seconds() <
                DAEMON_HEARTBEAT_INTERVAL_SECONDS):
            return

        daemon_type = self.daemon_type()

        last_stored_heartbeat = instance.get_daemon_heartbeats().get(
            daemon_type)
        if (self._last_heartbeat_time  # not the first heartbeat
                and last_stored_heartbeat
                and last_stored_heartbeat.daemon_id != daemon_uuid):
            self._logger.warning(
                "Taking over from another {} daemon process. If this "
                "message reoccurs, you may have multiple daemons running which is not supported. "
                "Last heartbeat daemon id: {}, "
                "Current daemon_id: {}".format(
                    daemon_type,
                    last_stored_heartbeat.daemon_id,
                    daemon_uuid,
                ))

        self._last_heartbeat_time = curr_time

        instance.add_daemon_heartbeat(
            DaemonHeartbeat(
                curr_time.float_timestamp,
                daemon_type,
                daemon_uuid,
                errors=self._last_iteration_exceptions,
            ))
 def test_get_individual_daemons(self, graphql_context):
     if graphql_context.instance.is_ephemeral:
         pytest.skip(
             "The daemon isn't compatible with an in-memory instance")
     graphql_context.instance.add_daemon_heartbeat(
         DaemonHeartbeat(timestamp=100.0,
                         daemon_type=DaemonType.SENSOR,
                         daemon_id=None,
                         error=None))
     results = execute_dagster_graphql(graphql_context,
                                       INDIVIDUAL_DAEMON_QUERY)
     assert results.data == {
         "instance": {
             "daemonHealth": {
                 "sensor": {
                     "daemonType": "SENSOR",
                     "required": True,
                     "healthy": False,
                     "lastHeartbeatTime": 100.0,
                 },
                 "run_coordinator": {
                     "daemonType": "QUEUED_RUN_COORDINATOR",
                     "required": False,
                     "healthy": None,
                     "lastHeartbeatTime": None,
                 },
                 "scheduler": {
                     "daemonType": "SCHEDULER",
                     "required": False,
                     "healthy": None,
                     "lastHeartbeatTime": None,
                 },
             }
         }
     }
Exemple #6
0
    def _check_add_heartbeat(self, daemon_type, curr_time):
        """
        Add a heartbeat for the given daemon
        """

        if (not daemon_type in self._last_heartbeat_times) or (
            (curr_time - self._last_heartbeat_times[daemon_type]).total_seconds()
            >= DAEMON_HEARTBEAT_INTERVAL_SECONDS
        ):

            last_stored_heartbeat = self._instance.get_daemon_heartbeats().get(daemon_type)
            if (
                daemon_type in self._last_heartbeat_times  # not the first heartbeat
                and last_stored_heartbeat
                and last_stored_heartbeat.daemon_id != self._daemon_uuid
            ):
                self._logger.warning(
                    "Taking over from another {} daemon process. If this "
                    "message reoccurs, you may have multiple daemons running which is not supported. "
                    "Last heartbeat daemon id: {}, "
                    "Current daemon_id: {}".format(
                        daemon_type.value, last_stored_heartbeat.daemon_id, self._daemon_uuid,
                    )
                )

            self._last_heartbeat_times[daemon_type] = curr_time
            self._instance.add_daemon_heartbeat(
                DaemonHeartbeat(
                    pendulum.now("UTC").float_timestamp,
                    daemon_type,
                    self._daemon_uuid,
                    self._last_iteration_exceptions[daemon_type],
                )
            )
Exemple #7
0
def debug_daemon_heartbeats(instance):
    daemon = SensorDaemon()
    timestamp = pendulum.now("UTC").float_timestamp
    instance.add_daemon_heartbeat(DaemonHeartbeat(timestamp, daemon.daemon_type(), None, None))
    returned_timestamp = instance.get_daemon_heartbeats()[daemon.daemon_type()].timestamp
    print(  # pylint: disable=print-call
        f"Written timestamp: {timestamp}\nRead timestamp: {returned_timestamp}"
    )
Exemple #8
0
def debug_daemon_heartbeats(instance):
    daemon = SensorDaemon(instance, interval_seconds=DEFAULT_DAEMON_INTERVAL_SECONDS,)
    timestamp = pendulum.now("UTC").float_timestamp
    instance.add_daemon_heartbeat(DaemonHeartbeat(timestamp, daemon.daemon_type(), None, None))
    returned_timestamp = instance.get_daemon_heartbeats()[daemon.daemon_type()].timestamp
    print(  # pylint: disable=print-call
        f"Written timetstamp: {timestamp}\nRead timestamp: {returned_timestamp}"
    )
Exemple #9
0
    def _check_add_heartbeat(
        self, instance, daemon_uuid, heartbeat_interval_seconds, error_interval_seconds
    ):
        error_max_time = pendulum.now("UTC").subtract(seconds=error_interval_seconds)

        while len(self._errors):
            _earliest_error, earliest_timestamp = self._errors[-1]
            if earliest_timestamp >= error_max_time:
                break
            self._errors.pop()

        curr_time = pendulum.now("UTC")

        if (
            self._last_heartbeat_time
            and (curr_time - self._last_heartbeat_time).total_seconds() < heartbeat_interval_seconds
        ):
            return

        daemon_type = self.daemon_type()

        last_stored_heartbeat = instance.get_daemon_heartbeats().get(daemon_type)
        if (
            self._last_heartbeat_time
            and last_stored_heartbeat
            and last_stored_heartbeat.daemon_id != daemon_uuid
        ):
            self._logger.error(
                "Another {} daemon is still sending heartbeats. You likely have multiple "
                "daemon processes running at once, which is not supported. "
                "Last heartbeat daemon id: {}, "
                "Current daemon_id: {}".format(
                    daemon_type,
                    last_stored_heartbeat.daemon_id,
                    daemon_uuid,
                )
            )

        self._last_heartbeat_time = curr_time

        instance.add_daemon_heartbeat(
            DaemonHeartbeat(
                curr_time.float_timestamp,
                daemon_type,
                daemon_uuid,
                errors=[error for (error, timestamp) in self._errors],
            )
        )
        if (
            not self._last_log_time
            or (curr_time - self._last_log_time).total_seconds() >= TELEMETRY_LOGGING_INTERVAL
        ):
            log_action(
                instance,
                DAEMON_ALIVE,
                metadata={"DAEMON_SESSION_ID": get_telemetry_daemon_session_id()},
            )
            self._last_log_time = curr_time
Exemple #10
0
 def _add_heartbeat(self, daemon):
     """
     Add a heartbeat for the given daemon
     """
     self._instance.add_daemon_heartbeat(
         DaemonHeartbeat(
             pendulum.now("UTC").float_timestamp,
             daemon.daemon_type(),
             None,
             daemon.last_iteration_exception,
         ))
Exemple #11
0
    def test_wipe_heartbeats(self, storage):
        self._skip_in_memory(storage)

        added_heartbeat = DaemonHeartbeat(
            timestamp=pendulum.from_timestamp(1000).float_timestamp,
            daemon_type=SensorDaemon.daemon_type(),
            daemon_id=None,
            errors=[],
        )
        storage.add_daemon_heartbeat(added_heartbeat)
        storage.wipe_daemon_heartbeats()
Exemple #12
0
 def get_daemon_heartbeats(self):
     with self.connect() as conn:
         rows = conn.execute(db.select(DaemonHeartbeatsTable.columns))
         return {
             row.daemon_type: DaemonHeartbeat(
                 timestamp=row.timestamp,
                 daemon_type=row.daemon_type,
                 daemon_id=row.daemon_id,
                 info=row.info,
             )
             for row in rows
         }
Exemple #13
0
    def test_wipe_heartbeats(self, storage):
        self._skip_in_memory(storage)

        added_heartbeat = DaemonHeartbeat(
            timestamp=pendulum.from_timestamp(1000),
            daemon_type=DaemonType.SENSOR,
            daemon_id=None,
            info=None,
        )
        storage.add_daemon_heartbeat(added_heartbeat)
        storage.wipe_daemon_heartbeats()

        assert storage.get_daemon_heartbeats() == {}
Exemple #14
0
    def test_wipe_heartbeats(self, storage):
        self._skip_in_memory(storage)

        if not self.can_delete_runs():
            pytest.skip("storage cannot delete")

        added_heartbeat = DaemonHeartbeat(
            timestamp=pendulum.from_timestamp(1000).float_timestamp,
            daemon_type=SensorDaemon.daemon_type(),
            daemon_id=None,
            errors=[],
        )
        storage.add_daemon_heartbeat(added_heartbeat)
        storage.wipe_daemon_heartbeats()
Exemple #15
0
    def _check_add_heartbeat(self, instance, daemon_uuid,
                             heartbeat_interval_seconds,
                             error_interval_seconds):
        error_max_time = pendulum.now("UTC").subtract(
            seconds=error_interval_seconds)

        while len(self._errors):
            _earliest_error, earliest_timestamp = self._errors[-1]
            if earliest_timestamp >= error_max_time:
                break
            self._errors.pop()

        curr_time = pendulum.now("UTC")

        if (self._last_heartbeat_time
                and (curr_time - self._last_heartbeat_time).total_seconds() <
                heartbeat_interval_seconds):
            return

        daemon_type = self.daemon_type()

        last_stored_heartbeat = instance.get_daemon_heartbeats().get(
            daemon_type)
        if (self._last_heartbeat_time and last_stored_heartbeat
                and last_stored_heartbeat.daemon_id != daemon_uuid):
            self._logger.warning(
                "Taking over from another {} daemon process. If this "
                "message reoccurs, you may have multiple daemons running which is not supported. "
                "Last heartbeat daemon id: {}, "
                "Current daemon_id: {}".format(
                    daemon_type,
                    last_stored_heartbeat.daemon_id,
                    daemon_uuid,
                ))

        self._last_heartbeat_time = curr_time

        instance.add_daemon_heartbeat(
            DaemonHeartbeat(
                curr_time.float_timestamp,
                daemon_type,
                daemon_uuid,
                errors=[error for (error, timestamp) in self._errors],
            ))
        if (not self._last_log_time
                or (curr_time - self._last_log_time).total_seconds() >=
                TELEMETRY_LOGGING_INTERVAL):
            log_action(instance, DAEMON_ALIVE)
            self._last_log_time = curr_time
Exemple #16
0
    def _check_add_heartbeat(self, instance, daemon_uuid):
        # Always log a heartbeat after the first time an iteration returns an error to make sure we
        # don't incorrectly say the daemon is healthy
        first_time_logging_error = self._last_iteration_exceptions and not self._first_error_logged

        curr_time = pendulum.now("UTC")
        if not first_time_logging_error and (
            self._last_heartbeat_time
            and (curr_time - self._last_heartbeat_time).total_seconds()
            < DAEMON_HEARTBEAT_INTERVAL_SECONDS
        ):
            return

        if first_time_logging_error:
            self._first_error_logged = True

        daemon_type = self.daemon_type()

        last_stored_heartbeat = instance.get_daemon_heartbeats().get(daemon_type)
        if (
            self._last_heartbeat_time  # not the first heartbeat
            and last_stored_heartbeat
            and last_stored_heartbeat.daemon_id != daemon_uuid
        ):
            self._logger.warning(
                "Taking over from another {} daemon process. If this "
                "message reoccurs, you may have multiple daemons running which is not supported. "
                "Last heartbeat daemon id: {}, "
                "Current daemon_id: {}".format(
                    daemon_type,
                    last_stored_heartbeat.daemon_id,
                    daemon_uuid,
                )
            )

        self._last_heartbeat_time = curr_time

        instance.add_daemon_heartbeat(
            DaemonHeartbeat(
                curr_time.float_timestamp,
                daemon_type,
                daemon_uuid,
                errors=self._last_iteration_exceptions,
            )
        )
Exemple #17
0
    def _check_add_heartbeat(self, instance, daemon_uuid,
                             heartbeat_interval_seconds,
                             error_interval_seconds):
        error_max_time = pendulum.now("UTC").subtract(
            seconds=error_interval_seconds)

        self._errors = self._errors[:DAEMON_HEARTBEAT_ERROR_LIMIT]

        self._errors = [(error, timestamp)
                        for (error, timestamp) in self._errors
                        if timestamp >= error_max_time]

        curr_time = pendulum.now("UTC")

        if (self._last_heartbeat_time
                and (curr_time - self._last_heartbeat_time).total_seconds() <
                heartbeat_interval_seconds):
            return

        daemon_type = self.daemon_type()

        last_stored_heartbeat = instance.get_daemon_heartbeats().get(
            daemon_type)
        if (self._last_heartbeat_time and last_stored_heartbeat
                and last_stored_heartbeat.daemon_id != daemon_uuid):
            self._logger.warning(
                "Taking over from another {} daemon process. If this "
                "message reoccurs, you may have multiple daemons running which is not supported. "
                "Last heartbeat daemon id: {}, "
                "Current daemon_id: {}".format(
                    daemon_type,
                    last_stored_heartbeat.daemon_id,
                    daemon_uuid,
                ))

        self._last_heartbeat_time = curr_time

        instance.add_daemon_heartbeat(
            DaemonHeartbeat(
                curr_time.float_timestamp,
                daemon_type,
                daemon_uuid,
                errors=[error for (error, timestamp) in self._errors],
            ))
 def test_get_daemon_error(self, graphql_context):
     if graphql_context.instance.is_ephemeral:
         pytest.skip(
             "The daemon isn't compatible with an in-memory instance")
     graphql_context.instance.add_daemon_heartbeat(
         DaemonHeartbeat(
             timestamp=100.0,
             daemon_type=DaemonType.SENSOR,
             daemon_id=None,
             error=SerializableErrorInfo(message="foobar",
                                         stack=[],
                                         cls_name=None,
                                         cause=None),
         ))
     results = execute_dagster_graphql(graphql_context, DAEMON_HEALTH_QUERY)
     assert results.data["instance"]["daemonHealth"]["sensor"] == {
         "lastHeartbeatError": {
             "message": "foobar"
         },
     }
Exemple #19
0
    def _check_add_heartbeat(self, daemon, curr_time):
        """
        Add a heartbeat for the given daemon
        """

        daemon_type = daemon.daemon_type()

        if (not daemon_type in self._last_heartbeat_times) or (
            (curr_time - self._last_heartbeat_times[daemon_type]).total_seconds()
            >= DAEMON_HEARTBEAT_INTERVAL_SECONDS
        ):
            self._last_heartbeat_times[daemon_type] = curr_time
            self._instance.add_daemon_heartbeat(
                DaemonHeartbeat(
                    pendulum.now("UTC").float_timestamp,
                    daemon.daemon_type(),
                    None,
                    daemon.last_iteration_exception,
                )
            )