コード例 #1
0
 def resolve_allDaemonStatuses(self, _graphene_info):
     return [
         GrapheneDaemonStatus(daemon_status)
         for daemon_status in get_daemon_statuses(
             self._instance,
             daemon_types=self._instance.get_required_daemon_types(),
             ignore_errors=True,
         ).values()
     ]
コード例 #2
0
def test_multiple_error_daemon(monkeypatch):
    with instance_for_test() as instance:
        from dagster.daemon.daemon import SensorDaemon

        def run_loop_error(_, _instance, _workspace):
            # ?message stack cls_name cause"
            yield SerializableErrorInfo("foobar", None, None, None)
            yield SerializableErrorInfo("bizbuz", None, None, None)

            while True:
                yield
                time.sleep(0.5)

        monkeypatch.setattr(SensorDaemon, "core_loop", run_loop_error)

        init_time = pendulum.now("UTC")

        heartbeat_interval_seconds = 1

        with daemon_controller_from_instance(
                instance,
                workspace_load_target=EmptyWorkspaceTarget(),
                heartbeat_interval_seconds=heartbeat_interval_seconds,
        ) as controller:
            while True:

                now = pendulum.now("UTC")

                if all_daemons_live(
                        instance,
                        heartbeat_interval_seconds=heartbeat_interval_seconds):

                    # Despite error, daemon should still be running
                    controller.check_daemon_threads()
                    controller.check_daemon_heartbeats()

                    status = get_daemon_statuses(
                        instance, [SensorDaemon.daemon_type()],
                        now.float_timestamp)[SensorDaemon.daemon_type()]

                    if status.healthy == False and len(
                            status.last_heartbeat.errors) == 2:
                        assert status.last_heartbeat.errors[0].message.strip(
                        ) == "bizbuz"
                        assert status.last_heartbeat.errors[1].message.strip(
                        ) == "foobar"
                        break

                if (now - init_time).total_seconds() > 10:
                    raise Exception("timed out waiting for heartbeat error")

                time.sleep(0.5)
コード例 #3
0
def test_thread_die_daemon(monkeypatch):
    with instance_for_test(overrides={}) as instance:
        from dagster.daemon.daemon import SchedulerDaemon, SensorDaemon

        iteration_ran = {"ran": False}

        def run_loop_error(_, _instance, _workspace):
            iteration_ran["ran"] = True
            raise KeyboardInterrupt
            yield  # pylint: disable=unreachable

        monkeypatch.setattr(SensorDaemon, "core_loop", run_loop_error)

        heartbeat_interval_seconds = 1

        init_time = pendulum.now("UTC")
        with daemon_controller_from_instance(
                instance,
                workspace_load_target=EmptyWorkspaceTarget(),
                heartbeat_interval_seconds=heartbeat_interval_seconds,
        ) as controller:
            while True:
                now = pendulum.now("UTC")

                status = get_daemon_statuses(
                    instance,
                    [SchedulerDaemon.daemon_type()],
                    now.float_timestamp,
                    heartbeat_interval_seconds=heartbeat_interval_seconds,
                )[SchedulerDaemon.daemon_type()]

                if iteration_ran["ran"] and status.healthy:
                    try:
                        controller.check_daemon_threads(
                        )  # Should eventually throw since the sensor thread is interrupted
                    except Exception as e:
                        assert (
                            "Stopping dagster-daemon process since the following threads are no longer running: ['SENSOR']"
                            in str(e))
                        break

                if (now - init_time).total_seconds() > 20:
                    raise Exception(
                        "timed out waiting for check_daemons to fail")

                time.sleep(0.5)
コード例 #4
0
ファイル: __init__.py プロジェクト: trevenrawr/dagster
def debug_heartbeat_dump_command():
    with DagsterInstance.get() as instance:
        for daemon_status in get_daemon_statuses(instance, instance.get_required_daemon_types()):
            click.echo(daemon_status)
コード例 #5
0
def test_warn_multiple_daemons(capsys):
    from dagster.daemon.daemon import SensorDaemon

    with instance_for_test() as instance:
        init_time = pendulum.now("UTC")

        heartbeat_interval_seconds = 1

        with daemon_controller_from_instance(
                instance,
                workspace_load_target=EmptyWorkspaceTarget(),
                heartbeat_interval_seconds=heartbeat_interval_seconds,
        ):
            while True:
                now = pendulum.now("UTC")

                if all_daemons_live(
                        instance,
                        heartbeat_interval_seconds=heartbeat_interval_seconds):
                    captured = capsys.readouterr()
                    assert "Another SENSOR daemon is still sending heartbeats" not in captured.out
                    break

                if (now - init_time).total_seconds() > 10:
                    raise Exception("timed out waiting for daemon status")

                time.sleep(0.5)

            capsys.readouterr()

        init_time = pendulum.now("UTC")

        status = get_daemon_statuses(
            instance,
            [SensorDaemon.daemon_type()],
            now.float_timestamp,
            heartbeat_interval_seconds=heartbeat_interval_seconds,
        )[SensorDaemon.daemon_type()]
        last_heartbeat_time = status.last_heartbeat.timestamp

        # No warning when a second controller starts up again
        with daemon_controller_from_instance(
                instance,
                workspace_load_target=EmptyWorkspaceTarget(),
                heartbeat_interval_seconds=heartbeat_interval_seconds,
        ):
            while True:
                now = pendulum.now("UTC")

                status = get_daemon_statuses(
                    instance,
                    [SensorDaemon.daemon_type()],
                    now.float_timestamp,
                    heartbeat_interval_seconds=heartbeat_interval_seconds,
                )[SensorDaemon.daemon_type()]

                if status.last_heartbeat and status.last_heartbeat.timestamp != last_heartbeat_time:
                    captured = capsys.readouterr()
                    assert "Another SENSOR daemon is still sending heartbeats" not in captured.out
                    break

                if (now - init_time).total_seconds() > 10:
                    raise Exception("timed out waiting for new daemon status")

                time.sleep(0.5)

            status = get_daemon_statuses(
                instance,
                [SensorDaemon.daemon_type()],
                now.float_timestamp,
                heartbeat_interval_seconds=heartbeat_interval_seconds,
            )[SensorDaemon.daemon_type()]
            last_heartbeat_time = status.last_heartbeat.timestamp

            # Starting up a controller while one is running produces the warning though
            with daemon_controller_from_instance(
                    instance,
                    workspace_load_target=EmptyWorkspaceTarget(),
                    heartbeat_interval_seconds=heartbeat_interval_seconds,
            ):
                # Wait for heartbeats while two controllers are running at once and there will
                # be a warning
                init_time = pendulum.now("UTC")

                while True:
                    now = pendulum.now("UTC")

                    captured = capsys.readouterr()
                    if "Another SENSOR daemon is still sending heartbeats" in captured.out:
                        break

                    if (now - init_time).total_seconds() > 60:
                        raise Exception("timed out waiting for heartbeats")

                    time.sleep(5)
コード例 #6
0
def test_error_daemon(monkeypatch):
    with instance_for_test() as instance:
        from dagster.daemon.daemon import SensorDaemon

        should_raise_errors = True

        error_count = {"count": 0}

        def run_loop_error(_, _instance, _workspace):
            if should_raise_errors:
                time.sleep(0.5)
                error_count["count"] = error_count["count"] + 1
                raise DagsterInvariantViolationError("foobar:" +
                                                     str(error_count["count"]))

            while True:
                yield
                time.sleep(0.5)

        def _get_error_number(error):
            error_message = error.message.strip()
            return int(error_message.split("foobar:")[1])

        monkeypatch.setattr(SensorDaemon, "core_loop", run_loop_error)

        heartbeat_interval_seconds = 1

        gen_daemons = lambda instance: [SensorDaemon()]

        init_time = pendulum.now("UTC")
        with daemon_controller_from_instance(
                instance,
                workspace_load_target=EmptyWorkspaceTarget(),
                heartbeat_interval_seconds=heartbeat_interval_seconds,
                gen_daemons=gen_daemons,
                error_interval_seconds=10,
        ) as controller:
            while True:
                now = pendulum.now("UTC")

                if get_daemon_statuses(
                        instance,
                    [SensorDaemon.daemon_type()],
                        heartbeat_interval_seconds=heartbeat_interval_seconds,
                        ignore_errors=True,
                )[SensorDaemon.daemon_type()].healthy:
                    # Despite error, daemon should still be running
                    controller.check_daemon_threads()
                    controller.check_daemon_heartbeats()

                    status = get_daemon_statuses(
                        instance,
                        [SensorDaemon.daemon_type()],
                        now.float_timestamp,
                        heartbeat_interval_seconds=heartbeat_interval_seconds,
                    )[SensorDaemon.daemon_type()]

                    assert status.healthy == False

                    # Errors build up until there are > 5, then pull off the last
                    if len(status.last_heartbeat.errors) >= 5:

                        first_error_number = _get_error_number(
                            status.last_heartbeat.errors[0])

                        if first_error_number > 5:

                            # Verify error numbers decrease consecutively
                            assert [
                                _get_error_number(error)
                                for error in status.last_heartbeat.errors
                            ] == list(
                                range(first_error_number,
                                      first_error_number - 5, -1))

                            assert not get_daemon_statuses(
                                instance,
                                [SensorDaemon.daemon_type()],
                                curr_time_seconds=now.float_timestamp,
                                heartbeat_interval_seconds=
                                heartbeat_interval_seconds,
                            )[SensorDaemon.daemon_type()].healthy
                            assert get_daemon_statuses(
                                instance,
                                [SensorDaemon.daemon_type()],
                                curr_time_seconds=now.float_timestamp,
                                heartbeat_interval_seconds=
                                heartbeat_interval_seconds,
                                ignore_errors=True,
                            )[SensorDaemon.daemon_type()].healthy

                            time.sleep(3)

                            status = get_daemon_statuses(
                                instance,
                                [SensorDaemon.daemon_type()],
                                now.float_timestamp,
                                heartbeat_interval_seconds=
                                heartbeat_interval_seconds,
                            )[SensorDaemon.daemon_type()]

                            # Error count does not rise above 5, continues to increase
                            assert len(status.last_heartbeat.errors) == 5

                            new_first_error_number = _get_error_number(
                                status.last_heartbeat.errors[0])

                            assert new_first_error_number > first_error_number

                            break

                if (now - init_time).total_seconds() > 15:
                    raise Exception("timed out waiting for heartbeat error")

                time.sleep(0.5)

            # Once the sensor no longer raises errors, they should return to 0 once
            # enough time passes
            should_raise_errors = False
            init_time = pendulum.now("UTC")

            while True:
                now = pendulum.now("UTC")

                status = get_daemon_statuses(
                    instance,
                    [SensorDaemon.daemon_type()],
                    now.float_timestamp,
                    heartbeat_interval_seconds=heartbeat_interval_seconds,
                )[SensorDaemon.daemon_type()]

                # Error count does not rise above 5
                if len(status.last_heartbeat.errors) == 0:
                    break

                if (now - init_time).total_seconds() > 15:
                    raise Exception(
                        "timed out waiting for hearrteat errors to return to 0"
                    )

                time.sleep(0.5)
コード例 #7
0
 def resolve_daemonStatus(self, _graphene_info, daemon_type):
     check.str_param(daemon_type, "daemon_type")
     status_by_type = get_daemon_statuses(self._instance,
                                          daemon_types=[daemon_type],
                                          ignore_errors=True)
     return GrapheneDaemonStatus(status_by_type[daemon_type])