Beispiel #1
0
def test_healthy():

    with instance_for_test(
            overrides={
                "run_coordinator": {
                    "module":
                    "dagster.core.run_coordinator.queued_run_coordinator",
                    "class": "QueuedRunCoordinator",
                },
            }) as instance:
        init_time = pendulum.now("UTC")
        beyond_tolerated_time = init_time.float_timestamp + 100

        controller = DagsterDaemonController(instance)
        assert not all_daemons_healthy(
            instance, curr_time_seconds=init_time.float_timestamp)
        assert not all_daemons_live(
            instance, curr_time_seconds=init_time.float_timestamp)

        controller.run_iteration(init_time)
        assert all_daemons_healthy(instance,
                                   curr_time_seconds=init_time.float_timestamp)
        assert all_daemons_live(instance,
                                curr_time_seconds=init_time.float_timestamp)

        assert not all_daemons_healthy(instance,
                                       curr_time_seconds=beyond_tolerated_time)
        assert not all_daemons_live(instance,
                                    curr_time_seconds=beyond_tolerated_time)
Beispiel #2
0
def test_heartbeat():
    with instance_for_test() as instance:

        assert all_daemons_healthy(instance) is False

        with start_daemon():
            time.sleep(5)
            assert all_daemons_healthy(instance) is True

        frozen_datetime = pendulum.now().add(
            seconds=DEFAULT_HEARTBEAT_INTERVAL_SECONDS +
            DEFAULT_DAEMON_HEARTBEAT_TOLERANCE_SECONDS + 5)
        with pendulum.test(frozen_datetime):
            assert all_daemons_healthy(instance) is False
Beispiel #3
0
def test_heartbeat(tmpdir, ):

    dagster_home_path = tmpdir.strpath
    with setup_instance(dagster_home_path, "") as instance:

        assert all_daemons_healthy(instance) is False

        with start_daemon():
            time.sleep(5)
            assert all_daemons_healthy(instance) is True

        frozen_datetime = pendulum.now().add(
            seconds=DEFAULT_HEARTBEAT_INTERVAL_SECONDS +
            DEFAULT_DAEMON_HEARTBEAT_TOLERANCE_SECONDS + 5)
        with pendulum.test(frozen_datetime):
            assert all_daemons_healthy(instance) is False
Beispiel #4
0
def health_check_command():
    with DagsterInstance.get() as instance:
        if all_daemons_healthy(instance):
            click.echo("Daemon healthy")
        else:
            click.echo("Daemon not healthy")
            sys.exit(1)
Beispiel #5
0
def liveness_check_command():
    with DagsterInstance.get() as instance:
        if all_daemons_healthy(instance):
            click.echo("Daemon healthy")
        else:
            click.echo("Daemon(s) not running")
            sys.exit(1)
Beispiel #6
0
def health_check_command():
    warnings.warn("health-check is deprecated. Use liveness-check instead.")
    with DagsterInstance.get() as instance:
        if all_daemons_healthy(instance):
            click.echo("Daemon healthy")
        else:
            click.echo("Daemon not healthy")
            sys.exit(1)
Beispiel #7
0
def test_healthy():

    with instance_for_test(
            overrides={
                "run_coordinator": {
                    "module":
                    "dagster.core.run_coordinator.queued_run_coordinator",
                    "class": "QueuedRunCoordinator",
                },
            }) as instance:
        init_time = pendulum.now("UTC")

        assert not all_daemons_healthy(
            instance, curr_time_seconds=init_time.float_timestamp)
        assert not all_daemons_live(
            instance, curr_time_seconds=init_time.float_timestamp)

        with daemon_controller_from_instance(
                instance, wait_for_processes_on_exit=True) as controller:

            while True:
                now = pendulum.now("UTC")
                if all_daemons_healthy(
                        instance, curr_time_seconds=now.float_timestamp
                ) and all_daemons_live(instance,
                                       curr_time_seconds=now.float_timestamp):

                    controller.check_daemons()

                    beyond_tolerated_time = now.float_timestamp + 100

                    assert not all_daemons_healthy(
                        instance, curr_time_seconds=beyond_tolerated_time)
                    assert not all_daemons_live(
                        instance, curr_time_seconds=beyond_tolerated_time)
                    break

                if (now - init_time).total_seconds() > 10:
                    raise Exception(
                        "timed out waiting for instance to become healthy")

                time.sleep(0.5)
Beispiel #8
0
def test_healthy_with_different_daemons():
    with instance_for_test() as instance:
        init_time = pendulum.now("UTC")
        controller = DagsterDaemonController(instance)
        controller.run_iteration(init_time)

    with instance_for_test(
        overrides={
            "run_coordinator": {
                "module": "dagster.core.run_coordinator.queued_run_coordinator",
                "class": "QueuedRunCoordinator",
            },
        }
    ) as instance:
        assert not all_daemons_healthy(instance, curr_time=init_time)
Beispiel #9
0
def test_monitoring():
    # with setup_instance() as instance:
    with instance_for_test(
        {
            "run_monitoring": {"enabled": True, "poll_interval_seconds": 5},
            "run_launcher": {
                "class": "DockerRunLauncher",
                "module": "dagster_docker",
                "config": {},
            },
        }
    ) as instance:
        with start_daemon():
            time.sleep(5)
            assert all_daemons_healthy(instance)
def test_healthy_with_different_daemons():
    with instance_for_test() as instance:
        with DagsterDaemonController.create_from_instance(instance):

            with instance_for_test(
                overrides={
                    "run_coordinator": {
                        "module": "dagster.core.run_coordinator.queued_run_coordinator",
                        "class": "QueuedRunCoordinator",
                    },
                }
            ) as other_instance:
                now = pendulum.now("UTC")
                assert not all_daemons_healthy(
                    other_instance, curr_time_seconds=now.float_timestamp
                )
                assert not all_daemons_live(other_instance, curr_time_seconds=now.float_timestamp)
Beispiel #11
0
def test_error_daemon(monkeypatch):
    with instance_for_test() as instance:
        from dagster.daemon.daemon import SensorDaemon

        def run_iteration_error(_, _instance, _daemon_shutdown_event,
                                _grpc_server_registry):
            raise DagsterInvariantViolationError("foobar")
            yield  # pylint: disable=unreachable

        monkeypatch.setattr(SensorDaemon, "run_iteration", run_iteration_error)

        init_time = pendulum.now("UTC")
        with daemon_controller_from_instance(
                instance, wait_for_processes_on_exit=True) as controller:
            while True:
                now = pendulum.now("UTC")

                if all_daemons_live(instance):
                    # Despite error, daemon should still be running
                    controller.check_daemons()

                    status = get_daemon_status(instance,
                                               SensorDaemon.daemon_type(),
                                               now.float_timestamp)

                    assert status.healthy == False
                    assert len(status.last_heartbeat.errors) == 1
                    assert (
                        status.last_heartbeat.errors[0].message.strip() ==
                        "dagster.core.errors.DagsterInvariantViolationError: foobar"
                    )
                    assert not all_daemons_healthy(
                        instance, curr_time_seconds=now.float_timestamp)
                    assert all_daemons_live(
                        instance, curr_time_seconds=now.float_timestamp)
                    break

                if (now - init_time).total_seconds() > 10:
                    raise Exception("timed out waiting for heartbeat error")

                time.sleep(0.5)
Beispiel #12
0
def test_error_daemon(monkeypatch):
    with instance_for_test(overrides={}) as instance:
        from dagster.daemon.daemon import SensorDaemon

        def run_iteration_error(_):
            raise DagsterInvariantViolationError("foobar")
            yield  # pylint: disable=unreachable

        monkeypatch.setattr(SensorDaemon, "run_iteration", run_iteration_error)
        controller = DagsterDaemonController(instance)
        init_time = pendulum.now("UTC")
        controller.run_iteration(init_time)

        status = get_daemon_status(instance, SensorDaemon.daemon_type(),
                                   init_time.float_timestamp)
        assert status.healthy == False
        assert len(status.last_heartbeat.errors) == 1
        assert (status.last_heartbeat.errors[0].message.strip() ==
                "dagster.core.errors.DagsterInvariantViolationError: foobar")
        assert not all_daemons_healthy(
            instance, curr_time_seconds=init_time.float_timestamp)
        assert all_daemons_live(instance,
                                curr_time_seconds=init_time.float_timestamp)
def test_healthy():

    with instance_for_test(
            overrides={
                "run_coordinator": {
                    "module":
                    "dagster.core.run_coordinator.queued_run_coordinator",
                    "class": "QueuedRunCoordinator",
                },
            }) as instance:
        init_time = pendulum.now("UTC")

        heartbeat_interval_seconds = 1

        assert not all_daemons_healthy(
            instance,
            curr_time_seconds=init_time.float_timestamp,
            heartbeat_interval_seconds=heartbeat_interval_seconds,
        )
        assert not all_daemons_live(
            instance,
            curr_time_seconds=init_time.float_timestamp,
            heartbeat_interval_seconds=heartbeat_interval_seconds,
        )

        with daemon_controller_from_instance(
                instance, heartbeat_interval_seconds=heartbeat_interval_seconds
        ) as controller:

            while True:
                now = pendulum.now("UTC")
                if all_daemons_healthy(
                        instance,
                        curr_time_seconds=now.float_timestamp,
                        heartbeat_interval_seconds=heartbeat_interval_seconds,
                ) and all_daemons_live(
                        instance,
                        curr_time_seconds=now.float_timestamp,
                        heartbeat_interval_seconds=heartbeat_interval_seconds,
                ):

                    controller.check_daemon_threads()
                    controller.check_daemon_heartbeats()

                    beyond_tolerated_time = (
                        now.float_timestamp +
                        DEFAULT_DAEMON_HEARTBEAT_TOLERANCE_SECONDS + 1)

                    assert not all_daemons_healthy(
                        instance,
                        curr_time_seconds=beyond_tolerated_time,
                        heartbeat_interval_seconds=heartbeat_interval_seconds,
                    )
                    assert not all_daemons_live(
                        instance,
                        curr_time_seconds=beyond_tolerated_time,
                        heartbeat_interval_seconds=heartbeat_interval_seconds,
                    )
                    break

                if (now - init_time).total_seconds() > 10:
                    raise Exception(
                        "timed out waiting for instance to become healthy")

                time.sleep(0.5)