Esempio n. 1
0
def test_healthy():

    with instance_for_test(
            overrides={
                "run_coordinator": {
                    "module":
                    "dagster.core.run_coordinator.queued_run_coordinator",
                    "class": "QueuedRunCoordinator",
                },
            }) as instance:
        init_time = pendulum.now("UTC")
        beyond_tolerated_time = init_time.float_timestamp + 100

        controller = DagsterDaemonController(instance)
        assert not all_daemons_healthy(
            instance, curr_time_seconds=init_time.float_timestamp)
        assert not all_daemons_live(
            instance, curr_time_seconds=init_time.float_timestamp)

        controller.run_iteration(init_time)
        assert all_daemons_healthy(instance,
                                   curr_time_seconds=init_time.float_timestamp)
        assert all_daemons_live(instance,
                                curr_time_seconds=init_time.float_timestamp)

        assert not all_daemons_healthy(instance,
                                       curr_time_seconds=beyond_tolerated_time)
        assert not all_daemons_live(instance,
                                    curr_time_seconds=beyond_tolerated_time)
Esempio n. 2
0
def test_different_intervals(caplog):
    with instance_for_test(
            overrides={
                "scheduler": {
                    "module": "dagster.core.scheduler",
                    "class": "DagsterDaemonScheduler",
                },
                "run_coordinator": {
                    "module":
                    "dagster.core.run_coordinator.queued_run_coordinator",
                    "class": "QueuedRunCoordinator",
                    "config": {
                        "dequeue_interval_seconds": 5
                    },
                },
            }) as instance:
        init_time = pendulum.now("UTC")
        controller = DagsterDaemonController(instance)

        assert caplog.record_tuples == [(
            "dagster-daemon",
            logging.INFO,
            "instance is configured with the following daemons: ['QueuedRunCoordinatorDaemon', 'SchedulerDaemon', 'SensorDaemon']",
        )]

        controller.run_iteration(init_time)

        scheduler_daemon = controller.get_daemon(SchedulerDaemon.__name__)
        run_daemon = controller.get_daemon(QueuedRunCoordinatorDaemon.__name__)

        assert scheduler_daemon
        assert scheduler_daemon.last_iteration_time == init_time
        assert _scheduler_ran(caplog)

        assert run_daemon
        assert run_daemon.last_iteration_time == init_time
        assert _run_coordinator_ran(caplog)
        caplog.clear()

        next_time = init_time + datetime.timedelta(seconds=5)
        controller.run_iteration(next_time)

        # Run coordinator does another iteration, scheduler does not
        assert scheduler_daemon.last_iteration_time == init_time
        assert not _scheduler_ran(caplog)

        assert run_daemon.last_iteration_time == next_time
        assert _run_coordinator_ran(caplog)
        caplog.clear()

        next_time = init_time + datetime.timedelta(seconds=30)
        controller.run_iteration(next_time)

        # 30 seconds later both daemons do another iteration
        assert scheduler_daemon.last_iteration_time == next_time
        assert _scheduler_ran(caplog)

        assert run_daemon.last_iteration_time == next_time
        assert _run_coordinator_ran(caplog)
Esempio n. 3
0
def run_command():
    with DagsterInstance.get() as instance:
        controller = DagsterDaemonController(instance)

        while True:
            curr_time = pendulum.now("UTC")
            controller.run_iteration(curr_time)
            time.sleep(0.5)
Esempio n. 4
0
def run_command():
    with DagsterInstance.get() as instance:
        if instance.is_ephemeral:
            raise Exception(
                "dagster-daemon can't run using an in-memory instance. Make sure "
                "the DAGSTER_HOME environment variable has been set correctly and that "
                "you have created a dagster.yaml file there.")

        controller = DagsterDaemonController(instance)

        while True:
            curr_time = pendulum.now("UTC")
            controller.run_iteration(curr_time)
            time.sleep(0.5)
Esempio n. 5
0
def test_healthy_with_different_daemons():
    with instance_for_test() as instance:
        init_time = pendulum.now("UTC")
        controller = DagsterDaemonController(instance)
        controller.run_iteration(init_time)

    with instance_for_test(
        overrides={
            "run_coordinator": {
                "module": "dagster.core.run_coordinator.queued_run_coordinator",
                "class": "QueuedRunCoordinator",
            },
        }
    ) as instance:
        assert not all_daemons_healthy(instance, curr_time=init_time)
Esempio n. 6
0
def test_empty_instance():
    with instance_for_test() as instance:
        with pytest.raises(
                Exception,
                match=re.escape(
                    "No daemons configured on the DagsterInstance")):
            DagsterDaemonController(instance)
Esempio n. 7
0
def run_command():
    with capture_interrupts():
        with DagsterInstance.get() as instance:
            if instance.is_ephemeral:
                raise Exception(
                    "dagster-daemon can't run using an in-memory instance. Make sure "
                    "the DAGSTER_HOME environment variable has been set correctly and that "
                    "you have created a dagster.yaml file there."
                )

            with DagsterDaemonController(
                instance, create_daemons_from_instance(instance)
            ) as controller:

                start_time = pendulum.now("UTC")
                while True:
                    # Wait until a daemon has been unhealthy for a long period of time
                    # before potentially restarting it due to a hanging or failed daemon
                    with raise_interrupts_as(KeyboardInterrupt):
                        time.sleep(1)

                        if (
                            pendulum.now("UTC") - start_time
                        ).total_seconds() < 2 * DAEMON_HEARTBEAT_TOLERANCE_SECONDS:
                            continue

                    controller.check_daemons()
                    start_time = pendulum.now("UTC")
Esempio n. 8
0
def test_error_daemon(monkeypatch):
    with instance_for_test(overrides={}) as instance:
        from dagster.daemon.daemon import SensorDaemon

        def run_iteration_error(_):
            raise DagsterInvariantViolationError("foobar")

        monkeypatch.setattr(SensorDaemon, "run_iteration", run_iteration_error)
        controller = DagsterDaemonController(instance)
        init_time = pendulum.now("UTC")
        controller.run_iteration(init_time)

        status = get_daemon_status(instance, SensorDaemon.daemon_type(),
                                   init_time.float_timestamp)
        assert status.healthy == False
        assert (status.last_heartbeat.error.message.strip() ==
                "dagster.core.errors.DagsterInvariantViolationError: foobar")
def test_warn_multiple_daemons(capsys):
    with instance_for_test() as instance:
        init_time = pendulum.now("UTC")
        next_time = init_time.add(seconds=100)

        controller1 = DagsterDaemonController(instance)
        controller1.run_iteration(init_time)
        captured = capsys.readouterr()
        assert "Taking over from another SENSOR daemon process" not in captured.out

        controller2 = DagsterDaemonController(instance)
        controller2.run_iteration(init_time)
        captured = capsys.readouterr()
        assert "Taking over from another SENSOR daemon process" not in captured.out

        controller1.run_iteration(next_time)
        captured = capsys.readouterr()
        assert "Taking over from another SENSOR daemon process" in captured.out
Esempio n. 10
0
def test_required():

    with instance_for_test(
            overrides={
                "run_coordinator": {
                    "module":
                    "dagster.core.run_coordinator.queued_run_coordinator",
                    "class": "QueuedRunCoordinator",
                },
            }) as instance:
        assert DagsterDaemonController.required(instance)
Esempio n. 11
0
def test_multiple_error_daemon(monkeypatch):
    with instance_for_test(overrides={}) as instance:
        from dagster.daemon.daemon import SensorDaemon

        def run_iteration_error(_):
            # ?message stack cls_name cause"
            yield SerializableErrorInfo("foobar", None, None, None)
            yield SerializableErrorInfo("bizbuz", None, None, None)

        monkeypatch.setattr(SensorDaemon, "run_iteration", run_iteration_error)
        controller = DagsterDaemonController(instance)
        init_time = pendulum.now("UTC")
        controller.run_iteration(init_time)

        status = get_daemon_status(instance, SensorDaemon.daemon_type(),
                                   init_time.float_timestamp)
        assert status.healthy == False
        assert len(status.last_heartbeat.errors) == 2
        assert status.last_heartbeat.errors[0].message.strip() == "foobar"
        assert status.last_heartbeat.errors[1].message.strip() == "bizbuz"
Esempio n. 12
0
def test_scheduler_instance():
    with instance_for_test(
        overrides={
            "scheduler": {"module": "dagster.core.scheduler", "class": "DagsterDaemonScheduler",},
        }
    ) as instance:
        controller = DagsterDaemonController(instance)

        daemons = controller.daemons

        assert len(daemons) == 2
        assert any(isinstance(daemon, SchedulerDaemon) for daemon in daemons)
Esempio n. 13
0
def test_backfill_instance():
    with instance_for_test(overrides={
            "backfill": {
                "daemon_enabled": True
            },
    }) as instance:
        with DagsterDaemonController.create_from_instance(
                instance) as controller:
            daemons = controller.daemons

            assert len(daemons) == 3
            assert any(
                isinstance(daemon, BackfillDaemon) for daemon in daemons)
Esempio n. 14
0
def test_error_daemon(monkeypatch):
    with instance_for_test(overrides={}) as instance:
        from dagster.daemon.daemon import SensorDaemon

        def run_iteration_error(_):
            raise DagsterInvariantViolationError("foobar")
            yield  # pylint: disable=unreachable

        monkeypatch.setattr(SensorDaemon, "run_iteration", run_iteration_error)
        controller = DagsterDaemonController(instance)
        init_time = pendulum.now("UTC")
        controller.run_iteration(init_time)

        status = get_daemon_status(instance, SensorDaemon.daemon_type(),
                                   init_time.float_timestamp)
        assert status.healthy == False
        assert len(status.last_heartbeat.errors) == 1
        assert (status.last_heartbeat.errors[0].message.strip() ==
                "dagster.core.errors.DagsterInvariantViolationError: foobar")
        assert not all_daemons_healthy(
            instance, curr_time_seconds=init_time.float_timestamp)
        assert all_daemons_live(instance,
                                curr_time_seconds=init_time.float_timestamp)
Esempio n. 15
0
def test_run_coordinator_instance():
    with instance_for_test(
        overrides={
            "run_coordinator": {
                "module": "dagster.core.run_coordinator.queued_run_coordinator",
                "class": "QueuedRunCoordinator",
            },
        }
    ) as instance:
        with DagsterDaemonController.create_from_instance(instance) as controller:
            daemons = controller.daemons

            assert len(daemons) == 3
            assert any(isinstance(daemon, QueuedRunCoordinatorDaemon) for daemon in daemons)
Esempio n. 16
0
def test_run_coordinator_instance():
    with instance_for_test(
            overrides={
                "run_coordinator": {
                    "module":
                    "dagster.core.run_coordinator.queued_run_coordinator",
                    "class": "QueuedRunCoordinator",
                },
            }) as instance:
        controller = DagsterDaemonController(instance)

        daemons = controller.daemons

        assert len(daemons) == 1
        assert isinstance(daemons[0], QueuedRunCoordinatorDaemon)
Esempio n. 17
0
def run_command():
    with DagsterInstance.get() as instance:
        if instance.is_ephemeral:
            raise Exception(
                "dagster-daemon can't run using an in-memory instance. Make sure "
                "the DAGSTER_HOME environment variable has been set correctly and that "
                "you have created a dagster.yaml file there.")

        with DagsterDaemonController(
                instance,
                create_daemons_from_instance(instance)) as controller:
            while True:
                # Wait until a daemon has been unhealthy for a long period of time
                # before potentially restarting it due to a hanging or failed daemon
                time.sleep(2 * DAEMON_HEARTBEAT_TOLERANCE_SECONDS)
                controller.check_daemons()
Esempio n. 18
0
def test_set_sensor_interval(caplog):
    with instance_for_test(overrides={"sensor_settings": {"interval_seconds": 5}}) as instance:

        init_time = pendulum.now("UTC")
        with DagsterDaemonController.create_from_instance(instance):
            while True:
                now = pendulum.now("UTC")
                # Wait until the run coordinator has run three times
                # Scheduler has only run once
                if _sensor_ran(caplog) == 1:
                    break

                if (now - init_time).total_seconds() > 10:
                    raise Exception("Timed out waiting for sensor daemon to execute")

                time.sleep(0.5)
Esempio n. 19
0
def test_different_intervals(caplog):
    with instance_for_test(
            overrides={
                "scheduler": {
                    "module": "dagster.core.scheduler",
                    "class": "DagsterDaemonScheduler",
                },
                "run_coordinator": {
                    "module":
                    "dagster.core.run_coordinator.queued_run_coordinator",
                    "class": "QueuedRunCoordinator",
                    "config": {
                        "dequeue_interval_seconds": 5
                    },
                },
            }) as instance:
        init_time = pendulum.now("UTC")
        with DagsterDaemonController.create_from_instance(instance):
            while True:
                now = pendulum.now("UTC")
                # Wait until the run coordinator has run three times
                # Scheduler has only run once
                if _run_coordinator_ran(caplog) == 3:
                    assert _scheduler_ran(caplog) == 1
                    break

                if (now - init_time).total_seconds() > 45:
                    raise Exception(
                        "Timed out waiting for run queue daemon to execute twice"
                    )

                time.sleep(0.5)

            init_time = pendulum.now("UTC")
            while True:
                now = pendulum.now("UTC")

                if _scheduler_ran(caplog) == 2:
                    assert _run_coordinator_ran(caplog) > 2
                    break

                if (now - init_time).total_seconds() > 45:
                    raise Exception(
                        "Timed out waiting for schedule daemon to execute twice"
                    )

                time.sleep(0.5)
def test_healthy_with_different_daemons():
    with instance_for_test() as instance:
        with DagsterDaemonController.create_from_instance(instance):

            with instance_for_test(
                overrides={
                    "run_coordinator": {
                        "module": "dagster.core.run_coordinator.queued_run_coordinator",
                        "class": "QueuedRunCoordinator",
                    },
                }
            ) as other_instance:
                now = pendulum.now("UTC")
                assert not all_daemons_healthy(
                    other_instance, curr_time_seconds=now.float_timestamp
                )
                assert not all_daemons_live(other_instance, curr_time_seconds=now.float_timestamp)
Esempio n. 21
0
def test_healthy():

    with instance_for_test(
            overrides={
                "run_coordinator": {
                    "module":
                    "dagster.core.run_coordinator.queued_run_coordinator",
                    "class": "QueuedRunCoordinator",
                },
            }) as instance:
        init_time = pendulum.now("UTC")
        beyond_tolerated_time = init_time + datetime.timedelta(seconds=60)

        controller = DagsterDaemonController(instance)
        assert not controller.daemon_healthy(instance, curr_time=init_time)

        controller.run_iteration(init_time)
        assert controller.daemon_healthy(instance, curr_time=init_time)

        assert not controller.daemon_healthy(instance,
                                             curr_time=beyond_tolerated_time)
def test_thread_die_daemon(monkeypatch):
    with instance_for_test(overrides={}) as instance:
        from dagster.daemon.daemon import SchedulerDaemon, SensorDaemon

        iteration_ran = {"ran": False}

        def run_iteration_error(_, _instance):
            iteration_ran["ran"] = True
            raise KeyboardInterrupt
            yield  # pylint: disable=unreachable

        monkeypatch.setattr(SensorDaemon, "run_iteration", run_iteration_error)

        init_time = pendulum.now("UTC")
        with DagsterDaemonController.create_from_instance(instance) as controller:
            while True:
                now = pendulum.now("UTC")

                status = get_daemon_status(
                    instance, SchedulerDaemon.daemon_type(), now.float_timestamp
                )

                if iteration_ran["ran"] and status.healthy:
                    try:
                        controller.check_daemons()  # Should throw since the sensor thread is interrupted
                    except Exception as e:  # pylint: disable=broad-except
                        assert (
                            "Stopping dagster-daemon process since the following threads are no longer sending heartbeats: ['SENSOR']"
                            in str(e)
                        )
                        break
                    else:
                        raise Exception("check_daemons should fail if a thread has died")

                if (now - init_time).total_seconds() > 10:
                    raise Exception("timed out waiting for heartbeat error")

                time.sleep(0.5)
def test_healthy():

    with instance_for_test(
        overrides={
            "run_coordinator": {
                "module": "dagster.core.run_coordinator.queued_run_coordinator",
                "class": "QueuedRunCoordinator",
            },
        }
    ) as instance:
        init_time = pendulum.now("UTC")

        assert not all_daemons_healthy(instance, curr_time_seconds=init_time.float_timestamp)
        assert not all_daemons_live(instance, curr_time_seconds=init_time.float_timestamp)

        with DagsterDaemonController.create_from_instance(instance) as controller:

            while True:
                now = pendulum.now("UTC")
                if all_daemons_healthy(
                    instance, curr_time_seconds=now.float_timestamp
                ) and all_daemons_live(instance, curr_time_seconds=now.float_timestamp):

                    controller.check_daemons()

                    beyond_tolerated_time = now.float_timestamp + 100

                    assert not all_daemons_healthy(
                        instance, curr_time_seconds=beyond_tolerated_time
                    )
                    assert not all_daemons_live(instance, curr_time_seconds=beyond_tolerated_time)
                    break

                if (now - init_time).total_seconds() > 10:
                    raise Exception("timed out waiting for instance to become healthy")

                time.sleep(0.5)
def test_multiple_error_daemon(monkeypatch):
    with instance_for_test(overrides={}) as instance:
        from dagster.daemon.daemon import SensorDaemon

        def run_iteration_error(_, _instance):
            # ?message stack cls_name cause"
            yield SerializableErrorInfo("foobar", None, None, None)
            yield SerializableErrorInfo("bizbuz", None, None, None)

        monkeypatch.setattr(SensorDaemon, "run_iteration", run_iteration_error)

        init_time = pendulum.now("UTC")

        with DagsterDaemonController.create_from_instance(instance) as controller:
            while True:

                now = pendulum.now("UTC")

                if all_daemons_live(instance):

                    # Despite error, daemon should still be running
                    controller.check_daemons()

                    status = get_daemon_status(
                        instance, SensorDaemon.daemon_type(), now.float_timestamp
                    )

                    assert status.healthy == False
                    assert len(status.last_heartbeat.errors) == 2
                    assert status.last_heartbeat.errors[0].message.strip() == "foobar"
                    assert status.last_heartbeat.errors[1].message.strip() == "bizbuz"
                    break

                if (now - init_time).total_seconds() > 10:
                    raise Exception("timed out waiting for heartbeat error")

                time.sleep(0.5)
def test_error_daemon(monkeypatch):
    with instance_for_test(overrides={}) as instance:
        from dagster.daemon.daemon import SensorDaemon

        def run_iteration_error(_, _instance):
            raise DagsterInvariantViolationError("foobar")
            yield  # pylint: disable=unreachable

        monkeypatch.setattr(SensorDaemon, "run_iteration", run_iteration_error)

        init_time = pendulum.now("UTC")
        with DagsterDaemonController.create_from_instance(instance) as controller:
            while True:
                now = pendulum.now("UTC")

                if all_daemons_live(instance):
                    # Despite error, daemon should still be running
                    controller.check_daemons()

                    status = get_daemon_status(
                        instance, SensorDaemon.daemon_type(), now.float_timestamp
                    )

                    assert status.healthy == False
                    assert len(status.last_heartbeat.errors) == 1
                    assert (
                        status.last_heartbeat.errors[0].message.strip()
                        == "dagster.core.errors.DagsterInvariantViolationError: foobar"
                    )
                    assert not all_daemons_healthy(instance, curr_time_seconds=now.float_timestamp)
                    assert all_daemons_live(instance, curr_time_seconds=now.float_timestamp)
                    break

                if (now - init_time).total_seconds() > 10:
                    raise Exception("timed out waiting for heartbeat error")

                time.sleep(0.5)
def test_warn_multiple_daemons(capsys):
    from dagster.daemon.daemon import SensorDaemon

    with instance_for_test() as instance:
        init_time = pendulum.now("UTC")

        with DagsterDaemonController.create_from_instance(instance):
            while True:
                now = pendulum.now("UTC")

                if all_daemons_live(instance):
                    captured = capsys.readouterr()
                    assert "Taking over from another SENSOR daemon process" not in captured.out
                    break

                if (now - init_time).total_seconds() > 10:
                    raise Exception("timed out waiting for daemon status")

                time.sleep(0.5)

            capsys.readouterr()

        init_time = pendulum.now("UTC")

        status = get_daemon_status(instance, SensorDaemon.daemon_type(), now.float_timestamp)
        last_heartbeat_time = status.last_heartbeat.timestamp

        # No warning when a second controller starts up again
        with DagsterDaemonController.create_from_instance(instance):
            while True:
                now = pendulum.now("UTC")

                status = get_daemon_status(
                    instance, SensorDaemon.daemon_type(), now.float_timestamp
                )

                if status.last_heartbeat and status.last_heartbeat.timestamp != last_heartbeat_time:
                    captured = capsys.readouterr()
                    assert "Taking over from another SENSOR daemon process" not in captured.out
                    break

                if (now - init_time).total_seconds() > 10:
                    raise Exception("timed out waiting for new daemon status")

                time.sleep(0.5)

            status = get_daemon_status(instance, SensorDaemon.daemon_type(), now.float_timestamp)
            last_heartbeat_time = status.last_heartbeat.timestamp

            # Starting up a controller while one is running produces the warning though
            with DagsterDaemonController.create_from_instance(instance):
                # Wait for heartbeats while two controllers are running at once and there will
                # be a warning
                init_time = pendulum.now("UTC")

                while True:
                    now = pendulum.now("UTC")

                    captured = capsys.readouterr()
                    if "Taking over from another SENSOR daemon process" in captured.out:
                        break

                    if (now - init_time).total_seconds() > 120:
                        raise Exception("timed out waiting for heartbeats")

                    time.sleep(5)