コード例 #1
0
def test_bad_load(capfd):
    with schedule_instance() as instance:
        fake_origin = _get_unloadable_schedule_origin()
        initial_datetime = pendulum.datetime(
            year=2019, month=2, day=27, hour=23, minute=59, second=59,
        )
        with pendulum.test(initial_datetime):
            schedule_state = ScheduleState(
                fake_origin, ScheduleStatus.RUNNING, "0 0 * * *", pendulum.now("UTC").timestamp(),
            )
            instance.add_schedule_state(schedule_state)

        initial_datetime = initial_datetime.add(seconds=1)
        with pendulum.test(initial_datetime):
            launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC"))

            assert instance.get_runs_count() == 0

            ticks = instance.get_schedule_ticks(fake_origin.get_id())

            assert len(ticks) == 0

            captured = capfd.readouterr()
            assert "Scheduler failed for also_doesnt_exist" in captured.out
            assert "doesnt_exist not found at module scope" in captured.out

        initial_datetime = initial_datetime.add(days=1)
        with pendulum.test(initial_datetime):
            launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC"))
            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(fake_origin.get_id())
            assert len(ticks) == 0
コード例 #2
0
def test_schedule_without_timezone(external_repo_context, capfd):
    with instance_with_schedules(external_repo_context) as (instance, external_repo):
        external_schedule = external_repo.get_external_schedule("daily_schedule_without_timezone")
        schedule_origin = external_schedule.get_origin()
        initial_datetime = pendulum.datetime(year=2019, month=2, day=27, hour=0, minute=0, second=0)

        with pendulum.test(initial_datetime):

            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC"))

            assert instance.get_runs_count() == 0

            ticks = instance.get_schedule_ticks(schedule_origin.get_id())

            assert len(ticks) == 0

            captured = capfd.readouterr()

            assert (
                "Scheduler could not run for daily_schedule_without_timezone as it did not specify "
                "an execution_timezone in its definition." in captured.out
            )

        initial_datetime = initial_datetime.add(days=1)
        with pendulum.test(initial_datetime):
            launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC"))
            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 0
コード例 #3
0
def test_bad_env_fn(external_repo_context, capfd):
    with instance_with_schedules(external_repo_context) as (instance, external_repo):
        external_schedule = external_repo.get_external_schedule("bad_env_fn_schedule")
        schedule_origin = external_schedule.get_origin()
        initial_datetime = pendulum.datetime(year=2019, month=2, day=27, hour=0, minute=0, second=0)
        with pendulum.test(initial_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC"))

            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1

            validate_tick(
                ticks[0],
                external_schedule,
                initial_datetime,
                ScheduleTickStatus.FAILURE,
                None,
                "Error occurred during the execution of run_config_fn for "
                "schedule bad_env_fn_schedule",
            )

            captured = capfd.readouterr()

            assert "Failed to fetch schedule data for bad_env_fn_schedule: " in captured.out

            assert (
                "Error occurred during the execution of run_config_fn for "
                "schedule bad_env_fn_schedule" in captured.out
            )
コード例 #4
0
def test_skip(external_repo_context, capfd):
    with instance_with_schedules(external_repo_context) as (instance, external_repo):
        external_schedule = external_repo.get_external_schedule("skip_schedule")
        schedule_origin = external_schedule.get_origin()
        initial_datetime = pendulum.datetime(
            year=2019, month=2, day=27, hour=0, minute=0, second=0,
        ).in_tz("US/Central")
        with pendulum.test(initial_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC"))

            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SKIPPED, None,
            )

            captured = capfd.readouterr()
            assert (
                captured.out
                == """2019-02-26 18:00:00 - dagster-scheduler - INFO - Checking for new runs for the following schedules: skip_schedule
2019-02-26 18:00:00 - dagster-scheduler - INFO - Launching run for skip_schedule at 2019-02-27 00:00:00+0000
2019-02-26 18:00:00 - dagster-scheduler - INFO - should_execute returned False for skip_schedule, skipping
"""
            )
コード例 #5
0
def test_run_scheduled_on_time_boundary(external_repo_context):
    with instance_with_schedules(external_repo_context) as (instance,
                                                            external_repo):
        external_schedule = external_repo.get_external_schedule(
            "simple_schedule")

        schedule_origin = external_schedule.get_origin()
        initial_datetime = pendulum.datetime(
            year=2019,
            month=2,
            day=27,
            hour=0,
            minute=0,
            second=0,
        )
        with pendulum.test(initial_datetime):
            # Start schedule exactly at midnight
            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                  pendulum.now("UTC"))

            assert instance.get_runs_count() == 1
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1
            assert ticks[0].status == ScheduleTickStatus.SUCCESS
コード例 #6
0
def test_with_incorrect_scheduler():
    with instance_for_test() as instance:
        with pytest.raises(DagsterInvariantViolationError):
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )
コード例 #7
0
def test_launch_failure(external_repo_context, capfd):
    with central_timezone():
        with instance_with_schedules(
                external_repo_context,
                overrides={
                    "run_launcher": {
                        "module": "dagster.core.test_utils",
                        "class": "ExplodingRunLauncher",
                    },
                },
        ) as (instance, external_repo):
            external_schedule = external_repo.get_external_schedule(
                "simple_schedule")

            schedule_origin = external_schedule.get_origin()
            initial_datetime = datetime(
                year=2019,
                month=2,
                day=27,
                hour=0,
                minute=0,
                second=0,
                tzinfo=get_utc_timezone(),
            )

            with freeze_time(initial_datetime):
                instance.start_schedule_and_update_storage_state(
                    external_schedule)

                launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                      get_current_datetime_in_utc())

                assert instance.get_runs_count() == 1

                run = instance.get_runs()[0]

                validate_run_started(run,
                                     initial_datetime,
                                     "2019-02-26",
                                     expected_success=False)

                ticks = instance.get_schedule_ticks(schedule_origin.get_id())
                assert len(ticks) == 1
                validate_tick(
                    ticks[0],
                    external_schedule,
                    initial_datetime,
                    ScheduleTickStatus.SUCCESS,
                    run.run_id,
                )

                captured = capfd.readouterr()
                assert (
                    captured.out ==
                    """2019-02-26 18:00:00 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule
2019-02-26 18:00:00 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-27 00:00:00+0000
2019-02-26 18:00:00 - dagster-scheduler - ERROR - Run {run_id} created successfully but failed to launch.
""".format(run_id=instance.get_runs()[0].run_id))
コード例 #8
0
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime,
                                              debug_crash_flags):
    with DagsterInstance.from_ref(instance_ref) as instance:
        with pendulum.test(execution_datetime):
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
                debug_crash_flags=debug_crash_flags,
            )
コード例 #9
0
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime,
                                              debug_crash_flags):
    with DagsterInstance.from_ref(instance_ref) as instance:
        with freeze_time(execution_datetime):
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                get_current_datetime_in_utc(),
                debug_crash_flags=debug_crash_flags,
            )
コード例 #10
0
def test_wrong_config(external_repo_context, capfd):
    with instance_with_schedules(external_repo_context) as (instance,
                                                            external_repo):
        external_schedule = external_repo.get_external_schedule(
            "wrong_config_schedule")
        schedule_origin = external_schedule.get_origin()
        initial_datetime = datetime(
            year=2019,
            month=2,
            day=27,
            hour=0,
            minute=0,
            second=0,
            tzinfo=get_utc_timezone(),
        )
        with freeze_time(initial_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                  get_current_datetime_in_utc())

            assert instance.get_runs_count() == 1

            wait_for_all_runs_to_start(instance)

            run = instance.get_runs()[0]

            validate_run_started(run,
                                 initial_datetime,
                                 "2019-02-26",
                                 expected_success=False)

            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0],
                external_schedule,
                initial_datetime,
                ScheduleTickStatus.SUCCESS,
                run.run_id,
            )

            run_logs = instance.all_logs(run.run_id)

            assert (len([
                event for event in run_logs if
                ("DagsterInvalidConfigError" in event.dagster_event.message
                 and event.dagster_event_type == DagsterEventType.ENGINE_EVENT)
            ]) > 0)

            captured = capfd.readouterr()

            assert "Failed to fetch execution plan for wrong_config_schedule" in captured.out
            assert "Error in config for pipeline the_pipeline" in captured.out
            assert 'Missing required field "solids" at the root.' in captured.out
コード例 #11
0
def test_no_started_schedules(external_repo_context, capfd):
    with instance_with_schedules(external_repo_context) as (instance, external_repo):
        external_schedule = external_repo.get_external_schedule("simple_schedule")
        schedule_origin = external_schedule.get_origin()

        launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC"))
        assert instance.get_runs_count() == 0

        ticks = instance.get_schedule_ticks(schedule_origin.get_id())
        assert len(ticks) == 0

        captured = capfd.readouterr()

        assert "Not checking for any runs since no schedules have been started." in captured.out
コード例 #12
0
def test_bad_should_execute(external_repo_context, capfd):
    with instance_with_schedules(external_repo_context) as (instance,
                                                            external_repo):
        external_schedule = external_repo.get_external_schedule(
            "bad_should_execute_schedule")
        schedule_origin = external_schedule.get_origin()
        initial_datetime = datetime(
            year=2019,
            month=2,
            day=27,
            hour=0,
            minute=0,
            second=0,
            tzinfo=get_utc_timezone(),
        )
        with freeze_time(initial_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                  get_current_datetime_in_utc())

            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1

            validate_tick(
                ticks[0],
                external_schedule,
                initial_datetime,
                ScheduleTickStatus.FAILURE,
                None,
                "Error occurred during the execution of should_execute for "
                "schedule bad_should_execute_schedule",
            )

            captured = capfd.readouterr()
            assert (
                "Failed to fetch schedule data for bad_should_execute_schedule: "
            ) in captured.out

            assert ("Error occurred during the execution of should_execute "
                    "for schedule bad_should_execute_schedule" in captured.out)

            assert "Exception: bananas" in captured.out
コード例 #13
0
def test_hourly_dst_spring_forward(external_repo_context):
    # Verify that an hourly schedule still runs hourly during the spring DST transition
    with instance_with_schedules(external_repo_context) as (
            instance,
            external_repo,
    ):
        # 1AM CST
        freeze_datetime = pendulum.create(2019,
                                          3,
                                          10,
                                          1,
                                          0,
                                          0,
                                          tz="US/Central").in_tz("US/Pacific")

        with pendulum.test(freeze_datetime):
            external_schedule = external_repo.get_external_schedule(
                "hourly_central_time_schedule")
            schedule_origin = external_schedule.get_origin()
            instance.start_schedule_and_update_storage_state(external_schedule)

            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 0

        freeze_datetime = freeze_datetime.add(hours=2)

        # DST has now happened, 2 hours later it is 4AM CST
        # Should be 3 runs: 1AM CST, 3AM CST, 4AM CST
        with pendulum.test(freeze_datetime):
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )

            wait_for_all_runs_to_start(instance)

            assert instance.get_runs_count() == 3
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 3

            expected_datetimes_utc = [
                pendulum.create(2019, 3, 10, 4, 0, 0,
                                tz="US/Central").in_tz("UTC"),
                pendulum.create(2019, 3, 10, 3, 0, 0,
                                tz="US/Central").in_tz("UTC"),
                pendulum.create(2019, 3, 10, 1, 0, 0,
                                tz="US/Central").in_tz("UTC"),
            ]

            for i in range(3):
                validate_tick(
                    ticks[i],
                    external_schedule,
                    expected_datetimes_utc[i],
                    ScheduleTickStatus.SUCCESS,
                    instance.get_runs()[i].run_id,
                )

                validate_run_started(
                    instance.get_runs()[i],
                    expected_datetimes_utc[i],
                    partition_time=expected_datetimes_utc[i].in_tz(
                        "US/Central").subtract(hours=1),
                    partition_fmt=DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE,
                )

            # Verify idempotence
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )
            assert instance.get_runs_count() == 3
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 3
コード例 #14
0
def test_different_days_in_different_timezones(external_repo_context):
    with instance_with_schedules(external_repo_context) as (
            instance,
            external_repo,
    ):
        freeze_datetime = pendulum.create(2019,
                                          2,
                                          27,
                                          22,
                                          59,
                                          59,
                                          tz="US/Central").in_tz("US/Pacific")
        with pendulum.test(freeze_datetime):
            # Runs every day at 11PM (CST)
            external_schedule = external_repo.get_external_schedule(
                "daily_late_schedule")
            schedule_origin = external_schedule.get_origin()
            instance.start_schedule_and_update_storage_state(external_schedule)

            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 0

            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )
            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 0

        freeze_datetime = freeze_datetime.add(seconds=2)
        with pendulum.test(freeze_datetime):
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )

            assert instance.get_runs_count() == 1
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1

            expected_datetime = pendulum.create(year=2019,
                                                month=2,
                                                day=27,
                                                hour=23,
                                                tz="US/Central").in_tz("UTC")

            validate_tick(
                ticks[0],
                external_schedule,
                expected_datetime,
                ScheduleTickStatus.SUCCESS,
                instance.get_runs()[0].run_id,
            )

            wait_for_all_runs_to_start(instance)
            validate_run_started(
                instance.get_runs()[0],
                expected_datetime,
                pendulum.create(2019, 2, 26, tz="US/Central"),
            )

            # Verify idempotence
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )
            assert instance.get_runs_count() == 1
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1
            assert ticks[0].status == ScheduleTickStatus.SUCCESS
コード例 #15
0
def test_non_utc_timezone_run(external_repo_context, capfd):
    # Verify that schedule runs at the expected time in a non-UTC timezone
    with instance_with_schedules(external_repo_context) as (
            instance,
            external_repo,
    ):
        freeze_datetime = pendulum.create(2019,
                                          2,
                                          27,
                                          23,
                                          59,
                                          59,
                                          tz="US/Central").in_tz("US/Pacific")
        with pendulum.test(freeze_datetime):
            external_schedule = external_repo.get_external_schedule(
                "daily_central_time_schedule")

            schedule_origin = external_schedule.get_origin()

            instance.start_schedule_and_update_storage_state(external_schedule)

            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 0

            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )
            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 0

            captured = capfd.readouterr()

            assert (
                captured.out ==
                """2019-02-27 21:59:59 - dagster-scheduler - INFO - Checking for new runs for the following schedules: daily_central_time_schedule
2019-02-27 21:59:59 - dagster-scheduler - INFO - No new runs for daily_central_time_schedule
""")
        freeze_datetime = freeze_datetime.add(seconds=2)
        with pendulum.test(freeze_datetime):
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )

            assert instance.get_runs_count() == 1
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1

            expected_datetime = pendulum.create(year=2019,
                                                month=2,
                                                day=28,
                                                tz="US/Central").in_tz("UTC")

            validate_tick(
                ticks[0],
                external_schedule,
                expected_datetime,
                ScheduleTickStatus.SUCCESS,
                instance.get_runs()[0].run_id,
            )

            wait_for_all_runs_to_start(instance)
            validate_run_started(
                instance.get_runs()[0],
                expected_datetime,
                pendulum.create(2019, 2, 27, tz="US/Central"),
            )

            captured = capfd.readouterr()

            assert (
                captured.out ==
                """2019-02-27 22:00:01 - dagster-scheduler - INFO - Checking for new runs for the following schedules: daily_central_time_schedule
2019-02-27 22:00:01 - dagster-scheduler - INFO - Launching run for daily_central_time_schedule at 2019-02-28 00:00:00-0600
2019-02-27 22:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {run_id} for daily_central_time_schedule
""".format(run_id=instance.get_runs()[0].run_id))

            # Verify idempotence
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )
            assert instance.get_runs_count() == 1
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1
            assert ticks[0].status == ScheduleTickStatus.SUCCESS
コード例 #16
0
def test_differing_timezones(external_repo_context):
    # Two schedules, one using US/Central, the other on US/Eastern
    with instance_with_schedules(external_repo_context) as (
            instance,
            external_repo,
    ):
        freeze_datetime = pendulum.create(2019,
                                          2,
                                          27,
                                          23,
                                          59,
                                          59,
                                          tz="US/Eastern").in_tz("US/Pacific")
        with pendulum.test(freeze_datetime):
            external_schedule = external_repo.get_external_schedule(
                "daily_central_time_schedule")
            external_eastern_schedule = external_repo.get_external_schedule(
                "daily_eastern_time_schedule")

            schedule_origin = external_schedule.get_origin()
            eastern_origin = external_eastern_schedule.get_origin()

            instance.start_schedule_and_update_storage_state(external_schedule)
            instance.start_schedule_and_update_storage_state(
                external_eastern_schedule)

            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 0

            ticks = instance.get_schedule_ticks(eastern_origin.get_id())
            assert len(ticks) == 0

            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )
            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 0

            ticks = instance.get_schedule_ticks(eastern_origin.get_id())
            assert len(ticks) == 0

        # Past midnight eastern time, the eastern timezone schedule will run, but not the central timezone
        freeze_datetime = freeze_datetime.add(minutes=1)
        with pendulum.test(freeze_datetime):
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )

            assert instance.get_runs_count() == 1
            ticks = instance.get_schedule_ticks(eastern_origin.get_id())
            assert len(ticks) == 1

            expected_datetime = pendulum.create(year=2019,
                                                month=2,
                                                day=28,
                                                tz="US/Eastern").in_tz("UTC")

            validate_tick(
                ticks[0],
                external_eastern_schedule,
                expected_datetime,
                ScheduleTickStatus.SUCCESS,
                instance.get_runs()[0].run_id,
            )

            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 0

            wait_for_all_runs_to_start(instance)
            validate_run_started(
                instance.get_runs()[0],
                expected_datetime,
                pendulum.create(2019, 2, 27, tz="US/Eastern"),
            )

        # Past midnight central time, the central timezone schedule will now run
        freeze_datetime = freeze_datetime.add(hours=1)
        with pendulum.test(freeze_datetime):

            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )

            assert instance.get_runs_count() == 2
            ticks = instance.get_schedule_ticks(eastern_origin.get_id())
            assert len(ticks) == 1

            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1

            expected_datetime = pendulum.create(year=2019,
                                                month=2,
                                                day=28,
                                                tz="US/Central").in_tz("UTC")

            validate_tick(
                ticks[0],
                external_schedule,
                expected_datetime,
                ScheduleTickStatus.SUCCESS,
                instance.get_runs()[0].run_id,
            )

            wait_for_all_runs_to_start(instance)
            validate_run_started(
                instance.get_runs()[0],
                expected_datetime,
                pendulum.create(2019, 2, 27, tz="US/Central"),
            )

            # Verify idempotence
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )
            assert instance.get_runs_count() == 2
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1
            assert ticks[0].status == ScheduleTickStatus.SUCCESS

            ticks = instance.get_schedule_ticks(eastern_origin.get_id())
            assert len(ticks) == 1
            assert ticks[0].status == ScheduleTickStatus.SUCCESS
コード例 #17
0
def test_multiple_schedules_on_different_time_ranges(external_repo_context,
                                                     capfd):
    with instance_with_schedules(external_repo_context) as (instance,
                                                            external_repo):
        external_schedule = external_repo.get_external_schedule(
            "simple_schedule")
        external_hourly_schedule = external_repo.get_external_schedule(
            "simple_hourly_schedule")
        initial_datetime = pendulum.datetime(
            year=2019,
            month=2,
            day=27,
            hour=23,
            minute=59,
            second=59,
        ).in_tz("US/Central")
        with pendulum.test(initial_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)
            instance.start_schedule_and_update_storage_state(
                external_hourly_schedule)

        initial_datetime = initial_datetime.add(seconds=2)
        with pendulum.test(initial_datetime):
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )

            assert instance.get_runs_count() == 2
            ticks = instance.get_schedule_ticks(
                external_schedule.get_origin_id())
            assert len(ticks) == 1
            assert ticks[0].status == ScheduleTickStatus.SUCCESS

            hourly_ticks = instance.get_schedule_ticks(
                external_hourly_schedule.get_origin_id())
            assert len(hourly_ticks) == 1
            assert hourly_ticks[0].status == ScheduleTickStatus.SUCCESS

            captured = capfd.readouterr()

            assert (
                captured.out ==
                """2019-02-27 18:00:01 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_hourly_schedule, simple_schedule
2019-02-27 18:00:01 - dagster-scheduler - INFO - Launching run for simple_hourly_schedule at 2019-02-28 00:00:00+0000
2019-02-27 18:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {first_run_id} for simple_hourly_schedule
2019-02-27 18:00:01 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-28 00:00:00+0000
2019-02-27 18:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {second_run_id} for simple_schedule
""".format(
                    first_run_id=instance.get_runs()[1].run_id,
                    second_run_id=instance.get_runs()[0].run_id,
                ))

        initial_datetime = initial_datetime.add(hours=1)
        with pendulum.test(initial_datetime):
            launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                  pendulum.now("UTC"))

            assert instance.get_runs_count() == 3

            ticks = instance.get_schedule_ticks(
                external_schedule.get_origin_id())
            assert len(ticks) == 1
            assert ticks[0].status == ScheduleTickStatus.SUCCESS

            hourly_ticks = instance.get_schedule_ticks(
                external_hourly_schedule.get_origin_id())
            assert len(hourly_ticks) == 2
            assert (len([
                tick for tick in hourly_ticks
                if tick.status == ScheduleTickStatus.SUCCESS
            ]) == 2)

            captured = capfd.readouterr()
            assert (
                captured.out ==
                """2019-02-27 19:00:01 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_hourly_schedule, simple_schedule
2019-02-27 19:00:01 - dagster-scheduler - INFO - Launching run for simple_hourly_schedule at 2019-02-28 01:00:00+0000
2019-02-27 19:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {third_run_id} for simple_hourly_schedule
2019-02-27 19:00:01 - dagster-scheduler - INFO - No new runs for simple_schedule
""".format(third_run_id=instance.get_runs()[0].run_id))
コード例 #18
0
def test_simple_schedule(external_repo_context, capfd):
    with central_timezone():
        initial_datetime = datetime(
            year=2019,
            month=2,
            day=27,
            hour=23,
            minute=59,
            second=59,
            tzinfo=get_utc_timezone(),
        )
        with instance_with_schedules(external_repo_context) as (instance,
                                                                external_repo):
            with freeze_time(initial_datetime) as frozen_datetime:
                external_schedule = external_repo.get_external_schedule(
                    "simple_schedule")

                schedule_origin = external_schedule.get_origin()

                instance.start_schedule_and_update_storage_state(
                    external_schedule)

                assert instance.get_runs_count() == 0
                ticks = instance.get_schedule_ticks(schedule_origin.get_id())
                assert len(ticks) == 0

                # launch_scheduled_runs does nothing before the first tick
                launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                      get_current_datetime_in_utc())
                assert instance.get_runs_count() == 0
                ticks = instance.get_schedule_ticks(schedule_origin.get_id())
                assert len(ticks) == 0

                captured = capfd.readouterr()

                assert (
                    captured.out ==
                    """2019-02-27 17:59:59 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule
2019-02-27 17:59:59 - dagster-scheduler - INFO - No new runs for simple_schedule
""")

                # Move forward in time so we're past a tick
                frozen_datetime.tick(delta=timedelta(seconds=2))

                launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                      get_current_datetime_in_utc())

                assert instance.get_runs_count() == 1
                ticks = instance.get_schedule_ticks(schedule_origin.get_id())
                assert len(ticks) == 1

                expected_datetime = datetime(year=2019,
                                             month=2,
                                             day=28,
                                             tzinfo=get_utc_timezone())

                validate_tick(
                    ticks[0],
                    external_schedule,
                    expected_datetime,
                    ScheduleTickStatus.SUCCESS,
                    instance.get_runs()[0].run_id,
                )

                wait_for_all_runs_to_start(instance)
                validate_run_started(instance.get_runs()[0], expected_datetime,
                                     "2019-02-27")

                captured = capfd.readouterr()

                assert (
                    captured.out ==
                    """2019-02-27 18:00:01 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule
2019-02-27 18:00:01 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-28 00:00:00+0000
2019-02-27 18:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {run_id} for simple_schedule
""".format(run_id=instance.get_runs()[0].run_id))

                # Verify idempotence
                launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                      get_current_datetime_in_utc())
                assert instance.get_runs_count() == 1
                ticks = instance.get_schedule_ticks(schedule_origin.get_id())
                assert len(ticks) == 1
                assert ticks[0].status == ScheduleTickStatus.SUCCESS

                # Verify advancing in time but not going past a tick doesn't add any new runs
                frozen_datetime.tick(delta=timedelta(seconds=2))
                launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                      get_current_datetime_in_utc())
                assert instance.get_runs_count() == 1
                ticks = instance.get_schedule_ticks(schedule_origin.get_id())
                assert len(ticks) == 1
                assert ticks[0].status == ScheduleTickStatus.SUCCESS

                capfd.readouterr()

                # Traveling two more days in the future before running results in two new ticks
                frozen_datetime.tick(delta=timedelta(days=2))
                launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                      get_current_datetime_in_utc())
                assert instance.get_runs_count() == 3
                ticks = instance.get_schedule_ticks(schedule_origin.get_id())
                assert len(ticks) == 3
                assert (len([
                    tick for tick in ticks
                    if tick.status == ScheduleTickStatus.SUCCESS
                ]) == 3)

                runs_by_partition = {
                    run.tags[PARTITION_NAME_TAG]: run
                    for run in instance.get_runs()
                }

                assert "2019-02-28" in runs_by_partition
                assert "2019-03-01" in runs_by_partition

                captured = capfd.readouterr()

                assert (
                    captured.out ==
                    """2019-03-01 18:00:03 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule
2019-03-01 18:00:03 - dagster-scheduler - INFO - Launching 2 runs for simple_schedule at the following times: 2019-03-01 00:00:00+0000, 2019-03-02 00:00:00+0000
2019-03-01 18:00:03 - dagster-scheduler - INFO - Completed scheduled launch of run {first_run_id} for simple_schedule
2019-03-01 18:00:03 - dagster-scheduler - INFO - Completed scheduled launch of run {second_run_id} for simple_schedule
""".format(
                        first_run_id=instance.get_runs()[1].run_id,
                        second_run_id=instance.get_runs()[0].run_id,
                    ))

                # Check idempotence again
                launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                      get_current_datetime_in_utc())
                assert instance.get_runs_count() == 3
                ticks = instance.get_schedule_ticks(schedule_origin.get_id())
                assert len(ticks) == 3
コード例 #19
0
def test_simple_schedule(external_repo_context, capfd):
    freeze_datetime = pendulum.datetime(
        year=2019,
        month=2,
        day=27,
        hour=23,
        minute=59,
        second=59,
    ).in_tz("US/Central")
    with instance_with_schedules(external_repo_context) as (instance,
                                                            external_repo):
        with pendulum.test(freeze_datetime):
            external_schedule = external_repo.get_external_schedule(
                "simple_schedule")

            schedule_origin = external_schedule.get_origin()

            instance.start_schedule_and_update_storage_state(external_schedule)

            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 0

            # launch_scheduled_runs does nothing before the first tick
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )
            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 0

            captured = capfd.readouterr()

            assert (
                captured.out ==
                """2019-02-27 17:59:59 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule
2019-02-27 17:59:59 - dagster-scheduler - INFO - No new runs for simple_schedule
""")

        freeze_datetime = freeze_datetime.add(seconds=2)
        with pendulum.test(freeze_datetime):
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )

            assert instance.get_runs_count() == 1
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1

            expected_datetime = pendulum.datetime(year=2019, month=2, day=28)

            validate_tick(
                ticks[0],
                external_schedule,
                expected_datetime,
                ScheduleTickStatus.SUCCESS,
                instance.get_runs()[0].run_id,
            )

            wait_for_all_runs_to_start(instance)
            validate_run_started(
                instance.get_runs()[0],
                execution_time=pendulum.datetime(2019, 2, 28),
                partition_time=pendulum.datetime(2019, 2, 27),
            )

            captured = capfd.readouterr()

            assert (
                captured.out ==
                """2019-02-27 18:00:01 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule
2019-02-27 18:00:01 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-28 00:00:00+0000
2019-02-27 18:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {run_id} for simple_schedule
""".format(run_id=instance.get_runs()[0].run_id))

            # Verify idempotence
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )
            assert instance.get_runs_count() == 1
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1
            assert ticks[0].status == ScheduleTickStatus.SUCCESS

        # Verify advancing in time but not going past a tick doesn't add any new runs
        freeze_datetime = freeze_datetime.add(seconds=2)
        with pendulum.test(freeze_datetime):
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )
            assert instance.get_runs_count() == 1
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1
            assert ticks[0].status == ScheduleTickStatus.SUCCESS

        freeze_datetime = freeze_datetime.add(days=2)
        with pendulum.test(freeze_datetime):
            capfd.readouterr()

            # Traveling two more days in the future before running results in two new ticks
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )
            assert instance.get_runs_count() == 3
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 3
            assert len([
                tick for tick in ticks
                if tick.status == ScheduleTickStatus.SUCCESS
            ]) == 3

            runs_by_partition = {
                run.tags[PARTITION_NAME_TAG]: run
                for run in instance.get_runs()
            }

            assert "2019-02-28" in runs_by_partition
            assert "2019-03-01" in runs_by_partition

            captured = capfd.readouterr()

            assert (
                captured.out ==
                """2019-03-01 18:00:03 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule
2019-03-01 18:00:03 - dagster-scheduler - INFO - Launching 2 runs for simple_schedule at the following times: 2019-03-01 00:00:00+0000, 2019-03-02 00:00:00+0000
2019-03-01 18:00:03 - dagster-scheduler - INFO - Completed scheduled launch of run {first_run_id} for simple_schedule
2019-03-01 18:00:03 - dagster-scheduler - INFO - Completed scheduled launch of run {second_run_id} for simple_schedule
""".format(
                    first_run_id=instance.get_runs()[1].run_id,
                    second_run_id=instance.get_runs()[0].run_id,
                ))

            # Check idempotence again
            launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                  pendulum.now("UTC"))
            assert instance.get_runs_count() == 3
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 3
コード例 #20
0
def test_execute_during_dst_transition_spring_forward(external_repo_context):
    # Verify that a daily schedule that is supposed to execute at a time that is skipped
    # by the DST transition does not execute for that day
    with instance_with_schedules(external_repo_context) as (
            instance,
            external_repo,
    ):
        # Day before DST
        freeze_datetime = pendulum.create(2019, 3, 9, 0, 0, 0,
                                          tz="US/Central").in_tz("US/Pacific")

        with pendulum.test(freeze_datetime):
            external_schedule = external_repo.get_external_schedule(
                "daily_dst_transition_schedule")
            schedule_origin = external_schedule.get_origin()
            instance.start_schedule_and_update_storage_state(external_schedule)

            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 0

        freeze_datetime = freeze_datetime.add(days=3)

        with pendulum.test(freeze_datetime):
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )

            wait_for_all_runs_to_start(instance)

            assert instance.get_runs_count() == 2
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 2

            # skipped 3/10 since 2:30AM never happened
            expected_datetimes_utc = [
                pendulum.create(2019, 3, 11, 2, 30, 0,
                                tz="US/Central").in_tz("UTC"),
                pendulum.create(2019, 3, 9, 2, 30, 0,
                                tz="US/Central").in_tz("UTC"),
            ]

            expected_partition_times = [
                pendulum.create(2019, 3, 10, tz="US/Central"),
                pendulum.create(2019, 3, 8, tz="US/Central"),
            ]

            for i in range(2):
                validate_tick(
                    ticks[i],
                    external_schedule,
                    expected_datetimes_utc[i],
                    ScheduleTickStatus.SUCCESS,
                    instance.get_runs()[i].run_id,
                )

                validate_run_started(
                    instance.get_runs()[i],
                    expected_datetimes_utc[i],
                    partition_time=expected_partition_times[i],
                )

            # Verify idempotence
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )
            assert instance.get_runs_count() == 2
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 2
コード例 #21
0
def test_bad_schedule_mixed_with_good_schedule(external_repo_context):
    with instance_with_schedules(external_repo_context) as (instance,
                                                            external_repo):
        good_schedule = external_repo.get_external_schedule("simple_schedule")
        bad_schedule = external_repo.get_external_schedule(
            "bad_should_execute_schedule_on_odd_days")

        good_origin = good_schedule.get_origin()
        bad_origin = bad_schedule.get_origin()
        initial_datetime = datetime(
            year=2019,
            month=2,
            day=27,
            hour=0,
            minute=0,
            second=0,
            tzinfo=get_utc_timezone(),
        )
        with freeze_time(initial_datetime) as frozen_datetime:
            instance.start_schedule_and_update_storage_state(good_schedule)
            instance.start_schedule_and_update_storage_state(bad_schedule)

            launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                  get_current_datetime_in_utc())

            assert instance.get_runs_count() == 1
            wait_for_all_runs_to_start(instance)
            validate_run_started(instance.get_runs()[0], initial_datetime,
                                 "2019-02-26")

            good_ticks = instance.get_schedule_ticks(good_origin.get_id())
            assert len(good_ticks) == 1
            validate_tick(
                good_ticks[0],
                good_schedule,
                initial_datetime,
                ScheduleTickStatus.SUCCESS,
                instance.get_runs()[0].run_id,
            )

            bad_ticks = instance.get_schedule_ticks(bad_origin.get_id())
            assert len(bad_ticks) == 1

            assert bad_ticks[0].status == ScheduleTickStatus.FAILURE

            assert ("Error occurred during the execution of should_execute "
                    "for schedule bad_should_execute_schedule"
                    in bad_ticks[0].error.message)

            frozen_datetime.tick(delta=timedelta(days=1))

            new_now = get_current_datetime_in_utc()

            launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                  get_current_datetime_in_utc())

            assert instance.get_runs_count() == 3
            wait_for_all_runs_to_start(instance)

            good_schedule_runs = instance.get_runs(
                filters=PipelineRunsFilter.for_schedule(good_schedule))
            assert len(good_schedule_runs) == 2
            validate_run_started(good_schedule_runs[0], new_now, "2019-02-27")

            good_ticks = instance.get_schedule_ticks(good_origin.get_id())
            assert len(good_ticks) == 2
            validate_tick(
                good_ticks[0],
                good_schedule,
                new_now,
                ScheduleTickStatus.SUCCESS,
                good_schedule_runs[0].run_id,
            )

            bad_schedule_runs = instance.get_runs(
                filters=PipelineRunsFilter.for_schedule(bad_schedule))
            assert len(bad_schedule_runs) == 1
            validate_run_started(bad_schedule_runs[0], new_now, "2019-02-27")

            bad_ticks = instance.get_schedule_ticks(bad_origin.get_id())
            assert len(bad_ticks) == 2
            validate_tick(
                bad_ticks[0],
                bad_schedule,
                new_now,
                ScheduleTickStatus.SUCCESS,
                bad_schedule_runs[0].run_id,
            )
コード例 #22
0
def test_bad_load(capfd):
    with schedule_instance() as instance:
        working_directory = os.path.dirname(__file__)
        recon_repo = ReconstructableRepository.for_file(
            __file__, "doesnt_exist", working_directory)
        schedule = recon_repo.get_reconstructable_schedule("also_doesnt_exist")
        fake_origin = schedule.get_origin()

        initial_datetime = datetime(
            year=2019,
            month=2,
            day=27,
            hour=23,
            minute=59,
            second=59,
            tzinfo=get_utc_timezone(),
        )
        with freeze_time(initial_datetime) as frozen_datetime:
            schedule_state = ScheduleState(
                fake_origin,
                ScheduleStatus.RUNNING,
                "0 0 * * *",
                get_timestamp_from_utc_datetime(get_current_datetime_in_utc()),
            )
            instance.add_schedule_state(schedule_state)

            frozen_datetime.tick(delta=timedelta(seconds=1))

            launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                  get_current_datetime_in_utc())

            assert instance.get_runs_count() == 0

            ticks = instance.get_schedule_ticks(fake_origin.get_id())

            assert len(ticks) == 1
            assert ticks[0].status == ScheduleTickStatus.FAILURE
            assert ticks[0].timestamp == get_timestamp_from_utc_datetime(
                get_current_datetime_in_utc())
            assert "doesnt_exist not found at module scope in file" in ticks[
                0].error.message

            captured = capfd.readouterr()
            assert "Error launching scheduled run" in captured.out
            assert "doesnt_exist not found at module scope" in captured.out

            frozen_datetime.tick(delta=timedelta(days=1))

            launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                  get_current_datetime_in_utc())

            assert instance.get_runs_count() == 0

            ticks = instance.get_schedule_ticks(fake_origin.get_id())

            assert len(ticks) == 2
            assert ticks[0].status == ScheduleTickStatus.FAILURE
            assert ticks[0].timestamp == get_timestamp_from_utc_datetime(
                get_current_datetime_in_utc())
            assert "doesnt_exist not found at module scope in file" in ticks[
                0].error.message

            captured = capfd.readouterr()
            assert "Error launching scheduled run" in captured.out
            assert "doesnt_exist not found at module scope" in captured.out
コード例 #23
0
def test_hourly_dst_fall_back(external_repo_context):
    # Verify that an hourly schedule still runs hourly during the fall DST transition
    with instance_with_schedules(external_repo_context) as (
            instance,
            external_repo,
    ):
        # 12:30 AM CST
        freeze_datetime = pendulum.create(2019,
                                          11,
                                          3,
                                          0,
                                          30,
                                          0,
                                          tz="US/Central").in_tz("US/Pacific")

        with pendulum.test(freeze_datetime):
            external_schedule = external_repo.get_external_schedule(
                "hourly_central_time_schedule")
            schedule_origin = external_schedule.get_origin()
            instance.start_schedule_and_update_storage_state(external_schedule)

            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 0

        freeze_datetime = freeze_datetime.add(hours=4)

        # DST has now happened, 4 hours later it is 3:30AM CST
        # Should be 4 runs: 1AM CDT, 1AM CST, 2AM CST, 3AM CST
        with pendulum.test(freeze_datetime):
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )

            wait_for_all_runs_to_start(instance)

            assert instance.get_runs_count() == 4
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 4

            expected_datetimes_utc = [
                pendulum.create(2019, 11, 3, 9, 0, 0, tz="UTC"),
                pendulum.create(2019, 11, 3, 8, 0, 0, tz="UTC"),
                pendulum.create(2019, 11, 3, 7, 0, 0, tz="UTC"),
                pendulum.create(2019, 11, 3, 6, 0, 0, tz="UTC"),
            ]

            expected_ct_times = [
                "2019-11-03T03:00:00-06:00",  # 3 AM CST
                "2019-11-03T02:00:00-06:00",  # 2 AM CST
                "2019-11-03T01:00:00-06:00",  # 1 AM CST
                "2019-11-03T01:00:00-05:00",  # 1 AM CDT
            ]

            for i in range(4):
                assert (expected_datetimes_utc[i].in_tz(
                    "US/Central").isoformat() == expected_ct_times[i])

                validate_tick(
                    ticks[i],
                    external_schedule,
                    expected_datetimes_utc[i],
                    ScheduleTickStatus.SUCCESS,
                    instance.get_runs()[i].run_id,
                )

                validate_run_started(
                    instance.get_runs()[i],
                    expected_datetimes_utc[i],
                    partition_time=expected_datetimes_utc[i].in_tz(
                        "US/Central").subtract(hours=1),
                    partition_fmt=DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE,
                )

            # Verify idempotence
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )
            assert instance.get_runs_count() == 4
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 4
コード例 #24
0
def test_bad_schedules_mixed_with_good_schedule(external_repo_context, capfd):
    with instance_with_schedules(external_repo_context) as (instance,
                                                            external_repo):
        good_schedule = external_repo.get_external_schedule("simple_schedule")
        bad_schedule = external_repo.get_external_schedule(
            "bad_should_execute_schedule_on_odd_days")

        good_origin = good_schedule.get_origin()
        bad_origin = bad_schedule.get_origin()
        unloadable_origin = _get_unloadable_schedule_origin()
        initial_datetime = pendulum.datetime(
            year=2019,
            month=2,
            day=27,
            hour=0,
            minute=0,
            second=0,
        )
        with pendulum.test(initial_datetime):
            instance.start_schedule_and_update_storage_state(good_schedule)
            instance.start_schedule_and_update_storage_state(bad_schedule)

            unloadable_schedule_state = ScheduleState(
                unloadable_origin,
                ScheduleStatus.RUNNING,
                "0 0 * * *",
                pendulum.now("UTC").timestamp(),
            )
            instance.add_schedule_state(unloadable_schedule_state)

            launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                  pendulum.now("UTC"))

            assert instance.get_runs_count() == 1
            wait_for_all_runs_to_start(instance)
            validate_run_started(
                instance.get_runs()[0],
                execution_time=initial_datetime,
                partition_time=pendulum.datetime(2019, 2, 26),
            )

            good_ticks = instance.get_schedule_ticks(good_origin.get_id())
            assert len(good_ticks) == 1
            validate_tick(
                good_ticks[0],
                good_schedule,
                initial_datetime,
                ScheduleTickStatus.SUCCESS,
                instance.get_runs()[0].run_id,
            )

            bad_ticks = instance.get_schedule_ticks(bad_origin.get_id())
            assert len(bad_ticks) == 1

            assert bad_ticks[0].status == ScheduleTickStatus.FAILURE

            assert ("Error occurred during the execution of should_execute "
                    "for schedule bad_should_execute_schedule"
                    in bad_ticks[0].error.message)

            unloadable_ticks = instance.get_schedule_ticks(
                unloadable_origin.get_id())
            assert len(unloadable_ticks) == 0

            captured = capfd.readouterr()
            assert "Scheduler failed for also_doesnt_exist" in captured.out
            assert "doesnt_exist not found at module scope" in captured.out

        initial_datetime = initial_datetime.add(days=1)
        with pendulum.test(initial_datetime):
            new_now = pendulum.now("UTC")
            launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                  new_now)

            assert instance.get_runs_count() == 3
            wait_for_all_runs_to_start(instance)

            good_schedule_runs = instance.get_runs(
                filters=PipelineRunsFilter.for_schedule(good_schedule))
            assert len(good_schedule_runs) == 2
            validate_run_started(
                good_schedule_runs[0],
                execution_time=new_now,
                partition_time=pendulum.datetime(2019, 2, 27),
            )

            good_ticks = instance.get_schedule_ticks(good_origin.get_id())
            assert len(good_ticks) == 2
            validate_tick(
                good_ticks[0],
                good_schedule,
                new_now,
                ScheduleTickStatus.SUCCESS,
                good_schedule_runs[0].run_id,
            )

            bad_schedule_runs = instance.get_runs(
                filters=PipelineRunsFilter.for_schedule(bad_schedule))
            assert len(bad_schedule_runs) == 1
            validate_run_started(
                bad_schedule_runs[0],
                execution_time=new_now,
                partition_time=pendulum.datetime(2019, 2, 27),
            )

            bad_ticks = instance.get_schedule_ticks(bad_origin.get_id())
            assert len(bad_ticks) == 2
            validate_tick(
                bad_ticks[0],
                bad_schedule,
                new_now,
                ScheduleTickStatus.SUCCESS,
                bad_schedule_runs[0].run_id,
            )

            unloadable_ticks = instance.get_schedule_ticks(
                unloadable_origin.get_id())
            assert len(unloadable_ticks) == 0

            captured = capfd.readouterr()
            assert "Scheduler failed for also_doesnt_exist" in captured.out
            assert "doesnt_exist not found at module scope" in captured.out
コード例 #25
0
def test_daily_dst_fall_back(external_repo_context):
    # Verify that a daily schedule still runs once per day during the fall DST transition
    with instance_with_schedules(external_repo_context) as (
            instance,
            external_repo,
    ):
        # Night before DST
        freeze_datetime = pendulum.create(2019,
                                          11,
                                          3,
                                          0,
                                          0,
                                          0,
                                          tz="US/Central").in_tz("US/Pacific")

        with pendulum.test(freeze_datetime):
            external_schedule = external_repo.get_external_schedule(
                "daily_central_time_schedule")
            schedule_origin = external_schedule.get_origin()
            instance.start_schedule_and_update_storage_state(external_schedule)

            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 0

        freeze_datetime = freeze_datetime.add(days=2)

        with pendulum.test(freeze_datetime):
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )

            wait_for_all_runs_to_start(instance)

            assert instance.get_runs_count() == 3
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 3

            # UTC time changed by one hour after the transition, still running daily at the same
            # time in CT
            expected_datetimes_utc = [
                pendulum.create(2019, 11, 5, 6, 0, 0, tz="UTC"),
                pendulum.create(2019, 11, 4, 6, 0, 0, tz="UTC"),
                pendulum.create(2019, 11, 3, 5, 0, 0, tz="UTC"),
            ]

            expected_partition_times = [
                pendulum.create(2019, 11, 4, tz="US/Central"),
                pendulum.create(2019, 11, 3, tz="US/Central"),
                pendulum.create(2019, 11, 2, tz="US/Central"),
            ]

            for i in range(3):
                validate_tick(
                    ticks[i],
                    external_schedule,
                    expected_datetimes_utc[i],
                    ScheduleTickStatus.SUCCESS,
                    instance.get_runs()[i].run_id,
                )

                validate_run_started(
                    instance.get_runs()[i],
                    expected_datetimes_utc[i],
                    partition_time=expected_partition_times[i],
                )

            # Verify idempotence
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )
            assert instance.get_runs_count() == 3
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 3
コード例 #26
0
def test_max_catchup_runs(capfd):
    initial_datetime = datetime(
        year=2019,
        month=2,
        day=27,
        hour=23,
        minute=59,
        second=59,
        tzinfo=get_utc_timezone(),
    )
    with central_timezone():
        with instance_with_schedules(grpc_repo) as (instance, external_repo):
            with freeze_time(initial_datetime) as frozen_datetime:
                external_schedule = external_repo.get_external_schedule(
                    "simple_schedule")
                schedule_origin = external_schedule.get_origin()
                instance.start_schedule_and_update_storage_state(
                    external_schedule)

                # Day is now March 4 at 11:59PM
                frozen_datetime.tick(delta=timedelta(days=5))

                launch_scheduled_runs(
                    instance,
                    get_default_scheduler_logger(),
                    get_current_datetime_in_utc(),
                    max_catchup_runs=2,
                )

                assert instance.get_runs_count() == 2
                ticks = instance.get_schedule_ticks(schedule_origin.get_id())
                assert len(ticks) == 2

                first_datetime = datetime(year=2019,
                                          month=3,
                                          day=4,
                                          tzinfo=get_utc_timezone())

                wait_for_all_runs_to_start(instance)

                validate_tick(
                    ticks[0],
                    external_schedule,
                    first_datetime,
                    ScheduleTickStatus.SUCCESS,
                    instance.get_runs()[0].run_id,
                )
                validate_run_started(instance.get_runs()[0], first_datetime,
                                     "2019-03-03")

                second_datetime = datetime(year=2019,
                                           month=3,
                                           day=3,
                                           tzinfo=get_utc_timezone())

                validate_tick(
                    ticks[1],
                    external_schedule,
                    second_datetime,
                    ScheduleTickStatus.SUCCESS,
                    instance.get_runs()[1].run_id,
                )

                validate_run_started(instance.get_runs()[1], second_datetime,
                                     "2019-03-02")

                captured = capfd.readouterr()
                assert (
                    captured.out ==
                    """2019-03-04 17:59:59 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule
2019-03-04 17:59:59 - dagster-scheduler - WARNING - simple_schedule has fallen behind, only launching 2 runs
2019-03-04 17:59:59 - dagster-scheduler - INFO - Launching 2 runs for simple_schedule at the following times: 2019-03-03 00:00:00+0000, 2019-03-04 00:00:00+0000
2019-03-04 17:59:59 - dagster-scheduler - INFO - Completed scheduled launch of run {first_run_id} for simple_schedule
2019-03-04 17:59:59 - dagster-scheduler - INFO - Completed scheduled launch of run {second_run_id} for simple_schedule
""".format(
                        first_run_id=instance.get_runs()[1].run_id,
                        second_run_id=instance.get_runs()[0].run_id,
                    ))
コード例 #27
0
def test_execute_during_dst_transition_fall_back(external_repo_context):
    with instance_with_schedules(external_repo_context) as (
            instance,
            external_repo,
    ):
        # A schedule that runs daily during a time that occurs twice during a fall DST transition
        # only executes once for that day
        freeze_datetime = pendulum.create(2019,
                                          11,
                                          2,
                                          0,
                                          0,
                                          0,
                                          tz="US/Central").in_tz("US/Pacific")

        with pendulum.test(freeze_datetime):
            external_schedule = external_repo.get_external_schedule(
                "daily_dst_transition_schedule")
            schedule_origin = external_schedule.get_origin()
            instance.start_schedule_and_update_storage_state(external_schedule)

            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 0

        freeze_datetime = freeze_datetime.add(days=3)

        with pendulum.test(freeze_datetime):
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )

            wait_for_all_runs_to_start(instance)

            assert instance.get_runs_count() == 3
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 3

            expected_datetimes_utc = [
                pendulum.create(2019, 11, 4, 8, 30, 0, tz="UTC"),
                pendulum.create(2019, 11, 3, 8, 30, 0, tz="UTC"),
                pendulum.create(2019, 11, 2, 7, 30, 0, tz="UTC"),
            ]

            expected_partition_times = [
                pendulum.create(2019, 11, 3, tz="US/Central"),
                pendulum.create(2019, 11, 2, tz="US/Central"),
                pendulum.create(2019, 11, 1, tz="US/Central"),
            ]

            for i in range(3):
                validate_tick(
                    ticks[i],
                    external_schedule,
                    expected_datetimes_utc[i],
                    ScheduleTickStatus.SUCCESS,
                    instance.get_runs()[i].run_id,
                )

                validate_run_started(
                    instance.get_runs()[i],
                    expected_datetimes_utc[i],
                    partition_time=expected_partition_times[i],
                )

            # Verify idempotence
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )
            assert instance.get_runs_count() == 3
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 3
コード例 #28
0
def test_max_catchup_runs(capfd):
    initial_datetime = pendulum.datetime(year=2019,
                                         month=2,
                                         day=27,
                                         hour=23,
                                         minute=59,
                                         second=59).in_tz("US/Central")
    with instance_with_schedules(grpc_repo) as (instance, external_repo):
        with pendulum.test(initial_datetime):
            external_schedule = external_repo.get_external_schedule(
                "simple_schedule")
            schedule_origin = external_schedule.get_origin()
            instance.start_schedule_and_update_storage_state(external_schedule)

        initial_datetime = initial_datetime.add(days=5)
        with pendulum.test(initial_datetime):
            # Day is now March 4 at 11:59PM
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
                max_catchup_runs=2,
            )

            assert instance.get_runs_count() == 2
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 2

            first_datetime = pendulum.datetime(year=2019, month=3, day=4)

            wait_for_all_runs_to_start(instance)

            validate_tick(
                ticks[0],
                external_schedule,
                first_datetime,
                ScheduleTickStatus.SUCCESS,
                instance.get_runs()[0].run_id,
            )
            validate_run_started(
                instance.get_runs()[0],
                execution_time=first_datetime,
                partition_time=pendulum.datetime(2019, 3, 3),
            )

            second_datetime = pendulum.datetime(year=2019, month=3, day=3)

            validate_tick(
                ticks[1],
                external_schedule,
                second_datetime,
                ScheduleTickStatus.SUCCESS,
                instance.get_runs()[1].run_id,
            )

            validate_run_started(
                instance.get_runs()[1],
                execution_time=second_datetime,
                partition_time=pendulum.datetime(2019, 3, 2),
            )

            captured = capfd.readouterr()
            assert (
                captured.out ==
                """2019-03-04 17:59:59 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule
2019-03-04 17:59:59 - dagster-scheduler - WARNING - simple_schedule has fallen behind, only launching 2 runs
2019-03-04 17:59:59 - dagster-scheduler - INFO - Launching 2 runs for simple_schedule at the following times: 2019-03-03 00:00:00+0000, 2019-03-04 00:00:00+0000
2019-03-04 17:59:59 - dagster-scheduler - INFO - Completed scheduled launch of run {first_run_id} for simple_schedule
2019-03-04 17:59:59 - dagster-scheduler - INFO - Completed scheduled launch of run {second_run_id} for simple_schedule
""".format(
                    first_run_id=instance.get_runs()[1].run_id,
                    second_run_id=instance.get_runs()[0].run_id,
                ))