def test_run_scheduled_on_time_boundary(external_repo_context):
    with instance_with_schedules(external_repo_context) as (instance,
                                                            external_repo):
        external_schedule = external_repo.get_external_schedule(
            "simple_schedule")

        schedule_origin = external_schedule.get_origin()
        initial_datetime = datetime(
            year=2019,
            month=2,
            day=27,
            hour=0,
            minute=0,
            second=0,
            tzinfo=get_utc_timezone(),
        )
        with freeze_time(initial_datetime):
            # Start schedule exactly at midnight
            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                  get_current_datetime_in_utc())

            assert instance.get_runs_count() == 1
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1
            assert ticks[0].status == ScheduleTickStatus.SUCCESS
Beispiel #2
0
def test_bad_env_fn(external_repo_context, capfd):
    with instance_with_schedules(external_repo_context) as (instance, external_repo):
        external_schedule = external_repo.get_external_schedule("bad_env_fn_schedule")
        schedule_origin = external_schedule.get_origin()
        initial_datetime = pendulum.datetime(year=2019, month=2, day=27, hour=0, minute=0, second=0)
        with pendulum.test(initial_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC"))

            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1

            validate_tick(
                ticks[0],
                external_schedule,
                initial_datetime,
                ScheduleTickStatus.FAILURE,
                None,
                "Error occurred during the execution of run_config_fn for "
                "schedule bad_env_fn_schedule",
            )

            captured = capfd.readouterr()

            assert "Failed to fetch schedule data for bad_env_fn_schedule: " in captured.out

            assert (
                "Error occurred during the execution of run_config_fn for "
                "schedule bad_env_fn_schedule" in captured.out
            )
Beispiel #3
0
def test_skip(external_repo_context, capfd):
    with instance_with_schedules(external_repo_context) as (instance, external_repo):
        external_schedule = external_repo.get_external_schedule("skip_schedule")
        schedule_origin = external_schedule.get_origin()
        initial_datetime = pendulum.datetime(
            year=2019, month=2, day=27, hour=0, minute=0, second=0,
        ).in_tz("US/Central")
        with pendulum.test(initial_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC"))

            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SKIPPED, None,
            )

            captured = capfd.readouterr()
            assert (
                captured.out
                == """2019-02-26 18:00:00 - dagster-scheduler - INFO - Checking for new runs for the following schedules: skip_schedule
2019-02-26 18:00:00 - dagster-scheduler - INFO - Launching run for skip_schedule at 2019-02-27 00:00:00+0000
2019-02-26 18:00:00 - dagster-scheduler - INFO - should_execute returned False for skip_schedule, skipping
"""
            )
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime,
                                              debug_crash_flags):
    with DagsterInstance.from_ref(instance_ref) as instance:
        with freeze_time(execution_datetime):
            launch_scheduled_runs(instance,
                                  get_current_datetime_in_utc(),
                                  debug_crash_flags=debug_crash_flags)
Beispiel #5
0
def test_bad_load(capfd):
    with schedule_instance() as instance:
        fake_origin = _get_unloadable_schedule_origin()
        initial_datetime = pendulum.datetime(
            year=2019, month=2, day=27, hour=23, minute=59, second=59,
        )
        with pendulum.test(initial_datetime):
            schedule_state = ScheduleState(
                fake_origin, ScheduleStatus.RUNNING, "0 0 * * *", pendulum.now("UTC").timestamp(),
            )
            instance.add_schedule_state(schedule_state)

        initial_datetime = initial_datetime.add(seconds=1)
        with pendulum.test(initial_datetime):
            launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC"))

            assert instance.get_runs_count() == 0

            ticks = instance.get_schedule_ticks(fake_origin.get_id())

            assert len(ticks) == 0

            captured = capfd.readouterr()
            assert "Scheduler failed for also_doesnt_exist" in captured.out
            assert "doesnt_exist not found at module scope" in captured.out

        initial_datetime = initial_datetime.add(days=1)
        with pendulum.test(initial_datetime):
            launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC"))
            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(fake_origin.get_id())
            assert len(ticks) == 0
Beispiel #6
0
def test_launch_failure(external_repo_context):
    with instance_with_schedules(
        external_repo_context,
        overrides={
            "run_launcher": {"module": "dagster.core.test_utils", "class": "ExplodingRunLauncher",},
        },
    ) as (instance, external_repo):
        external_schedule = external_repo.get_external_schedule("simple_schedule")

        schedule_origin = external_schedule.get_origin()
        initial_datetime = datetime(
            year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(),
        )

        with freeze_time(initial_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, get_current_datetime_in_utc())

            assert instance.get_runs_count() == 1

            run = instance.get_runs()[0]

            validate_run_started(run, initial_datetime, "2019-02-26", expected_success=False)

            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0],
                external_schedule,
                initial_datetime,
                ScheduleTickStatus.SUCCESS,
                run.run_id,
            )
Beispiel #7
0
def test_bad_should_execute(external_repo_context):
    with instance_with_schedules(external_repo_context) as (instance, external_repo):
        external_schedule = external_repo.get_external_schedule("bad_should_execute_schedule")
        schedule_origin = external_schedule.get_origin()
        initial_datetime = datetime(
            year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(),
        )
        with freeze_time(initial_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, get_current_datetime_in_utc())

            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1

            validate_tick(
                ticks[0],
                external_schedule,
                initial_datetime,
                ScheduleTickStatus.FAILURE,
                None,
                "Error occurred during the execution of should_execute for "
                "schedule bad_should_execute_schedule",
            )
Beispiel #8
0
def test_run_scheduled_on_time_boundary(external_repo_context):
    with instance_with_schedules(external_repo_context) as (instance,
                                                            external_repo):
        external_schedule = external_repo.get_external_schedule(
            "simple_schedule")

        schedule_origin = external_schedule.get_external_origin()
        initial_datetime = pendulum.datetime(
            year=2019,
            month=2,
            day=27,
            hour=0,
            minute=0,
            second=0,
        )
        with pendulum.test(initial_datetime):
            # Start schedule exactly at midnight
            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, logger(), pendulum.now("UTC"))

            assert instance.get_runs_count() == 1
            ticks = instance.get_job_ticks(schedule_origin.get_id())
            assert len(ticks) == 1
            assert ticks[0].status == JobTickStatus.SUCCESS
Beispiel #9
0
def test_schedule_without_timezone(external_repo_context, capfd):
    with instance_with_schedules(external_repo_context) as (instance, external_repo):
        external_schedule = external_repo.get_external_schedule("daily_schedule_without_timezone")
        schedule_origin = external_schedule.get_origin()
        initial_datetime = pendulum.datetime(year=2019, month=2, day=27, hour=0, minute=0, second=0)

        with pendulum.test(initial_datetime):

            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC"))

            assert instance.get_runs_count() == 0

            ticks = instance.get_schedule_ticks(schedule_origin.get_id())

            assert len(ticks) == 0

            captured = capfd.readouterr()

            assert (
                "Scheduler could not run for daily_schedule_without_timezone as it did not specify "
                "an execution_timezone in its definition." in captured.out
            )

        initial_datetime = initial_datetime.add(days=1)
        with pendulum.test(initial_datetime):
            launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC"))
            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 0
def test_with_incorrect_scheduler():
    with instance_for_test() as instance:
        with pytest.raises(DagsterInvariantViolationError):
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
            )
def test_launch_failure(external_repo_context, capfd):
    with central_timezone():
        with instance_with_schedules(
                external_repo_context,
                overrides={
                    "run_launcher": {
                        "module": "dagster.core.test_utils",
                        "class": "ExplodingRunLauncher",
                    },
                },
        ) as (instance, external_repo):
            external_schedule = external_repo.get_external_schedule(
                "simple_schedule")

            schedule_origin = external_schedule.get_origin()
            initial_datetime = datetime(
                year=2019,
                month=2,
                day=27,
                hour=0,
                minute=0,
                second=0,
                tzinfo=get_utc_timezone(),
            )

            with freeze_time(initial_datetime):
                instance.start_schedule_and_update_storage_state(
                    external_schedule)

                launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                      get_current_datetime_in_utc())

                assert instance.get_runs_count() == 1

                run = instance.get_runs()[0]

                validate_run_started(run,
                                     initial_datetime,
                                     "2019-02-26",
                                     expected_success=False)

                ticks = instance.get_schedule_ticks(schedule_origin.get_id())
                assert len(ticks) == 1
                validate_tick(
                    ticks[0],
                    external_schedule,
                    initial_datetime,
                    ScheduleTickStatus.SUCCESS,
                    run.run_id,
                )

                captured = capfd.readouterr()
                assert (
                    captured.out ==
                    """2019-02-26 18:00:00 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule
2019-02-26 18:00:00 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-27 00:00:00+0000
2019-02-26 18:00:00 - dagster-scheduler - ERROR - Run {run_id} created successfully but failed to launch.
""".format(run_id=instance.get_runs()[0].run_id))
Beispiel #12
0
def test_launch_failure(external_repo_context, capfd):
    with instance_with_schedules(
            external_repo_context,
            overrides={
                "run_launcher": {
                    "module": "dagster.core.test_utils",
                    "class": "ExplodingRunLauncher",
                },
            },
    ) as (instance, external_repo):
        external_schedule = external_repo.get_external_schedule(
            "simple_schedule")

        schedule_origin = external_schedule.get_external_origin()
        initial_datetime = pendulum.datetime(
            year=2019,
            month=2,
            day=27,
            hour=0,
            minute=0,
            second=0,
        ).in_tz("US/Central")

        with pendulum.test(initial_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, logger(), pendulum.now("UTC"))

            assert instance.get_runs_count() == 1

            run = instance.get_runs()[0]

            validate_run_started(
                run,
                execution_time=initial_datetime,
                partition_time=pendulum.datetime(2019, 2, 26),
                expected_success=False,
            )

            ticks = instance.get_job_ticks(schedule_origin.get_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0],
                external_schedule,
                initial_datetime,
                JobTickStatus.SUCCESS,
                run.run_id,
            )

            captured = capfd.readouterr()
            assert (
                captured.out ==
                """2019-02-26 18:00:00 - SchedulerDaemon - INFO - Checking for new runs for the following schedules: simple_schedule
2019-02-26 18:00:00 - SchedulerDaemon - INFO - Launching run for simple_schedule at 2019-02-27 00:00:00+0000
2019-02-26 18:00:00 - SchedulerDaemon - ERROR - Run {run_id} created successfully but failed to launch.
""".format(run_id=instance.get_runs()[0].run_id))
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime,
                                              debug_crash_flags):
    with DagsterInstance.from_ref(instance_ref) as instance:
        with pendulum.test(execution_datetime):
            launch_scheduled_runs(
                instance,
                get_default_scheduler_logger(),
                pendulum.now("UTC"),
                debug_crash_flags=debug_crash_flags,
            )
def test_wrong_config(external_repo_context, capfd):
    with instance_with_schedules(external_repo_context) as (instance,
                                                            external_repo):
        external_schedule = external_repo.get_external_schedule(
            "wrong_config_schedule")
        schedule_origin = external_schedule.get_origin()
        initial_datetime = datetime(
            year=2019,
            month=2,
            day=27,
            hour=0,
            minute=0,
            second=0,
            tzinfo=get_utc_timezone(),
        )
        with freeze_time(initial_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                  get_current_datetime_in_utc())

            assert instance.get_runs_count() == 1

            wait_for_all_runs_to_start(instance)

            run = instance.get_runs()[0]

            validate_run_started(run,
                                 initial_datetime,
                                 "2019-02-26",
                                 expected_success=False)

            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0],
                external_schedule,
                initial_datetime,
                ScheduleTickStatus.SUCCESS,
                run.run_id,
            )

            run_logs = instance.all_logs(run.run_id)

            assert (len([
                event for event in run_logs if
                ("DagsterInvalidConfigError" in event.dagster_event.message
                 and event.dagster_event_type == DagsterEventType.ENGINE_EVENT)
            ]) > 0)

            captured = capfd.readouterr()

            assert "Failed to fetch execution plan for wrong_config_schedule" in captured.out
            assert "Error in config for pipeline the_pipeline" in captured.out
            assert 'Missing required field "solids" at the root.' in captured.out
Beispiel #15
0
def test_wrong_config(external_repo_context, capfd):
    with instance_with_schedules(external_repo_context) as (instance,
                                                            external_repo):
        external_schedule = external_repo.get_external_schedule(
            "wrong_config_schedule")
        schedule_origin = external_schedule.get_external_origin()
        initial_datetime = pendulum.datetime(year=2019,
                                             month=2,
                                             day=27,
                                             hour=0,
                                             minute=0,
                                             second=0)
        with pendulum.test(initial_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, logger(), pendulum.now("UTC"))

            assert instance.get_runs_count() == 1

            wait_for_all_runs_to_start(instance)

            run = instance.get_runs()[0]

            validate_run_started(
                run,
                execution_time=initial_datetime,
                partition_time=pendulum.datetime(2019, 2, 26),
                expected_success=False,
            )

            ticks = instance.get_job_ticks(schedule_origin.get_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0],
                external_schedule,
                initial_datetime,
                JobTickStatus.SUCCESS,
                run.run_id,
            )

            run_logs = instance.all_logs(run.run_id)

            assert (len([
                event for event in run_logs if
                ("DagsterInvalidConfigError" in event.dagster_event.message
                 and event.dagster_event_type == DagsterEventType.ENGINE_EVENT)
            ]) > 0)

            captured = capfd.readouterr()

            assert "Failed to fetch execution plan for wrong_config_schedule" in captured.out
            assert "Error in config for pipeline the_pipeline" in captured.out
            assert 'Missing required config entry "solids" at the root.' in captured.out
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime,
                                              debug_crash_flags):
    with DagsterInstance.from_ref(instance_ref) as instance:
        try:
            with pendulum.test(execution_datetime):
                launch_scheduled_runs(
                    instance,
                    logger(),
                    pendulum.now("UTC"),
                    debug_crash_flags=debug_crash_flags,
                )
        finally:
            cleanup_test_instance(instance)
Beispiel #17
0
def test_no_started_schedules(external_repo_context, capfd):
    with instance_with_schedules(external_repo_context) as (instance, external_repo):
        external_schedule = external_repo.get_external_schedule("simple_schedule")
        schedule_origin = external_schedule.get_origin()

        launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC"))
        assert instance.get_runs_count() == 0

        ticks = instance.get_schedule_ticks(schedule_origin.get_id())
        assert len(ticks) == 0

        captured = capfd.readouterr()

        assert "Not checking for any runs since no schedules have been started." in captured.out
Beispiel #18
0
def test_bad_load():
    with schedule_instance() as instance:
        working_directory = os.path.dirname(__file__)
        recon_repo = ReconstructableRepository.for_file(__file__, "doesnt_exist", working_directory)
        schedule = recon_repo.get_reconstructable_schedule("also_doesnt_exist")
        fake_origin = schedule.get_origin()

        initial_datetime = datetime(
            year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(),
        )
        with freeze_time(initial_datetime) as frozen_datetime:
            schedule_state = ScheduleState(
                fake_origin,
                ScheduleStatus.RUNNING,
                "0 0 * * *",
                get_timestamp_from_utc_datetime(get_current_datetime_in_utc()),
            )
            instance.add_schedule_state(schedule_state)

            frozen_datetime.tick(delta=timedelta(seconds=1))

            launch_scheduled_runs(instance, get_current_datetime_in_utc())

            assert instance.get_runs_count() == 0

            ticks = instance.get_schedule_ticks(fake_origin.get_id())

            assert len(ticks) == 1
            assert ticks[0].status == ScheduleTickStatus.FAILURE
            assert ticks[0].timestamp == get_timestamp_from_utc_datetime(
                get_current_datetime_in_utc()
            )
            assert "doesnt_exist not found at module scope in file" in ticks[0].error.message

            frozen_datetime.tick(delta=timedelta(days=1))

            launch_scheduled_runs(instance, get_current_datetime_in_utc())

            assert instance.get_runs_count() == 0

            ticks = instance.get_schedule_ticks(fake_origin.get_id())

            assert len(ticks) == 2
            assert ticks[0].status == ScheduleTickStatus.FAILURE
            assert ticks[0].timestamp == get_timestamp_from_utc_datetime(
                get_current_datetime_in_utc()
            )
            assert "doesnt_exist not found at module scope in file" in ticks[0].error.message
Beispiel #19
0
def test_wrong_config(external_repo_context):
    with instance_with_schedules(external_repo_context) as (instance, external_repo):
        external_schedule = external_repo.get_external_schedule("wrong_config_schedule")
        schedule_origin = external_schedule.get_origin()
        initial_datetime = datetime(
            year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(),
        )
        with freeze_time(initial_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, get_current_datetime_in_utc())

            assert instance.get_runs_count() == 1

            wait_for_all_runs_to_start(instance)

            run = instance.get_runs()[0]

            validate_run_started(run, initial_datetime, "2019-02-26", expected_success=False)

            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0],
                external_schedule,
                initial_datetime,
                ScheduleTickStatus.SUCCESS,
                run.run_id,
            )

            run_logs = instance.all_logs(run.run_id)

            assert (
                len(
                    [
                        event
                        for event in run_logs
                        if (
                            "DagsterInvalidConfigError" in event.dagster_event.message
                            and event.dagster_event_type == DagsterEventType.ENGINE_EVENT
                        )
                    ]
                )
                > 0
            )
Beispiel #20
0
def test_skip(external_repo_context):
    with instance_with_schedules(external_repo_context) as (instance, external_repo):
        external_schedule = external_repo.get_external_schedule("skip_schedule")
        schedule_origin = external_schedule.get_origin()
        initial_datetime = datetime(
            year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(),
        )
        with freeze_time(initial_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, get_current_datetime_in_utc())

            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SKIPPED, None,
            )
def test_bad_should_execute(external_repo_context, capfd):
    with instance_with_schedules(external_repo_context) as (instance,
                                                            external_repo):
        external_schedule = external_repo.get_external_schedule(
            "bad_should_execute_schedule")
        schedule_origin = external_schedule.get_origin()
        initial_datetime = datetime(
            year=2019,
            month=2,
            day=27,
            hour=0,
            minute=0,
            second=0,
            tzinfo=get_utc_timezone(),
        )
        with freeze_time(initial_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)

            launch_scheduled_runs(instance, get_default_scheduler_logger(),
                                  get_current_datetime_in_utc())

            assert instance.get_runs_count() == 0
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 1

            validate_tick(
                ticks[0],
                external_schedule,
                initial_datetime,
                ScheduleTickStatus.FAILURE,
                None,
                "Error occurred during the execution of should_execute for "
                "schedule bad_should_execute_schedule",
            )

            captured = capfd.readouterr()
            assert (
                "Failed to fetch schedule data for bad_should_execute_schedule: "
            ) in captured.out

            assert ("Error occurred during the execution of should_execute "
                    "for schedule bad_should_execute_schedule" in captured.out)

            assert "Exception: bananas" in captured.out
Beispiel #22
0
def test_max_catchup_runs():
    initial_datetime = datetime(
        year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(),
    )
    with instance_with_schedules(grpc_repo) as (instance, external_repo):
        with freeze_time(initial_datetime) as frozen_datetime:
            external_schedule = external_repo.get_external_schedule("simple_schedule")
            schedule_origin = external_schedule.get_origin()
            instance.start_schedule_and_update_storage_state(external_schedule)

            # Day is now March 4 at 11:59PM
            frozen_datetime.tick(delta=timedelta(days=5))

            launch_scheduled_runs(instance, get_current_datetime_in_utc(), max_catchup_runs=2)

            assert instance.get_runs_count() == 2
            ticks = instance.get_schedule_ticks(schedule_origin.get_id())
            assert len(ticks) == 2

            first_datetime = datetime(year=2019, month=3, day=4, tzinfo=get_utc_timezone())

            wait_for_all_runs_to_start(instance)

            validate_tick(
                ticks[0],
                external_schedule,
                first_datetime,
                ScheduleTickStatus.SUCCESS,
                instance.get_runs()[0].run_id,
            )
            validate_run_started(instance.get_runs()[0], first_datetime, "2019-03-03")

            second_datetime = datetime(year=2019, month=3, day=3, tzinfo=get_utc_timezone())

            validate_tick(
                ticks[1],
                external_schedule,
                second_datetime,
                ScheduleTickStatus.SUCCESS,
                instance.get_runs()[1].run_id,
            )

            validate_run_started(instance.get_runs()[1], second_datetime, "2019-03-02")
Beispiel #23
0
def test_multiple_schedules_on_different_time_ranges(external_repo_context):
    with instance_with_schedules(external_repo_context) as (instance, external_repo):
        external_schedule = external_repo.get_external_schedule("simple_schedule")
        external_hourly_schedule = external_repo.get_external_schedule("simple_hourly_schedule")
        initial_datetime = datetime(
            year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(),
        )
        with freeze_time(initial_datetime) as frozen_datetime:
            instance.start_schedule_and_update_storage_state(external_schedule)
            instance.start_schedule_and_update_storage_state(external_hourly_schedule)
            frozen_datetime.tick(delta=timedelta(seconds=2))

            launch_scheduled_runs(instance, get_current_datetime_in_utc())

            assert instance.get_runs_count() == 2
            ticks = instance.get_schedule_ticks(external_schedule.get_origin_id())
            assert len(ticks) == 1
            assert ticks[0].status == ScheduleTickStatus.SUCCESS

            hourly_ticks = instance.get_schedule_ticks(external_hourly_schedule.get_origin_id())
            assert len(hourly_ticks) == 1
            assert hourly_ticks[0].status == ScheduleTickStatus.SUCCESS

            frozen_datetime.tick(delta=timedelta(hours=1))

            launch_scheduled_runs(instance, get_current_datetime_in_utc())

            assert instance.get_runs_count() == 3

            ticks = instance.get_schedule_ticks(external_schedule.get_origin_id())
            assert len(ticks) == 1
            assert ticks[0].status == ScheduleTickStatus.SUCCESS

            hourly_ticks = instance.get_schedule_ticks(external_hourly_schedule.get_origin_id())
            assert len(hourly_ticks) == 2
            assert (
                len([tick for tick in hourly_ticks if tick.status == ScheduleTickStatus.SUCCESS])
                == 2
            )
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime, debug_crash_flags):
    with DagsterInstance.from_ref(instance_ref) as instance:
        with ProcessGrpcServerRegistry(wait_for_processes_on_exit=True) as grpc_server_registry:
            try:
                with pendulum.test(execution_datetime):
                    list(
                        launch_scheduled_runs(
                            instance,
                            grpc_server_registry,
                            logger(),
                            pendulum.now("UTC"),
                            debug_crash_flags=debug_crash_flags,
                        )
                    )
            finally:
                cleanup_test_instance(instance)
Beispiel #25
0
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime,
                                              debug_crash_flags):
    with DagsterInstance.from_ref(instance_ref) as instance:
        try:
            with create_test_daemon_workspace(workspace_load_target(),
                                              instance) as workspace:
                with pendulum.test(execution_datetime):
                    list(
                        launch_scheduled_runs(
                            instance,
                            workspace,
                            logger(),
                            pendulum.now("UTC"),
                            debug_crash_flags=debug_crash_flags,
                        ))
        finally:
            cleanup_test_instance(instance)
Beispiel #26
0
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime,
                                              debug_crash_flags):
    with DagsterInstance.from_ref(instance_ref) as instance:
        try:
            with ProcessGrpcServerRegistry() as grpc_server_registry:
                with DynamicWorkspace(grpc_server_registry) as workspace:
                    with pendulum.test(execution_datetime):
                        list(
                            launch_scheduled_runs(
                                instance,
                                workspace,
                                logger(),
                                pendulum.now("UTC"),
                                debug_crash_flags=debug_crash_flags,
                            ))
        finally:
            cleanup_test_instance(instance)
Beispiel #27
0
def test_differing_timezones(instance, workspace, external_repo):
    # Two schedules, one using US/Central, the other on US/Eastern
    freeze_datetime = to_timezone(
        create_pendulum_time(2019, 2, 27, 23, 59, 59, tz="US/Eastern"), "US/Pacific"
    )
    with pendulum.test(freeze_datetime):
        external_schedule = external_repo.get_external_schedule("daily_central_time_schedule")
        external_eastern_schedule = external_repo.get_external_schedule(
            "daily_eastern_time_schedule"
        )

        schedule_origin = external_schedule.get_external_origin()
        eastern_origin = external_eastern_schedule.get_external_origin()

        instance.start_schedule(external_schedule)
        instance.start_schedule(external_eastern_schedule)

        assert instance.get_runs_count() == 0
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

        ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id)
        assert len(ticks) == 0

        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )
        assert instance.get_runs_count() == 0
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

        ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id)
        assert len(ticks) == 0

    # Past midnight eastern time, the eastern timezone schedule will run, but not the central timezone
    freeze_datetime = freeze_datetime.add(minutes=1)
    with pendulum.test(freeze_datetime):
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )

        assert instance.get_runs_count() == 1
        ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id)
        assert len(ticks) == 1

        expected_datetime = to_timezone(
            create_pendulum_time(year=2019, month=2, day=28, tz="US/Eastern"), "UTC"
        )

        validate_tick(
            ticks[0],
            external_eastern_schedule,
            expected_datetime,
            TickStatus.SUCCESS,
            [run.run_id for run in instance.get_runs()],
        )

        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

        wait_for_all_runs_to_start(instance)
        validate_run_started(
            instance,
            instance.get_runs()[0],
            expected_datetime,
            create_pendulum_time(2019, 2, 27, tz="US/Eastern"),
        )

    # Past midnight central time, the central timezone schedule will now run
    freeze_datetime = freeze_datetime.add(hours=1)
    with pendulum.test(freeze_datetime):

        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )

        assert instance.get_runs_count() == 2
        ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id)
        assert len(ticks) == 1

        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 1

        expected_datetime = to_timezone(
            create_pendulum_time(year=2019, month=2, day=28, tz="US/Central"), "UTC"
        )

        validate_tick(
            ticks[0],
            external_schedule,
            expected_datetime,
            TickStatus.SUCCESS,
            [instance.get_runs()[0].run_id],
        )

        wait_for_all_runs_to_start(instance)
        validate_run_started(
            instance,
            instance.get_runs()[0],
            expected_datetime,
            create_pendulum_time(2019, 2, 27, tz="US/Central"),
        )

        # Verify idempotence
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )
        assert instance.get_runs_count() == 2
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 1
        assert ticks[0].status == TickStatus.SUCCESS

        ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id)
        assert len(ticks) == 1
        assert ticks[0].status == TickStatus.SUCCESS
Beispiel #28
0
def test_execute_during_dst_transition_fall_back(instance, workspace, external_repo):
    # A schedule that runs daily during a time that occurs twice during a fall DST transition
    # only executes once for that day
    freeze_datetime = to_timezone(
        create_pendulum_time(2019, 11, 2, 0, 0, 0, tz="US/Central"), "US/Pacific"
    )

    with pendulum.test(freeze_datetime):
        external_schedule = external_repo.get_external_schedule(
            "daily_dst_transition_schedule_doubled_time"
        )
        schedule_origin = external_schedule.get_external_origin()
        instance.start_schedule(external_schedule)

        assert instance.get_runs_count() == 0
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

    freeze_datetime = freeze_datetime.add(days=3)

    with pendulum.test(freeze_datetime):
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )

        wait_for_all_runs_to_start(instance)

        assert instance.get_runs_count() == 3
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 3

        expected_datetimes_utc = [
            create_pendulum_time(2019, 11, 4, 7, 30, 0, tz="UTC"),
            create_pendulum_time(2019, 11, 3, 7, 30, 0, tz="UTC"),
            create_pendulum_time(2019, 11, 2, 6, 30, 0, tz="UTC"),
        ]

        expected_partition_times = [
            create_pendulum_time(2019, 11, 3, tz="US/Central"),
            create_pendulum_time(2019, 11, 2, tz="US/Central"),
            create_pendulum_time(2019, 11, 1, tz="US/Central"),
        ]

        for i in range(3):
            validate_tick(
                ticks[i],
                external_schedule,
                expected_datetimes_utc[i],
                TickStatus.SUCCESS,
                [instance.get_runs()[i].run_id],
            )

            validate_run_started(
                instance,
                instance.get_runs()[i],
                expected_datetimes_utc[i],
                partition_time=expected_partition_times[i],
            )

        # Verify idempotence
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )
        assert instance.get_runs_count() == 3
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 3
Beispiel #29
0
def test_execute_during_dst_transition_spring_forward(instance, workspace, external_repo):
    # Verify that a daily schedule that is supposed to execute at a time that is skipped
    # by the DST transition does not execute for that day
    # Day before DST
    freeze_datetime = to_timezone(
        create_pendulum_time(2019, 3, 9, 0, 0, 0, tz="US/Central"), "US/Pacific"
    )

    with pendulum.test(freeze_datetime):
        external_schedule = external_repo.get_external_schedule(
            "daily_dst_transition_schedule_skipped_time"
        )
        schedule_origin = external_schedule.get_external_origin()
        instance.start_schedule(external_schedule)

        assert instance.get_runs_count() == 0
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

    freeze_datetime = freeze_datetime.add(days=3)

    with pendulum.test(freeze_datetime):
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )

        wait_for_all_runs_to_start(instance)

        assert instance.get_runs_count() == 3
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 3

        expected_datetimes_utc = [
            to_timezone(create_pendulum_time(2019, 3, 11, 2, 30, 0, tz="US/Central"), "UTC"),
            to_timezone(create_pendulum_time(2019, 3, 10, 3, 00, 0, tz="US/Central"), "UTC"),
            to_timezone(create_pendulum_time(2019, 3, 9, 2, 30, 0, tz="US/Central"), "UTC"),
        ]

        expected_partition_times = [
            create_pendulum_time(2019, 3, 10, tz="US/Central"),
            create_pendulum_time(2019, 3, 9, tz="US/Central"),
            create_pendulum_time(2019, 3, 8, tz="US/Central"),
        ]

        partition_set_def = the_repo.get_partition_set_def(
            "daily_dst_transition_schedule_skipped_time_partitions"
        )
        partition_names = partition_set_def.get_partition_names()

        assert "2019-03-08" in partition_names
        assert "2019-03-09" in partition_names
        assert "2019-03-10" in partition_names

        for i in range(3):
            validate_tick(
                ticks[i],
                external_schedule,
                expected_datetimes_utc[i],
                TickStatus.SUCCESS,
                [instance.get_runs()[i].run_id],
            )

            validate_run_started(
                instance,
                instance.get_runs()[i],
                expected_datetimes_utc[i],
                partition_time=expected_partition_times[i],
            )

        # Verify idempotence
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )
        assert instance.get_runs_count() == 3
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 3
Beispiel #30
0
def test_daily_dst_fall_back(instance, workspace, external_repo):
    # Verify that a daily schedule still runs once per day during the fall DST transition
    # Night before DST
    freeze_datetime = to_timezone(
        create_pendulum_time(2019, 11, 3, 0, 0, 0, tz="US/Central"), "US/Pacific"
    )

    with pendulum.test(freeze_datetime):
        external_schedule = external_repo.get_external_schedule("daily_central_time_schedule")
        schedule_origin = external_schedule.get_external_origin()
        instance.start_schedule(external_schedule)

        assert instance.get_runs_count() == 0
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 0

    freeze_datetime = freeze_datetime.add(days=2)

    with pendulum.test(freeze_datetime):
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )

        wait_for_all_runs_to_start(instance)

        assert instance.get_runs_count() == 3
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 3

        # UTC time changed by one hour after the transition, still running daily at the same
        # time in CT
        expected_datetimes_utc = [
            create_pendulum_time(2019, 11, 5, 6, 0, 0, tz="UTC"),
            create_pendulum_time(2019, 11, 4, 6, 0, 0, tz="UTC"),
            create_pendulum_time(2019, 11, 3, 5, 0, 0, tz="UTC"),
        ]

        expected_partition_times = [
            create_pendulum_time(2019, 11, 4, tz="US/Central"),
            create_pendulum_time(2019, 11, 3, tz="US/Central"),
            create_pendulum_time(2019, 11, 2, tz="US/Central"),
        ]

        for i in range(3):
            validate_tick(
                ticks[i],
                external_schedule,
                expected_datetimes_utc[i],
                TickStatus.SUCCESS,
                [instance.get_runs()[i].run_id],
            )

            validate_run_started(
                instance,
                instance.get_runs()[i],
                expected_datetimes_utc[i],
                partition_time=expected_partition_times[i],
            )

        # Verify idempotence
        list(
            launch_scheduled_runs(
                instance,
                workspace,
                logger(),
                pendulum.now("UTC"),
            )
        )
        assert instance.get_runs_count() == 3
        ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id)
        assert len(ticks) == 3