def test_bad_load(capfd): with schedule_instance() as instance: fake_origin = _get_unloadable_schedule_origin() initial_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, ) with pendulum.test(initial_datetime): schedule_state = ScheduleState( fake_origin, ScheduleStatus.RUNNING, "0 0 * * *", pendulum.now("UTC").timestamp(), ) instance.add_schedule_state(schedule_state) initial_datetime = initial_datetime.add(seconds=1) with pendulum.test(initial_datetime): launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(fake_origin.get_id()) assert len(ticks) == 0 captured = capfd.readouterr() assert "Scheduler failed for also_doesnt_exist" in captured.out assert "doesnt_exist not found at module scope" in captured.out initial_datetime = initial_datetime.add(days=1) with pendulum.test(initial_datetime): launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(fake_origin.get_id()) assert len(ticks) == 0
def test_schedule_without_timezone(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("daily_schedule_without_timezone") schedule_origin = external_schedule.get_origin() initial_datetime = pendulum.datetime(year=2019, month=2, day=27, hour=0, minute=0, second=0) with pendulum.test(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 captured = capfd.readouterr() assert ( "Scheduler could not run for daily_schedule_without_timezone as it did not specify " "an execution_timezone in its definition." in captured.out ) initial_datetime = initial_datetime.add(days=1) with pendulum.test(initial_datetime): launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0
def test_bad_env_fn(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("bad_env_fn_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = pendulum.datetime(year=2019, month=2, day=27, hour=0, minute=0, second=0) with pendulum.test(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.FAILURE, None, "Error occurred during the execution of run_config_fn for " "schedule bad_env_fn_schedule", ) captured = capfd.readouterr() assert "Failed to fetch schedule data for bad_env_fn_schedule: " in captured.out assert ( "Error occurred during the execution of run_config_fn for " "schedule bad_env_fn_schedule" in captured.out )
def test_skip(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("skip_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, ).in_tz("US/Central") with pendulum.test(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SKIPPED, None, ) captured = capfd.readouterr() assert ( captured.out == """2019-02-26 18:00:00 - dagster-scheduler - INFO - Checking for new runs for the following schedules: skip_schedule 2019-02-26 18:00:00 - dagster-scheduler - INFO - Launching run for skip_schedule at 2019-02-27 00:00:00+0000 2019-02-26 18:00:00 - dagster-scheduler - INFO - should_execute returned False for skip_schedule, skipping """ )
def test_run_scheduled_on_time_boundary(external_repo_context): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "simple_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, ) with pendulum.test(initial_datetime): # Start schedule exactly at midnight instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS
def test_with_incorrect_scheduler(): with instance_for_test() as instance: with pytest.raises(DagsterInvariantViolationError): launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), )
def test_launch_failure(external_repo_context, capfd): with central_timezone(): with instance_with_schedules( external_repo_context, overrides={ "run_launcher": { "module": "dagster.core.test_utils", "class": "ExplodingRunLauncher", }, }, ) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "simple_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state( external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] validate_run_started(run, initial_datetime, "2019-02-26", expected_success=False) ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, run.run_id, ) captured = capfd.readouterr() assert ( captured.out == """2019-02-26 18:00:00 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-26 18:00:00 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-27 00:00:00+0000 2019-02-26 18:00:00 - dagster-scheduler - ERROR - Run {run_id} created successfully but failed to launch. """.format(run_id=instance.get_runs()[0].run_id))
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime, debug_crash_flags): with DagsterInstance.from_ref(instance_ref) as instance: with pendulum.test(execution_datetime): launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), debug_crash_flags=debug_crash_flags, )
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime, debug_crash_flags): with DagsterInstance.from_ref(instance_ref) as instance: with freeze_time(execution_datetime): launch_scheduled_runs( instance, get_default_scheduler_logger(), get_current_datetime_in_utc(), debug_crash_flags=debug_crash_flags, )
def test_wrong_config(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "wrong_config_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) run = instance.get_runs()[0] validate_run_started(run, initial_datetime, "2019-02-26", expected_success=False) ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, run.run_id, ) run_logs = instance.all_logs(run.run_id) assert (len([ event for event in run_logs if ("DagsterInvalidConfigError" in event.dagster_event.message and event.dagster_event_type == DagsterEventType.ENGINE_EVENT) ]) > 0) captured = capfd.readouterr() assert "Failed to fetch execution plan for wrong_config_schedule" in captured.out assert "Error in config for pipeline the_pipeline" in captured.out assert 'Missing required field "solids" at the root.' in captured.out
def test_no_started_schedules(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("simple_schedule") schedule_origin = external_schedule.get_origin() launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 captured = capfd.readouterr() assert "Not checking for any runs since no schedules have been started." in captured.out
def test_bad_should_execute(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "bad_should_execute_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.FAILURE, None, "Error occurred during the execution of should_execute for " "schedule bad_should_execute_schedule", ) captured = capfd.readouterr() assert ( "Failed to fetch schedule data for bad_should_execute_schedule: " ) in captured.out assert ("Error occurred during the execution of should_execute " "for schedule bad_should_execute_schedule" in captured.out) assert "Exception: bananas" in captured.out
def test_hourly_dst_spring_forward(external_repo_context): # Verify that an hourly schedule still runs hourly during the spring DST transition with instance_with_schedules(external_repo_context) as ( instance, external_repo, ): # 1AM CST freeze_datetime = pendulum.create(2019, 3, 10, 1, 0, 0, tz="US/Central").in_tz("US/Pacific") with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule( "hourly_central_time_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 freeze_datetime = freeze_datetime.add(hours=2) # DST has now happened, 2 hours later it is 4AM CST # Should be 3 runs: 1AM CST, 3AM CST, 4AM CST with pendulum.test(freeze_datetime): launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3 expected_datetimes_utc = [ pendulum.create(2019, 3, 10, 4, 0, 0, tz="US/Central").in_tz("UTC"), pendulum.create(2019, 3, 10, 3, 0, 0, tz="US/Central").in_tz("UTC"), pendulum.create(2019, 3, 10, 1, 0, 0, tz="US/Central").in_tz("UTC"), ] for i in range(3): validate_tick( ticks[i], external_schedule, expected_datetimes_utc[i], ScheduleTickStatus.SUCCESS, instance.get_runs()[i].run_id, ) validate_run_started( instance.get_runs()[i], expected_datetimes_utc[i], partition_time=expected_datetimes_utc[i].in_tz( "US/Central").subtract(hours=1), partition_fmt=DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE, ) # Verify idempotence launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3
def test_different_days_in_different_timezones(external_repo_context): with instance_with_schedules(external_repo_context) as ( instance, external_repo, ): freeze_datetime = pendulum.create(2019, 2, 27, 22, 59, 59, tz="US/Central").in_tz("US/Pacific") with pendulum.test(freeze_datetime): # Runs every day at 11PM (CST) external_schedule = external_repo.get_external_schedule( "daily_late_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 freeze_datetime = freeze_datetime.add(seconds=2) with pendulum.test(freeze_datetime): launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 expected_datetime = pendulum.create(year=2019, month=2, day=27, hour=23, tz="US/Central").in_tz("UTC") validate_tick( ticks[0], external_schedule, expected_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) wait_for_all_runs_to_start(instance) validate_run_started( instance.get_runs()[0], expected_datetime, pendulum.create(2019, 2, 26, tz="US/Central"), ) # Verify idempotence launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS
def test_non_utc_timezone_run(external_repo_context, capfd): # Verify that schedule runs at the expected time in a non-UTC timezone with instance_with_schedules(external_repo_context) as ( instance, external_repo, ): freeze_datetime = pendulum.create(2019, 2, 27, 23, 59, 59, tz="US/Central").in_tz("US/Pacific") with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule( "daily_central_time_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 captured = capfd.readouterr() assert ( captured.out == """2019-02-27 21:59:59 - dagster-scheduler - INFO - Checking for new runs for the following schedules: daily_central_time_schedule 2019-02-27 21:59:59 - dagster-scheduler - INFO - No new runs for daily_central_time_schedule """) freeze_datetime = freeze_datetime.add(seconds=2) with pendulum.test(freeze_datetime): launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 expected_datetime = pendulum.create(year=2019, month=2, day=28, tz="US/Central").in_tz("UTC") validate_tick( ticks[0], external_schedule, expected_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) wait_for_all_runs_to_start(instance) validate_run_started( instance.get_runs()[0], expected_datetime, pendulum.create(2019, 2, 27, tz="US/Central"), ) captured = capfd.readouterr() assert ( captured.out == """2019-02-27 22:00:01 - dagster-scheduler - INFO - Checking for new runs for the following schedules: daily_central_time_schedule 2019-02-27 22:00:01 - dagster-scheduler - INFO - Launching run for daily_central_time_schedule at 2019-02-28 00:00:00-0600 2019-02-27 22:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {run_id} for daily_central_time_schedule """.format(run_id=instance.get_runs()[0].run_id)) # Verify idempotence launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS
def test_differing_timezones(external_repo_context): # Two schedules, one using US/Central, the other on US/Eastern with instance_with_schedules(external_repo_context) as ( instance, external_repo, ): freeze_datetime = pendulum.create(2019, 2, 27, 23, 59, 59, tz="US/Eastern").in_tz("US/Pacific") with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule( "daily_central_time_schedule") external_eastern_schedule = external_repo.get_external_schedule( "daily_eastern_time_schedule") schedule_origin = external_schedule.get_origin() eastern_origin = external_eastern_schedule.get_origin() instance.start_schedule_and_update_storage_state(external_schedule) instance.start_schedule_and_update_storage_state( external_eastern_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 ticks = instance.get_schedule_ticks(eastern_origin.get_id()) assert len(ticks) == 0 launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 ticks = instance.get_schedule_ticks(eastern_origin.get_id()) assert len(ticks) == 0 # Past midnight eastern time, the eastern timezone schedule will run, but not the central timezone freeze_datetime = freeze_datetime.add(minutes=1) with pendulum.test(freeze_datetime): launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(eastern_origin.get_id()) assert len(ticks) == 1 expected_datetime = pendulum.create(year=2019, month=2, day=28, tz="US/Eastern").in_tz("UTC") validate_tick( ticks[0], external_eastern_schedule, expected_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 wait_for_all_runs_to_start(instance) validate_run_started( instance.get_runs()[0], expected_datetime, pendulum.create(2019, 2, 27, tz="US/Eastern"), ) # Past midnight central time, the central timezone schedule will now run freeze_datetime = freeze_datetime.add(hours=1) with pendulum.test(freeze_datetime): launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 2 ticks = instance.get_schedule_ticks(eastern_origin.get_id()) assert len(ticks) == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 expected_datetime = pendulum.create(year=2019, month=2, day=28, tz="US/Central").in_tz("UTC") validate_tick( ticks[0], external_schedule, expected_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) wait_for_all_runs_to_start(instance) validate_run_started( instance.get_runs()[0], expected_datetime, pendulum.create(2019, 2, 27, tz="US/Central"), ) # Verify idempotence launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 2 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS ticks = instance.get_schedule_ticks(eastern_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS
def test_multiple_schedules_on_different_time_ranges(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "simple_schedule") external_hourly_schedule = external_repo.get_external_schedule( "simple_hourly_schedule") initial_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, ).in_tz("US/Central") with pendulum.test(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) instance.start_schedule_and_update_storage_state( external_hourly_schedule) initial_datetime = initial_datetime.add(seconds=2) with pendulum.test(initial_datetime): launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 2 ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS hourly_ticks = instance.get_schedule_ticks( external_hourly_schedule.get_origin_id()) assert len(hourly_ticks) == 1 assert hourly_ticks[0].status == ScheduleTickStatus.SUCCESS captured = capfd.readouterr() assert ( captured.out == """2019-02-27 18:00:01 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_hourly_schedule, simple_schedule 2019-02-27 18:00:01 - dagster-scheduler - INFO - Launching run for simple_hourly_schedule at 2019-02-28 00:00:00+0000 2019-02-27 18:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {first_run_id} for simple_hourly_schedule 2019-02-27 18:00:01 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-28 00:00:00+0000 2019-02-27 18:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {second_run_id} for simple_schedule """.format( first_run_id=instance.get_runs()[1].run_id, second_run_id=instance.get_runs()[0].run_id, )) initial_datetime = initial_datetime.add(hours=1) with pendulum.test(initial_datetime): launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS hourly_ticks = instance.get_schedule_ticks( external_hourly_schedule.get_origin_id()) assert len(hourly_ticks) == 2 assert (len([ tick for tick in hourly_ticks if tick.status == ScheduleTickStatus.SUCCESS ]) == 2) captured = capfd.readouterr() assert ( captured.out == """2019-02-27 19:00:01 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_hourly_schedule, simple_schedule 2019-02-27 19:00:01 - dagster-scheduler - INFO - Launching run for simple_hourly_schedule at 2019-02-28 01:00:00+0000 2019-02-27 19:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {third_run_id} for simple_hourly_schedule 2019-02-27 19:00:01 - dagster-scheduler - INFO - No new runs for simple_schedule """.format(third_run_id=instance.get_runs()[0].run_id))
def test_simple_schedule(external_repo_context, capfd): with central_timezone(): initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with instance_with_schedules(external_repo_context) as (instance, external_repo): with freeze_time(initial_datetime) as frozen_datetime: external_schedule = external_repo.get_external_schedule( "simple_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state( external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 # launch_scheduled_runs does nothing before the first tick launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 captured = capfd.readouterr() assert ( captured.out == """2019-02-27 17:59:59 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-27 17:59:59 - dagster-scheduler - INFO - No new runs for simple_schedule """) # Move forward in time so we're past a tick frozen_datetime.tick(delta=timedelta(seconds=2)) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 expected_datetime = datetime(year=2019, month=2, day=28, tzinfo=get_utc_timezone()) validate_tick( ticks[0], external_schedule, expected_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) wait_for_all_runs_to_start(instance) validate_run_started(instance.get_runs()[0], expected_datetime, "2019-02-27") captured = capfd.readouterr() assert ( captured.out == """2019-02-27 18:00:01 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-27 18:00:01 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-28 00:00:00+0000 2019-02-27 18:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {run_id} for simple_schedule """.format(run_id=instance.get_runs()[0].run_id)) # Verify idempotence launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS # Verify advancing in time but not going past a tick doesn't add any new runs frozen_datetime.tick(delta=timedelta(seconds=2)) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS capfd.readouterr() # Traveling two more days in the future before running results in two new ticks frozen_datetime.tick(delta=timedelta(days=2)) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3 assert (len([ tick for tick in ticks if tick.status == ScheduleTickStatus.SUCCESS ]) == 3) runs_by_partition = { run.tags[PARTITION_NAME_TAG]: run for run in instance.get_runs() } assert "2019-02-28" in runs_by_partition assert "2019-03-01" in runs_by_partition captured = capfd.readouterr() assert ( captured.out == """2019-03-01 18:00:03 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-03-01 18:00:03 - dagster-scheduler - INFO - Launching 2 runs for simple_schedule at the following times: 2019-03-01 00:00:00+0000, 2019-03-02 00:00:00+0000 2019-03-01 18:00:03 - dagster-scheduler - INFO - Completed scheduled launch of run {first_run_id} for simple_schedule 2019-03-01 18:00:03 - dagster-scheduler - INFO - Completed scheduled launch of run {second_run_id} for simple_schedule """.format( first_run_id=instance.get_runs()[1].run_id, second_run_id=instance.get_runs()[0].run_id, )) # Check idempotence again launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3
def test_simple_schedule(external_repo_context, capfd): freeze_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, ).in_tz("US/Central") with instance_with_schedules(external_repo_context) as (instance, external_repo): with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule( "simple_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 # launch_scheduled_runs does nothing before the first tick launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 captured = capfd.readouterr() assert ( captured.out == """2019-02-27 17:59:59 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-27 17:59:59 - dagster-scheduler - INFO - No new runs for simple_schedule """) freeze_datetime = freeze_datetime.add(seconds=2) with pendulum.test(freeze_datetime): launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 expected_datetime = pendulum.datetime(year=2019, month=2, day=28) validate_tick( ticks[0], external_schedule, expected_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) wait_for_all_runs_to_start(instance) validate_run_started( instance.get_runs()[0], execution_time=pendulum.datetime(2019, 2, 28), partition_time=pendulum.datetime(2019, 2, 27), ) captured = capfd.readouterr() assert ( captured.out == """2019-02-27 18:00:01 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-27 18:00:01 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-28 00:00:00+0000 2019-02-27 18:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {run_id} for simple_schedule """.format(run_id=instance.get_runs()[0].run_id)) # Verify idempotence launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS # Verify advancing in time but not going past a tick doesn't add any new runs freeze_datetime = freeze_datetime.add(seconds=2) with pendulum.test(freeze_datetime): launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS freeze_datetime = freeze_datetime.add(days=2) with pendulum.test(freeze_datetime): capfd.readouterr() # Traveling two more days in the future before running results in two new ticks launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3 assert len([ tick for tick in ticks if tick.status == ScheduleTickStatus.SUCCESS ]) == 3 runs_by_partition = { run.tags[PARTITION_NAME_TAG]: run for run in instance.get_runs() } assert "2019-02-28" in runs_by_partition assert "2019-03-01" in runs_by_partition captured = capfd.readouterr() assert ( captured.out == """2019-03-01 18:00:03 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-03-01 18:00:03 - dagster-scheduler - INFO - Launching 2 runs for simple_schedule at the following times: 2019-03-01 00:00:00+0000, 2019-03-02 00:00:00+0000 2019-03-01 18:00:03 - dagster-scheduler - INFO - Completed scheduled launch of run {first_run_id} for simple_schedule 2019-03-01 18:00:03 - dagster-scheduler - INFO - Completed scheduled launch of run {second_run_id} for simple_schedule """.format( first_run_id=instance.get_runs()[1].run_id, second_run_id=instance.get_runs()[0].run_id, )) # Check idempotence again launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3
def test_execute_during_dst_transition_spring_forward(external_repo_context): # Verify that a daily schedule that is supposed to execute at a time that is skipped # by the DST transition does not execute for that day with instance_with_schedules(external_repo_context) as ( instance, external_repo, ): # Day before DST freeze_datetime = pendulum.create(2019, 3, 9, 0, 0, 0, tz="US/Central").in_tz("US/Pacific") with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule( "daily_dst_transition_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 freeze_datetime = freeze_datetime.add(days=3) with pendulum.test(freeze_datetime): launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 2 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 2 # skipped 3/10 since 2:30AM never happened expected_datetimes_utc = [ pendulum.create(2019, 3, 11, 2, 30, 0, tz="US/Central").in_tz("UTC"), pendulum.create(2019, 3, 9, 2, 30, 0, tz="US/Central").in_tz("UTC"), ] expected_partition_times = [ pendulum.create(2019, 3, 10, tz="US/Central"), pendulum.create(2019, 3, 8, tz="US/Central"), ] for i in range(2): validate_tick( ticks[i], external_schedule, expected_datetimes_utc[i], ScheduleTickStatus.SUCCESS, instance.get_runs()[i].run_id, ) validate_run_started( instance.get_runs()[i], expected_datetimes_utc[i], partition_time=expected_partition_times[i], ) # Verify idempotence launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 2 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 2
def test_bad_schedule_mixed_with_good_schedule(external_repo_context): with instance_with_schedules(external_repo_context) as (instance, external_repo): good_schedule = external_repo.get_external_schedule("simple_schedule") bad_schedule = external_repo.get_external_schedule( "bad_should_execute_schedule_on_odd_days") good_origin = good_schedule.get_origin() bad_origin = bad_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime) as frozen_datetime: instance.start_schedule_and_update_storage_state(good_schedule) instance.start_schedule_and_update_storage_state(bad_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) validate_run_started(instance.get_runs()[0], initial_datetime, "2019-02-26") good_ticks = instance.get_schedule_ticks(good_origin.get_id()) assert len(good_ticks) == 1 validate_tick( good_ticks[0], good_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) bad_ticks = instance.get_schedule_ticks(bad_origin.get_id()) assert len(bad_ticks) == 1 assert bad_ticks[0].status == ScheduleTickStatus.FAILURE assert ("Error occurred during the execution of should_execute " "for schedule bad_should_execute_schedule" in bad_ticks[0].error.message) frozen_datetime.tick(delta=timedelta(days=1)) new_now = get_current_datetime_in_utc() launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 3 wait_for_all_runs_to_start(instance) good_schedule_runs = instance.get_runs( filters=PipelineRunsFilter.for_schedule(good_schedule)) assert len(good_schedule_runs) == 2 validate_run_started(good_schedule_runs[0], new_now, "2019-02-27") good_ticks = instance.get_schedule_ticks(good_origin.get_id()) assert len(good_ticks) == 2 validate_tick( good_ticks[0], good_schedule, new_now, ScheduleTickStatus.SUCCESS, good_schedule_runs[0].run_id, ) bad_schedule_runs = instance.get_runs( filters=PipelineRunsFilter.for_schedule(bad_schedule)) assert len(bad_schedule_runs) == 1 validate_run_started(bad_schedule_runs[0], new_now, "2019-02-27") bad_ticks = instance.get_schedule_ticks(bad_origin.get_id()) assert len(bad_ticks) == 2 validate_tick( bad_ticks[0], bad_schedule, new_now, ScheduleTickStatus.SUCCESS, bad_schedule_runs[0].run_id, )
def test_bad_load(capfd): with schedule_instance() as instance: working_directory = os.path.dirname(__file__) recon_repo = ReconstructableRepository.for_file( __file__, "doesnt_exist", working_directory) schedule = recon_repo.get_reconstructable_schedule("also_doesnt_exist") fake_origin = schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime) as frozen_datetime: schedule_state = ScheduleState( fake_origin, ScheduleStatus.RUNNING, "0 0 * * *", get_timestamp_from_utc_datetime(get_current_datetime_in_utc()), ) instance.add_schedule_state(schedule_state) frozen_datetime.tick(delta=timedelta(seconds=1)) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(fake_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.FAILURE assert ticks[0].timestamp == get_timestamp_from_utc_datetime( get_current_datetime_in_utc()) assert "doesnt_exist not found at module scope in file" in ticks[ 0].error.message captured = capfd.readouterr() assert "Error launching scheduled run" in captured.out assert "doesnt_exist not found at module scope" in captured.out frozen_datetime.tick(delta=timedelta(days=1)) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(fake_origin.get_id()) assert len(ticks) == 2 assert ticks[0].status == ScheduleTickStatus.FAILURE assert ticks[0].timestamp == get_timestamp_from_utc_datetime( get_current_datetime_in_utc()) assert "doesnt_exist not found at module scope in file" in ticks[ 0].error.message captured = capfd.readouterr() assert "Error launching scheduled run" in captured.out assert "doesnt_exist not found at module scope" in captured.out
def test_hourly_dst_fall_back(external_repo_context): # Verify that an hourly schedule still runs hourly during the fall DST transition with instance_with_schedules(external_repo_context) as ( instance, external_repo, ): # 12:30 AM CST freeze_datetime = pendulum.create(2019, 11, 3, 0, 30, 0, tz="US/Central").in_tz("US/Pacific") with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule( "hourly_central_time_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 freeze_datetime = freeze_datetime.add(hours=4) # DST has now happened, 4 hours later it is 3:30AM CST # Should be 4 runs: 1AM CDT, 1AM CST, 2AM CST, 3AM CST with pendulum.test(freeze_datetime): launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 4 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 4 expected_datetimes_utc = [ pendulum.create(2019, 11, 3, 9, 0, 0, tz="UTC"), pendulum.create(2019, 11, 3, 8, 0, 0, tz="UTC"), pendulum.create(2019, 11, 3, 7, 0, 0, tz="UTC"), pendulum.create(2019, 11, 3, 6, 0, 0, tz="UTC"), ] expected_ct_times = [ "2019-11-03T03:00:00-06:00", # 3 AM CST "2019-11-03T02:00:00-06:00", # 2 AM CST "2019-11-03T01:00:00-06:00", # 1 AM CST "2019-11-03T01:00:00-05:00", # 1 AM CDT ] for i in range(4): assert (expected_datetimes_utc[i].in_tz( "US/Central").isoformat() == expected_ct_times[i]) validate_tick( ticks[i], external_schedule, expected_datetimes_utc[i], ScheduleTickStatus.SUCCESS, instance.get_runs()[i].run_id, ) validate_run_started( instance.get_runs()[i], expected_datetimes_utc[i], partition_time=expected_datetimes_utc[i].in_tz( "US/Central").subtract(hours=1), partition_fmt=DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE, ) # Verify idempotence launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 4 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 4
def test_bad_schedules_mixed_with_good_schedule(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): good_schedule = external_repo.get_external_schedule("simple_schedule") bad_schedule = external_repo.get_external_schedule( "bad_should_execute_schedule_on_odd_days") good_origin = good_schedule.get_origin() bad_origin = bad_schedule.get_origin() unloadable_origin = _get_unloadable_schedule_origin() initial_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, ) with pendulum.test(initial_datetime): instance.start_schedule_and_update_storage_state(good_schedule) instance.start_schedule_and_update_storage_state(bad_schedule) unloadable_schedule_state = ScheduleState( unloadable_origin, ScheduleStatus.RUNNING, "0 0 * * *", pendulum.now("UTC").timestamp(), ) instance.add_schedule_state(unloadable_schedule_state) launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) validate_run_started( instance.get_runs()[0], execution_time=initial_datetime, partition_time=pendulum.datetime(2019, 2, 26), ) good_ticks = instance.get_schedule_ticks(good_origin.get_id()) assert len(good_ticks) == 1 validate_tick( good_ticks[0], good_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) bad_ticks = instance.get_schedule_ticks(bad_origin.get_id()) assert len(bad_ticks) == 1 assert bad_ticks[0].status == ScheduleTickStatus.FAILURE assert ("Error occurred during the execution of should_execute " "for schedule bad_should_execute_schedule" in bad_ticks[0].error.message) unloadable_ticks = instance.get_schedule_ticks( unloadable_origin.get_id()) assert len(unloadable_ticks) == 0 captured = capfd.readouterr() assert "Scheduler failed for also_doesnt_exist" in captured.out assert "doesnt_exist not found at module scope" in captured.out initial_datetime = initial_datetime.add(days=1) with pendulum.test(initial_datetime): new_now = pendulum.now("UTC") launch_scheduled_runs(instance, get_default_scheduler_logger(), new_now) assert instance.get_runs_count() == 3 wait_for_all_runs_to_start(instance) good_schedule_runs = instance.get_runs( filters=PipelineRunsFilter.for_schedule(good_schedule)) assert len(good_schedule_runs) == 2 validate_run_started( good_schedule_runs[0], execution_time=new_now, partition_time=pendulum.datetime(2019, 2, 27), ) good_ticks = instance.get_schedule_ticks(good_origin.get_id()) assert len(good_ticks) == 2 validate_tick( good_ticks[0], good_schedule, new_now, ScheduleTickStatus.SUCCESS, good_schedule_runs[0].run_id, ) bad_schedule_runs = instance.get_runs( filters=PipelineRunsFilter.for_schedule(bad_schedule)) assert len(bad_schedule_runs) == 1 validate_run_started( bad_schedule_runs[0], execution_time=new_now, partition_time=pendulum.datetime(2019, 2, 27), ) bad_ticks = instance.get_schedule_ticks(bad_origin.get_id()) assert len(bad_ticks) == 2 validate_tick( bad_ticks[0], bad_schedule, new_now, ScheduleTickStatus.SUCCESS, bad_schedule_runs[0].run_id, ) unloadable_ticks = instance.get_schedule_ticks( unloadable_origin.get_id()) assert len(unloadable_ticks) == 0 captured = capfd.readouterr() assert "Scheduler failed for also_doesnt_exist" in captured.out assert "doesnt_exist not found at module scope" in captured.out
def test_daily_dst_fall_back(external_repo_context): # Verify that a daily schedule still runs once per day during the fall DST transition with instance_with_schedules(external_repo_context) as ( instance, external_repo, ): # Night before DST freeze_datetime = pendulum.create(2019, 11, 3, 0, 0, 0, tz="US/Central").in_tz("US/Pacific") with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule( "daily_central_time_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 freeze_datetime = freeze_datetime.add(days=2) with pendulum.test(freeze_datetime): launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3 # UTC time changed by one hour after the transition, still running daily at the same # time in CT expected_datetimes_utc = [ pendulum.create(2019, 11, 5, 6, 0, 0, tz="UTC"), pendulum.create(2019, 11, 4, 6, 0, 0, tz="UTC"), pendulum.create(2019, 11, 3, 5, 0, 0, tz="UTC"), ] expected_partition_times = [ pendulum.create(2019, 11, 4, tz="US/Central"), pendulum.create(2019, 11, 3, tz="US/Central"), pendulum.create(2019, 11, 2, tz="US/Central"), ] for i in range(3): validate_tick( ticks[i], external_schedule, expected_datetimes_utc[i], ScheduleTickStatus.SUCCESS, instance.get_runs()[i].run_id, ) validate_run_started( instance.get_runs()[i], expected_datetimes_utc[i], partition_time=expected_partition_times[i], ) # Verify idempotence launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3
def test_max_catchup_runs(capfd): initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with central_timezone(): with instance_with_schedules(grpc_repo) as (instance, external_repo): with freeze_time(initial_datetime) as frozen_datetime: external_schedule = external_repo.get_external_schedule( "simple_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state( external_schedule) # Day is now March 4 at 11:59PM frozen_datetime.tick(delta=timedelta(days=5)) launch_scheduled_runs( instance, get_default_scheduler_logger(), get_current_datetime_in_utc(), max_catchup_runs=2, ) assert instance.get_runs_count() == 2 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 2 first_datetime = datetime(year=2019, month=3, day=4, tzinfo=get_utc_timezone()) wait_for_all_runs_to_start(instance) validate_tick( ticks[0], external_schedule, first_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) validate_run_started(instance.get_runs()[0], first_datetime, "2019-03-03") second_datetime = datetime(year=2019, month=3, day=3, tzinfo=get_utc_timezone()) validate_tick( ticks[1], external_schedule, second_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[1].run_id, ) validate_run_started(instance.get_runs()[1], second_datetime, "2019-03-02") captured = capfd.readouterr() assert ( captured.out == """2019-03-04 17:59:59 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-03-04 17:59:59 - dagster-scheduler - WARNING - simple_schedule has fallen behind, only launching 2 runs 2019-03-04 17:59:59 - dagster-scheduler - INFO - Launching 2 runs for simple_schedule at the following times: 2019-03-03 00:00:00+0000, 2019-03-04 00:00:00+0000 2019-03-04 17:59:59 - dagster-scheduler - INFO - Completed scheduled launch of run {first_run_id} for simple_schedule 2019-03-04 17:59:59 - dagster-scheduler - INFO - Completed scheduled launch of run {second_run_id} for simple_schedule """.format( first_run_id=instance.get_runs()[1].run_id, second_run_id=instance.get_runs()[0].run_id, ))
def test_execute_during_dst_transition_fall_back(external_repo_context): with instance_with_schedules(external_repo_context) as ( instance, external_repo, ): # A schedule that runs daily during a time that occurs twice during a fall DST transition # only executes once for that day freeze_datetime = pendulum.create(2019, 11, 2, 0, 0, 0, tz="US/Central").in_tz("US/Pacific") with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule( "daily_dst_transition_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 freeze_datetime = freeze_datetime.add(days=3) with pendulum.test(freeze_datetime): launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3 expected_datetimes_utc = [ pendulum.create(2019, 11, 4, 8, 30, 0, tz="UTC"), pendulum.create(2019, 11, 3, 8, 30, 0, tz="UTC"), pendulum.create(2019, 11, 2, 7, 30, 0, tz="UTC"), ] expected_partition_times = [ pendulum.create(2019, 11, 3, tz="US/Central"), pendulum.create(2019, 11, 2, tz="US/Central"), pendulum.create(2019, 11, 1, tz="US/Central"), ] for i in range(3): validate_tick( ticks[i], external_schedule, expected_datetimes_utc[i], ScheduleTickStatus.SUCCESS, instance.get_runs()[i].run_id, ) validate_run_started( instance.get_runs()[i], expected_datetimes_utc[i], partition_time=expected_partition_times[i], ) # Verify idempotence launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), ) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3
def test_max_catchup_runs(capfd): initial_datetime = pendulum.datetime(year=2019, month=2, day=27, hour=23, minute=59, second=59).in_tz("US/Central") with instance_with_schedules(grpc_repo) as (instance, external_repo): with pendulum.test(initial_datetime): external_schedule = external_repo.get_external_schedule( "simple_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state(external_schedule) initial_datetime = initial_datetime.add(days=5) with pendulum.test(initial_datetime): # Day is now March 4 at 11:59PM launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), max_catchup_runs=2, ) assert instance.get_runs_count() == 2 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 2 first_datetime = pendulum.datetime(year=2019, month=3, day=4) wait_for_all_runs_to_start(instance) validate_tick( ticks[0], external_schedule, first_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) validate_run_started( instance.get_runs()[0], execution_time=first_datetime, partition_time=pendulum.datetime(2019, 3, 3), ) second_datetime = pendulum.datetime(year=2019, month=3, day=3) validate_tick( ticks[1], external_schedule, second_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[1].run_id, ) validate_run_started( instance.get_runs()[1], execution_time=second_datetime, partition_time=pendulum.datetime(2019, 3, 2), ) captured = capfd.readouterr() assert ( captured.out == """2019-03-04 17:59:59 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-03-04 17:59:59 - dagster-scheduler - WARNING - simple_schedule has fallen behind, only launching 2 runs 2019-03-04 17:59:59 - dagster-scheduler - INFO - Launching 2 runs for simple_schedule at the following times: 2019-03-03 00:00:00+0000, 2019-03-04 00:00:00+0000 2019-03-04 17:59:59 - dagster-scheduler - INFO - Completed scheduled launch of run {first_run_id} for simple_schedule 2019-03-04 17:59:59 - dagster-scheduler - INFO - Completed scheduled launch of run {second_run_id} for simple_schedule """.format( first_run_id=instance.get_runs()[1].run_id, second_run_id=instance.get_runs()[0].run_id, ))