def test_run_scheduled_on_time_boundary(external_repo_context): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "simple_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): # Start schedule exactly at midnight instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS
def test_bad_env_fn(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("bad_env_fn_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = pendulum.datetime(year=2019, month=2, day=27, hour=0, minute=0, second=0) with pendulum.test(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.FAILURE, None, "Error occurred during the execution of run_config_fn for " "schedule bad_env_fn_schedule", ) captured = capfd.readouterr() assert "Failed to fetch schedule data for bad_env_fn_schedule: " in captured.out assert ( "Error occurred during the execution of run_config_fn for " "schedule bad_env_fn_schedule" in captured.out )
def test_skip(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("skip_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, ).in_tz("US/Central") with pendulum.test(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SKIPPED, None, ) captured = capfd.readouterr() assert ( captured.out == """2019-02-26 18:00:00 - dagster-scheduler - INFO - Checking for new runs for the following schedules: skip_schedule 2019-02-26 18:00:00 - dagster-scheduler - INFO - Launching run for skip_schedule at 2019-02-27 00:00:00+0000 2019-02-26 18:00:00 - dagster-scheduler - INFO - should_execute returned False for skip_schedule, skipping """ )
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime, debug_crash_flags): with DagsterInstance.from_ref(instance_ref) as instance: with freeze_time(execution_datetime): launch_scheduled_runs(instance, get_current_datetime_in_utc(), debug_crash_flags=debug_crash_flags)
def test_bad_load(capfd): with schedule_instance() as instance: fake_origin = _get_unloadable_schedule_origin() initial_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, ) with pendulum.test(initial_datetime): schedule_state = ScheduleState( fake_origin, ScheduleStatus.RUNNING, "0 0 * * *", pendulum.now("UTC").timestamp(), ) instance.add_schedule_state(schedule_state) initial_datetime = initial_datetime.add(seconds=1) with pendulum.test(initial_datetime): launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(fake_origin.get_id()) assert len(ticks) == 0 captured = capfd.readouterr() assert "Scheduler failed for also_doesnt_exist" in captured.out assert "doesnt_exist not found at module scope" in captured.out initial_datetime = initial_datetime.add(days=1) with pendulum.test(initial_datetime): launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(fake_origin.get_id()) assert len(ticks) == 0
def test_launch_failure(external_repo_context): with instance_with_schedules( external_repo_context, overrides={ "run_launcher": {"module": "dagster.core.test_utils", "class": "ExplodingRunLauncher",}, }, ) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("simple_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] validate_run_started(run, initial_datetime, "2019-02-26", expected_success=False) ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, run.run_id, )
def test_bad_should_execute(external_repo_context): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("bad_should_execute_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.FAILURE, None, "Error occurred during the execution of should_execute for " "schedule bad_should_execute_schedule", )
def test_run_scheduled_on_time_boundary(external_repo_context): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "simple_schedule") schedule_origin = external_schedule.get_external_origin() initial_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, ) with pendulum.test(initial_datetime): # Start schedule exactly at midnight instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 1 ticks = instance.get_job_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == JobTickStatus.SUCCESS
def test_schedule_without_timezone(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("daily_schedule_without_timezone") schedule_origin = external_schedule.get_origin() initial_datetime = pendulum.datetime(year=2019, month=2, day=27, hour=0, minute=0, second=0) with pendulum.test(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 captured = capfd.readouterr() assert ( "Scheduler could not run for daily_schedule_without_timezone as it did not specify " "an execution_timezone in its definition." in captured.out ) initial_datetime = initial_datetime.add(days=1) with pendulum.test(initial_datetime): launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0
def test_with_incorrect_scheduler(): with instance_for_test() as instance: with pytest.raises(DagsterInvariantViolationError): launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), )
def test_launch_failure(external_repo_context, capfd): with central_timezone(): with instance_with_schedules( external_repo_context, overrides={ "run_launcher": { "module": "dagster.core.test_utils", "class": "ExplodingRunLauncher", }, }, ) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "simple_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state( external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] validate_run_started(run, initial_datetime, "2019-02-26", expected_success=False) ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, run.run_id, ) captured = capfd.readouterr() assert ( captured.out == """2019-02-26 18:00:00 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-26 18:00:00 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-27 00:00:00+0000 2019-02-26 18:00:00 - dagster-scheduler - ERROR - Run {run_id} created successfully but failed to launch. """.format(run_id=instance.get_runs()[0].run_id))
def test_launch_failure(external_repo_context, capfd): with instance_with_schedules( external_repo_context, overrides={ "run_launcher": { "module": "dagster.core.test_utils", "class": "ExplodingRunLauncher", }, }, ) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "simple_schedule") schedule_origin = external_schedule.get_external_origin() initial_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, ).in_tz("US/Central") with pendulum.test(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] validate_run_started( run, execution_time=initial_datetime, partition_time=pendulum.datetime(2019, 2, 26), expected_success=False, ) ticks = instance.get_job_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, JobTickStatus.SUCCESS, run.run_id, ) captured = capfd.readouterr() assert ( captured.out == """2019-02-26 18:00:00 - SchedulerDaemon - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-26 18:00:00 - SchedulerDaemon - INFO - Launching run for simple_schedule at 2019-02-27 00:00:00+0000 2019-02-26 18:00:00 - SchedulerDaemon - ERROR - Run {run_id} created successfully but failed to launch. """.format(run_id=instance.get_runs()[0].run_id))
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime, debug_crash_flags): with DagsterInstance.from_ref(instance_ref) as instance: with pendulum.test(execution_datetime): launch_scheduled_runs( instance, get_default_scheduler_logger(), pendulum.now("UTC"), debug_crash_flags=debug_crash_flags, )
def test_wrong_config(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "wrong_config_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) run = instance.get_runs()[0] validate_run_started(run, initial_datetime, "2019-02-26", expected_success=False) ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, run.run_id, ) run_logs = instance.all_logs(run.run_id) assert (len([ event for event in run_logs if ("DagsterInvalidConfigError" in event.dagster_event.message and event.dagster_event_type == DagsterEventType.ENGINE_EVENT) ]) > 0) captured = capfd.readouterr() assert "Failed to fetch execution plan for wrong_config_schedule" in captured.out assert "Error in config for pipeline the_pipeline" in captured.out assert 'Missing required field "solids" at the root.' in captured.out
def test_wrong_config(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "wrong_config_schedule") schedule_origin = external_schedule.get_external_origin() initial_datetime = pendulum.datetime(year=2019, month=2, day=27, hour=0, minute=0, second=0) with pendulum.test(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) run = instance.get_runs()[0] validate_run_started( run, execution_time=initial_datetime, partition_time=pendulum.datetime(2019, 2, 26), expected_success=False, ) ticks = instance.get_job_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, JobTickStatus.SUCCESS, run.run_id, ) run_logs = instance.all_logs(run.run_id) assert (len([ event for event in run_logs if ("DagsterInvalidConfigError" in event.dagster_event.message and event.dagster_event_type == DagsterEventType.ENGINE_EVENT) ]) > 0) captured = capfd.readouterr() assert "Failed to fetch execution plan for wrong_config_schedule" in captured.out assert "Error in config for pipeline the_pipeline" in captured.out assert 'Missing required config entry "solids" at the root.' in captured.out
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime, debug_crash_flags): with DagsterInstance.from_ref(instance_ref) as instance: try: with pendulum.test(execution_datetime): launch_scheduled_runs( instance, logger(), pendulum.now("UTC"), debug_crash_flags=debug_crash_flags, ) finally: cleanup_test_instance(instance)
def test_no_started_schedules(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("simple_schedule") schedule_origin = external_schedule.get_origin() launch_scheduled_runs(instance, get_default_scheduler_logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 captured = capfd.readouterr() assert "Not checking for any runs since no schedules have been started." in captured.out
def test_bad_load(): with schedule_instance() as instance: working_directory = os.path.dirname(__file__) recon_repo = ReconstructableRepository.for_file(__file__, "doesnt_exist", working_directory) schedule = recon_repo.get_reconstructable_schedule("also_doesnt_exist") fake_origin = schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime) as frozen_datetime: schedule_state = ScheduleState( fake_origin, ScheduleStatus.RUNNING, "0 0 * * *", get_timestamp_from_utc_datetime(get_current_datetime_in_utc()), ) instance.add_schedule_state(schedule_state) frozen_datetime.tick(delta=timedelta(seconds=1)) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(fake_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.FAILURE assert ticks[0].timestamp == get_timestamp_from_utc_datetime( get_current_datetime_in_utc() ) assert "doesnt_exist not found at module scope in file" in ticks[0].error.message frozen_datetime.tick(delta=timedelta(days=1)) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(fake_origin.get_id()) assert len(ticks) == 2 assert ticks[0].status == ScheduleTickStatus.FAILURE assert ticks[0].timestamp == get_timestamp_from_utc_datetime( get_current_datetime_in_utc() ) assert "doesnt_exist not found at module scope in file" in ticks[0].error.message
def test_wrong_config(external_repo_context): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("wrong_config_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) run = instance.get_runs()[0] validate_run_started(run, initial_datetime, "2019-02-26", expected_success=False) ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, run.run_id, ) run_logs = instance.all_logs(run.run_id) assert ( len( [ event for event in run_logs if ( "DagsterInvalidConfigError" in event.dagster_event.message and event.dagster_event_type == DagsterEventType.ENGINE_EVENT ) ] ) > 0 )
def test_skip(external_repo_context): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("skip_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SKIPPED, None, )
def test_bad_should_execute(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "bad_should_execute_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.FAILURE, None, "Error occurred during the execution of should_execute for " "schedule bad_should_execute_schedule", ) captured = capfd.readouterr() assert ( "Failed to fetch schedule data for bad_should_execute_schedule: " ) in captured.out assert ("Error occurred during the execution of should_execute " "for schedule bad_should_execute_schedule" in captured.out) assert "Exception: bananas" in captured.out
def test_max_catchup_runs(): initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with instance_with_schedules(grpc_repo) as (instance, external_repo): with freeze_time(initial_datetime) as frozen_datetime: external_schedule = external_repo.get_external_schedule("simple_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state(external_schedule) # Day is now March 4 at 11:59PM frozen_datetime.tick(delta=timedelta(days=5)) launch_scheduled_runs(instance, get_current_datetime_in_utc(), max_catchup_runs=2) assert instance.get_runs_count() == 2 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 2 first_datetime = datetime(year=2019, month=3, day=4, tzinfo=get_utc_timezone()) wait_for_all_runs_to_start(instance) validate_tick( ticks[0], external_schedule, first_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) validate_run_started(instance.get_runs()[0], first_datetime, "2019-03-03") second_datetime = datetime(year=2019, month=3, day=3, tzinfo=get_utc_timezone()) validate_tick( ticks[1], external_schedule, second_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[1].run_id, ) validate_run_started(instance.get_runs()[1], second_datetime, "2019-03-02")
def test_multiple_schedules_on_different_time_ranges(external_repo_context): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("simple_schedule") external_hourly_schedule = external_repo.get_external_schedule("simple_hourly_schedule") initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime) as frozen_datetime: instance.start_schedule_and_update_storage_state(external_schedule) instance.start_schedule_and_update_storage_state(external_hourly_schedule) frozen_datetime.tick(delta=timedelta(seconds=2)) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 2 ticks = instance.get_schedule_ticks(external_schedule.get_origin_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS hourly_ticks = instance.get_schedule_ticks(external_hourly_schedule.get_origin_id()) assert len(hourly_ticks) == 1 assert hourly_ticks[0].status == ScheduleTickStatus.SUCCESS frozen_datetime.tick(delta=timedelta(hours=1)) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(external_schedule.get_origin_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS hourly_ticks = instance.get_schedule_ticks(external_hourly_schedule.get_origin_id()) assert len(hourly_ticks) == 2 assert ( len([tick for tick in hourly_ticks if tick.status == ScheduleTickStatus.SUCCESS]) == 2 )
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime, debug_crash_flags): with DagsterInstance.from_ref(instance_ref) as instance: with ProcessGrpcServerRegistry(wait_for_processes_on_exit=True) as grpc_server_registry: try: with pendulum.test(execution_datetime): list( launch_scheduled_runs( instance, grpc_server_registry, logger(), pendulum.now("UTC"), debug_crash_flags=debug_crash_flags, ) ) finally: cleanup_test_instance(instance)
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime, debug_crash_flags): with DagsterInstance.from_ref(instance_ref) as instance: try: with create_test_daemon_workspace(workspace_load_target(), instance) as workspace: with pendulum.test(execution_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), debug_crash_flags=debug_crash_flags, )) finally: cleanup_test_instance(instance)
def _test_launch_scheduled_runs_in_subprocess(instance_ref, execution_datetime, debug_crash_flags): with DagsterInstance.from_ref(instance_ref) as instance: try: with ProcessGrpcServerRegistry() as grpc_server_registry: with DynamicWorkspace(grpc_server_registry) as workspace: with pendulum.test(execution_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), debug_crash_flags=debug_crash_flags, )) finally: cleanup_test_instance(instance)
def test_differing_timezones(instance, workspace, external_repo): # Two schedules, one using US/Central, the other on US/Eastern freeze_datetime = to_timezone( create_pendulum_time(2019, 2, 27, 23, 59, 59, tz="US/Eastern"), "US/Pacific" ) with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule("daily_central_time_schedule") external_eastern_schedule = external_repo.get_external_schedule( "daily_eastern_time_schedule" ) schedule_origin = external_schedule.get_external_origin() eastern_origin = external_eastern_schedule.get_external_origin() instance.start_schedule(external_schedule) instance.start_schedule(external_eastern_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id) assert len(ticks) == 0 list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 0 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id) assert len(ticks) == 0 # Past midnight eastern time, the eastern timezone schedule will run, but not the central timezone freeze_datetime = freeze_datetime.add(minutes=1) with pendulum.test(freeze_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 1 ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id) assert len(ticks) == 1 expected_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=28, tz="US/Eastern"), "UTC" ) validate_tick( ticks[0], external_eastern_schedule, expected_datetime, TickStatus.SUCCESS, [run.run_id for run in instance.get_runs()], ) ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 wait_for_all_runs_to_start(instance) validate_run_started( instance, instance.get_runs()[0], expected_datetime, create_pendulum_time(2019, 2, 27, tz="US/Eastern"), ) # Past midnight central time, the central timezone schedule will now run freeze_datetime = freeze_datetime.add(hours=1) with pendulum.test(freeze_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 2 ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id) assert len(ticks) == 1 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 1 expected_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=28, tz="US/Central"), "UTC" ) validate_tick( ticks[0], external_schedule, expected_datetime, TickStatus.SUCCESS, [instance.get_runs()[0].run_id], ) wait_for_all_runs_to_start(instance) validate_run_started( instance, instance.get_runs()[0], expected_datetime, create_pendulum_time(2019, 2, 27, tz="US/Central"), ) # Verify idempotence list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 2 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 1 assert ticks[0].status == TickStatus.SUCCESS ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id) assert len(ticks) == 1 assert ticks[0].status == TickStatus.SUCCESS
def test_execute_during_dst_transition_fall_back(instance, workspace, external_repo): # A schedule that runs daily during a time that occurs twice during a fall DST transition # only executes once for that day freeze_datetime = to_timezone( create_pendulum_time(2019, 11, 2, 0, 0, 0, tz="US/Central"), "US/Pacific" ) with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule( "daily_dst_transition_schedule_doubled_time" ) schedule_origin = external_schedule.get_external_origin() instance.start_schedule(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 freeze_datetime = freeze_datetime.add(days=3) with pendulum.test(freeze_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 3 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 3 expected_datetimes_utc = [ create_pendulum_time(2019, 11, 4, 7, 30, 0, tz="UTC"), create_pendulum_time(2019, 11, 3, 7, 30, 0, tz="UTC"), create_pendulum_time(2019, 11, 2, 6, 30, 0, tz="UTC"), ] expected_partition_times = [ create_pendulum_time(2019, 11, 3, tz="US/Central"), create_pendulum_time(2019, 11, 2, tz="US/Central"), create_pendulum_time(2019, 11, 1, tz="US/Central"), ] for i in range(3): validate_tick( ticks[i], external_schedule, expected_datetimes_utc[i], TickStatus.SUCCESS, [instance.get_runs()[i].run_id], ) validate_run_started( instance, instance.get_runs()[i], expected_datetimes_utc[i], partition_time=expected_partition_times[i], ) # Verify idempotence list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 3 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 3
def test_execute_during_dst_transition_spring_forward(instance, workspace, external_repo): # Verify that a daily schedule that is supposed to execute at a time that is skipped # by the DST transition does not execute for that day # Day before DST freeze_datetime = to_timezone( create_pendulum_time(2019, 3, 9, 0, 0, 0, tz="US/Central"), "US/Pacific" ) with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule( "daily_dst_transition_schedule_skipped_time" ) schedule_origin = external_schedule.get_external_origin() instance.start_schedule(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 freeze_datetime = freeze_datetime.add(days=3) with pendulum.test(freeze_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 3 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 3 expected_datetimes_utc = [ to_timezone(create_pendulum_time(2019, 3, 11, 2, 30, 0, tz="US/Central"), "UTC"), to_timezone(create_pendulum_time(2019, 3, 10, 3, 00, 0, tz="US/Central"), "UTC"), to_timezone(create_pendulum_time(2019, 3, 9, 2, 30, 0, tz="US/Central"), "UTC"), ] expected_partition_times = [ create_pendulum_time(2019, 3, 10, tz="US/Central"), create_pendulum_time(2019, 3, 9, tz="US/Central"), create_pendulum_time(2019, 3, 8, tz="US/Central"), ] partition_set_def = the_repo.get_partition_set_def( "daily_dst_transition_schedule_skipped_time_partitions" ) partition_names = partition_set_def.get_partition_names() assert "2019-03-08" in partition_names assert "2019-03-09" in partition_names assert "2019-03-10" in partition_names for i in range(3): validate_tick( ticks[i], external_schedule, expected_datetimes_utc[i], TickStatus.SUCCESS, [instance.get_runs()[i].run_id], ) validate_run_started( instance, instance.get_runs()[i], expected_datetimes_utc[i], partition_time=expected_partition_times[i], ) # Verify idempotence list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 3 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 3
def test_daily_dst_fall_back(instance, workspace, external_repo): # Verify that a daily schedule still runs once per day during the fall DST transition # Night before DST freeze_datetime = to_timezone( create_pendulum_time(2019, 11, 3, 0, 0, 0, tz="US/Central"), "US/Pacific" ) with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule("daily_central_time_schedule") schedule_origin = external_schedule.get_external_origin() instance.start_schedule(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 freeze_datetime = freeze_datetime.add(days=2) with pendulum.test(freeze_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 3 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 3 # UTC time changed by one hour after the transition, still running daily at the same # time in CT expected_datetimes_utc = [ create_pendulum_time(2019, 11, 5, 6, 0, 0, tz="UTC"), create_pendulum_time(2019, 11, 4, 6, 0, 0, tz="UTC"), create_pendulum_time(2019, 11, 3, 5, 0, 0, tz="UTC"), ] expected_partition_times = [ create_pendulum_time(2019, 11, 4, tz="US/Central"), create_pendulum_time(2019, 11, 3, tz="US/Central"), create_pendulum_time(2019, 11, 2, tz="US/Central"), ] for i in range(3): validate_tick( ticks[i], external_schedule, expected_datetimes_utc[i], TickStatus.SUCCESS, [instance.get_runs()[i].run_id], ) validate_run_started( instance, instance.get_runs()[i], expected_datetimes_utc[i], partition_time=expected_partition_times[i], ) # Verify idempotence list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 3 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 3