def get_date_range_partitions(current_time=None): check.opt_inst_param(current_time, "current_time", datetime.datetime) tz = timezone if timezone else "UTC" _start = (to_timezone(start, tz) if isinstance(start, PendulumDateTime) else pendulum.instance(start, tz=tz)) if end: _end = end elif current_time: _end = current_time else: _end = pendulum.now(tz) # coerce to the definition timezone if isinstance(_end, PendulumDateTime): _end = to_timezone(_end, tz) else: _end = pendulum.instance(_end, tz=tz) period = pendulum.period(_start, _end) date_names = [ Partition(value=current, name=current.strftime(fmt)) for current in period.range(delta_range, delta_amount) ] # We don't include the last element here by default since we only want # fully completed intervals, and the _end time is in the middle of the interval # represented by the last element of date_names if inclusive: return date_names return date_names[:-1]
def _create_scheduler_run( instance, schedule_time, repo_location, external_schedule, external_pipeline, run_request, ): from dagster.daemon.daemon import get_telemetry_daemon_session_id run_config = run_request.run_config schedule_tags = run_request.tags external_execution_plan = repo_location.get_external_execution_plan( external_pipeline, run_config, external_schedule.mode, step_keys_to_execute=None, known_state=None, ) execution_plan_snapshot = external_execution_plan.execution_plan_snapshot pipeline_tags = external_pipeline.tags or {} check_tags(pipeline_tags, "pipeline_tags") tags = merge_dicts(pipeline_tags, schedule_tags) tags[SCHEDULED_EXECUTION_TIME_TAG] = to_timezone(schedule_time, "UTC").isoformat() if run_request.run_key: tags[RUN_KEY_TAG] = run_request.run_key log_action( instance, SCHEDULED_RUN_CREATED, metadata={ "DAEMON_SESSION_ID": get_telemetry_daemon_session_id(), "SCHEDULE_NAME_HASH": hash_name(external_schedule.name), "repo_hash": hash_name(repo_location.name), "pipeline_name_hash": hash_name(external_pipeline.name), }, ) return instance.create_run( pipeline_name=external_schedule.pipeline_name, run_id=None, run_config=run_config, mode=external_schedule.mode, solids_to_execute=external_pipeline.solids_to_execute, step_keys_to_execute=None, solid_selection=external_pipeline.solid_selection, status=PipelineRunStatus.NOT_STARTED, root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), )
def test_partitions_for_monthly_schedule_decorators_without_timezone( partition_months_offset: int): with pendulum.test( to_timezone(create_pendulum_time(2019, 3, 27, 0, 1, 1, tz="UTC"), "US/Eastern")): context_without_time = build_schedule_context() start_date = datetime(year=2019, month=1, day=1) @monthly_schedule( pipeline_name="foo_pipeline", execution_day_of_month=3, start_date=start_date, execution_time=time(9, 30), partition_months_offset=partition_months_offset, ) def monthly_foo_schedule(monthly_time): return {"monthly_time": monthly_time.isoformat()} valid_monthly_time = create_pendulum_time(year=2019, month=3, day=3, hour=9, minute=30, tz="UTC") context_with_valid_time = build_schedule_context( scheduled_execution_time=valid_monthly_time) execution_data = monthly_foo_schedule.evaluate_tick( context_with_valid_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 assert execution_data.run_requests[0].run_config == { "monthly_time": create_pendulum_time( year=2019, month=3, day=1, tz="UTC").subtract(months=partition_months_offset).isoformat() } execution_data = monthly_foo_schedule.evaluate_tick( context_without_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 assert execution_data.run_requests[0].run_config == { "monthly_time": create_pendulum_time( year=2019, month=3, day=1, tz="UTC").subtract(months=partition_months_offset).isoformat() } _check_partitions( monthly_foo_schedule, 3 - partition_months_offset, pendulum.instance(start_date, tz="UTC"), DEFAULT_MONTHLY_FORMAT, relativedelta(months=1), )
def test_partitions_for_hourly_schedule_decorators_without_timezone( partition_hours_offset: int): with pendulum.test( to_timezone(create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="UTC"), "US/Eastern")): context_without_time = build_schedule_context() start_date = datetime(year=2019, month=1, day=1) @hourly_schedule( pipeline_name="foo_pipeline", start_date=start_date, execution_time=time(hour=0, minute=25), partition_hours_offset=partition_hours_offset, ) def hourly_foo_schedule(hourly_time): return {"hourly_time": hourly_time.isoformat()} _check_partitions( hourly_foo_schedule, HOURS_UNTIL_FEBRUARY_27 + 1 - partition_hours_offset, pendulum.instance(start_date, tz="UTC"), DEFAULT_HOURLY_FORMAT_WITHOUT_TIMEZONE, relativedelta(hours=1), ) execution_data = hourly_foo_schedule.evaluate_tick( context_without_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 assert execution_data.run_requests[0].run_config == { "hourly_time": create_pendulum_time( year=2019, month=2, day=27, tz="UTC").subtract(hours=partition_hours_offset).isoformat() } valid_time = create_pendulum_time(year=2019, month=1, day=27, hour=1, minute=25, tz="UTC") context_with_valid_time = build_schedule_context( scheduled_execution_time=valid_time) execution_data = hourly_foo_schedule.evaluate_tick( context_with_valid_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 assert execution_data.run_requests[0].run_config == { "hourly_time": create_pendulum_time( year=2019, month=1, day=27, hour=1, tz="UTC").subtract(hours=partition_hours_offset).isoformat() }
def test_run_record_timestamps(): with get_instance() as instance: freeze_datetime = to_timezone( create_pendulum_time(2019, 11, 2, 0, 0, 0, tz="US/Central"), "US/Pacific" ) with pendulum.test(freeze_datetime): result = my_job.execute_in_process(instance=instance) records = instance.get_run_records(filters=PipelineRunsFilter(run_ids=[result.run_id])) assert len(records) == 1 record = records[0] assert record.start_time == 1572670800.0 assert record.end_time == 1572670800.0
def _get_existing_run_for_request(instance, external_schedule, schedule_time, run_request): tags = merge_dicts( PipelineRun.tags_for_schedule(external_schedule), { SCHEDULED_EXECUTION_TIME_TAG: to_timezone(schedule_time, "UTC").isoformat(), }, ) if run_request.run_key: tags[RUN_KEY_TAG] = run_request.run_key runs_filter = RunsFilter(tags=tags) existing_runs = instance.get_runs(runs_filter) if not len(existing_runs): return None return existing_runs[0]
def _create_scheduler_run( instance, schedule_time, repo_location, external_schedule, external_pipeline, run_request, ): run_config = run_request.run_config schedule_tags = run_request.tags external_execution_plan = repo_location.get_external_execution_plan( external_pipeline, run_config, external_schedule.mode, step_keys_to_execute=None, known_state=None, ) execution_plan_snapshot = external_execution_plan.execution_plan_snapshot pipeline_tags = external_pipeline.tags or {} check_tags(pipeline_tags, "pipeline_tags") tags = merge_dicts(pipeline_tags, schedule_tags) tags[SCHEDULED_EXECUTION_TIME_TAG] = to_timezone(schedule_time, "UTC").isoformat() if run_request.run_key: tags[RUN_KEY_TAG] = run_request.run_key return instance.create_run( pipeline_name=external_schedule.pipeline_name, run_id=None, run_config=run_config, mode=external_schedule.mode, solids_to_execute=external_pipeline.solids_to_execute, step_keys_to_execute=None, solid_selection=external_pipeline.solid_selection, status=PipelineRunStatus.NOT_STARTED, root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), )
def resolve_evaluationResult(self, graphene_info): if self._job_state.status != InstigatorStatus.RUNNING: return None if self._job_state.job_type != InstigatorType.SCHEDULE: return None repository_origin = self._job_state.origin.external_repository_origin if not graphene_info.context.has_repository_location( repository_origin.repository_location_origin.location_name): return None repository_location = graphene_info.context.get_repository_location( repository_origin.repository_location_origin.location_name) if not repository_location.has_repository( repository_origin.repository_name): return None repository = repository_location.get_repository( repository_origin.repository_name) external_schedule = repository.get_external_schedule( self._job_state.name) timezone_str = external_schedule.execution_timezone if not timezone_str: timezone_str = "UTC" next_tick_datetime = next( external_schedule.execution_time_iterator(self._timestamp)) schedule_time = to_timezone(pendulum.instance(next_tick_datetime), timezone_str) try: schedule_data = repository_location.get_external_schedule_execution_data( instance=graphene_info.context.instance, repository_handle=repository.handle, schedule_name=external_schedule.name, scheduled_execution_time=schedule_time, ) except Exception: schedule_data = serializable_error_info_from_exc_info( sys.exc_info()) return GrapheneTickEvaluation(schedule_data)
def test_run_record_timestamps(self, storage): assert storage self._skip_in_memory(storage) @op def a(): pass @job def my_job(): a() with tempfile.TemporaryDirectory() as temp_dir: if storage._instance: # pylint: disable=protected-access instance = storage._instance # pylint: disable=protected-access else: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=storage, event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), run_coordinator=DefaultRunCoordinator(), run_launcher=SyncInMemoryRunLauncher(), ) freeze_datetime = to_timezone( create_pendulum_time(2019, 11, 2, 0, 0, 0, tz="US/Central"), "US/Pacific" ) with pendulum.test(freeze_datetime): result = my_job.execute_in_process(instance=instance) records = instance.get_run_records( filters=PipelineRunsFilter(run_ids=[result.run_id]) ) assert len(records) == 1 record = records[0] assert record.start_time == freeze_datetime.timestamp() assert record.end_time == freeze_datetime.timestamp()
def _test_backfill_in_subprocess(instance_ref, debug_crash_flags): execution_datetime = to_timezone( create_pendulum_time( year=2021, month=2, day=17, ), "US/Central", ) with DagsterInstance.from_ref(instance_ref) as instance: try: with pendulum.test(execution_datetime), create_test_daemon_workspace() as workspace: list( execute_backfill_iteration( instance, workspace, get_default_daemon_logger("BackfillDaemon"), debug_crash_flags=debug_crash_flags, ) ) finally: cleanup_test_instance(instance)
def test_differing_timezones(instance, workspace, external_repo): # Two schedules, one using US/Central, the other on US/Eastern freeze_datetime = to_timezone( create_pendulum_time(2019, 2, 27, 23, 59, 59, tz="US/Eastern"), "US/Pacific" ) with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule("daily_central_time_schedule") external_eastern_schedule = external_repo.get_external_schedule( "daily_eastern_time_schedule" ) schedule_origin = external_schedule.get_external_origin() eastern_origin = external_eastern_schedule.get_external_origin() instance.start_schedule(external_schedule) instance.start_schedule(external_eastern_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id) assert len(ticks) == 0 list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 0 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id) assert len(ticks) == 0 # Past midnight eastern time, the eastern timezone schedule will run, but not the central timezone freeze_datetime = freeze_datetime.add(minutes=1) with pendulum.test(freeze_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 1 ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id) assert len(ticks) == 1 expected_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=28, tz="US/Eastern"), "UTC" ) validate_tick( ticks[0], external_eastern_schedule, expected_datetime, TickStatus.SUCCESS, [run.run_id for run in instance.get_runs()], ) ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 wait_for_all_runs_to_start(instance) validate_run_started( instance, instance.get_runs()[0], expected_datetime, create_pendulum_time(2019, 2, 27, tz="US/Eastern"), ) # Past midnight central time, the central timezone schedule will now run freeze_datetime = freeze_datetime.add(hours=1) with pendulum.test(freeze_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 2 ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id) assert len(ticks) == 1 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 1 expected_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=28, tz="US/Central"), "UTC" ) validate_tick( ticks[0], external_schedule, expected_datetime, TickStatus.SUCCESS, [instance.get_runs()[0].run_id], ) wait_for_all_runs_to_start(instance) validate_run_started( instance, instance.get_runs()[0], expected_datetime, create_pendulum_time(2019, 2, 27, tz="US/Central"), ) # Verify idempotence list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 2 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 1 assert ticks[0].status == TickStatus.SUCCESS ticks = instance.get_ticks(eastern_origin.get_id(), external_eastern_schedule.selector_id) assert len(ticks) == 1 assert ticks[0].status == TickStatus.SUCCESS
def test_execute_during_dst_transition_fall_back(instance, workspace, external_repo): # A schedule that runs daily during a time that occurs twice during a fall DST transition # only executes once for that day freeze_datetime = to_timezone( create_pendulum_time(2019, 11, 2, 0, 0, 0, tz="US/Central"), "US/Pacific" ) with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule( "daily_dst_transition_schedule_doubled_time" ) schedule_origin = external_schedule.get_external_origin() instance.start_schedule(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 freeze_datetime = freeze_datetime.add(days=3) with pendulum.test(freeze_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 3 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 3 expected_datetimes_utc = [ create_pendulum_time(2019, 11, 4, 7, 30, 0, tz="UTC"), create_pendulum_time(2019, 11, 3, 7, 30, 0, tz="UTC"), create_pendulum_time(2019, 11, 2, 6, 30, 0, tz="UTC"), ] expected_partition_times = [ create_pendulum_time(2019, 11, 3, tz="US/Central"), create_pendulum_time(2019, 11, 2, tz="US/Central"), create_pendulum_time(2019, 11, 1, tz="US/Central"), ] for i in range(3): validate_tick( ticks[i], external_schedule, expected_datetimes_utc[i], TickStatus.SUCCESS, [instance.get_runs()[i].run_id], ) validate_run_started( instance, instance.get_runs()[i], expected_datetimes_utc[i], partition_time=expected_partition_times[i], ) # Verify idempotence list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 3 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 3
def test_execute_during_dst_transition_spring_forward(instance, workspace, external_repo): # Verify that a daily schedule that is supposed to execute at a time that is skipped # by the DST transition does not execute for that day # Day before DST freeze_datetime = to_timezone( create_pendulum_time(2019, 3, 9, 0, 0, 0, tz="US/Central"), "US/Pacific" ) with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule( "daily_dst_transition_schedule_skipped_time" ) schedule_origin = external_schedule.get_external_origin() instance.start_schedule(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 freeze_datetime = freeze_datetime.add(days=3) with pendulum.test(freeze_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 3 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 3 expected_datetimes_utc = [ to_timezone(create_pendulum_time(2019, 3, 11, 2, 30, 0, tz="US/Central"), "UTC"), to_timezone(create_pendulum_time(2019, 3, 10, 3, 00, 0, tz="US/Central"), "UTC"), to_timezone(create_pendulum_time(2019, 3, 9, 2, 30, 0, tz="US/Central"), "UTC"), ] expected_partition_times = [ create_pendulum_time(2019, 3, 10, tz="US/Central"), create_pendulum_time(2019, 3, 9, tz="US/Central"), create_pendulum_time(2019, 3, 8, tz="US/Central"), ] partition_set_def = the_repo.get_partition_set_def( "daily_dst_transition_schedule_skipped_time_partitions" ) partition_names = partition_set_def.get_partition_names() assert "2019-03-08" in partition_names assert "2019-03-09" in partition_names assert "2019-03-10" in partition_names for i in range(3): validate_tick( ticks[i], external_schedule, expected_datetimes_utc[i], TickStatus.SUCCESS, [instance.get_runs()[i].run_id], ) validate_run_started( instance, instance.get_runs()[i], expected_datetimes_utc[i], partition_time=expected_partition_times[i], ) # Verify idempotence list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 3 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 3
def test_daily_dst_fall_back(instance, workspace, external_repo): # Verify that a daily schedule still runs once per day during the fall DST transition # Night before DST freeze_datetime = to_timezone( create_pendulum_time(2019, 11, 3, 0, 0, 0, tz="US/Central"), "US/Pacific" ) with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule("daily_central_time_schedule") schedule_origin = external_schedule.get_external_origin() instance.start_schedule(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 freeze_datetime = freeze_datetime.add(days=2) with pendulum.test(freeze_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 3 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 3 # UTC time changed by one hour after the transition, still running daily at the same # time in CT expected_datetimes_utc = [ create_pendulum_time(2019, 11, 5, 6, 0, 0, tz="UTC"), create_pendulum_time(2019, 11, 4, 6, 0, 0, tz="UTC"), create_pendulum_time(2019, 11, 3, 5, 0, 0, tz="UTC"), ] expected_partition_times = [ create_pendulum_time(2019, 11, 4, tz="US/Central"), create_pendulum_time(2019, 11, 3, tz="US/Central"), create_pendulum_time(2019, 11, 2, tz="US/Central"), ] for i in range(3): validate_tick( ticks[i], external_schedule, expected_datetimes_utc[i], TickStatus.SUCCESS, [instance.get_runs()[i].run_id], ) validate_run_started( instance, instance.get_runs()[i], expected_datetimes_utc[i], partition_time=expected_partition_times[i], ) # Verify idempotence list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 3 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 3
def test_hourly_dst_fall_back(instance, workspace, external_repo): # Verify that an hourly schedule still runs hourly during the fall DST transition # 12:30 AM CST freeze_datetime = to_timezone( create_pendulum_time(2019, 11, 3, 0, 30, 0, tz="US/Central"), "US/Pacific" ) with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule("hourly_central_time_schedule") schedule_origin = external_schedule.get_external_origin() instance.start_schedule(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 freeze_datetime = freeze_datetime.add(hours=4) # DST has now happened, 4 hours later it is 3:30AM CST # Should be 4 runs: 1AM CDT, 1AM CST, 2AM CST, 3AM CST with pendulum.test(freeze_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 4 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 4 expected_datetimes_utc = [ create_pendulum_time(2019, 11, 3, 9, 0, 0, tz="UTC"), create_pendulum_time(2019, 11, 3, 8, 0, 0, tz="UTC"), create_pendulum_time(2019, 11, 3, 7, 0, 0, tz="UTC"), create_pendulum_time(2019, 11, 3, 6, 0, 0, tz="UTC"), ] expected_ct_times = [ "2019-11-03T03:00:00-06:00", # 3 AM CST "2019-11-03T02:00:00-06:00", # 2 AM CST "2019-11-03T01:00:00-06:00", # 1 AM CST "2019-11-03T01:00:00-05:00", # 1 AM CDT ] for i in range(4): assert ( to_timezone(expected_datetimes_utc[i], "US/Central").isoformat() == expected_ct_times[i] ) validate_tick( ticks[i], external_schedule, expected_datetimes_utc[i], TickStatus.SUCCESS, [instance.get_runs()[i].run_id], ) validate_run_started( instance, instance.get_runs()[i], expected_datetimes_utc[i], partition_time=to_timezone(expected_datetimes_utc[i], "US/Central").subtract( hours=1 ), partition_fmt=DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE, ) # Verify idempotence list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 4 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 4
def test_hourly_dst_spring_forward(instance, workspace, external_repo): # Verify that an hourly schedule still runs hourly during the spring DST transition # 1AM CST freeze_datetime = to_timezone( create_pendulum_time(2019, 3, 10, 1, 0, 0, tz="US/Central"), "US/Pacific" ) with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule("hourly_central_time_schedule") schedule_origin = external_schedule.get_external_origin() instance.start_schedule(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 freeze_datetime = freeze_datetime.add(hours=2) # DST has now happened, 2 hours later it is 4AM CST # Should be 3 runs: 1AM CST, 3AM CST, 4AM CST with pendulum.test(freeze_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 3 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 3 expected_datetimes_utc = [ to_timezone(create_pendulum_time(2019, 3, 10, 4, 0, 0, tz="US/Central"), "UTC"), to_timezone(create_pendulum_time(2019, 3, 10, 3, 0, 0, tz="US/Central"), "UTC"), to_timezone(create_pendulum_time(2019, 3, 10, 1, 0, 0, tz="US/Central"), "UTC"), ] for i in range(3): validate_tick( ticks[i], external_schedule, expected_datetimes_utc[i], TickStatus.SUCCESS, [instance.get_runs()[i].run_id], ) validate_run_started( instance, instance.get_runs()[i], expected_datetimes_utc[i], partition_time=to_timezone(expected_datetimes_utc[i], "US/Central").subtract( hours=1 ), partition_fmt=DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE, ) # Verify idempotence list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 3 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 3
def schedule_execution_time_iterator( start_timestamp: float, cron_schedule: str, execution_timezone: Optional[str]) -> Iterator[datetime.datetime]: timezone_str = execution_timezone if execution_timezone else "UTC" utc_datetime = pytz.utc.localize( datetime.datetime.utcfromtimestamp(start_timestamp)) start_datetime = utc_datetime.astimezone(pytz.timezone(timezone_str)) date_iter = croniter(cron_schedule, start_datetime) # Go back one iteration so that the next iteration is the first time that is >= start_datetime # and matches the cron schedule next_date = date_iter.get_prev(datetime.datetime) check.invariant(is_valid_cron_string(cron_schedule)) cron_parts, _ = croniter.expand(cron_schedule) is_numeric = [len(part) == 1 and part[0] != "*" for part in cron_parts] is_wildcard = [len(part) == 1 and part[0] == "*" for part in cron_parts] delta_fn = None should_hour_change = False # Special-case common intervals (hourly/daily/weekly/monthly) since croniter iteration can be # much slower than adding a fixed interval if all(is_numeric[0:3]) and all(is_wildcard[3:]): # monthly delta_fn = lambda d, num: d.add(months=num) should_hour_change = False elif all(is_numeric[0:2]) and is_numeric[4] and all( is_wildcard[2:4]): # weekly delta_fn = lambda d, num: d.add(weeks=num) should_hour_change = False elif all(is_numeric[0:2]) and all(is_wildcard[2:]): # daily delta_fn = lambda d, num: d.add(days=num) should_hour_change = False elif is_numeric[0] and all(is_wildcard[1:]): # hourly delta_fn = lambda d, num: d.add(hours=num) should_hour_change = True else: delta_fn = None should_hour_change = False if delta_fn: # Use pendulums for intervals when possible next_date = to_timezone(pendulum.instance(next_date), timezone_str) while True: curr_hour = next_date.hour next_date_cand = delta_fn(next_date, 1) new_hour = next_date_cand.hour if not should_hour_change and new_hour != curr_hour: # If the hour changes during a daily/weekly/monthly schedule, it # indicates that the time shifted due to falling in a time that doesn't # exist due to a DST transition (for example, 2:30AM CST on 3/10/2019). # Instead, execute at the first time that does exist (the start of the hour), # but return to the original hour for all subsequent executions so that the # hour doesn't stay different permanently. check.invariant(new_hour == curr_hour + 1) yield next_date_cand.replace(minute=0) next_date_cand = delta_fn(next_date, 2) check.invariant(next_date_cand.hour == curr_hour) next_date = next_date_cand yield next_date else: # Otherwise fall back to croniter while True: next_date = to_timezone( pendulum.instance(date_iter.get_next(datetime.datetime)), timezone_str) yield next_date
def test_non_utc_timezone_run(instance, workspace, external_repo): # Verify that schedule runs at the expected time in a non-UTC timezone freeze_datetime = to_timezone( create_pendulum_time(2019, 2, 27, 23, 59, 59, tz="US/Central"), "US/Pacific" ) with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule("daily_central_time_schedule") schedule_origin = external_schedule.get_external_origin() instance.start_schedule(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 0 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 freeze_datetime = freeze_datetime.add(seconds=2) with pendulum.test(freeze_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 1 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 1 expected_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=28, tz="US/Central"), "UTC" ) validate_tick( ticks[0], external_schedule, expected_datetime, TickStatus.SUCCESS, [run.run_id for run in instance.get_runs()], ) wait_for_all_runs_to_start(instance) validate_run_started( instance, instance.get_runs()[0], expected_datetime, create_pendulum_time(2019, 2, 27, tz="US/Central"), ) # Verify idempotence list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 1 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 1 assert ticks[0].status == TickStatus.SUCCESS
def test_failure_recovery_before_run_created(external_repo_context, crash_location, crash_signal, capfd): # Verify that if the scheduler crashes or is interrupted before a run is created, # it will create exactly one tick/run when it is re-launched with instance_with_schedules(external_repo_context) as ( instance, _grpc_server_registry, external_repo, ): initial_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=27, hour=0, minute=0, second=0, tz="UTC"), "US/Central", ) frozen_datetime = initial_datetime.add() external_schedule = external_repo.get_external_schedule( "simple_schedule") with pendulum.test(frozen_datetime): instance.start_schedule_and_update_storage_state(external_schedule) debug_crash_flags = { external_schedule.name: { crash_location: crash_signal } } scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime, debug_crash_flags], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode != 0 captured = capfd.readouterr() assert ( captured.out.replace("\r\n", "\n") == """2019-02-26 18:00:00 - SchedulerDaemon - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-26 18:00:00 - SchedulerDaemon - INFO - Evaluating schedule `simple_schedule` at 2019-02-27 00:00:00+0000 """) ticks = instance.get_job_ticks( external_schedule.get_external_origin_id()) assert len(ticks) == 1 assert ticks[0].status == JobTickStatus.STARTED assert instance.get_runs_count() == 0 frozen_datetime = frozen_datetime.add(minutes=5) with pendulum.test(frozen_datetime): scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime, None], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode == 0 assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) validate_run_started( instance.get_runs()[0], execution_time=initial_datetime, partition_time=create_pendulum_time(2019, 2, 26), ) ticks = instance.get_job_ticks( external_schedule.get_external_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, JobTickStatus.SUCCESS, [instance.get_runs()[0].run_id], ) captured = capfd.readouterr() assert ( captured.out.replace("\r\n", "\n") == """2019-02-26 18:05:00 - SchedulerDaemon - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-26 18:05:00 - SchedulerDaemon - INFO - Evaluating schedule `simple_schedule` at 2019-02-27 00:00:00+0000 2019-02-26 18:05:00 - SchedulerDaemon - INFO - Resuming previously interrupted schedule execution 2019-02-26 18:05:00 - SchedulerDaemon - INFO - Completed scheduled launch of run {run_id} for simple_schedule """.format(run_id=instance.get_runs()[0].run_id))
def test_failure_before_run_created(crash_location, crash_signal, capfd): frozen_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=28, hour=0, minute=0, second=1, tz="UTC"), "US/Central", ) with instance_with_sensors() as ( instance, _grpc_server_registry, external_repo, ): with pendulum.test(frozen_datetime): external_sensor = external_repo.get_external_sensor("simple_sensor") instance.add_instigator_state( InstigatorState( external_sensor.get_external_origin(), InstigatorType.SENSOR, InstigatorStatus.RUNNING, ) ) # create a tick launch_process = spawn_ctx.Process( target=_test_launch_sensor_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime, None], ) launch_process.start() launch_process.join(timeout=60) ticks = instance.get_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 1 assert ticks[0].status == TickStatus.SKIPPED capfd.readouterr() # create a starting tick, but crash debug_crash_flags = {external_sensor.name: {crash_location: crash_signal}} launch_process = spawn_ctx.Process( target=_test_launch_sensor_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime.add(seconds=31), debug_crash_flags], ) launch_process.start() launch_process.join(timeout=60) assert launch_process.exitcode != 0 capfd.readouterr() ticks = instance.get_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 2 assert ticks[0].status == TickStatus.STARTED assert not int(ticks[0].timestamp) % 2 # skip condition for simple_sensor assert instance.get_runs_count() == 0 # create another tick, but ensure that the last evaluation time used is from the first, # successful tick rather than the failed tick launch_process = spawn_ctx.Process( target=_test_launch_sensor_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime.add(seconds=62), None], ) launch_process.start() launch_process.join(timeout=60) assert launch_process.exitcode == 0 wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] assert ( get_logger_output_from_capfd(capfd, "dagster.daemon.SensorDaemon") == f"""2019-02-27 18:01:03 -0600 - dagster.daemon.SensorDaemon - INFO - Checking for new runs for sensor: simple_sensor 2019-02-27 18:01:03 -0600 - dagster.daemon.SensorDaemon - INFO - Launching run for simple_sensor 2019-02-27 18:01:03 -0600 - dagster.daemon.SensorDaemon - INFO - Completed launch of run {run.run_id} for simple_sensor""" ) ticks = instance.get_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 3 assert ticks[0].status == TickStatus.SUCCESS
def test_partitions_for_hourly_schedule_decorators_with_timezone( partition_hours_offset: int): with pendulum.test( create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")): start_date = datetime(year=2019, month=1, day=1) # You can specify a start date with no timezone and it will be assumed to be # in the execution timezone @hourly_schedule( pipeline_name="foo_pipeline", start_date=start_date, execution_time=time(hour=0, minute=25), execution_timezone="US/Central", partition_hours_offset=partition_hours_offset, ) def hourly_central_schedule(hourly_time): return {"hourly_time": hourly_time.isoformat()} assert hourly_central_schedule.execution_timezone == "US/Central" _check_partitions( hourly_central_schedule, HOURS_UNTIL_FEBRUARY_27 + 1 - partition_hours_offset, pendulum.instance(start_date, tz="US/Central"), DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE, relativedelta(hours=1), ) valid_time = create_pendulum_time(year=2019, month=1, day=27, hour=1, minute=25, tz="US/Central") context_with_valid_time = build_schedule_context( scheduled_execution_time=valid_time) execution_data = hourly_central_schedule.evaluate_tick( context_with_valid_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 assert execution_data.run_requests[0].run_config == { "hourly_time": create_pendulum_time(year=2019, month=1, day=27, hour=1, tz="US/Central").subtract( hours=partition_hours_offset).isoformat() } # You can specify a start date in a different timezone and it will be transformed into the # execution timezone start_date_with_different_timezone = create_pendulum_time( 2019, 1, 1, 0, tz="US/Pacific") @hourly_schedule( pipeline_name="foo_pipeline", start_date=start_date_with_different_timezone, execution_time=time(hour=0, minute=25), execution_timezone="US/Central", partition_hours_offset=partition_hours_offset, ) def hourly_central_schedule_with_timezone_start_time(hourly_time): return {"hourly_time": hourly_time.isoformat()} _check_partitions( hourly_central_schedule_with_timezone_start_time, HOURS_UNTIL_FEBRUARY_27 - 2 # start date is two hours later since it's in PT + 1 - partition_hours_offset, to_timezone(start_date_with_different_timezone, "US/Central"), DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE, relativedelta(hours=1), )
def schedule_execution_time_iterator( start_timestamp: float, cron_schedule: str, execution_timezone: Optional[str]) -> Iterator[datetime.datetime]: timezone_str = execution_timezone if execution_timezone else "UTC" start_datetime = pendulum.from_timestamp(start_timestamp, tz=timezone_str) date_iter = croniter(cron_schedule, start_datetime) # Go back one iteration so that the next iteration is the first time that is >= start_datetime # and matches the cron schedule next_date = to_timezone( pendulum.instance(date_iter.get_prev(datetime.datetime)), timezone_str) cron_parts = cron_schedule.split(" ") check.invariant(len(cron_parts) == 5) is_numeric = [part.isnumeric() for part in cron_parts] delta_fn = None # Special-case common intervals (hourly/daily/weekly/monthly) since croniter iteration can be # much slower than adding a fixed interval if cron_schedule.endswith(" * *") and all(is_numeric[0:3]): # monthly delta_fn = lambda d, num: d.add(months=num) should_hour_change = False elif (all(is_numeric[0:2]) and is_numeric[4] and cron_parts[2] == "*" and cron_parts[3] == "*"): # weekly delta_fn = lambda d, num: d.add(weeks=num) should_hour_change = False elif all(is_numeric[0:2]) and cron_schedule.endswith(" * * *"): # daily delta_fn = lambda d, num: d.add(days=num) should_hour_change = False elif is_numeric[0] and cron_schedule.endswith(" * * * *"): # hourly delta_fn = lambda d, num: d.add(hours=num) should_hour_change = True while True: if delta_fn: curr_hour = next_date.hour next_date_cand = delta_fn(next_date, 1) new_hour = next_date_cand.hour if not should_hour_change and new_hour != curr_hour: # If the hour changes during a daily/weekly/monthly schedule, it # indicates that the time shifted due to falling in a time that doesn't # exist due to a DST transition (for example, 2:30AM CST on 3/10/2019). # Instead, execute at the first time that does exist (the start of the hour), # but return to the original hour for all subsequent executions so that the # hour doesn't stay different permanently. check.invariant(new_hour == curr_hour + 1) yield next_date_cand.replace(minute=0) next_date_cand = delta_fn(next_date, 2) check.invariant(next_date_cand.hour == curr_hour) next_date = next_date_cand else: next_date = to_timezone( pendulum.instance(date_iter.get_next(datetime.datetime)), timezone_str) yield next_date
def test_failure_recovery_before_run_created(instance, external_repo, crash_location, crash_signal): # Verify that if the scheduler crashes or is interrupted before a run is created, # it will create exactly one tick/run when it is re-launched initial_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=27, hour=0, minute=0, second=0, tz="UTC"), "US/Central", ) frozen_datetime = initial_datetime.add() external_schedule = external_repo.get_external_schedule("simple_schedule") with pendulum.test(frozen_datetime): instance.start_schedule(external_schedule) debug_crash_flags = { external_schedule.name: { crash_location: crash_signal } } scheduler_process = spawn_ctx.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime, debug_crash_flags], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode != 0 ticks = instance.get_ticks(external_schedule.get_external_origin_id(), external_schedule.selector_id) assert len(ticks) == 1 assert ticks[0].status == TickStatus.STARTED assert instance.get_runs_count() == 0 frozen_datetime = frozen_datetime.add(minutes=5) with pendulum.test(frozen_datetime): scheduler_process = spawn_ctx.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime, None], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode == 0 assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) validate_run_exists( instance.get_runs()[0], execution_time=initial_datetime, partition_time=create_pendulum_time(2019, 2, 26), ) ticks = instance.get_ticks(external_schedule.get_external_origin_id(), external_schedule.selector_id) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, TickStatus.SUCCESS, [instance.get_runs()[0].run_id], )
def test_different_days_in_different_timezones(instance, workspace, external_repo): freeze_datetime = to_timezone( create_pendulum_time(2019, 2, 27, 22, 59, 59, tz="US/Central"), "US/Pacific" ) with pendulum.test(freeze_datetime): # Runs every day at 11PM (CST) external_schedule = external_repo.get_external_schedule("daily_late_schedule") schedule_origin = external_schedule.get_external_origin() instance.start_schedule(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 0 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 0 freeze_datetime = freeze_datetime.add(seconds=2) with pendulum.test(freeze_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 1 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 1 expected_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=27, hour=23, tz="US/Central"), "UTC" ) validate_tick( ticks[0], external_schedule, expected_datetime, TickStatus.SUCCESS, [instance.get_runs()[0].run_id], ) wait_for_all_runs_to_start(instance) validate_run_started( instance, instance.get_runs()[0], expected_datetime, create_pendulum_time(2019, 2, 26, tz="US/Central"), ) # Verify idempotence list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), ) ) assert instance.get_runs_count() == 1 ticks = instance.get_ticks(schedule_origin.get_id(), external_schedule.selector_id) assert len(ticks) == 1 assert ticks[0].status == TickStatus.SUCCESS
def test_failure_after_run_launched(crash_location, crash_signal, capfd): frozen_datetime = to_timezone( create_pendulum_time( year=2019, month=2, day=28, hour=0, minute=0, second=0, tz="UTC", ), "US/Central", ) with instance_with_sensors() as ( instance, _grpc_server_registry, external_repo, ): with pendulum.test(frozen_datetime): external_sensor = external_repo.get_external_sensor("run_key_sensor") instance.add_instigator_state( InstigatorState( external_sensor.get_external_origin(), InstigatorType.SENSOR, InstigatorStatus.RUNNING, ) ) # create a run, launch but crash debug_crash_flags = {external_sensor.name: {crash_location: crash_signal}} launch_process = spawn_ctx.Process( target=_test_launch_sensor_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime, debug_crash_flags], ) launch_process.start() launch_process.join(timeout=60) assert launch_process.exitcode != 0 ticks = instance.get_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 1 assert ticks[0].status == TickStatus.STARTED assert instance.get_runs_count() == 1 run = instance.get_runs()[0] wait_for_all_runs_to_start(instance) assert run.tags.get(SENSOR_NAME_TAG) == "run_key_sensor" assert run.tags.get(RUN_KEY_TAG) == "only_once" capfd.readouterr() launch_process = spawn_ctx.Process( target=_test_launch_sensor_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime.add(seconds=1), None], ) launch_process.start() launch_process.join(timeout=60) assert launch_process.exitcode == 0 wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] captured = capfd.readouterr() assert ( 'Skipping 1 run for sensor run_key_sensor already completed with run keys: ["only_once"]' in captured.out ) ticks = instance.get_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 2 assert ticks[0].status == TickStatus.SKIPPED
def test_failure_after_run_created_before_run_launched(external_repo_context, crash_location, crash_signal, capfd): frozen_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=28, hour=0, minute=0, second=0, tz="UTC"), "US/Central", ) with instance_with_sensors(external_repo_context) as ( instance, _grpc_server_registry, external_repo, ): with pendulum.test(frozen_datetime): external_sensor = external_repo.get_external_sensor( "run_key_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING)) # create a starting tick, but crash debug_crash_flags = { external_sensor.name: { crash_location: crash_signal } } launch_process = multiprocessing.Process( target=_test_launch_sensor_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime, debug_crash_flags], ) launch_process.start() launch_process.join(timeout=60) assert launch_process.exitcode != 0 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 assert ticks[0].status == JobTickStatus.STARTED assert instance.get_runs_count() == 1 run = instance.get_runs()[0] # Run was created, but hasn't launched yet assert run.status == PipelineRunStatus.NOT_STARTED assert run.tags.get(SENSOR_NAME_TAG) == "run_key_sensor" assert run.tags.get(RUN_KEY_TAG) == "only_once" # clear output capfd.readouterr() launch_process = multiprocessing.Process( target=_test_launch_sensor_runs_in_subprocess, args=[ instance.get_ref(), frozen_datetime.add(seconds=1), None ], ) launch_process.start() launch_process.join(timeout=60) assert launch_process.exitcode == 0 wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] captured = capfd.readouterr() assert ( f"Run {run.run_id} already created with the run key `only_once` for run_key_sensor" in captured.out) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 2 assert ticks[0].status == JobTickStatus.SUCCESS
def test_non_utc_timezone_run(external_repo_context, capfd): # Verify that schedule runs at the expected time in a non-UTC timezone with instance_with_schedules(external_repo_context) as ( instance, workspace, external_repo, ): freeze_datetime = to_timezone( create_pendulum_time(2019, 2, 27, 23, 59, 59, tz="US/Central"), "US/Pacific") with pendulum.test(freeze_datetime): external_schedule = external_repo.get_external_schedule( "daily_central_time_schedule") schedule_origin = external_schedule.get_external_origin() instance.start_schedule_and_update_storage_state(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(schedule_origin.get_id()) assert len(ticks) == 0 list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), )) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(schedule_origin.get_id()) assert len(ticks) == 0 captured = capfd.readouterr() assert ( captured.out == """2019-02-27 21:59:59 - SchedulerDaemon - INFO - Checking for new runs for the following schedules: daily_central_time_schedule 2019-02-27 21:59:59 - SchedulerDaemon - INFO - No new runs for daily_central_time_schedule """) freeze_datetime = freeze_datetime.add(seconds=2) with pendulum.test(freeze_datetime): list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), )) assert instance.get_runs_count() == 1 ticks = instance.get_job_ticks(schedule_origin.get_id()) assert len(ticks) == 1 expected_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=28, tz="US/Central"), "UTC") validate_tick( ticks[0], external_schedule, expected_datetime, JobTickStatus.SUCCESS, [run.run_id for run in instance.get_runs()], ) wait_for_all_runs_to_start(instance) validate_run_started( instance.get_runs()[0], expected_datetime, create_pendulum_time(2019, 2, 27, tz="US/Central"), ) captured = capfd.readouterr() assert ( captured.out == """2019-02-27 22:00:01 - SchedulerDaemon - INFO - Checking for new runs for the following schedules: daily_central_time_schedule 2019-02-27 22:00:01 - SchedulerDaemon - INFO - Evaluating schedule `daily_central_time_schedule` at 2019-02-28 00:00:00-0600 2019-02-27 22:00:01 - SchedulerDaemon - INFO - Completed scheduled launch of run {run_id} for daily_central_time_schedule """.format(run_id=instance.get_runs()[0].run_id)) # Verify idempotence list( launch_scheduled_runs( instance, workspace, logger(), pendulum.now("UTC"), )) assert instance.get_runs_count() == 1 ticks = instance.get_job_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == JobTickStatus.SUCCESS
def _create_scheduler_run( instance, logger, schedule_time, repo_location, external_schedule, external_pipeline, run_request, ): run_config = run_request.run_config schedule_tags = run_request.tags execution_plan_errors = [] execution_plan_snapshot = None try: external_execution_plan = repo_location.get_external_execution_plan( external_pipeline, run_config, external_schedule.mode, step_keys_to_execute=None, known_state=None, ) execution_plan_snapshot = external_execution_plan.execution_plan_snapshot except Exception: # pylint: disable=broad-except execution_plan_errors.append(serializable_error_info_from_exc_info(sys.exc_info())) pipeline_tags = external_pipeline.tags or {} check_tags(pipeline_tags, "pipeline_tags") tags = merge_dicts(pipeline_tags, schedule_tags) tags[SCHEDULED_EXECUTION_TIME_TAG] = to_timezone(schedule_time, "UTC").isoformat() if run_request.run_key: tags[RUN_KEY_TAG] = run_request.run_key # If the run was scheduled correctly but there was an error creating its # run config, enter it into the run DB with a FAILURE status possibly_invalid_pipeline_run = instance.create_run( pipeline_name=external_schedule.pipeline_name, run_id=None, run_config=run_config, mode=external_schedule.mode, solids_to_execute=external_pipeline.solids_to_execute, step_keys_to_execute=None, solid_selection=external_pipeline.solid_selection, status=( PipelineRunStatus.FAILURE if len(execution_plan_errors) > 0 else PipelineRunStatus.NOT_STARTED ), root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) if len(execution_plan_errors) > 0: for error in execution_plan_errors: instance.report_engine_event( error.message, possibly_invalid_pipeline_run, EngineEventData.engine_error(error), ) instance.report_run_failed(possibly_invalid_pipeline_run) error_string = "\n".join([error.to_string() for error in execution_plan_errors]) logger.error(f"Failed to fetch execution plan for {external_schedule.name}: {error_string}") return (possibly_invalid_pipeline_run, execution_plan_errors)