def build_tick(self, current_time, status=JobTickStatus.STARTED, run_id=None, error=None): return JobTickData( "my_schedule", "my_schedule", JobType.SCHEDULE, status, current_time, [run_id] if run_id else [], [], error, )
def build_sensor_tick( self, current_time, status=JobTickStatus.STARTED, run_id=None, error=None ): return JobTickData( "my_sensor", "my_sensor", JobType.SENSOR, status, current_time, [run_id] if run_id else [], error, )
def launch_scheduled_execution(output_file, schedule_name, **kwargs): with ipc_write_stream(output_file) as stream: with DagsterInstance.get() as instance: repository_origin = get_repository_origin_from_kwargs(kwargs) job_origin = repository_origin.get_job_origin(schedule_name) # open the tick scope before we load any external artifacts so that # load errors are stored in DB with _schedule_tick_state( instance, stream, JobTickData( job_origin_id=job_origin.get_id(), job_name=schedule_name, job_type=JobType.SCHEDULE, status=JobTickStatus.STARTED, timestamp=time.time(), ), ) as tick_context: with get_repository_location_from_kwargs( kwargs) as repo_location: repo_dict = repo_location.get_repositories() check.invariant( repo_dict and len(repo_dict) == 1, "Passed in arguments should reference exactly one repository, instead there are {num_repos}" .format(num_repos=len(repo_dict)), ) external_repo = next(iter(repo_dict.values())) check.invariant( schedule_name in [ schedule.name for schedule in external_repo.get_external_schedules() ], "Could not find schedule named {schedule_name}".format( schedule_name=schedule_name), ) external_schedule = external_repo.get_external_schedule( schedule_name) tick_context.update_with_status( status=JobTickStatus.STARTED) _launch_scheduled_execution( instance, repo_location, external_repo, external_schedule, tick_context, stream, )
def execute_sensor_iteration(instance, logger, location_manager, debug_crash_flags=None): check.inst_param(location_manager, "location_manager", RepositoryLocationManager) check.inst_param(instance, "instance", DagsterInstance) sensor_jobs = [ s for s in instance.all_stored_job_state(job_type=JobType.SENSOR) if s.status == JobStatus.RUNNING ] if not sensor_jobs: logger.info( "Not checking for any runs since no sensors have been started.") return for job_state in sensor_jobs: sensor_debug_crash_flags = (debug_crash_flags.get(job_state.job_name) if debug_crash_flags else None) error_info = None try: origin = job_state.origin.external_repository_origin.repository_location_origin repo_location = location_manager.get_location(origin) repo_name = job_state.origin.external_repository_origin.repository_name if not repo_location.has_repository(repo_name): raise DagsterSensorDaemonError( f"Could not find repository {repo_name} in location {repo_location.name} to " + f"run sensor {job_state.job_name}. If this repository no longer exists, you can " + "turn off the sensor in the Dagit UI.", ) external_repo = repo_location.get_repository(repo_name) if not external_repo.has_external_sensor(job_state.job_name): raise DagsterSensorDaemonError( f"Could not find sensor {job_state.job_name} in repository {repo_name}. If this " "sensor no longer exists, you can turn it off in the Dagit UI.", ) now = pendulum.now("UTC") if _is_under_min_interval(job_state, now): continue tick = instance.create_job_tick( JobTickData( job_origin_id=job_state.job_origin_id, job_name=job_state.job_name, job_type=JobType.SENSOR, status=JobTickStatus.STARTED, timestamp=now.timestamp(), )) _check_for_debug_crash(sensor_debug_crash_flags, "TICK_CREATED") external_sensor = external_repo.get_external_sensor( job_state.job_name) with SensorLaunchContext(external_sensor, job_state, tick, instance, logger) as tick_context: _check_for_debug_crash(sensor_debug_crash_flags, "TICK_HELD") yield from _evaluate_sensor( tick_context, instance, repo_location, external_repo, external_sensor, job_state, sensor_debug_crash_flags, ) instance.purge_job_ticks( job_state.job_origin_id, tick_status=JobTickStatus.SKIPPED, before=now.subtract( days=7).timestamp(), # keep the last 7 days ) except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) logger.error( "Sensor daemon caught an error for sensor {sensor_name} : {error_info}" .format( sensor_name=job_state.job_name, error_info=error_info.to_string(), )) yield error_info
def execute_sensor_iteration(instance, logger, debug_crash_flags=None): check.inst_param(instance, "instance", DagsterInstance) sensor_jobs = [ s for s in instance.all_stored_job_state(job_type=JobType.SENSOR) if s.status == JobStatus.RUNNING ] if not sensor_jobs: logger.info( "Not checking for any runs since no sensors have been started.") return logger.info( "Checking for new runs for the following sensors: {sensor_names}". format(sensor_names=", ".join([job.job_name for job in sensor_jobs]), )) for job_state in sensor_jobs: sensor_debug_crash_flags = (debug_crash_flags.get(job_state.job_name) if debug_crash_flags else None) try: with RepositoryLocationHandle.create_from_repository_location_origin( job_state.origin.external_repository_origin. repository_location_origin) as repo_location_handle: repo_location = RepositoryLocation.from_handle( repo_location_handle) repo_dict = repo_location.get_repositories() check.invariant( len(repo_dict) == 1, "Reconstructed repository location should have exactly one repository", ) external_repo = next(iter(repo_dict.values())) if not external_repo.has_external_job(job_state.job_name): continue now = pendulum.now("UTC") tick = instance.create_job_tick( JobTickData( job_origin_id=job_state.job_origin_id, job_name=job_state.job_name, job_type=JobType.SENSOR, status=JobTickStatus.STARTED, timestamp=now.timestamp(), )) _check_for_debug_crash(sensor_debug_crash_flags, "TICK_CREATED") external_sensor = external_repo.get_external_sensor( job_state.job_name) with SensorLaunchContext(job_state, tick, instance, logger) as tick_context: _check_for_debug_crash(sensor_debug_crash_flags, "TICK_HELD") _evaluate_sensor( tick_context, instance, repo_location, external_repo, external_sensor, job_state, sensor_debug_crash_flags, ) instance.purge_job_ticks( job_state.job_origin_id, tick_status=JobTickStatus.SKIPPED, before=now.subtract(days=7), # keep the last 7 days ) except Exception: # pylint: disable=broad-except logger.error( "Sensor failed for {sensor_name} : {error_info}".format( sensor_name=job_state.job_name, error_info=serializable_error_info_from_exc_info( sys.exc_info()).to_string(), )) yield
def launch_scheduled_execution(output_file, schedule_name, override_system_timezone, **kwargs): with (mock_system_timezone(override_system_timezone) if override_system_timezone else nullcontext()): with ipc_write_stream(output_file) as stream: with DagsterInstance.get() as instance: repository_origin = get_repository_origin_from_kwargs(kwargs) job_origin = repository_origin.get_job_origin(schedule_name) # open the tick scope before we load any external artifacts so that # load errors are stored in DB with _schedule_tick_context( instance, stream, JobTickData( job_origin_id=job_origin.get_id(), job_name=schedule_name, job_type=JobType.SCHEDULE, status=JobTickStatus.STARTED, timestamp=time.time(), ), ) as tick_context: with get_repository_location_from_kwargs( kwargs) as repo_location: repo_dict = repo_location.get_repositories() check.invariant( repo_dict and len(repo_dict) == 1, "Passed in arguments should reference exactly one repository, instead there are {num_repos}" .format(num_repos=len(repo_dict)), ) external_repo = next(iter(repo_dict.values())) if not schedule_name in [ schedule.name for schedule in external_repo.get_external_schedules() ]: raise DagsterInvariantViolationError( "Could not find schedule named {schedule_name}" .format(schedule_name=schedule_name), ) external_schedule = external_repo.get_external_schedule( schedule_name) # Validate that either the schedule has no timezone or it matches # the system timezone schedule_timezone = external_schedule.execution_timezone if schedule_timezone: system_timezone = pendulum.now().timezone.name if system_timezone != external_schedule.execution_timezone: raise DagsterInvariantViolationError( "Schedule {schedule_name} is set to execute in {schedule_timezone}, " "but this scheduler can only run in the system timezone, " "{system_timezone}. Use DagsterDaemonScheduler if you want to be able " "to execute schedules in arbitrary timezones." .format( schedule_name=external_schedule.name, schedule_timezone=schedule_timezone, system_timezone=system_timezone, ), ) _launch_scheduled_executions(instance, repo_location, external_repo, external_schedule, tick_context)
def launch_scheduled_runs_for_schedule( instance, logger, schedule_state, repo_location, end_datetime_utc, max_catchup_runs, debug_crash_flags=None, ): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(schedule_state, "schedule_state", JobState) check.inst_param(end_datetime_utc, "end_datetime_utc", datetime.datetime) check.inst_param(repo_location, "repo_location", RepositoryLocation) latest_tick = instance.get_latest_job_tick(schedule_state.job_origin_id) if not latest_tick: start_timestamp_utc = schedule_state.job_specific_data.start_timestamp elif latest_tick.status == JobTickStatus.STARTED: # Scheduler was interrupted while performing this tick, re-do it start_timestamp_utc = latest_tick.timestamp else: start_timestamp_utc = latest_tick.timestamp + 1 schedule_name = schedule_state.job_name repo_dict = repo_location.get_repositories() check.invariant( len(repo_dict) == 1, "Reconstructed repository location should have exactly one repository", ) external_repo = next(iter(repo_dict.values())) external_schedule = external_repo.get_external_schedule(schedule_name) timezone_str = external_schedule.execution_timezone if not timezone_str: timezone_str = pendulum.now().timezone.name logger.warn( f"Using the system timezone, {timezone_str}, for {external_schedule.name} as it did not specify " "an execution_timezone in its definition. Specifying an execution_timezone " "on all schedules will be required in the dagster 0.11.0 release.") end_datetime = end_datetime_utc.in_tz(timezone_str) tick_times = [] for next_time in external_schedule.execution_time_iterator( start_timestamp_utc): if next_time.timestamp() > end_datetime.timestamp(): break tick_times.append(next_time) if not tick_times: logger.info(f"No new runs for {schedule_name}") return if not external_schedule.partition_set_name and len(tick_times) > 1: logger.warning( f"{schedule_name} has no partition set, so not trying to catch up") tick_times = tick_times[-1:] elif len(tick_times) > max_catchup_runs: logger.warning( f"{schedule_name} has fallen behind, only launching {max_catchup_runs} runs" ) tick_times = tick_times[-max_catchup_runs:] if len(tick_times) == 1: tick_time = tick_times[0].strftime(_SCHEDULER_DATETIME_FORMAT) logger.info(f"Evaluating schedule `{schedule_name}` at {tick_time}") else: times = ", ".join( [time.strftime(_SCHEDULER_DATETIME_FORMAT) for time in tick_times]) logger.info( f"Evaluating schedule `{schedule_name}` at the following times: {times}" ) for tick_time in tick_times: schedule_time = pendulum.instance(tick_time).in_tz(timezone_str) schedule_timestamp = schedule_time.timestamp() if latest_tick and latest_tick.timestamp == schedule_timestamp: tick = latest_tick logger.info("Resuming previously interrupted schedule execution") else: tick = instance.create_job_tick( JobTickData( job_origin_id=external_schedule.get_external_origin_id(), job_name=schedule_name, job_type=JobType.SCHEDULE, status=JobTickStatus.STARTED, timestamp=schedule_timestamp, )) _check_for_debug_crash(debug_crash_flags, "TICK_CREATED") with _ScheduleLaunchContext(tick, instance, logger) as tick_context: _check_for_debug_crash(debug_crash_flags, "TICK_HELD") _schedule_runs_at_time( instance, logger, repo_location, external_repo, external_schedule, schedule_time, tick_context, debug_crash_flags, )
def launch_scheduled_runs_for_schedule( instance, logger, schedule_state, repo_location, end_datetime_utc, max_catchup_runs, debug_crash_flags=None, ): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(schedule_state, "schedule_state", JobState) check.inst_param(end_datetime_utc, "end_datetime_utc", datetime.datetime) check.inst_param(repo_location, "repo_location", RepositoryLocation) latest_tick = instance.get_latest_job_tick(schedule_state.job_origin_id) if not latest_tick: start_timestamp_utc = schedule_state.job_specific_data.start_timestamp elif latest_tick.status == JobTickStatus.STARTED: # Scheduler was interrupted while performing this tick, re-do it start_timestamp_utc = latest_tick.timestamp else: start_timestamp_utc = latest_tick.timestamp + 1 schedule_name = schedule_state.job_name repo_name = schedule_state.origin.external_repository_origin.repository_name if not repo_location.has_repository(repo_name): raise DagsterSchedulerError( f"Could not find repository {repo_name} in location {repo_location.name} to " + f"run schedule {schedule_name}. If this repository no longer exists, you can " + "turn off the schedule in the Dagit UI.", ) external_repo = repo_location.get_repository(repo_name) if not external_repo.has_external_schedule(schedule_name): raise DagsterSchedulerError( f"Could not find schedule {schedule_name} in repository {repo_name}. If this " "schedule no longer exists, you can turn it off in the Dagit UI.", ) external_schedule = external_repo.get_external_schedule(schedule_name) timezone_str = external_schedule.execution_timezone if not timezone_str: timezone_str = "UTC" logger.warn( f"Using UTC as the timezone for {external_schedule.name} as it did not specify " "an execution_timezone in its definition." ) tick_times = [] for next_time in external_schedule.execution_time_iterator(start_timestamp_utc): if next_time.timestamp() > end_datetime_utc.timestamp(): break tick_times.append(next_time) if not tick_times: logger.info(f"No new runs for {schedule_name}") return if not external_schedule.partition_set_name and len(tick_times) > 1: logger.warning(f"{schedule_name} has no partition set, so not trying to catch up") tick_times = tick_times[-1:] elif len(tick_times) > max_catchup_runs: logger.warning(f"{schedule_name} has fallen behind, only launching {max_catchup_runs} runs") tick_times = tick_times[-max_catchup_runs:] if len(tick_times) == 1: tick_time = tick_times[0].strftime(_SCHEDULER_DATETIME_FORMAT) logger.info(f"Evaluating schedule `{schedule_name}` at {tick_time}") else: times = ", ".join([time.strftime(_SCHEDULER_DATETIME_FORMAT) for time in tick_times]) logger.info(f"Evaluating schedule `{schedule_name}` at the following times: {times}") for schedule_time in tick_times: schedule_timestamp = schedule_time.timestamp() if latest_tick and latest_tick.timestamp == schedule_timestamp: tick = latest_tick logger.info("Resuming previously interrupted schedule execution") else: tick = instance.create_job_tick( JobTickData( job_origin_id=external_schedule.get_external_origin_id(), job_name=schedule_name, job_type=JobType.SCHEDULE, status=JobTickStatus.STARTED, timestamp=schedule_timestamp, ) ) _check_for_debug_crash(debug_crash_flags, "TICK_CREATED") with _ScheduleLaunchContext(tick, instance, logger) as tick_context: _check_for_debug_crash(debug_crash_flags, "TICK_HELD") yield from _schedule_runs_at_time( instance, logger, repo_location, external_repo, external_schedule, schedule_time, tick_context, debug_crash_flags, )
def build_job_tick_data( self, current_time, status=JobTickStatus.STARTED, run_id=None, error=None, run_key=None, ): return JobTickData( "my_sensor", "my_sensor", JobType.SENSOR, status, current_time, run_id, error, run_key, )
def launch_scheduled_runs_for_schedule( instance, logger, schedule_state, repo_location, end_datetime_utc, max_catchup_runs, debug_crash_flags=None, ): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(schedule_state, "schedule_state", JobState) check.inst_param(end_datetime_utc, "end_datetime_utc", datetime.datetime) check.inst_param(repo_location, "repo_location", RepositoryLocation) latest_tick = instance.get_latest_job_tick(schedule_state.job_origin_id) if not latest_tick: start_timestamp_utc = schedule_state.job_specific_data.start_timestamp elif latest_tick.status == JobTickStatus.STARTED: # Scheduler was interrupted while performing this tick, re-do it start_timestamp_utc = latest_tick.timestamp else: start_timestamp_utc = latest_tick.timestamp + 1 schedule_name = schedule_state.job_name repo_dict = repo_location.get_repositories() check.invariant( len(repo_dict) == 1, "Reconstructed repository location should have exactly one repository", ) external_repo = next(iter(repo_dict.values())) external_schedule = external_repo.get_external_schedule(schedule_name) timezone_str = external_schedule.execution_timezone if not timezone_str: logger.error( "Scheduler could not run for {schedule_name} as it did not specify " "an execution_timezone in its definition.".format( schedule_name=schedule_name)) return end_datetime = end_datetime_utc.in_tz(timezone_str) tick_times = [] for next_time in external_schedule.execution_time_iterator( start_timestamp_utc): if next_time.timestamp() > end_datetime.timestamp(): break tick_times.append(next_time) if not tick_times: logger.info("No new runs for {schedule_name}".format( schedule_name=schedule_name)) return if len(tick_times) > max_catchup_runs: logger.warning( "{schedule_name} has fallen behind, only launching {max_catchup_runs} runs" .format(schedule_name=schedule_name, max_catchup_runs=max_catchup_runs)) tick_times = tick_times[-max_catchup_runs:] if len(tick_times) == 1: logger.info("Launching run for {schedule_name} at {time}".format( schedule_name=schedule_name, time=tick_times[0].strftime(_SCHEDULER_DATETIME_FORMAT), )) else: logger.info( "Launching {num_runs} runs for {schedule_name} at the following times: {times}" .format( num_runs=len(tick_times), schedule_name=schedule_name, times=", ".join([ time.strftime(_SCHEDULER_DATETIME_FORMAT) for time in tick_times ]), )) for tick_time in tick_times: schedule_time = pendulum.instance(tick_time).in_tz(timezone_str) schedule_timestamp = schedule_time.timestamp() if latest_tick and latest_tick.timestamp == schedule_timestamp: tick = latest_tick logger.info("Resuming previously interrupted schedule execution") else: tick = instance.create_job_tick( JobTickData( job_origin_id=external_schedule.get_external_origin_id(), job_name=schedule_name, job_type=JobType.SCHEDULE, status=JobTickStatus.STARTED, timestamp=schedule_timestamp, )) _check_for_debug_crash(debug_crash_flags, "TICK_CREATED") with ScheduleTickHolder(tick, instance, logger) as tick_holder: _check_for_debug_crash(debug_crash_flags, "TICK_HELD") _schedule_run_at_time( instance, logger, repo_location, external_repo, external_schedule, schedule_time, tick_holder, debug_crash_flags, )