def get_current_instigator_state( self, stored_state: Optional["InstigatorState"]): from dagster.core.scheduler.instigation import ( InstigatorState, InstigatorStatus, ScheduleInstigatorData, ) if self.default_status == DefaultScheduleStatus.RUNNING: if stored_state: return stored_state return InstigatorState( self.get_external_origin(), InstigatorType.SCHEDULE, InstigatorStatus.AUTOMATICALLY_RUNNING, ScheduleInstigatorData(self.cron_schedule, start_timestamp=None), ) else: # Ignore AUTOMATICALLY_RUNNING states in the DB if the default status # isn't DefaultScheduleStatus.RUNNING - this would indicate that the schedule's # default has been changed in code but there's still a lingering AUTOMATICALLY_RUNNING # row in the database that can be ignored if stored_state and stored_state.status != InstigatorStatus.AUTOMATICALLY_RUNNING: return stored_state return InstigatorState( self.get_external_origin(), InstigatorType.SCHEDULE, InstigatorStatus.STOPPED, ScheduleInstigatorData(self.cron_schedule, start_timestamp=None), )
def get_current_instigator_state( self, stored_state: Optional["InstigatorState"]): from dagster.core.scheduler.instigation import ( InstigatorState, InstigatorStatus, SensorInstigatorData, ) if self.default_status == DefaultSensorStatus.RUNNING: return (stored_state if stored_state else InstigatorState( self.get_external_origin(), InstigatorType.SENSOR, InstigatorStatus.AUTOMATICALLY_RUNNING, SensorInstigatorData(min_interval=self.min_interval_seconds), )) else: # Ignore AUTOMATICALLY_RUNNING states in the DB if the default status # isn't DefaultSensorStatus.RUNNING - this would indicate that the schedule's # default has changed if stored_state and stored_state.status != InstigatorStatus.AUTOMATICALLY_RUNNING: return stored_state return InstigatorState( self.get_external_origin(), InstigatorType.SENSOR, InstigatorStatus.STOPPED, SensorInstigatorData(min_interval=self.min_interval_seconds), )
def test_unloadable_schedule(graphql_context): instance = graphql_context.instance initial_datetime = create_pendulum_time( year=2019, month=2, day=27, hour=23, minute=59, second=59, ) running_origin = _get_unloadable_schedule_origin("unloadable_running") running_instigator_state = InstigatorState( running_origin, InstigatorType.SCHEDULE, InstigatorStatus.RUNNING, ScheduleInstigatorData( "0 0 * * *", pendulum.now("UTC").timestamp(), ), ) stopped_origin = _get_unloadable_schedule_origin("unloadable_stopped") with pendulum.test(initial_datetime): instance.add_instigator_state(running_instigator_state) instance.add_instigator_state( InstigatorState( stopped_origin, InstigatorType.SCHEDULE, InstigatorStatus.STOPPED, ScheduleInstigatorData( "0 0 * * *", pendulum.now("UTC").timestamp(), ), )) result = execute_dagster_graphql(graphql_context, GET_UNLOADABLE_QUERY) assert len( result.data["unloadableInstigationStatesOrError"]["results"]) == 1 assert (result.data["unloadableInstigationStatesOrError"]["results"][0] ["name"] == "unloadable_running") # Verify that we can stop the unloadable schedule stop_result = execute_dagster_graphql( graphql_context, STOP_SCHEDULES_QUERY, variables={ "scheduleOriginId": running_instigator_state.instigator_origin_id, "scheduleSelectorId": running_instigator_state.selector_id, }, ) assert (stop_result.data["stopRunningSchedule"]["scheduleState"]["status"] == InstigatorStatus.STOPPED.value)
def _create_new_schedule_state(self, instance, external_schedule): schedule_state = InstigatorState( external_schedule.get_external_origin(), InstigatorType.SCHEDULE, InstigatorStatus.STOPPED, ScheduleInstigatorData(external_schedule.cron_schedule), ) instance.add_job_state(schedule_state) return schedule_state
def test_get_unloadable_job(graphql_context): instance = graphql_context.instance initial_datetime = create_pendulum_time( year=2019, month=2, day=27, hour=23, minute=59, second=59, ) with pendulum.test(initial_datetime): instance.add_job_state( InstigatorState( _get_unloadable_schedule_origin("unloadable_running"), InstigatorType.SCHEDULE, InstigatorStatus.RUNNING, ScheduleInstigatorData( "0 0 * * *", pendulum.now("UTC").timestamp(), ), ) ) instance.add_job_state( InstigatorState( _get_unloadable_schedule_origin("unloadable_stopped"), InstigatorType.SCHEDULE, InstigatorStatus.STOPPED, ScheduleInstigatorData( "0 0 * * *", pendulum.now("UTC").timestamp(), ), ) ) result = execute_dagster_graphql(graphql_context, GET_UNLOADABLE_QUERY) assert len(result.data["unloadableInstigationStatesOrError"]["results"]) == 1 assert ( result.data["unloadableInstigationStatesOrError"]["results"][0]["name"] == "unloadable_running" )
def build_schedule( cls, schedule_name, cron_schedule, status=InstigatorStatus.STOPPED, ): return InstigatorState( cls.fake_repo_target().get_instigator_origin(schedule_name), InstigatorType.SCHEDULE, status, ScheduleInstigatorData(cron_schedule, start_timestamp=None), )
def test_unloadable_sensor(graphql_context): instance = graphql_context.instance running_origin = _get_unloadable_sensor_origin("unloadable_running") running_instigator_state = InstigatorState( running_origin, InstigatorType.SENSOR, InstigatorStatus.RUNNING, SensorInstigatorData(min_interval=30, cursor=None), ) stopped_origin = _get_unloadable_sensor_origin("unloadable_stopped") instance.add_instigator_state(running_instigator_state) instance.add_instigator_state( InstigatorState( stopped_origin, InstigatorType.SENSOR, InstigatorStatus.STOPPED, SensorInstigatorData(min_interval=30, cursor=None), )) result = execute_dagster_graphql(graphql_context, GET_UNLOADABLE_QUERY) assert len( result.data["unloadableInstigationStatesOrError"]["results"]) == 1 assert (result.data["unloadableInstigationStatesOrError"]["results"][0] ["name"] == "unloadable_running") # Verify that we can stop the unloadable sensor stop_result = execute_dagster_graphql( graphql_context, STOP_SENSORS_QUERY, variables={ "jobOriginId": running_instigator_state.instigator_origin_id, "jobSelectorId": running_instigator_state.selector_id, }, ) assert (stop_result.data["stopSensor"]["instigationState"]["status"] == InstigatorStatus.STOPPED.value)
def get_default_instigation_state(self): from dagster.core.scheduler.instigation import ( InstigatorState, InstigatorStatus, SensorInstigatorData, ) return InstigatorState( self.get_external_origin(), InstigatorType.SENSOR, InstigatorStatus.STOPPED, SensorInstigatorData(min_interval=self.min_interval_seconds), )
def get_default_instigation_state(self): from dagster.core.scheduler.instigation import ( InstigatorState, InstigatorStatus, ScheduleInstigatorData, ) return InstigatorState( self.get_external_origin(), InstigatorType.SCHEDULE, InstigatorStatus.STOPPED, ScheduleInstigatorData(self.cron_schedule, start_timestamp=None), )
def execute_cursor_command(sensor_name, cli_args, print_fn): with DagsterInstance.get() as instance: with get_repository_location_from_kwargs( instance, version=dagster_version, kwargs=cli_args) as repo_location: if bool(cli_args.get("delete")) == bool(cli_args.get("set")): # must use one of delete/set raise click.UsageError( "Must set cursor using `--set <value>` or use `--delete`") cursor_value = cli_args.get("set") external_repo = get_external_repository_from_repo_location( repo_location, cli_args.get("repository")) check_repo_and_scheduler(external_repo, instance) external_sensor = external_repo.get_external_sensor(sensor_name) job_state = instance.get_instigator_state( external_sensor.get_external_origin_id(), external_sensor.selector_id) if not job_state: instance.add_instigator_state( InstigatorState( external_sensor.get_external_origin(), InstigatorType.SENSOR, InstigatorStatus.STOPPED, SensorInstigatorData( min_interval=external_sensor.min_interval_seconds, cursor=cursor_value), )) else: instance.update_instigator_state( job_state.with_data( SensorInstigatorData( last_tick_timestamp=job_state.instigator_data. last_tick_timestamp, last_run_key=job_state.instigator_data. last_run_key, min_interval=external_sensor.min_interval_seconds, cursor=cursor_value, ), )) if cursor_value: print_fn( f'Set cursor state for sensor {external_sensor.name} to "{cursor_value}"' ) else: print_fn( f"Cleared cursor state for sensor {external_sensor.name}")
def test_sensor_next_ticks(graphql_context): external_repository = graphql_context.get_repository_location( main_repo_location_name()).get_repository(main_repo_name()) sensor_name = "always_no_config_sensor" external_sensor = external_repository.get_external_sensor(sensor_name) sensor_selector = infer_sensor_selector(graphql_context, sensor_name) result = execute_dagster_graphql( graphql_context, GET_SENSOR_QUERY, variables={"sensorSelector": sensor_selector}) # test default sensor off assert result.data assert result.data["sensorOrError"]["__typename"] == "Sensor" next_tick = result.data["sensorOrError"]["nextTick"] assert not next_tick # test default sensor with no tick graphql_context.instance.add_instigator_state( InstigatorState(external_sensor.get_external_origin(), InstigatorType.SENSOR, InstigatorStatus.RUNNING)) result = execute_dagster_graphql( graphql_context, GET_SENSOR_QUERY, variables={"sensorSelector": sensor_selector}) assert result.data assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 0 assert result.data["sensorOrError"]["__typename"] == "Sensor" next_tick = result.data["sensorOrError"]["nextTick"] assert not next_tick # test default sensor with last tick _create_tick(graphql_context) result = execute_dagster_graphql( graphql_context, GET_SENSOR_QUERY, variables={"sensorSelector": sensor_selector}) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 1 assert result.data assert result.data["sensorOrError"]["__typename"] == "Sensor" next_tick = result.data["sensorOrError"]["nextTick"] assert next_tick
def start_schedule(self, instance, external_schedule): """ Updates the status of the given schedule to `InstigatorStatus.RUNNING` in schedule storage, This should not be overridden by subclasses. Args: instance (DagsterInstance): The current instance. external_schedule (ExternalSchedule): The schedule to start """ check.inst_param(instance, "instance", DagsterInstance) check.inst_param(external_schedule, "external_schedule", ExternalSchedule) schedule_state = instance.get_instigator_state( external_schedule.get_external_origin_id(), external_schedule.selector_id) if external_schedule.get_current_instigator_state( schedule_state).is_running: raise DagsterSchedulerError( "You have attempted to start schedule {name}, but it is already running" .format(name=external_schedule.name)) new_instigator_data = ScheduleInstigatorData( external_schedule.cron_schedule, get_current_datetime_in_utc().timestamp(), ) if not schedule_state: started_schedule = InstigatorState( external_schedule.get_external_origin(), InstigatorType.SCHEDULE, InstigatorStatus.RUNNING, new_instigator_data, ) instance.add_instigator_state(started_schedule) else: started_schedule = schedule_state.with_status( InstigatorStatus.RUNNING).with_data(new_instigator_data) instance.update_instigator_state(started_schedule) return started_schedule
def stop_schedule(self, instance, schedule_origin_id, schedule_selector_id, external_schedule): """ Updates the status of the given schedule to `InstigatorStatus.STOPPED` in schedule storage, This should not be overridden by subclasses. Args: schedule_origin_id (string): The id of the schedule target to stop running. """ check.str_param(schedule_origin_id, "schedule_origin_id") check.opt_inst_param(external_schedule, "external_schedule", ExternalSchedule) schedule_state = instance.get_instigator_state(schedule_origin_id, schedule_selector_id) if (external_schedule and not external_schedule. get_current_instigator_state(schedule_state).is_running) or ( schedule_state and not schedule_state.is_running): raise DagsterSchedulerError( "You have attempted to stop schedule {name}, but it is already stopped" .format(name=external_schedule.name)) if not schedule_state: assert external_schedule stopped_schedule = InstigatorState( external_schedule.get_external_origin(), InstigatorType.SCHEDULE, InstigatorStatus.STOPPED, ScheduleInstigatorData(external_schedule.cron_schedule, ), ) instance.add_instigator_state(stopped_schedule) else: stopped_schedule = schedule_state.with_status( InstigatorStatus.STOPPED).with_data( ScheduleInstigatorData(cron_schedule=schedule_state. instigator_data.cron_schedule, )) instance.update_instigator_state(stopped_schedule) return stopped_schedule
def test_sensor_tick_range(graphql_context): external_repository = graphql_context.get_repository_location( main_repo_location_name()).get_repository(main_repo_name()) sensor_name = "always_no_config_sensor" external_sensor = external_repository.get_external_sensor(sensor_name) sensor_selector = infer_sensor_selector(graphql_context, sensor_name) # test with no job state result = execute_dagster_graphql( graphql_context, GET_SENSOR_TICK_RANGE_QUERY, variables={ "sensorSelector": sensor_selector, "dayRange": None, "dayOffset": None }, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 0 # turn the sensor on graphql_context.instance.add_instigator_state( InstigatorState(external_sensor.get_external_origin(), InstigatorType.SENSOR, InstigatorStatus.RUNNING)) now = pendulum.now("US/Central") one = now.subtract(days=2).subtract(hours=1) with pendulum.test(one): _create_tick(graphql_context) two = now.subtract(days=1).subtract(hours=1) with pendulum.test(two): _create_tick(graphql_context) three = now.subtract(hours=1) with pendulum.test(three): _create_tick(graphql_context) result = execute_dagster_graphql( graphql_context, GET_SENSOR_TICK_RANGE_QUERY, variables={ "sensorSelector": sensor_selector, "dayRange": None, "dayOffset": None }, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 3 result = execute_dagster_graphql( graphql_context, GET_SENSOR_TICK_RANGE_QUERY, variables={ "sensorSelector": sensor_selector, "dayRange": 1, "dayOffset": None }, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 1 assert result.data["sensorOrError"]["sensorState"]["ticks"][0][ "timestamp"] == three.timestamp() result = execute_dagster_graphql( graphql_context, GET_SENSOR_TICK_RANGE_QUERY, variables={ "sensorSelector": sensor_selector, "dayRange": 1, "dayOffset": 1 }, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 1 assert result.data["sensorOrError"]["sensorState"]["ticks"][0][ "timestamp"] == two.timestamp() result = execute_dagster_graphql( graphql_context, GET_SENSOR_TICK_RANGE_QUERY, variables={ "sensorSelector": sensor_selector, "dayRange": 2, "dayOffset": None, }, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 2
def build_sensor(cls, sensor_name, status=InstigatorStatus.STOPPED): origin = cls.fake_repo_target().get_instigator_origin(sensor_name) return InstigatorState(origin, InstigatorType.SENSOR, status)
def execute_sensor_iteration(instance, logger, workspace, log_verbose_checks=True, debug_crash_flags=None): check.inst_param(workspace, "workspace", IWorkspace) check.inst_param(instance, "instance", DagsterInstance) workspace_snapshot = { location_entry.origin: location_entry for location_entry in workspace.get_workspace_snapshot().values() } all_sensor_states = { sensor_state.origin.get_id(): sensor_state for sensor_state in instance.all_instigator_state( instigator_type=InstigatorType.SENSOR) } sensors = {} for location_entry in workspace_snapshot.values(): repo_location = location_entry.repository_location if repo_location: for repo in repo_location.get_repositories().values(): for sensor in repo.get_external_sensors(): origin_id = sensor.get_external_origin().get_id() if sensor.get_current_instigator_state( all_sensor_states.get(origin_id)).is_running: sensors[origin_id] = sensor elif location_entry.load_error and log_verbose_checks: logger.warning( f"Could not load location {location_entry.origin.location_name} to check for sensors due to the following error: {location_entry.load_error}" ) if log_verbose_checks: unloadable_sensor_states = { origin_id: sensor_state for origin_id, sensor_state in all_sensor_states.items() if origin_id not in sensors and sensor_state.status == InstigatorStatus.RUNNING } for sensor_state in unloadable_sensor_states.values(): sensor_name = sensor_state.origin.instigator_name repo_location_origin = ( sensor_state.origin.external_repository_origin. repository_location_origin) repo_location_name = repo_location_origin.location_name repo_name = sensor_state.origin.external_repository_origin.repository_name if (repo_location_origin not in workspace_snapshot or not workspace_snapshot[repo_location_origin]. repository_location): logger.warning( f"Sensor {sensor_name} was started from a location " f"{repo_location_name} that can no longer be found in the workspace, or has " "metadata that has changed since the sensor was started. You can turn off " "this sensor in the Dagit UI from the Status tab.") elif not workspace_snapshot[ repo_location_origin].repository_location.has_repository( repo_name): logger.warning( f"Could not find repository {repo_name} in location {repo_location_name} to " + f"run sensor {sensor_name}. If this repository no longer exists, you can " + "turn off the sensor in the Dagit UI from the Status tab.", ) else: logger.warning( f"Could not find sensor {sensor_name} in repository {repo_name}. If this " "sensor no longer exists, you can turn it off in the Dagit UI from the " "Status tab.", ) if not sensors: if log_verbose_checks: logger.info( "Not checking for any runs since no sensors have been started." ) yield return now = pendulum.now("UTC") for external_sensor in sensors.values(): sensor_name = external_sensor.name sensor_debug_crash_flags = debug_crash_flags.get( sensor_name) if debug_crash_flags else None error_info = None try: sensor_state = all_sensor_states.get( external_sensor.get_external_origin().get_id()) if not sensor_state: assert external_sensor.default_status == DefaultSensorStatus.RUNNING sensor_state = InstigatorState( external_sensor.get_external_origin(), InstigatorType.SENSOR, InstigatorStatus.AUTOMATICALLY_RUNNING, SensorInstigatorData( min_interval=external_sensor.min_interval_seconds), ) instance.add_instigator_state(sensor_state) elif _is_under_min_interval(sensor_state, external_sensor, now): continue tick = instance.create_tick( TickData( instigator_origin_id=sensor_state.instigator_origin_id, instigator_name=sensor_state.instigator_name, instigator_type=InstigatorType.SENSOR, status=TickStatus.STARTED, timestamp=now.timestamp(), selector_id=external_sensor.selector_id, )) _check_for_debug_crash(sensor_debug_crash_flags, "TICK_CREATED") with SensorLaunchContext(external_sensor, sensor_state, tick, instance, logger) as tick_context: _check_for_debug_crash(sensor_debug_crash_flags, "TICK_HELD") yield from _evaluate_sensor( tick_context, instance, workspace, external_sensor, sensor_state, sensor_debug_crash_flags, ) except Exception: error_info = serializable_error_info_from_exc_info(sys.exc_info()) logger.error( "Sensor daemon caught an error for sensor {sensor_name} : {error_info}" .format( sensor_name=external_sensor.name, error_info=error_info.to_string(), )) yield error_info
def test_failure_after_run_launched(crash_location, crash_signal, capfd): frozen_datetime = to_timezone( create_pendulum_time( year=2019, month=2, day=28, hour=0, minute=0, second=0, tz="UTC", ), "US/Central", ) with instance_with_sensors() as ( instance, _grpc_server_registry, external_repo, ): with pendulum.test(frozen_datetime): external_sensor = external_repo.get_external_sensor("run_key_sensor") instance.add_instigator_state( InstigatorState( external_sensor.get_external_origin(), InstigatorType.SENSOR, InstigatorStatus.RUNNING, ) ) # create a run, launch but crash debug_crash_flags = {external_sensor.name: {crash_location: crash_signal}} launch_process = spawn_ctx.Process( target=_test_launch_sensor_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime, debug_crash_flags], ) launch_process.start() launch_process.join(timeout=60) assert launch_process.exitcode != 0 ticks = instance.get_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 1 assert ticks[0].status == TickStatus.STARTED assert instance.get_runs_count() == 1 run = instance.get_runs()[0] wait_for_all_runs_to_start(instance) assert run.tags.get(SENSOR_NAME_TAG) == "run_key_sensor" assert run.tags.get(RUN_KEY_TAG) == "only_once" capfd.readouterr() launch_process = spawn_ctx.Process( target=_test_launch_sensor_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime.add(seconds=1), None], ) launch_process.start() launch_process.join(timeout=60) assert launch_process.exitcode == 0 wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] captured = capfd.readouterr() assert ( 'Skipping 1 run for sensor run_key_sensor already completed with run keys: ["only_once"]' in captured.out ) ticks = instance.get_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 2 assert ticks[0].status == TickStatus.SKIPPED
def launch_scheduled_runs( instance, workspace, logger, end_datetime_utc, max_catchup_runs=DEFAULT_MAX_CATCHUP_RUNS, max_tick_retries=0, debug_crash_flags=None, log_verbose_checks=True, ): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(workspace, "workspace", IWorkspace) workspace_snapshot = { location_entry.origin: location_entry for location_entry in workspace.get_workspace_snapshot().values() } all_schedule_states = { schedule_state.origin.get_id(): schedule_state for schedule_state in instance.all_instigator_state(instigator_type=InstigatorType.SCHEDULE) } schedules = {} for location_entry in workspace_snapshot.values(): repo_location = location_entry.repository_location if repo_location: for repo in repo_location.get_repositories().values(): for schedule in repo.get_external_schedules(): origin_id = schedule.get_external_origin().get_id() if schedule.get_current_instigator_state( all_schedule_states.get(origin_id) ).is_running: schedules[origin_id] = schedule elif location_entry.load_error and log_verbose_checks: logger.warning( f"Could not load location {location_entry.origin.location_name} to check for schedules due to the following error: {location_entry.load_error}" ) # Remove any schedule states that were previously created with AUTOMATICALLY_RUNNING # and can no longer be found in the workspace (so that if they are later added # back again, their timestamps will start at the correct place) states_to_delete = { schedule_state for origin_id, schedule_state in all_schedule_states.items() if origin_id not in schedules and schedule_state.status == InstigatorStatus.AUTOMATICALLY_RUNNING } for state in states_to_delete: instance.schedule_storage.delete_instigator_state( state.instigator_origin_id, state.selector_id ) if log_verbose_checks: unloadable_schedule_states = { origin_id: schedule_state for origin_id, schedule_state in all_schedule_states.items() if origin_id not in schedules and schedule_state.status == InstigatorStatus.RUNNING } for schedule_state in unloadable_schedule_states.values(): schedule_name = schedule_state.origin.instigator_name repo_location_origin = ( schedule_state.origin.external_repository_origin.repository_location_origin ) repo_location_name = repo_location_origin.location_name repo_name = schedule_state.origin.external_repository_origin.repository_name if ( repo_location_origin not in workspace_snapshot or not workspace_snapshot[repo_location_origin].repository_location ): logger.warning( f"Schedule {schedule_name} was started from a location " f"{repo_location_name} that can no longer be found in the workspace, or has " "metadata that has changed since the schedule was started. You can turn off " "this schedule in the Dagit UI from the Status tab." ) elif not workspace_snapshot[repo_location_origin].repository_location.has_repository( repo_name ): logger.warning( f"Could not find repository {repo_name} in location {repo_location_name} to " + f"run schedule {schedule_name}. If this repository no longer exists, you can " + "turn off the schedule in the Dagit UI from the Status tab.", ) else: logger.warning( f"Could not find schedule {schedule_name} in repository {repo_name}. If this " "schedule no longer exists, you can turn it off in the Dagit UI from the " "Status tab.", ) if not schedules: logger.debug("Not checking for any runs since no schedules have been started.") yield return if log_verbose_checks: schedule_names = ", ".join([schedule.name for schedule in schedules.values()]) logger.info(f"Checking for new runs for the following schedules: {schedule_names}") for external_schedule in schedules.values(): error_info = None try: schedule_state = all_schedule_states.get( external_schedule.get_external_origin().get_id() ) if not schedule_state: assert external_schedule.default_status == DefaultScheduleStatus.RUNNING schedule_state = InstigatorState( external_schedule.get_external_origin(), InstigatorType.SCHEDULE, InstigatorStatus.AUTOMATICALLY_RUNNING, ScheduleInstigatorData( external_schedule.cron_schedule, end_datetime_utc.timestamp(), ), ) instance.add_instigator_state(schedule_state) yield from launch_scheduled_runs_for_schedule( instance, logger, external_schedule, schedule_state, workspace, end_datetime_utc, max_catchup_runs, max_tick_retries, ( debug_crash_flags.get(schedule_state.instigator_name) if debug_crash_flags else None ), log_verbose_checks=log_verbose_checks, ) except Exception: error_info = serializable_error_info_from_exc_info(sys.exc_info()) logger.error( f"Scheduler caught an error for schedule {external_schedule.name} : {error_info.to_string()}" ) yield error_info
def test_sensor_ticks_filtered(graphql_context): external_repository = graphql_context.get_repository_location( main_repo_location_name()).get_repository(main_repo_name()) sensor_name = "always_no_config_sensor" external_sensor = external_repository.get_external_sensor(sensor_name) sensor_selector = infer_sensor_selector(graphql_context, sensor_name) # turn the sensor on graphql_context.instance.add_instigator_state( InstigatorState(external_sensor.get_external_origin(), InstigatorType.SENSOR, InstigatorStatus.RUNNING)) now = pendulum.now("US/Central") with pendulum.test(now): _create_tick(graphql_context) # create a success tick # create a started tick graphql_context.instance.create_tick( TickData( instigator_origin_id=external_sensor.get_external_origin().get_id( ), instigator_name=sensor_name, instigator_type=InstigatorType.SENSOR, status=TickStatus.STARTED, timestamp=now.timestamp(), selector_id=external_sensor.selector_id, )) # create a skipped tick graphql_context.instance.create_tick( TickData( instigator_origin_id=external_sensor.get_external_origin().get_id( ), instigator_name=sensor_name, instigator_type=InstigatorType.SENSOR, status=TickStatus.SKIPPED, timestamp=now.timestamp(), selector_id=external_sensor.selector_id, )) # create a failed tick graphql_context.instance.create_tick( TickData( instigator_origin_id=external_sensor.get_external_origin().get_id( ), instigator_name=sensor_name, instigator_type=InstigatorType.SENSOR, status=TickStatus.FAILURE, timestamp=now.timestamp(), error=SerializableErrorInfo(message="foobar", stack=[], cls_name=None, cause=None), selector_id=external_sensor.selector_id, )) result = execute_dagster_graphql( graphql_context, GET_TICKS_QUERY, variables={"sensorSelector": sensor_selector}, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 4 result = execute_dagster_graphql( graphql_context, GET_TICKS_QUERY, variables={ "sensorSelector": sensor_selector, "statuses": ["STARTED"] }, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 1 assert result.data["sensorOrError"]["sensorState"]["ticks"][0][ "status"] == "STARTED" result = execute_dagster_graphql( graphql_context, GET_TICKS_QUERY, variables={ "sensorSelector": sensor_selector, "statuses": ["FAILURE"] }, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 1 assert result.data["sensorOrError"]["sensorState"]["ticks"][0][ "status"] == "FAILURE" result = execute_dagster_graphql( graphql_context, GET_TICKS_QUERY, variables={ "sensorSelector": sensor_selector, "statuses": ["SKIPPED"] }, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 1 assert result.data["sensorOrError"]["sensorState"]["ticks"][0][ "status"] == "SKIPPED"
def test_failure_before_run_created(crash_location, crash_signal, capfd): frozen_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=28, hour=0, minute=0, second=1, tz="UTC"), "US/Central", ) with instance_with_sensors() as ( instance, _grpc_server_registry, external_repo, ): with pendulum.test(frozen_datetime): external_sensor = external_repo.get_external_sensor("simple_sensor") instance.add_instigator_state( InstigatorState( external_sensor.get_external_origin(), InstigatorType.SENSOR, InstigatorStatus.RUNNING, ) ) # create a tick launch_process = spawn_ctx.Process( target=_test_launch_sensor_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime, None], ) launch_process.start() launch_process.join(timeout=60) ticks = instance.get_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 1 assert ticks[0].status == TickStatus.SKIPPED capfd.readouterr() # create a starting tick, but crash debug_crash_flags = {external_sensor.name: {crash_location: crash_signal}} launch_process = spawn_ctx.Process( target=_test_launch_sensor_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime.add(seconds=31), debug_crash_flags], ) launch_process.start() launch_process.join(timeout=60) assert launch_process.exitcode != 0 capfd.readouterr() ticks = instance.get_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 2 assert ticks[0].status == TickStatus.STARTED assert not int(ticks[0].timestamp) % 2 # skip condition for simple_sensor assert instance.get_runs_count() == 0 # create another tick, but ensure that the last evaluation time used is from the first, # successful tick rather than the failed tick launch_process = spawn_ctx.Process( target=_test_launch_sensor_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime.add(seconds=62), None], ) launch_process.start() launch_process.join(timeout=60) assert launch_process.exitcode == 0 wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] assert ( get_logger_output_from_capfd(capfd, "dagster.daemon.SensorDaemon") == f"""2019-02-27 18:01:03 -0600 - dagster.daemon.SensorDaemon - INFO - Checking for new runs for sensor: simple_sensor 2019-02-27 18:01:03 -0600 - dagster.daemon.SensorDaemon - INFO - Launching run for simple_sensor 2019-02-27 18:01:03 -0600 - dagster.daemon.SensorDaemon - INFO - Completed launch of run {run.run_id} for simple_sensor""" ) ticks = instance.get_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 3 assert ticks[0].status == TickStatus.SUCCESS
def test_failure_after_run_created_before_run_launched(external_repo_context, crash_location, crash_signal, capfd): frozen_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=28, hour=0, minute=0, second=0, tz="UTC"), "US/Central", ) with instance_with_sensors(external_repo_context) as ( instance, _grpc_server_registry, external_repo, ): with pendulum.test(frozen_datetime): external_sensor = external_repo.get_external_sensor( "run_key_sensor") instance.add_job_state( InstigatorState( external_sensor.get_external_origin(), InstigatorType.SENSOR, InstigatorStatus.RUNNING, )) # create a starting tick, but crash debug_crash_flags = { external_sensor.name: { crash_location: crash_signal } } launch_process = multiprocessing.Process( target=_test_launch_sensor_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime, debug_crash_flags], ) launch_process.start() launch_process.join(timeout=60) assert launch_process.exitcode != 0 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 assert ticks[0].status == TickStatus.STARTED assert instance.get_runs_count() == 1 run = instance.get_runs()[0] # Run was created, but hasn't launched yet assert run.status == PipelineRunStatus.NOT_STARTED assert run.tags.get(SENSOR_NAME_TAG) == "run_key_sensor" assert run.tags.get(RUN_KEY_TAG) == "only_once" # clear output capfd.readouterr() launch_process = multiprocessing.Process( target=_test_launch_sensor_runs_in_subprocess, args=[ instance.get_ref(), frozen_datetime.add(seconds=1), None ], ) launch_process.start() launch_process.join(timeout=60) assert launch_process.exitcode == 0 wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] captured = capfd.readouterr() assert ( f"Run {run.run_id} already created with the run key `only_once` for run_key_sensor" in captured.out) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 2 assert ticks[0].status == TickStatus.SUCCESS