def _wrapped_fn(context): since = context.last_completion_time if context.last_completion_time else 0 if not os.path.isdir(directory_name): yield SkipReason( f"Could not find directory named {directory_name}.") return fileinfo_since = [] for filename in os.listdir(directory_name): filepath = os.path.join(directory_name, filename) if not os.path.isfile(filepath): continue fstats = os.stat(filepath) if fstats.st_mtime > since: fileinfo_since.append((filename, fstats.st_mtime)) result = fn(context, fileinfo_since) if inspect.isgenerator(result): for item in result: yield item elif isinstance(result, (SkipReason, RunRequest)): yield result elif result is not None: raise DagsterInvariantViolationError( f"Error in sensor {sensor_name}: Sensor unexpectedly returned output " f"{result} of type {type(result)}. Should only return SkipReason or " "RunRequest objects.")
def skip_cursor_sensor(context): if not context.cursor: cursor = 1 else: cursor = int(context.cursor) + 1 context.update_cursor(str(cursor)) return SkipReason()
def toy_file_sensor(_, modified_fileinfo): if not modified_fileinfo: yield SkipReason("No modified files") for filename, mtime in modified_fileinfo: yield RunRequest( run_key="{}:{}".format(filename, str(mtime)), run_config={ "solids": { "read_file": { "config": { "directory": directory_name, "filename": filename } } } }, )
def custom_interval_sensor(_context): return SkipReason()
def simple_sensor(context): if not context.last_completion_time or not int( context.last_completion_time) % 2: return SkipReason() return RunRequest(run_key=None, run_config={}, tags={})
def never_no_config_sensor(_): return SkipReason("never")
def _wrapped_fn(context: SensorEvaluationContext): # initiate the cursor to (most recent event id, current timestamp) when: # * it's the first time starting the sensor # * or, the cursor isn't in valid format (backcompt) if context.cursor is None or not RunStatusSensorCursor.is_valid( context.cursor): most_recent_event_records = list( context.instance.get_event_records(ascending=False, limit=1)) most_recent_event_id = (most_recent_event_records[0].storage_id if len(most_recent_event_records) == 1 else -1) new_cursor = RunStatusSensorCursor( update_timestamp=pendulum.now("UTC").isoformat(), record_id=most_recent_event_id, ) context.update_cursor(new_cursor.to_json()) yield SkipReason( f"Initiating {name}. Set cursor to {new_cursor}") return record_id, update_timestamp = RunStatusSensorCursor.from_json( context.cursor) # Fetch events after the cursor id # * we move the cursor forward to the latest visited event's id to avoid revisits # * when the daemon is down, bc we persist the cursor info, we can go back to where we # left and backfill alerts for the qualified events (up to 5 at a time) during the downtime # Note: this is a cross-run query which requires extra handling in sqlite, see details in SqliteEventLogStorage. event_records = context.instance.get_event_records( EventRecordsFilter( after_cursor=RunShardedEventsCursor( id=record_id, run_updated_after=pendulum.parse(update_timestamp)), event_type=PIPELINE_RUN_STATUS_TO_EVENT_TYPE[ pipeline_run_status], ), ascending=True, limit=5, ) for event_record in event_records: event_log_entry = event_record.event_log_entry storage_id = event_record.storage_id # get run info run_records = context.instance.get_run_records( filters=PipelineRunsFilter( run_ids=[event_log_entry.run_id])) check.invariant(len(run_records) == 1) pipeline_run = run_records[0].pipeline_run update_timestamp = run_records[0].update_timestamp # skip if any of of the followings happens: if ( # the pipeline does not have a repository (manually executed) not pipeline_run.external_pipeline_origin or # the pipeline does not belong to the current repository pipeline_run.external_pipeline_origin. external_repository_origin.repository_name != context.repository_name or # if pipeline is not selected (pipeline_selection and pipeline_run.pipeline_name not in pipeline_selection )): context.update_cursor( RunStatusSensorCursor( record_id=storage_id, update_timestamp=update_timestamp.isoformat()). to_json()) continue serializable_error = None try: with user_code_error_boundary( RunStatusSensorExecutionError, lambda: f'Error occurred during the execution sensor "{name}".', ): # one user code invocation maps to one failure event run_status_sensor_fn( RunStatusSensorContext( sensor_name=name, pipeline_run=pipeline_run, dagster_event=event_log_entry.dagster_event, )) except RunStatusSensorExecutionError as run_status_sensor_execution_error: # When the user code errors, we report error to the sensor tick not the original run. serializable_error = serializable_error_info_from_exc_info( run_status_sensor_execution_error.original_exc_info) context.update_cursor( RunStatusSensorCursor(record_id=storage_id, update_timestamp=update_timestamp. isoformat()).to_json()) # Yield PipelineRunReaction to indicate the execution success/failure. # The sensor machinery would # * report back to the original run if success # * update cursor and job state yield PipelineRunReaction( pipeline_run=pipeline_run, error=serializable_error, )