def _fn(context): after_cursor = None if context.cursor: try: after_cursor = int(context.cursor) except ValueError: after_cursor = None event_records = context.instance.get_event_records( EventRecordsFilter( event_type=DagsterEventType.ASSET_MATERIALIZATION, asset_key=self._asset_key, after_cursor=after_cursor, ), ascending=False, limit=1, ) if not event_records: return event_record = event_records[0] yield from materialization_fn(context, event_record.event_log_entry) context.update_cursor(str(event_record.storage_id))
def test_asset_materialization(self, storage): asset_key = AssetKey(["path", "to", "asset_one"]) @solid def materialize_one(_): yield AssetMaterialization( asset_key=asset_key, metadata={ "text": "hello", "json": { "hello": "world" }, "one_float": 1.0, "one_int": 1, }, ) yield Output(1) def _solids(): materialize_one() with instance_for_test() as instance: if not storage._instance: # pylint: disable=protected-access storage.register_instance(instance) events_one, _ = _synthesize_events(_solids, instance=instance) for event in events_one: storage.store_event(event) assert asset_key in set(storage.all_asset_keys()) events = storage.get_asset_events(asset_key) assert len(events) == 1 event = events[0] assert isinstance(event, EventLogEntry) assert (event.dagster_event.event_type_value == DagsterEventType.ASSET_MATERIALIZATION.value) records = storage.get_event_records( EventRecordsFilter(asset_key=asset_key)) assert len(records) == 1 record = records[0] assert isinstance(record, EventLogRecord) assert record.event_log_entry == event
def _wrapped_fn(context: SensorEvaluationContext): # initiate the cursor to (most recent event id, current timestamp) when: # * it's the first time starting the sensor # * or, the cursor isn't in valid format (backcompt) if context.cursor is None or not RunStatusSensorCursor.is_valid(context.cursor): most_recent_event_records = list( context.instance.get_event_records(ascending=False, limit=1) ) most_recent_event_id = ( most_recent_event_records[0].storage_id if len(most_recent_event_records) == 1 else -1 ) new_cursor = RunStatusSensorCursor( update_timestamp=pendulum.now("UTC").isoformat(), record_id=most_recent_event_id, ) context.update_cursor(new_cursor.to_json()) yield SkipReason(f"Initiating {name}. Set cursor to {new_cursor}") return record_id, update_timestamp = RunStatusSensorCursor.from_json(context.cursor) # Fetch events after the cursor id # * we move the cursor forward to the latest visited event's id to avoid revisits # * when the daemon is down, bc we persist the cursor info, we can go back to where we # left and backfill alerts for the qualified events (up to 5 at a time) during the downtime # Note: this is a cross-run query which requires extra handling in sqlite, see details in SqliteEventLogStorage. event_records = context.instance.get_event_records( EventRecordsFilter( after_cursor=RunShardedEventsCursor( id=record_id, run_updated_after=cast(datetime, pendulum.parse(update_timestamp)), ), event_type=PIPELINE_RUN_STATUS_TO_EVENT_TYPE[pipeline_run_status], ), ascending=True, limit=5, ) for event_record in event_records: event_log_entry = event_record.event_log_entry storage_id = event_record.storage_id # get run info run_records = context.instance.get_run_records( filters=RunsFilter(run_ids=[event_log_entry.run_id]) ) # skip if we couldn't find the right run if len(run_records) != 1: # bc we couldn't find the run, we use the event timestamp as the approximate # run update timestamp approximate_update_timestamp = utc_datetime_from_timestamp( event_log_entry.timestamp ) context.update_cursor( RunStatusSensorCursor( record_id=storage_id, update_timestamp=approximate_update_timestamp.isoformat(), ).to_json() ) continue pipeline_run = run_records[0].pipeline_run update_timestamp = run_records[0].update_timestamp # skip if any of of the followings happens: if ( # the pipeline does not have a repository (manually executed) not pipeline_run.external_pipeline_origin or # the pipeline does not belong to the current repository pipeline_run.external_pipeline_origin.external_repository_origin.repository_name != context.repository_name or # if pipeline is not selected (pipeline_selection and pipeline_run.pipeline_name not in pipeline_selection) or # if job not selected ( job_selection and pipeline_run.pipeline_name not in map(lambda x: x.name, job_selection) ) ): context.update_cursor( RunStatusSensorCursor( record_id=storage_id, update_timestamp=update_timestamp.isoformat() ).to_json() ) continue serializable_error = None try: with user_code_error_boundary( RunStatusSensorExecutionError, lambda: f'Error occurred during the execution sensor "{name}".', ): # one user code invocation maps to one failure event run_status_sensor_fn( RunStatusSensorContext( sensor_name=name, dagster_run=pipeline_run, dagster_event=event_log_entry.dagster_event, instance=context.instance, ) ) except RunStatusSensorExecutionError as run_status_sensor_execution_error: # When the user code errors, we report error to the sensor tick not the original run. serializable_error = serializable_error_info_from_exc_info( run_status_sensor_execution_error.original_exc_info ) context.update_cursor( RunStatusSensorCursor( record_id=storage_id, update_timestamp=update_timestamp.isoformat() ).to_json() ) # Yield PipelineRunReaction to indicate the execution success/failure. # The sensor machinery would # * report back to the original run if success # * update cursor and job state yield PipelineRunReaction( pipeline_run=pipeline_run, run_status=pipeline_run_status, error=serializable_error, )
def test_get_event_records_sqlite(self, storage): # test for sqlite only because sqlite requires special logic to handle cross-run queries if not isinstance(storage, SqliteEventLogStorage): pytest.skip() asset_key = AssetKey(["path", "to", "asset_one"]) events = [] def _append_event(event): events.append(event) @solid def materialize_one(_): yield AssetMaterialization( asset_key=asset_key, metadata={ "text": "hello", "json": { "hello": "world" }, "one_float": 1.0, "one_int": 1, }, ) yield Output(1) @pipeline(mode_defs=[_mode_def(_append_event)]) def a_pipe(): materialize_one() with instance_for_test() as instance: if not storage._instance: # pylint: disable=protected-access storage.register_instance(instance) # first run execute_run( InMemoryPipeline(a_pipe), instance.create_run_for_pipeline( a_pipe, run_id="1", run_config={"loggers": { "callback": {}, "console": {} }}), instance, ) for event in events: storage.store_event(event) run_records = instance.get_run_records() assert len(run_records) == 1 # all logs returned in descending order all_event_records = storage.get_event_records() assert _event_types([all_event_records[0].event_log_entry ]) == [DagsterEventType.PIPELINE_SUCCESS] assert _event_types([all_event_records[-1].event_log_entry ]) == [DagsterEventType.PIPELINE_START] # second run events = [] execute_run( InMemoryPipeline(a_pipe), instance.create_run_for_pipeline( a_pipe, run_id="2", run_config={"loggers": { "callback": {}, "console": {} }}), instance, ) run_records = instance.get_run_records() assert len(run_records) == 2 for event in events: storage.store_event(event) # third run events = [] execute_run( InMemoryPipeline(a_pipe), instance.create_run_for_pipeline( a_pipe, run_id="3", run_config={"loggers": { "callback": {}, "console": {} }}), instance, ) run_records = instance.get_run_records() assert len(run_records) == 3 for event in events: storage.store_event(event) # of_type filtered_records = storage.get_event_records( EventRecordsFilter( event_type=DagsterEventType.PIPELINE_SUCCESS, after_cursor=RunShardedEventsCursor( id=0, run_updated_after=run_records[-1].update_timestamp ), # events after first run ), ascending=True, ) assert len(filtered_records) == 2 assert _event_types([r.event_log_entry for r in filtered_records]) == [ DagsterEventType.PIPELINE_SUCCESS, DagsterEventType.PIPELINE_SUCCESS, ] assert [r.event_log_entry.run_id for r in filtered_records] == ["2", "3"]
def test_get_event_records(self, storage): if isinstance(storage, SqliteEventLogStorage): # test sqlite in test_get_event_records_sqlite pytest.skip() asset_key = AssetKey(["path", "to", "asset_one"]) @solid def materialize_one(_): yield AssetMaterialization( asset_key=asset_key, metadata={ "text": "hello", "json": { "hello": "world" }, "one_float": 1.0, "one_int": 1, }, ) yield Output(1) def _solids(): materialize_one() events, _ = _synthesize_events(_solids) for event in events: storage.store_event(event) all_records = storage.get_event_records() # all logs returned in descending order assert all_records min_record_num = all_records[-1].storage_id max_record_num = min_record_num + len(all_records) - 1 assert [r[0] for r in all_records ] == list(range(max_record_num, min_record_num - 1, -1)) assert _event_types([all_records[0].event_log_entry ]) == [DagsterEventType.PIPELINE_SUCCESS] assert _event_types([all_records[-1].event_log_entry ]) == [DagsterEventType.PIPELINE_START] # after cursor assert not list( filter( lambda r: r.storage_id <= 2, storage.get_event_records(EventRecordsFilter(after_cursor=2)), )) assert [ i.storage_id for i in storage.get_event_records(EventRecordsFilter( after_cursor=min_record_num + 2), ascending=True, limit=2) ] == [min_record_num + 3, min_record_num + 4] assert [ i.storage_id for i in storage.get_event_records(EventRecordsFilter( after_cursor=min_record_num + 2), ascending=False, limit=2) ] == [max_record_num, max_record_num - 1] filtered_records = storage.get_event_records( EventRecordsFilter(event_type=DagsterEventType.PIPELINE_SUCCESS)) assert _event_types([r.event_log_entry for r in filtered_records ]) == [DagsterEventType.PIPELINE_SUCCESS]