Exemple #1
0
 def event_records(event_records_filter, **_kwargs):
     asset_key = event_records_filter.asset_key
     after_cursor = event_records_filter.after_cursor
     matching_events = [
         event for event in asset_events
         if asset_key.path[-1] == event[0] and (
             after_cursor is None or event[1] > after_cursor)
     ]
     return [
         EventLogRecord(storage_id=event[1], event_log_entry=None)
         for event in matching_events
     ]
    def get_event_records(
        self,
        event_records_filter: Optional[EventRecordsFilter] = None,
        limit: Optional[int] = None,
        ascending: bool = False,
    ) -> Iterable[EventLogRecord]:
        """Overridden method to enable cross-run event queries in sqlite.

        The record id in sqlite does not auto increment cross runs, so instead of fetching events
        after record id, we only fetch events whose runs updated after update_timestamp.
        """
        check.opt_inst_param(event_records_filter, "event_records_filter",
                             EventRecordsFilter)
        check.opt_int_param(limit, "limit")
        check.bool_param(ascending, "ascending")

        is_asset_query = event_records_filter and (
            event_records_filter.event_type
            == DagsterEventType.ASSET_MATERIALIZATION
            or event_records_filter.event_type
            == DagsterEventType.ASSET_OBSERVATION)
        if is_asset_query:
            # asset materializations and observations get mirrored into the index shard, so no
            # custom run shard-aware cursor logic needed
            return super(SqliteEventLogStorage, self).get_event_records(
                event_records_filter=event_records_filter,
                limit=limit,
                ascending=ascending)

        query = db.select(
            [SqlEventLogStorageTable.c.id, SqlEventLogStorageTable.c.event])
        if event_records_filter and event_records_filter.asset_key:
            asset_details = next(
                iter(self._get_assets_details([event_records_filter.asset_key
                                               ])))
        else:
            asset_details = None

        if not event_records_filter or not (isinstance(
                event_records_filter.after_cursor, RunShardedEventsCursor)):
            warnings.warn("""
                Called `get_event_records` on a run-sharded event log storage with a query that
                is not run aware (e.g. not using a RunShardedEventsCursor).  This likely has poor
                performance characteristics.  Consider adding a RunShardedEventsCursor to your query
                or switching your instance configuration to use a non-run sharded event log storage
                (e.g. PostgresEventLogStorage, ConsolidatedSqliteEventLogStorage)
            """)

        query = self._apply_filter_to_query(
            query=query,
            event_records_filter=event_records_filter,
            asset_details=asset_details,
            apply_cursor_filters=
            False,  # run-sharded cursor filters don't really make sense
        )
        if limit:
            query = query.limit(limit)
        if ascending:
            query = query.order_by(SqlEventLogStorageTable.c.timestamp.asc())
        else:
            query = query.order_by(SqlEventLogStorageTable.c.timestamp.desc())

        # workaround for the run-shard sqlite to enable cross-run queries: get a list of run_ids
        # whose events may qualify the query, and then open run_connection per run_id at a time.
        run_updated_after = (
            event_records_filter.after_cursor.run_updated_after
            if event_records_filter and isinstance(
                event_records_filter.after_cursor, RunShardedEventsCursor) else
            None)
        run_records = self._instance.get_run_records(
            filters=RunsFilter(updated_after=run_updated_after),
            order_by="update_timestamp",
            ascending=ascending,
        )

        event_records = []
        for run_record in run_records:
            run_id = run_record.pipeline_run.run_id
            with self.run_connection(run_id) as conn:
                results = conn.execute(query).fetchall()

            for row_id, json_str in results:
                try:
                    event_record = deserialize_json_to_dagster_namedtuple(
                        json_str)
                    if not isinstance(event_record, EventLogEntry):
                        logging.warning(
                            "Could not resolve event record as EventLogEntry for id `{}`."
                            .format(row_id))
                        continue
                    else:
                        event_records.append(
                            EventLogRecord(storage_id=row_id,
                                           event_log_entry=event_record))
                    if limit and len(event_records) >= limit:
                        break
                except seven.JSONDecodeError:
                    logging.warning(
                        "Could not parse event record id `{}`.".format(row_id))

            if limit and len(event_records) >= limit:
                break

        return event_records[:limit]