def has_asset_key(self, asset_key: AssetKey) -> bool: check.inst_param(asset_key, "asset_key", AssetKey) query = (db.select([ AssetKeyTable.c.asset_key, AssetKeyTable.c.asset_details ]).where( db.or_( AssetKeyTable.c.asset_key == asset_key.to_string(), AssetKeyTable.c.asset_key == asset_key.to_string(legacy=True), )).limit(1)) with self.index_connection() as conn: row = conn.execute(query).fetchone() if not row: return False asset_details: Optional[ AssetDetails] = AssetDetails.from_db_string(row[1]) if not asset_details or not asset_details.last_wipe_timestamp: return True materialization_row = conn.execute( db.select([SqlEventLogStorageTable.c.timestamp]).where( db.or_( AssetKeyTable.c.asset_key == asset_key.to_string(), AssetKeyTable.c.asset_key == asset_key.to_string( legacy=True), )).order_by( SqlEventLogStorageTable.c.timestamp.desc()).limit( 1)).fetchone() if not materialization_row: return False return utc_datetime_from_naive( materialization_row[0]) > utc_datetime_from_timestamp( asset_details.last_wipe_timestamp)
def all_asset_keys(self): with self.index_connection() as conn: results = conn.execute( db.select([AssetKeyTable.c.asset_key, AssetKeyTable.c.asset_details]) ).fetchall() asset_keys = set() wiped = set() wiped_timestamps = {} for result in results: asset_key = AssetKey.from_db_string(result[0]) asset_details: Optional[AssetDetails] = AssetDetails.from_db_string(result[1]) asset_keys.add(asset_key) if asset_details and asset_details.last_wipe_timestamp: wiped_timestamps[asset_key] = asset_details.last_wipe_timestamp if wiped_timestamps: materialized_timestamps = {} # fetch the last materialization timestamp per asset key materialization_results = conn.execute( db.select( [ SqlEventLogStorageTable.c.asset_key, db.func.max(SqlEventLogStorageTable.c.timestamp), ] ) .where( SqlEventLogStorageTable.c.asset_key.in_( [asset_key.to_string() for asset_key in wiped_timestamps.keys()] ) ) .group_by(SqlEventLogStorageTable.c.asset_key) .order_by(db.func.max(SqlEventLogStorageTable.c.timestamp).asc()) ).fetchall() for result in materialization_results: asset_key = AssetKey.from_db_string(result[0]) last_materialized_timestamp = result[1] materialized_timestamps[asset_key] = last_materialized_timestamp # calculate the set of wiped asset keys that have not had a materialization since # the wipe timestamp wiped = set( [ asset_key for asset_key in wiped_timestamps.keys() if not materialized_timestamps.get(asset_key) or utc_datetime_from_naive(materialized_timestamps.get(asset_key)) < utc_datetime_from_timestamp(wiped_timestamps[asset_key]) ] ) return list(asset_keys.difference(wiped))
def has_asset_key(self, asset_key: AssetKey) -> bool: check.inst_param(asset_key, "asset_key", AssetKey) if self.has_secondary_index(ASSET_KEY_INDEX_COLS): query = (db.select([AssetKeyTable.c.asset_key]).where( db.or_( AssetKeyTable.c.asset_key == asset_key.to_string(), AssetKeyTable.c.asset_key == asset_key.to_string( legacy=True), )).where( db.or_( AssetKeyTable.c.wipe_timestamp == None, AssetKeyTable.c.last_materialization_timestamp > AssetKeyTable.c.wipe_timestamp, )).limit(1)) with self.index_connection() as conn: row = conn.execute(query).fetchone() return bool(row) # has not migrated, need to pull asset_details to get wipe status query = (db.select([ AssetKeyTable.c.asset_key, AssetKeyTable.c.asset_details ]).where( db.or_( AssetKeyTable.c.asset_key == asset_key.to_string(), AssetKeyTable.c.asset_key == asset_key.to_string(legacy=True), )).limit(1)) with self.index_connection() as conn: row = conn.execute(query).fetchone() if not row: return False asset_details: Optional[ AssetDetails] = AssetDetails.from_db_string(row[1]) if not asset_details or not asset_details.last_wipe_timestamp: return True materialization_row = conn.execute( db.select([SqlEventLogStorageTable.c.timestamp]).where( db.or_( AssetKeyTable.c.asset_key == asset_key.to_string(), AssetKeyTable.c.asset_key == asset_key.to_string( legacy=True), )).order_by( SqlEventLogStorageTable.c.timestamp.desc()).limit( 1)).fetchone() if not materialization_row: return False return utc_datetime_from_naive( materialization_row[0]) > utc_datetime_from_timestamp( asset_details.last_wipe_timestamp)
def _fetch_raw_asset_rows(self, asset_keys=None, prefix=None, limit=None, cursor=None): # fetches rows containing asset_key, last_materialization, and asset_details from the DB, # applying the filters specified in the arguments. Does not guarantee that the number of # rows returned will match the limit specified. This helper function is used to fetch a # chunk of asset key rows, which may or may not be wiped. # # Returns a tuple of (rows, has_more, cursor), where each row is a tuple of serialized # asset_key, materialization, and asset_details columns = [ AssetKeyTable.c.asset_key, AssetKeyTable.c.last_materialization, AssetKeyTable.c.asset_details, ] is_partial_query = bool(asset_keys) or bool(prefix) or bool(limit) or bool(cursor) if self.has_asset_key_index_cols() and not is_partial_query: # if the schema has been migrated, fetch the last_materialization_timestamp to see if # we can lazily migrate the data table columns.append(AssetKeyTable.c.last_materialization_timestamp) columns.append(AssetKeyTable.c.wipe_timestamp) query = db.select(columns).order_by(AssetKeyTable.c.asset_key.asc()) query = self._apply_asset_filter_to_query(query, asset_keys, prefix, limit, cursor) if self.has_secondary_index(ASSET_KEY_INDEX_COLS): query = query.where( db.or_( AssetKeyTable.c.wipe_timestamp == None, AssetKeyTable.c.last_materialization_timestamp > AssetKeyTable.c.wipe_timestamp, ) ) with self.index_connection() as conn: rows = conn.execute(query).fetchall() return rows, False, None with self.index_connection() as conn: rows = conn.execute(query).fetchall() wiped_timestamps_by_asset_key = {} row_by_asset_key = OrderedDict() for row in rows: asset_key = AssetKey.from_db_string(row[0]) if not asset_key: continue asset_details = AssetDetails.from_db_string(row[2]) if not asset_details or not asset_details.last_wipe_timestamp: row_by_asset_key[asset_key] = row continue materialization_or_event = ( deserialize_json_to_dagster_namedtuple(row[1]) if row[1] else None ) if isinstance(materialization_or_event, EventLogEntry): if asset_details.last_wipe_timestamp > materialization_or_event.timestamp: # this asset has not been materialized since being wiped, skip continue else: # add the key row_by_asset_key[asset_key] = row else: row_by_asset_key[asset_key] = row wiped_timestamps_by_asset_key[asset_key] = asset_details.last_wipe_timestamp if wiped_timestamps_by_asset_key: materialization_times = self._fetch_backcompat_materialization_times( wiped_timestamps_by_asset_key.keys() ) for asset_key, wiped_timestamp in wiped_timestamps_by_asset_key.items(): materialization_time = materialization_times.get(asset_key) if not materialization_time or utc_datetime_from_naive( materialization_time ) < utc_datetime_from_timestamp(wiped_timestamp): # remove rows that have not been materialized since being wiped row_by_asset_key.pop(asset_key) has_more = limit and len(rows) == limit new_cursor = rows[-1][0] if rows else None return row_by_asset_key.values(), has_more, new_cursor