def online_read( self, project: str, table: Union[FeatureTable, FeatureView], entity_keys: List[EntityKeyProto], ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: client = self._initialize_client() result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] for entity_key in entity_keys: document_id = compute_datastore_entity_id(entity_key) key = client.key( "Project", project, "Table", table.name, "Row", document_id ) value = client.get(key) if value is not None: res = {} for feature_name, value_bin in value["values"].items(): val = ValueProto() val.ParseFromString(value_bin) res[feature_name] = val result.append((value["event_ts"], res)) else: result.append((None, None)) return result
def online_read( self, config: RepoConfig, table: FeatureView, entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: online_config = config.online_store assert isinstance(online_config, DynamoDBOnlineStoreConfig) dynamodb_resource = self._get_dynamodb_resource(online_config.region) result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] for entity_key in entity_keys: table_instance = dynamodb_resource.Table( _get_table_name(config, table)) entity_id = compute_entity_id(entity_key) with tracing_span(name="remote_call"): response = table_instance.get_item( Key={"entity_id": entity_id}) value = response.get("Item") if value is not None: res = {} for feature_name, value_bin in value["values"].items(): val = ValueProto() val.ParseFromString(value_bin.value) res[feature_name] = val result.append((datetime.fromisoformat(value["event_ts"]), res)) else: result.append((None, None)) return result
def online_read( self, config: RepoConfig, table: Union[FeatureTable, FeatureView], entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: online_config = config.online_store assert isinstance(online_config, DatastoreOnlineStoreConfig) client = self._get_client(online_config) feast_project = config.project result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] for entity_key in entity_keys: document_id = compute_datastore_entity_id(entity_key) key = client.key("Project", feast_project, "Table", table.name, "Row", document_id) value = client.get(key) if value is not None: res = {} for feature_name, value_bin in value["values"].items(): val = ValueProto() val.ParseFromString(value_bin) res[feature_name] = val result.append((value["event_ts"], res)) else: result.append((None, None)) return result
def online_read( self, project: str, table: Union[FeatureTable, FeatureView], entity_key: EntityKeyProto, ) -> Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]: entity_key_bin = serialize_entity_key(entity_key) conn = self._get_conn() cur = conn.cursor() cur.execute( f"SELECT feature_name, value, event_ts FROM {_table_id(project, table)} WHERE entity_key = ?", (entity_key_bin, ), ) res = {} res_ts = None for feature_name, val_bin, ts in cur.fetchall(): val = ValueProto() val.ParseFromString(val_bin) res[feature_name] = val res_ts = ts if not res: return None, None else: return res_ts, res
def online_read( self, config: RepoConfig, table: Union[FeatureTable, FeatureView], entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: online_config = config.online_store assert isinstance(online_config, DynamoDBOnlineStoreConfig) _, dynamodb_resource = self._initialize_dynamodb(online_config) result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] for entity_key in entity_keys: table_instance = dynamodb_resource.Table( f"{config.project}.{table.name}") entity_id = compute_entity_id(entity_key) response = table_instance.get_item(Key={"entity_id": entity_id}) value = response.get("Item") if value is not None: res = {} for feature_name, value_bin in value["values"].items(): val = ValueProto() val.ParseFromString(value_bin.value) res[feature_name] = val result.append((value["event_ts"], res)) else: result.append((None, None)) return result
def online_read( self, config: RepoConfig, table: Union[FeatureTable, FeatureView], entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: pass conn = self._get_conn(config) cur = conn.cursor() result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] project = config.project for entity_key in entity_keys: entity_key_bin = serialize_entity_key(entity_key) cur.execute( f"SELECT feature_name, value, event_ts FROM {_table_id(project, table)} WHERE entity_key = ?", (entity_key_bin,), ) res = {} res_ts = None for feature_name, val_bin, ts in cur.fetchall(): val = ValueProto() val.ParseFromString(val_bin) res[feature_name] = val res_ts = ts if not res: result.append((None, None)) else: result.append((res_ts, res)) return result
def _get_features_for_entity( self, values: List[ByteString], feature_view: str, requested_features: List[str], ) -> Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]: res_val = dict(zip(requested_features, values)) res_ts = Timestamp() ts_val = res_val.pop(f"_ts:{feature_view}") if ts_val: res_ts.ParseFromString(ts_val) res = {} for feature_name, val_bin in res_val.items(): val = ValueProto() if val_bin: val.ParseFromString(val_bin) res[feature_name] = val if not res: return None, None else: timestamp = datetime.fromtimestamp(res_ts.seconds) return timestamp, res
def online_read( self, config: RepoConfig, table: Union[FeatureTable, FeatureView], entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: online_store_config = config.online_store assert isinstance(online_store_config, RedisOnlineStoreConfig) client = self._get_client(online_store_config) feature_view = table.name project = config.project result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] if not requested_features: requested_features = [f.name for f in table.features] for entity_key in entity_keys: redis_key_bin = _redis_key(project, entity_key) hset_keys = [ _mmh3(f"{feature_view}:{k}") for k in requested_features ] ts_key = f"_ts:{feature_view}" hset_keys.append(ts_key) values = client.hmget(redis_key_bin, hset_keys) requested_features.append(ts_key) res_val = dict(zip(requested_features, values)) res_ts = Timestamp() ts_val = res_val.pop(ts_key) if ts_val: res_ts.ParseFromString(ts_val) res = {} for feature_name, val_bin in res_val.items(): val = ValueProto() if val_bin: val.ParseFromString(val_bin) res[feature_name] = val if not res: result.append((None, None)) else: timestamp = datetime.fromtimestamp(res_ts.seconds) result.append((timestamp, res)) return result
def online_read( self, config: RepoConfig, table: FeatureView, entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] project = config.project with self._get_conn(config) as conn, conn.cursor() as cur: # Collecting all the keys to a list allows us to make fewer round trips # to PostgreSQL keys = [] for entity_key in entity_keys: keys.append(serialize_entity_key(entity_key)) cur.execute( sql.SQL(""" SELECT entity_key, feature_name, value, event_ts FROM {} WHERE entity_key = ANY(%s); """).format(sql.Identifier(_table_id(project, table)), ), (keys, ), ) rows = cur.fetchall() # Since we don't know the order returned from PostgreSQL we'll need # to construct a dict to be able to quickly look up the correct row # when we iterate through the keys since they are in the correct order values_dict = defaultdict(list) for row in rows if rows is not None else []: values_dict[row[0].tobytes()].append(row[1:]) for key in keys: if key in values_dict: value = values_dict[key] res = {} for feature_name, value_bin, event_ts in value: val = ValueProto() val.ParseFromString(value_bin) res[feature_name] = val result.append((event_ts, res)) else: result.append((None, None)) return result
def online_read( self, config: RepoConfig, table: FeatureView, entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: conn = self._get_conn(config) cur = conn.cursor() result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] with tracing_span(name="remote_call"): # Fetch all entities in one go cur.execute( f"SELECT entity_key, feature_name, value, event_ts " f"FROM {_table_id(config.project, table)} " f"WHERE entity_key IN ({','.join('?' * len(entity_keys))}) " f"ORDER BY entity_key", [ serialize_entity_key(entity_key) for entity_key in entity_keys ], ) rows = cur.fetchall() rows = { k: list(group) for k, group in itertools.groupby(rows, key=lambda r: r[0]) } for entity_key in entity_keys: entity_key_bin = serialize_entity_key(entity_key) res = {} res_ts = None for _, feature_name, val_bin, ts in rows.get(entity_key_bin, []): val = ValueProto() val.ParseFromString(val_bin) res[feature_name] = val res_ts = ts if not res: result.append((None, None)) else: result.append((res_ts, res)) return result
def online_read( self, config: RepoConfig, table: Union[FeatureTable, FeatureView], entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: online_config = config.online_store assert isinstance(online_config, DatastoreOnlineStoreConfig) client = self._get_client(online_config) feast_project = config.project keys: List[Key] = [] result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] for entity_key in entity_keys: document_id = compute_entity_id(entity_key) key = client.key("Project", feast_project, "Table", table.name, "Row", document_id) keys.append(key) # NOTE: get_multi doesn't return values in the same order as the keys in the request. # Also, len(values) can be less than len(keys) in the case of missing values. with tracing_span(name="remote_call"): values = client.get_multi(keys) values_dict = {v.key: v for v in values} if values is not None else {} for key in keys: if key in values_dict: value = values_dict[key] res = {} for feature_name, value_bin in value["values"].items(): val = ValueProto() val.ParseFromString(value_bin) res[feature_name] = val result.append((value["event_ts"], res)) else: result.append((None, None)) return result
def online_read( self, config: RepoConfig, table: Union[FeatureTable, FeatureView], entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: online_config = config.online_store assert isinstance(online_config, DatastoreOnlineStoreConfig) client = self._get_client(online_config) feast_project = config.project keys: List[Key] = [] result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] for entity_key in entity_keys: document_id = compute_entity_id(entity_key) key = client.key("Project", feast_project, "Table", table.name, "Row", document_id) keys.append(key) values = client.get_multi(keys) if values is not None: keys_missing_from_response = set(keys) - set( [v.key for v in values]) values = sorted(values, key=lambda v: keys.index(v.key)) for value in values: res = {} for feature_name, value_bin in value["values"].items(): val = ValueProto() val.ParseFromString(value_bin) res[feature_name] = val result.append((value["event_ts"], res)) for missing_key_idx in sorted( [keys.index(k) for k in keys_missing_from_response]): result.insert(missing_key_idx, (None, None)) return result
def online_read( self, config: RepoConfig, table: FeatureView, entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: """ Retrieve feature values from the online DynamoDB store. Args: config: The RepoConfig for the current FeatureStore. table: Feast FeatureView. entity_keys: a list of entity keys that should be read from the FeatureStore. """ online_config = config.online_store assert isinstance(online_config, DynamoDBOnlineStoreConfig) dynamodb_resource = self._get_dynamodb_resource( online_config.region, online_config.endpoint_url ) table_instance = dynamodb_resource.Table( _get_table_name(online_config, config, table) ) result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] entity_ids = [compute_entity_id(entity_key) for entity_key in entity_keys] batch_size = online_config.batch_size entity_ids_iter = iter(entity_ids) while True: batch = list(itertools.islice(entity_ids_iter, batch_size)) # No more items to insert if len(batch) == 0: break batch_entity_ids = { table_instance.name: { "Keys": [{"entity_id": entity_id} for entity_id in batch] } } with tracing_span(name="remote_call"): response = dynamodb_resource.batch_get_item( RequestItems=batch_entity_ids ) response = response.get("Responses") table_responses = response.get(table_instance.name) if table_responses: table_responses = self._sort_dynamodb_response( table_responses, entity_ids ) entity_idx = 0 for tbl_res in table_responses: entity_id = tbl_res["entity_id"] while entity_id != batch[entity_idx]: result.append((None, None)) entity_idx += 1 res = {} for feature_name, value_bin in tbl_res["values"].items(): val = ValueProto() val.ParseFromString(value_bin.value) res[feature_name] = val result.append((datetime.fromisoformat(tbl_res["event_ts"]), res)) entity_idx += 1 # Not all entities in a batch may have responses # Pad with remaining values in batch that were not found batch_size_nones = ((None, None),) * (len(batch) - len(result)) result.extend(batch_size_nones) return result