def online_write_batch( self, config: RepoConfig, table: Union[FeatureTable, FeatureView], data: List[Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]], progress: Optional[Callable[[int], Any]], ) -> None: online_store_config = config.online_store assert isinstance(online_store_config, RedisOnlineStoreConfig) client = self._get_client(online_store_config) project = config.project entity_hset = {} feature_view = table.name ex = Timestamp() ex.seconds = EX_SECONDS ex_str = ex.SerializeToString() for entity_key, values, timestamp, created_ts in data: redis_key_bin = _redis_key(project, entity_key) ts = Timestamp() ts.seconds = int(utils.make_tzaware(timestamp).timestamp()) entity_hset[f"_ts:{feature_view}"] = ts.SerializeToString() entity_hset[f"_ex:{feature_view}"] = ex_str for feature_name, val in values.items(): f_key = _mmh3(f"{feature_view}:{feature_name}") entity_hset[f_key] = val.SerializeToString() client.hset(redis_key_bin, mapping=entity_hset) if progress: progress(1)
def online_read( self, config: RepoConfig, table: Union[FeatureTable, FeatureView], entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: online_store_config = config.online_store assert isinstance(online_store_config, RedisOnlineStoreConfig) client = self._get_client(online_store_config) feature_view = table.name project = config.project result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] if not requested_features: requested_features = [f.name for f in table.features] for entity_key in entity_keys: redis_key_bin = _redis_key(project, entity_key) hset_keys = [ _mmh3(f"{feature_view}:{k}") for k in requested_features ] ts_key = f"_ts:{feature_view}" hset_keys.append(ts_key) values = client.hmget(redis_key_bin, hset_keys) requested_features.append(ts_key) res_val = dict(zip(requested_features, values)) res_ts = Timestamp() ts_val = res_val.pop(ts_key) if ts_val: res_ts.ParseFromString(ts_val) res = {} for feature_name, val_bin in res_val.items(): val = ValueProto() if val_bin: val.ParseFromString(val_bin) res[feature_name] = val if not res: result.append((None, None)) else: timestamp = datetime.fromtimestamp(res_ts.seconds) result.append((timestamp, res)) return result
def online_read( self, config: RepoConfig, table: FeatureView, entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: online_store_config = config.online_store assert isinstance(online_store_config, RedisOnlineStoreConfig) client = self._get_client(online_store_config) feature_view = table.name project = config.project result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] if not requested_features: requested_features = [f.name for f in table.features] hset_keys = [_mmh3(f"{feature_view}:{k}") for k in requested_features] ts_key = f"_ts:{feature_view}" hset_keys.append(ts_key) requested_features.append(ts_key) keys = [] for entity_key in entity_keys: redis_key_bin = _redis_key(project, entity_key) keys.append(redis_key_bin) with client.pipeline() as pipe: for redis_key_bin in keys: pipe.hmget(redis_key_bin, hset_keys) with tracing_span(name="remote_call"): redis_values = pipe.execute() for values in redis_values: features = self._get_features_for_entity( values, feature_view, requested_features ) result.append(features) return result
def online_write_batch( self, config: RepoConfig, table: FeatureView, data: List[ Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]] ], progress: Optional[Callable[[int], Any]], ) -> None: online_store_config = config.online_store assert isinstance(online_store_config, RedisOnlineStoreConfig) client = self._get_client(online_store_config) project = config.project feature_view = table.name ts_key = f"_ts:{feature_view}" keys = [] # redis pipelining optimization: send multiple commands to redis server without waiting for every reply with client.pipeline() as pipe: # check if a previous record under the key bin exists # TODO: investigate if check and set is a better approach rather than pulling all entity ts and then setting # it may be significantly slower but avoids potential (rare) race conditions for entity_key, _, _, _ in data: redis_key_bin = _redis_key(project, entity_key) keys.append(redis_key_bin) pipe.hmget(redis_key_bin, ts_key) prev_event_timestamps = pipe.execute() # flattening the list of lists. `hmget` does the lookup assuming a list of keys in the key bin prev_event_timestamps = [i[0] for i in prev_event_timestamps] for redis_key_bin, prev_event_time, (_, values, timestamp, _) in zip( keys, prev_event_timestamps, data ): event_time_seconds = int(utils.make_tzaware(timestamp).timestamp()) # ignore if event_timestamp is before the event features that are currently in the feature store if prev_event_time: prev_ts = Timestamp() prev_ts.ParseFromString(prev_event_time) if prev_ts.seconds and event_time_seconds <= prev_ts.seconds: # TODO: somehow signal that it's not overwriting the current record? if progress: progress(1) continue ts = Timestamp() ts.seconds = event_time_seconds entity_hset = dict() entity_hset[ts_key] = ts.SerializeToString() for feature_name, val in values.items(): f_key = _mmh3(f"{feature_view}:{feature_name}") entity_hset[f_key] = val.SerializeToString() pipe.hset(redis_key_bin, mapping=entity_hset) # TODO: support expiring the entity / features in Redis # otherwise entity features remain in redis until cleaned up in separate process # client.expire redis_key_bin based a ttl setting results = pipe.execute() if progress: progress(len(results))