예제 #1
0
파일: redis.py 프로젝트: qooba/feast
    def online_write_batch(
        self,
        config: RepoConfig,
        table: Union[FeatureTable, FeatureView],
        data: List[Tuple[EntityKeyProto, Dict[str, ValueProto], datetime,
                         Optional[datetime]]],
        progress: Optional[Callable[[int], Any]],
    ) -> None:
        online_store_config = config.online_store
        assert isinstance(online_store_config, RedisOnlineStoreConfig)

        client = self._get_client(online_store_config)
        project = config.project

        entity_hset = {}
        feature_view = table.name

        ex = Timestamp()
        ex.seconds = EX_SECONDS
        ex_str = ex.SerializeToString()
        for entity_key, values, timestamp, created_ts in data:
            redis_key_bin = _redis_key(project, entity_key)
            ts = Timestamp()
            ts.seconds = int(utils.make_tzaware(timestamp).timestamp())
            entity_hset[f"_ts:{feature_view}"] = ts.SerializeToString()
            entity_hset[f"_ex:{feature_view}"] = ex_str

            for feature_name, val in values.items():
                f_key = _mmh3(f"{feature_view}:{feature_name}")
                entity_hset[f_key] = val.SerializeToString()

            client.hset(redis_key_bin, mapping=entity_hset)
            if progress:
                progress(1)
예제 #2
0
파일: redis.py 프로젝트: qooba/feast
    def online_read(
        self,
        config: RepoConfig,
        table: Union[FeatureTable, FeatureView],
        entity_keys: List[EntityKeyProto],
        requested_features: Optional[List[str]] = None,
    ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
        online_store_config = config.online_store
        assert isinstance(online_store_config, RedisOnlineStoreConfig)

        client = self._get_client(online_store_config)
        feature_view = table.name
        project = config.project

        result: List[Tuple[Optional[datetime],
                           Optional[Dict[str, ValueProto]]]] = []

        if not requested_features:
            requested_features = [f.name for f in table.features]

        for entity_key in entity_keys:
            redis_key_bin = _redis_key(project, entity_key)
            hset_keys = [
                _mmh3(f"{feature_view}:{k}") for k in requested_features
            ]
            ts_key = f"_ts:{feature_view}"
            hset_keys.append(ts_key)
            values = client.hmget(redis_key_bin, hset_keys)
            requested_features.append(ts_key)
            res_val = dict(zip(requested_features, values))

            res_ts = Timestamp()
            ts_val = res_val.pop(ts_key)
            if ts_val:
                res_ts.ParseFromString(ts_val)

            res = {}
            for feature_name, val_bin in res_val.items():
                val = ValueProto()
                if val_bin:
                    val.ParseFromString(val_bin)
                res[feature_name] = val

            if not res:
                result.append((None, None))
            else:
                timestamp = datetime.fromtimestamp(res_ts.seconds)
                result.append((timestamp, res))
        return result
예제 #3
0
파일: redis.py 프로젝트: Shopify/feast
    def online_read(
        self,
        config: RepoConfig,
        table: FeatureView,
        entity_keys: List[EntityKeyProto],
        requested_features: Optional[List[str]] = None,
    ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
        online_store_config = config.online_store
        assert isinstance(online_store_config, RedisOnlineStoreConfig)

        client = self._get_client(online_store_config)
        feature_view = table.name
        project = config.project

        result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = []

        if not requested_features:
            requested_features = [f.name for f in table.features]

        hset_keys = [_mmh3(f"{feature_view}:{k}") for k in requested_features]

        ts_key = f"_ts:{feature_view}"
        hset_keys.append(ts_key)
        requested_features.append(ts_key)

        keys = []
        for entity_key in entity_keys:
            redis_key_bin = _redis_key(project, entity_key)
            keys.append(redis_key_bin)
        with client.pipeline() as pipe:
            for redis_key_bin in keys:
                pipe.hmget(redis_key_bin, hset_keys)
            with tracing_span(name="remote_call"):
                redis_values = pipe.execute()
        for values in redis_values:
            features = self._get_features_for_entity(
                values, feature_view, requested_features
            )
            result.append(features)
        return result
예제 #4
0
파일: redis.py 프로젝트: Shopify/feast
    def online_write_batch(
        self,
        config: RepoConfig,
        table: FeatureView,
        data: List[
            Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]
        ],
        progress: Optional[Callable[[int], Any]],
    ) -> None:
        online_store_config = config.online_store
        assert isinstance(online_store_config, RedisOnlineStoreConfig)

        client = self._get_client(online_store_config)
        project = config.project

        feature_view = table.name
        ts_key = f"_ts:{feature_view}"
        keys = []
        # redis pipelining optimization: send multiple commands to redis server without waiting for every reply
        with client.pipeline() as pipe:
            # check if a previous record under the key bin exists
            # TODO: investigate if check and set is a better approach rather than pulling all entity ts and then setting
            # it may be significantly slower but avoids potential (rare) race conditions
            for entity_key, _, _, _ in data:
                redis_key_bin = _redis_key(project, entity_key)
                keys.append(redis_key_bin)
                pipe.hmget(redis_key_bin, ts_key)
            prev_event_timestamps = pipe.execute()
            # flattening the list of lists. `hmget` does the lookup assuming a list of keys in the key bin
            prev_event_timestamps = [i[0] for i in prev_event_timestamps]

            for redis_key_bin, prev_event_time, (_, values, timestamp, _) in zip(
                keys, prev_event_timestamps, data
            ):
                event_time_seconds = int(utils.make_tzaware(timestamp).timestamp())

                # ignore if event_timestamp is before the event features that are currently in the feature store
                if prev_event_time:
                    prev_ts = Timestamp()
                    prev_ts.ParseFromString(prev_event_time)
                    if prev_ts.seconds and event_time_seconds <= prev_ts.seconds:
                        # TODO: somehow signal that it's not overwriting the current record?
                        if progress:
                            progress(1)
                        continue

                ts = Timestamp()
                ts.seconds = event_time_seconds
                entity_hset = dict()
                entity_hset[ts_key] = ts.SerializeToString()

                for feature_name, val in values.items():
                    f_key = _mmh3(f"{feature_view}:{feature_name}")
                    entity_hset[f_key] = val.SerializeToString()

                pipe.hset(redis_key_bin, mapping=entity_hset)
                # TODO: support expiring the entity / features in Redis
                # otherwise entity features remain in redis until cleaned up in separate process
                # client.expire redis_key_bin based a ttl setting
            results = pipe.execute()
            if progress:
                progress(len(results))