コード例 #1
0
ファイル: local_sqlite.py プロジェクト: sachuin23/feast
    def online_read(
        self,
        project: str,
        table: Union[FeatureTable, FeatureView],
        entity_key: EntityKeyProto,
    ) -> Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]:
        entity_key_bin = serialize_entity_key(entity_key)

        conn = self._get_conn()
        cur = conn.cursor()
        cur.execute(
            f"SELECT feature_name, value, event_ts FROM {_table_id(project, table)} WHERE entity_key = ?",
            (entity_key_bin, ),
        )

        res = {}
        res_ts = None
        for feature_name, val_bin, ts in cur.fetchall():
            val = ValueProto()
            val.ParseFromString(val_bin)
            res[feature_name] = val
            res_ts = ts

        if not res:
            return None, None
        else:
            return res_ts, res
コード例 #2
0
ファイル: helpers.py プロジェクト: dmatrix/feast
def compute_entity_id(entity_key: EntityKeyProto) -> str:
    """
    Compute Entity id given Feast Entity Key for online stores.
    Remember that Entity here refers to `EntityKeyProto` which is used in some online stores to encode the keys.
    It has nothing to do with the Entity concept we have in Feast.
    """
    return mmh3.hash_bytes(serialize_entity_key(entity_key)).hex()
コード例 #3
0
    def online_read(
        self,
        config: RepoConfig,
        table: Union[FeatureTable, FeatureView],
        entity_keys: List[EntityKeyProto],
        requested_features: Optional[List[str]] = None,
    ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
        pass
        conn = self._get_conn(config)
        cur = conn.cursor()

        result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = []

        project = config.project
        for entity_key in entity_keys:
            entity_key_bin = serialize_entity_key(entity_key)

            cur.execute(
                f"SELECT feature_name, value, event_ts FROM {_table_id(project, table)} WHERE entity_key = ?",
                (entity_key_bin,),
            )

            res = {}
            res_ts = None
            for feature_name, val_bin, ts in cur.fetchall():
                val = ValueProto()
                val.ParseFromString(val_bin)
                res[feature_name] = val
                res_ts = ts

            if not res:
                result.append((None, None))
            else:
                result.append((res_ts, res))
        return result
コード例 #4
0
    def online_read(
        self,
        config: RepoConfig,
        table: FeatureView,
        entity_keys: List[EntityKeyProto],
        requested_features: Optional[List[str]] = None,
    ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
        conn = self._get_conn(config)
        cur = conn.cursor()

        result: List[Tuple[Optional[datetime],
                           Optional[Dict[str, ValueProto]]]] = []

        with tracing_span(name="remote_call"):
            # Fetch all entities in one go
            cur.execute(
                f"SELECT entity_key, feature_name, value, event_ts "
                f"FROM {_table_id(config.project, table)} "
                f"WHERE entity_key IN ({','.join('?' * len(entity_keys))}) "
                f"ORDER BY entity_key",
                [
                    serialize_entity_key(entity_key)
                    for entity_key in entity_keys
                ],
            )
            rows = cur.fetchall()

        rows = {
            k: list(group)
            for k, group in itertools.groupby(rows, key=lambda r: r[0])
        }
        for entity_key in entity_keys:
            entity_key_bin = serialize_entity_key(entity_key)
            res = {}
            res_ts = None
            for _, feature_name, val_bin, ts in rows.get(entity_key_bin, []):
                val = ValueProto()
                val.ParseFromString(val_bin)
                res[feature_name] = val
                res_ts = ts

            if not res:
                result.append((None, None))
            else:
                result.append((res_ts, res))
        return result
コード例 #5
0
ファイル: gcp.py プロジェクト: smarthi/feast
def compute_datastore_entity_id(entity_key: EntityKeyProto) -> str:
    """
    Compute Datastore Entity id given Feast Entity Key.

    Remember that Datastore Entity is a concept from the Datastore data model, that has nothing to
    do with the Entity concept we have in Feast.
    """
    return mmh3.hash_bytes(serialize_entity_key(entity_key)).hex()
コード例 #6
0
ファイル: sqlite.py プロジェクト: qooba/feast
    def online_write_batch(
        self,
        config: RepoConfig,
        table: Union[FeatureTable, FeatureView],
        data: List[
            Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]
        ],
        progress: Optional[Callable[[int], Any]],
    ) -> None:

        conn = self._get_conn(config)

        project = config.project

        with conn:
            for entity_key, values, timestamp, created_ts in data:
                entity_key_bin = serialize_entity_key(entity_key)
                timestamp = _to_naive_utc(timestamp)
                if created_ts is not None:
                    created_ts = _to_naive_utc(created_ts)

                for feature_name, val in values.items():
                    conn.execute(
                        f"""
                            UPDATE {_table_id(project, table)}
                            SET value = ?, event_ts = ?, created_ts = ?
                            WHERE (entity_key = ? AND feature_name = ?)
                        """,
                        (
                            # SET
                            val.SerializeToString(),
                            timestamp,
                            created_ts,
                            # WHERE
                            entity_key_bin,
                            feature_name,
                        ),
                    )

                    conn.execute(
                        f"""INSERT OR IGNORE INTO {_table_id(project, table)}
                            (entity_key, feature_name, value, event_ts, created_ts)
                            VALUES (?, ?, ?, ?, ?)""",
                        (
                            entity_key_bin,
                            feature_name,
                            val.SerializeToString(),
                            timestamp,
                            created_ts,
                        ),
                    )
                if progress:
                    progress(1)
コード例 #7
0
    def online_write_batch(
        self,
        project: str,
        table: Union[FeatureTable, FeatureView],
        data: List[Tuple[EntityKeyProto, Dict[str, ValueProto], datetime,
                         Optional[datetime]]],
    ) -> None:
        conn = self._get_conn()

        with conn:
            for entity_key, values, timestamp, created_ts in data:
                for feature_name, val in values.items():
                    entity_key_bin = serialize_entity_key(entity_key)
                    timestamp = _to_naive_utc(timestamp)
                    if created_ts is not None:
                        created_ts = _to_naive_utc(created_ts)

                    conn.execute(
                        f"""
                            UPDATE {_table_id(project, table)}
                            SET value = ?, event_ts = ?, created_ts = ?
                            WHERE (event_ts < ? OR (event_ts = ? AND (created_ts IS NULL OR ? IS NULL OR created_ts < ?)))
                            AND (entity_key = ? AND feature_name = ?)
                        """,
                        (
                            # SET
                            val.SerializeToString(),
                            timestamp,
                            created_ts,
                            # WHERE
                            timestamp,
                            timestamp,
                            created_ts,
                            created_ts,
                            entity_key_bin,
                            feature_name,
                        ),
                    )

                    conn.execute(
                        f"""INSERT OR IGNORE INTO {_table_id(project, table)}
                            (entity_key, feature_name, value, event_ts, created_ts)
                            VALUES (?, ?, ?, ?, ?)""",
                        (
                            entity_key_bin,
                            feature_name,
                            val.SerializeToString(),
                            timestamp,
                            created_ts,
                        ),
                    )
コード例 #8
0
ファイル: postgres.py プロジェクト: feast-dev/feast
    def online_write_batch(
        self,
        config: RepoConfig,
        table: FeatureView,
        data: List[Tuple[EntityKeyProto, Dict[str, ValueProto], datetime,
                         Optional[datetime]]],
        progress: Optional[Callable[[int], Any]],
    ) -> None:
        project = config.project

        with self._get_conn(config) as conn, conn.cursor() as cur:
            insert_values = []
            for entity_key, values, timestamp, created_ts in data:
                entity_key_bin = serialize_entity_key(entity_key)
                timestamp = _to_naive_utc(timestamp)
                if created_ts is not None:
                    created_ts = _to_naive_utc(created_ts)

                for feature_name, val in values.items():
                    insert_values.append((
                        entity_key_bin,
                        feature_name,
                        val.SerializeToString(),
                        timestamp,
                        created_ts,
                    ))
            # Controll the batch so that we can update the progress
            batch_size = 5000
            for i in range(0, len(insert_values), batch_size):
                cur_batch = insert_values[i:i + batch_size]
                execute_values(
                    cur,
                    sql.SQL(
                        """
                        INSERT INTO {}
                        (entity_key, feature_name, value, event_ts, created_ts)
                        VALUES %s
                        ON CONFLICT (entity_key, feature_name) DO
                        UPDATE SET
                            value = EXCLUDED.value,
                            event_ts = EXCLUDED.event_ts,
                            created_ts = EXCLUDED.created_ts;
                        """, ).format(sql.Identifier(_table_id(project,
                                                               table))),
                    cur_batch,
                    page_size=batch_size,
                )
                if progress:
                    progress(len(cur_batch))
コード例 #9
0
ファイル: postgres.py プロジェクト: feast-dev/feast
    def online_read(
        self,
        config: RepoConfig,
        table: FeatureView,
        entity_keys: List[EntityKeyProto],
        requested_features: Optional[List[str]] = None,
    ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
        result: List[Tuple[Optional[datetime],
                           Optional[Dict[str, ValueProto]]]] = []

        project = config.project
        with self._get_conn(config) as conn, conn.cursor() as cur:
            # Collecting all the keys to a list allows us to make fewer round trips
            # to PostgreSQL
            keys = []
            for entity_key in entity_keys:
                keys.append(serialize_entity_key(entity_key))

            cur.execute(
                sql.SQL("""
                    SELECT entity_key, feature_name, value, event_ts
                    FROM {} WHERE entity_key = ANY(%s);
                    """).format(sql.Identifier(_table_id(project, table)), ),
                (keys, ),
            )

            rows = cur.fetchall()

            # Since we don't know the order returned from PostgreSQL we'll need
            # to construct a dict to be able to quickly look up the correct row
            # when we iterate through the keys since they are in the correct order
            values_dict = defaultdict(list)
            for row in rows if rows is not None else []:
                values_dict[row[0].tobytes()].append(row[1:])

            for key in keys:
                if key in values_dict:
                    value = values_dict[key]
                    res = {}
                    for feature_name, value_bin, event_ts in value:
                        val = ValueProto()
                        val.ParseFromString(value_bin)
                        res[feature_name] = val
                    result.append((event_ts, res))
                else:
                    result.append((None, None))

        return result
コード例 #10
0
ファイル: helpers.py プロジェクト: dmatrix/feast
def _redis_key(project: str, entity_key: EntityKeyProto) -> bytes:
    key: List[bytes] = [
        serialize_entity_key(entity_key),
        project.encode("utf-8")
    ]
    return b"".join(key)