コード例 #1
0
ファイル: dynamodb.py プロジェクト: Shopify/feast
    def online_read(
        self,
        config: RepoConfig,
        table: FeatureView,
        entity_keys: List[EntityKeyProto],
        requested_features: Optional[List[str]] = None,
    ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
        online_config = config.online_store
        assert isinstance(online_config, DynamoDBOnlineStoreConfig)
        dynamodb_resource = self._get_dynamodb_resource(online_config.region)

        result: List[Tuple[Optional[datetime],
                           Optional[Dict[str, ValueProto]]]] = []
        for entity_key in entity_keys:
            table_instance = dynamodb_resource.Table(
                _get_table_name(config, table))
            entity_id = compute_entity_id(entity_key)
            with tracing_span(name="remote_call"):
                response = table_instance.get_item(
                    Key={"entity_id": entity_id})
            value = response.get("Item")

            if value is not None:
                res = {}
                for feature_name, value_bin in value["values"].items():
                    val = ValueProto()
                    val.ParseFromString(value_bin.value)
                    res[feature_name] = val
                result.append((datetime.fromisoformat(value["event_ts"]), res))
            else:
                result.append((None, None))
        return result
コード例 #2
0
    def online_read(
        self,
        config: RepoConfig,
        table: Union[FeatureTable, FeatureView],
        entity_keys: List[EntityKeyProto],
        requested_features: Optional[List[str]] = None,
    ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
        online_config = config.online_store
        assert isinstance(online_config, DynamoDBOnlineStoreConfig)
        _, dynamodb_resource = self._initialize_dynamodb(online_config)

        result: List[Tuple[Optional[datetime],
                           Optional[Dict[str, ValueProto]]]] = []
        for entity_key in entity_keys:
            table_instance = dynamodb_resource.Table(
                f"{config.project}.{table.name}")
            entity_id = compute_entity_id(entity_key)
            response = table_instance.get_item(Key={"entity_id": entity_id})
            value = response.get("Item")

            if value is not None:
                res = {}
                for feature_name, value_bin in value["values"].items():
                    val = ValueProto()
                    val.ParseFromString(value_bin.value)
                    res[feature_name] = val
                result.append((value["event_ts"], res))
            else:
                result.append((None, None))
        return result
コード例 #3
0
ファイル: dynamodb.py プロジェクト: Shopify/feast
    def online_write_batch(
        self,
        config: RepoConfig,
        table: FeatureView,
        data: List[Tuple[EntityKeyProto, Dict[str, ValueProto], datetime,
                         Optional[datetime]]],
        progress: Optional[Callable[[int], Any]],
    ) -> None:
        online_config = config.online_store
        assert isinstance(online_config, DynamoDBOnlineStoreConfig)
        dynamodb_resource = self._get_dynamodb_resource(online_config.region)

        table_instance = dynamodb_resource.Table(_get_table_name(
            config, table))
        with table_instance.batch_writer() as batch:
            for entity_key, features, timestamp, created_ts in data:
                entity_id = compute_entity_id(entity_key)
                batch.put_item(
                    Item={
                        "entity_id": entity_id,  # PartitionKey
                        "event_ts": str(utils.make_tzaware(timestamp)),
                        "values": {
                            k: v.SerializeToString()
                            for k, v in features.items()  # Serialized Features
                        },
                    })
                if progress:
                    progress(1)
コード例 #4
0
    def online_read(
        self,
        config: RepoConfig,
        table: Union[FeatureTable, FeatureView],
        entity_keys: List[EntityKeyProto],
        requested_features: Optional[List[str]] = None,
    ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:

        online_config = config.online_store
        assert isinstance(online_config, DatastoreOnlineStoreConfig)
        client = self._get_client(online_config)

        feast_project = config.project

        result: List[Tuple[Optional[datetime],
                           Optional[Dict[str, ValueProto]]]] = []
        for entity_key in entity_keys:
            document_id = compute_entity_id(entity_key)
            key = client.key("Project", feast_project, "Table", table.name,
                             "Row", document_id)
            value = client.get(key)
            if value is not None:
                res = {}
                for feature_name, value_bin in value["values"].items():
                    val = ValueProto()
                    val.ParseFromString(value_bin)
                    res[feature_name] = val
                result.append((value["event_ts"], res))
            else:
                result.append((None, None))
        return result
コード例 #5
0
def _insert_data_test_table(data, project, tbl_name, region):
    dynamodb_resource = boto3.resource("dynamodb", region_name=region)
    table_instance = dynamodb_resource.Table(f"{project}.{tbl_name}")
    for entity_key, features, timestamp, created_ts in data:
        entity_id = compute_entity_id(entity_key)
        with table_instance.batch_writer() as batch:
            batch.put_item(
                Item={
                    "entity_id": entity_id,
                    "event_ts": str(utils.make_tzaware(timestamp)),
                    "values":
                    {k: v.SerializeToString()
                     for k, v in features.items()},
                })
コード例 #6
0
ファイル: datastore.py プロジェクト: pyalex/feast
    def _write_minibatch(
        client,
        project: str,
        table: Union[FeatureTable, FeatureView],
        data: Sequence[Tuple[EntityKeyProto, Dict[str, ValueProto], datetime,
                             Optional[datetime]]],
        progress: Optional[Callable[[int], Any]],
    ):
        entities = []
        for entity_key, features, timestamp, created_ts in data:
            document_id = compute_entity_id(entity_key)

            key = client.key(
                "Project",
                project,
                "Table",
                table.name,
                "Row",
                document_id,
            )

            entity = datastore.Entity(key=key,
                                      exclude_from_indexes=("created_ts",
                                                            "event_ts",
                                                            "values"))

            entity.update(
                dict(
                    key=entity_key.SerializeToString(),
                    values={
                        k: v.SerializeToString()
                        for k, v in features.items()
                    },
                    event_ts=utils.make_tzaware(timestamp),
                    created_ts=(utils.make_tzaware(created_ts)
                                if created_ts is not None else None),
                ))
            entities.append(entity)
        with client.transaction():
            client.put_multi(entities)

        if progress:
            progress(len(entities))
コード例 #7
0
ファイル: datastore.py プロジェクト: pyalex/feast
    def online_read(
        self,
        config: RepoConfig,
        table: Union[FeatureTable, FeatureView],
        entity_keys: List[EntityKeyProto],
        requested_features: Optional[List[str]] = None,
    ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:

        online_config = config.online_store
        assert isinstance(online_config, DatastoreOnlineStoreConfig)
        client = self._get_client(online_config)

        feast_project = config.project

        keys: List[Key] = []
        result: List[Tuple[Optional[datetime],
                           Optional[Dict[str, ValueProto]]]] = []
        for entity_key in entity_keys:
            document_id = compute_entity_id(entity_key)
            key = client.key("Project", feast_project, "Table", table.name,
                             "Row", document_id)
            keys.append(key)

        # NOTE: get_multi doesn't return values in the same order as the keys in the request.
        # Also, len(values) can be less than len(keys) in the case of missing values.
        with tracing_span(name="remote_call"):
            values = client.get_multi(keys)
        values_dict = {v.key: v for v in values} if values is not None else {}
        for key in keys:
            if key in values_dict:
                value = values_dict[key]
                res = {}
                for feature_name, value_bin in value["values"].items():
                    val = ValueProto()
                    val.ParseFromString(value_bin)
                    res[feature_name] = val
                result.append((value["event_ts"], res))
            else:
                result.append((None, None))

        return result
コード例 #8
0
ファイル: datastore.py プロジェクト: tedhtchang/feast
    def online_read(
        self,
        config: RepoConfig,
        table: Union[FeatureTable, FeatureView],
        entity_keys: List[EntityKeyProto],
        requested_features: Optional[List[str]] = None,
    ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:

        online_config = config.online_store
        assert isinstance(online_config, DatastoreOnlineStoreConfig)
        client = self._get_client(online_config)

        feast_project = config.project

        keys: List[Key] = []
        result: List[Tuple[Optional[datetime],
                           Optional[Dict[str, ValueProto]]]] = []
        for entity_key in entity_keys:
            document_id = compute_entity_id(entity_key)
            key = client.key("Project", feast_project, "Table", table.name,
                             "Row", document_id)
            keys.append(key)

        values = client.get_multi(keys)

        if values is not None:
            keys_missing_from_response = set(keys) - set(
                [v.key for v in values])
            values = sorted(values, key=lambda v: keys.index(v.key))
            for value in values:
                res = {}
                for feature_name, value_bin in value["values"].items():
                    val = ValueProto()
                    val.ParseFromString(value_bin)
                    res[feature_name] = val
                result.append((value["event_ts"], res))
            for missing_key_idx in sorted(
                [keys.index(k) for k in keys_missing_from_response]):
                result.insert(missing_key_idx, (None, None))

        return result
コード例 #9
0
 def _write_batch_non_duplicates(
     self,
     table_instance,
     data: List[
         Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]
     ],
     progress: Optional[Callable[[int], Any]],
 ):
     """Deduplicate write batch request items on ``entity_id`` primary key."""
     with table_instance.batch_writer(overwrite_by_pkeys=["entity_id"]) as batch:
         for entity_key, features, timestamp, created_ts in data:
             entity_id = compute_entity_id(entity_key)
             batch.put_item(
                 Item={
                     "entity_id": entity_id,  # PartitionKey
                     "event_ts": str(utils.make_tzaware(timestamp)),
                     "values": {
                         k: v.SerializeToString()
                         for k, v in features.items()  # Serialized Features
                     },
                 }
             )
             if progress:
                 progress(1)
コード例 #10
0
    def online_read(
        self,
        config: RepoConfig,
        table: FeatureView,
        entity_keys: List[EntityKeyProto],
        requested_features: Optional[List[str]] = None,
    ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
        """
        Retrieve feature values from the online DynamoDB store.

        Args:
            config: The RepoConfig for the current FeatureStore.
            table: Feast FeatureView.
            entity_keys: a list of entity keys that should be read from the FeatureStore.
        """
        online_config = config.online_store
        assert isinstance(online_config, DynamoDBOnlineStoreConfig)
        dynamodb_resource = self._get_dynamodb_resource(
            online_config.region, online_config.endpoint_url
        )
        table_instance = dynamodb_resource.Table(
            _get_table_name(online_config, config, table)
        )

        result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = []
        entity_ids = [compute_entity_id(entity_key) for entity_key in entity_keys]
        batch_size = online_config.batch_size
        entity_ids_iter = iter(entity_ids)
        while True:
            batch = list(itertools.islice(entity_ids_iter, batch_size))
            # No more items to insert
            if len(batch) == 0:
                break
            batch_entity_ids = {
                table_instance.name: {
                    "Keys": [{"entity_id": entity_id} for entity_id in batch]
                }
            }
            with tracing_span(name="remote_call"):
                response = dynamodb_resource.batch_get_item(
                    RequestItems=batch_entity_ids
                )
            response = response.get("Responses")
            table_responses = response.get(table_instance.name)
            if table_responses:
                table_responses = self._sort_dynamodb_response(
                    table_responses, entity_ids
                )
                entity_idx = 0
                for tbl_res in table_responses:
                    entity_id = tbl_res["entity_id"]
                    while entity_id != batch[entity_idx]:
                        result.append((None, None))
                        entity_idx += 1
                    res = {}
                    for feature_name, value_bin in tbl_res["values"].items():
                        val = ValueProto()
                        val.ParseFromString(value_bin.value)
                        res[feature_name] = val
                    result.append((datetime.fromisoformat(tbl_res["event_ts"]), res))
                    entity_idx += 1

            # Not all entities in a batch may have responses
            # Pad with remaining values in batch that were not found
            batch_size_nones = ((None, None),) * (len(batch) - len(result))
            result.extend(batch_size_nones)
        return result