def test_order_by_creation_time(client):
    proc_time_fs = FeatureSet(
        "processing_time",
        features=[Feature("feature_value", ValueType.STRING)],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    client.apply(proc_time_fs)
    time.sleep(10)
    proc_time_fs = client.get_feature_set(name="processing_time", version=1)

    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
    N_ROWS = 10
    incorrect_df = pd.DataFrame({
        "datetime": [time_offset] * N_ROWS,
        "entity_id": [i for i in range(N_ROWS)],
        "feature_value": ["WRONG"] * N_ROWS,
    })
    correct_df = pd.DataFrame({
        "datetime": [time_offset] * N_ROWS,
        "entity_id": [i for i in range(N_ROWS)],
        "feature_value": ["CORRECT"] * N_ROWS,
    })
    client.ingest(proc_time_fs, incorrect_df)
    time.sleep(10)
    client.ingest(proc_time_fs, correct_df)
    feature_retrieval_job = client.get_batch_features(
        entity_rows=incorrect_df[["datetime", "entity_id"]],
        feature_ids=["processing_time:1:feature_value"])
    output = feature_retrieval_job.to_dataframe()
    print(output.head())

    assert output["processing_time_v1_feature_value"].to_list() == ["CORRECT"
                                                                    ] * N_ROWS
Beispiel #2
0
def test_update_featureset_update_featureset_and_ingest_second_subset(
        client, update_featureset_dataframe):
    subset_columns = [
        "datetime",
        "entity_id",
        "update_feature1",
        "update_feature3",
        "update_feature4",
    ]
    subset_df = update_featureset_dataframe.iloc[5:][subset_columns]
    update_fs = FeatureSet(
        "update_fs",
        entities=[Entity(name="entity_id", dtype=ValueType.INT64)],
        max_age=Duration(seconds=432000),
    )
    update_fs.infer_fields_from_df(subset_df)
    client.apply(update_fs)

    # We keep retrying this ingestion until all values make it into the buffer.
    # This is a necessary step because bigquery streaming caches table schemas
    # and as a result, rows may be lost.
    while True:
        ingestion_id = client.ingest(feature_set=update_fs, source=subset_df)
        time.sleep(15)  # wait for rows to get written to bq
        rows_ingested = get_rows_ingested(client, update_fs, ingestion_id)
        if rows_ingested == len(subset_df):
            print(
                f"Number of rows successfully ingested: {rows_ingested}. Continuing."
            )
            break
        print(
            f"Number of rows successfully ingested: {rows_ingested}. Retrying ingestion."
        )
        time.sleep(30)

    def check():
        feature_retrieval_job = client.get_batch_features(
            entity_rows=update_featureset_dataframe[["datetime",
                                                     "entity_id"]].iloc[5:],
            feature_refs=[
                "update_feature1",
                "update_feature3",
                "update_feature4",
            ],
            project=PROJECT_NAME,
        )

        output = feature_retrieval_job.to_dataframe(
            timeout_sec=180).sort_values(by=["entity_id"])
        print(output.head())

        assert output["update_feature1"].to_list(
        ) == subset_df["update_feature1"].to_list()
        assert output["update_feature3"].to_list(
        ) == subset_df["update_feature3"].to_list()
        assert output["update_feature4"].to_list(
        ) == subset_df["update_feature4"].to_list()
        clean_up_remote_files(feature_retrieval_job.get_avro_files())

    wait_for(check, timedelta(minutes=5))
Beispiel #3
0
def test_update_featureset_apply_featureset_and_ingest_first_subset(
        client, update_featureset_dataframe):
    subset_columns = [
        "datetime", "entity_id", "update_feature1", "update_feature2"
    ]
    subset_df = update_featureset_dataframe.iloc[:5][subset_columns]
    update_fs = FeatureSet(
        "update_fs",
        entities=[Entity(name="entity_id", dtype=ValueType.INT64)],
        max_age=Duration(seconds=432000),
    )
    update_fs.infer_fields_from_df(subset_df)
    client.apply(update_fs)

    client.ingest(feature_set=update_fs, source=subset_df)

    time.sleep(15)
    feature_retrieval_job = client.get_batch_features(
        entity_rows=update_featureset_dataframe[["datetime",
                                                 "entity_id"]].iloc[:5],
        feature_refs=[
            f"{PROJECT_NAME}/update_feature1",
            f"{PROJECT_NAME}/update_feature2",
        ],
    )

    output = feature_retrieval_job.to_dataframe().sort_values(by=["entity_id"])
    clean_up_remote_files(feature_retrieval_job.get_avro_files())
    print(output.head())

    assert output["update_feature1"].to_list(
    ) == subset_df["update_feature1"].to_list()
    assert output["update_feature2"].to_list(
    ) == subset_df["update_feature2"].to_list()
Beispiel #4
0
    def get_entity(self, name: str, project: str = None) -> Entity:
        """
        Retrieves an entity.

        Args:
            project: Feast project that this entity belongs to
            name: Name of entity

        Returns:
            Returns either the specified entity, or raises an exception if
            none is found
        """

        if self._telemetry_enabled:
            log_usage(
                "get_entity",
                self._telemetry_id,
                datetime.utcnow(),
                self.version(sdk_only=True),
            )
        if project is None:
            project = self.project

        try:
            get_entity_response = self._core_service.GetEntity(
                GetEntityRequest(project=project, name=name.strip()),
                metadata=self._get_grpc_metadata(),
            )  # type: GetEntityResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())
        entity = Entity.from_proto(get_entity_response.entity)

        return entity
Beispiel #5
0
    def from_proto(cls, feature_set_proto: FeatureSetProto):
        """
        Creates a feature set from a protobuf representation of a feature set

        Args:
            feature_set_proto: A protobuf representation of a feature set

        Returns:
            Returns a FeatureSet object based on the feature set protobuf
        """

        feature_set = cls(
            name=feature_set_proto.spec.name,
            features=[
                Feature.from_proto(feature)
                for feature in feature_set_proto.spec.features
            ],
            entities=[
                Entity.from_proto(entity)
                for entity in feature_set_proto.spec.entities
            ],
            max_age=(None if feature_set_proto.spec.max_age.seconds == 0
                     and feature_set_proto.spec.max_age.nanos == 0 else
                     feature_set_proto.spec.max_age),
            labels=feature_set_proto.spec.labels,
            source=(None if feature_set_proto.spec.source.type == 0 else
                    Source.from_proto(feature_set_proto.spec.source)),
            project=None if len(feature_set_proto.spec.project) == 0 else
            feature_set_proto.spec.project,
        )
        feature_set._status = feature_set_proto.meta.status  # type: ignore
        feature_set._created_timestamp = feature_set_proto.meta.created_timestamp
        return feature_set
Beispiel #6
0
def test_apply_entity_integration(test_registry):
    entity = Entity(
        name="driver_car_id",
        description="Car driver id",
        value_type=ValueType.STRING,
        labels={"team": "matchmaking"},
    )

    project = "project"

    # Register Entity
    test_registry.apply_entity(entity, project)

    entities = test_registry.list_entities(project)

    entity = entities[0]
    assert (len(entities) == 1 and entity.name == "driver_car_id"
            and entity.value_type == ValueType(ValueProto.ValueType.STRING)
            and entity.description == "Car driver id"
            and "team" in entity.labels
            and entity.labels["team"] == "matchmaking")

    entity = test_registry.get_entity("driver_car_id", project)
    assert (entity.name == "driver_car_id"
            and entity.value_type == ValueType(ValueProto.ValueType.STRING)
            and entity.description == "Car driver id"
            and "team" in entity.labels
            and entity.labels["team"] == "matchmaking")

    test_registry.teardown()

    # Will try to reload registry, which will fail because the file has been deleted
    with pytest.raises(FileNotFoundError):
        test_registry._get_registry_proto()
Beispiel #7
0
    def get_entity(self, name: str, project: str = None) -> Entity:
        """
        Retrieves an entity.

        Args:
            project: Feast project that this entity belongs to
            name: Name of entity

        Returns:
            Returns either the specified entity, or raises an exception if
            none is found
        """

        self._usage.log("get_entity")

        if project is None:
            project = self.project

        if self._use_object_store_registry:
            return self._registry.get_entity(name, project)
        else:
            try:
                get_entity_response = self._core_service.GetEntity(
                    GetEntityRequest(project=project, name=name.strip()),
                    metadata=self._get_grpc_metadata(),
                )  # type: GetEntityResponse
            except grpc.RpcError as e:
                raise grpc.RpcError(e.details())
            entity = Entity.from_proto(get_entity_response.entity)

            return entity
Beispiel #8
0
    def test_feature_set_ingest_success(self, dataframe, test_client, mocker):
        test_client.set_project("project1")
        driver_fs = FeatureSet(
            "driver-feature-set", source=KafkaSource(brokers="kafka:9092", topic="test")
        )
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        # Register with Feast core
        test_client.apply(driver_fs)
        driver_fs = driver_fs.to_proto()
        driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY

        mocker.patch.object(
            test_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=driver_fs),
        )

        # Need to create a mock producer
        with patch("feast.client.get_producer"):
            # Ingest data into Feast
            test_client.ingest("driver-feature-set", dataframe)
Beispiel #9
0
    def test_feature_set_ingest_throws_exception_if_kafka_down(
        self, dataframe, test_client, exception, mocker
    ):

        test_client.set_project("project1")
        driver_fs = FeatureSet(
            "driver-feature-set",
            source=KafkaSource(brokers="localhost:4412", topic="test"),
        )
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        # Register with Feast core
        test_client.apply(driver_fs)
        driver_fs = driver_fs.to_proto()
        driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY

        mocker.patch.object(
            test_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=driver_fs),
        )

        with pytest.raises(exception):
            test_client.ingest("driver-feature-set", dataframe)
Beispiel #10
0
    def list_entities(self,
                      project: str = None,
                      labels: Dict[str, str] = dict()) -> List[Entity]:
        """
        Retrieve a list of entities from Feast Core

        Args:
            project: Filter entities based on project name
            labels: User-defined labels that these entities are associated with

        Returns:
            List of entities
        """

        if project is None:
            project = self.project

        if self._use_object_store_registry:
            return self._registry.list_entities(project)
        else:
            filter = ListEntitiesRequest.Filter(project=project, labels=labels)

            # Get latest entities from Feast Core
            entity_protos = self._core_service.ListEntities(
                ListEntitiesRequest(filter=filter),
                metadata=self._get_grpc_metadata(),
            )  # type: ListEntitiesResponse

            # Extract entities and return
            entities = []
            for entity_proto in entity_protos.entities:
                entity = Entity.from_proto(entity_proto)
                entity._client = self
                entities.append(entity)
            return entities
Beispiel #11
0
    def from_proto(cls, feature_set_proto: FeatureSetProto):
        """
        Creates a feature set from a protobuf representation of a feature set

        Args:
            feature_set_proto: A protobuf representation of a feature set

        Returns:
            Returns a FeatureSet object based on the feature set protobuf
        """

        feature_set = cls(
            name=feature_set_proto.spec.name,
            features=[
                Feature.from_proto(feature)
                for feature in feature_set_proto.spec.features
            ],
            entities=[
                Entity.from_proto(entity)
                for entity in feature_set_proto.spec.entities
            ],
            max_age=feature_set_proto.spec.max_age,
            source=(None if feature_set_proto.spec.source.type == 0 else
                    Source.from_proto(feature_set_proto.spec.source)))
        feature_set._version = feature_set_proto.spec.version
        feature_set._status = feature_set_proto.meta.status
        feature_set._created_timestamp = feature_set_proto.meta.created_timestamp
        return feature_set
Beispiel #12
0
def driver_entity():
    return Entity(
        name="driver_id",
        description="Driver entity for car rides",
        value_type=ValueType.STRING,
        labels={"team": "matchmaking", "common_key": "common_val"},
    )
Beispiel #13
0
    def test_feature_set_ingest_fail_if_pending(self, dataframe, exception,
                                                client, mocker):
        with pytest.raises(exception):
            driver_fs = FeatureSet(
                "driver-feature-set",
                source=KafkaSource(brokers="kafka:9092", topic="test"),
            )
            driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
            driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
            driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
            driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

            # Register with Feast core
            client.apply(driver_fs)
            driver_fs = driver_fs.to_proto()
            driver_fs.meta.status = FeatureSetStatus.STATUS_PENDING

            mocker.patch.object(
                client._core_service_stub,
                "GetFeatureSet",
                return_value=GetFeatureSetResponse(feature_set=driver_fs),
            )

            # Need to create a mock producer
            with patch("feast.client.get_producer") as mocked_queue:
                # Ingest data into Feast
                client.ingest("driver-feature-set", dataframe, timeout=1)
Beispiel #14
0
    def test_apply_entity_integration(self, test_feature_store):

        entity = Entity(
            name="driver_car_id",
            description="Car driver id",
            value_type=ValueType.STRING,
            labels={"team": "matchmaking"},
        )

        # Register Entity with Core
        test_feature_store.apply([entity])

        entities = test_feature_store.list_entities()

        entity = entities[0]
        assert (
            len(entities) == 1
            and entity.name == "driver_car_id"
            and entity.value_type == ValueType(ValueProto.ValueType.STRING)
            and entity.description == "Car driver id"
            and "team" in entity.labels
            and entity.labels["team"] == "matchmaking"
        )

        entity = test_feature_store.get_entity("driver_car_id")
        assert (
            entity.name == "driver_car_id"
            and entity.value_type == ValueType(ValueProto.ValueType.STRING)
            and entity.description == "Car driver id"
            and "team" in entity.labels
            and entity.labels["team"] == "matchmaking"
        )
Beispiel #15
0
def customer_entity():
    return Entity(
        name="customer_id",
        description="Customer entity for rides",
        value_type=ValueType.STRING,
        labels={"team": "customer_service", "common_key": "common_val"},
    )
Beispiel #16
0
def alltypes_entity():
    return Entity(
        name="alltypes_id",
        description="Driver entity for car rides",
        value_type=ValueType.STRING,
        labels={"cat": "alltypes"},
    )
Beispiel #17
0
    def test_feature_set_ingest_success(self, dataframe, client, mocker):

        driver_fs = FeatureSet("driver-feature-set")
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        driver_fs.source = KafkaSource(topic="feature-topic",
                                       brokers="127.0.0.1")

        client._message_producer = MagicMock()
        client._message_producer.produce = MagicMock()

        # Register with Feast core
        client.apply(driver_fs)

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=driver_fs.to_proto()),
        )

        # Ingest data into Feast
        client.ingest("driver-feature-set", dataframe=dataframe)
Beispiel #18
0
def prep_local_fs_and_fv() -> Iterator[Tuple[FeatureStore, FeatureView]]:
    with tempfile.NamedTemporaryFile(suffix=".parquet") as f:
        df = create_dataset()
        f.close()
        df.to_parquet(f.name)
        file_source = FileSource(
            file_format=ParquetFormat(),
            file_url=f"file://{f.name}",
            event_timestamp_column="ts",
            created_timestamp_column="created_ts",
            date_partition_column="",
            field_mapping={
                "ts_1": "ts",
                "id": "driver_id"
            },
        )
        fv = get_feature_view(file_source)
        e = Entity(name="driver_id",
                   description="id for driver",
                   value_type=ValueType.INT32)
        with tempfile.TemporaryDirectory(
        ) as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
            config = RepoConfig(
                registry=str(Path(repo_dir_name) / "registry.db"),
                project=
                f"test_bq_correctness_{str(uuid.uuid4()).replace('-', '')}",
                provider="local",
                online_store=OnlineStoreConfig(local=LocalOnlineStoreConfig(
                    path=str(Path(data_dir_name) / "online_store.db"))),
            )
            fs = FeatureStore(config=config)
            fs.apply([fv, e])

            yield fs, fv
Beispiel #19
0
    def test_entity_import_export_yaml(self):

        test_entity = Entity(
            name="car_driver_entity",
            description="Driver entity for car rides",
            value_type=ValueType.STRING,
            labels={"team": "matchmaking"},
        )

        # Create a string YAML representation of the entity
        string_yaml = test_entity.to_yaml()

        # Create a new entity object from the YAML string
        actual_entity_from_string = Entity.from_yaml(string_yaml)

        # Ensure equality is upheld to original entity
        assert test_entity == actual_entity_from_string
Beispiel #20
0
def entity_create(filename, project):
    """
    Create or update an entity
    """

    entities = [Entity.from_dict(entity_dict) for entity_dict in yaml_loader(filename)]
    feast_client = Client()  # type: Client
    feast_client.apply(entities, project)
Beispiel #21
0
def test_entity_class_contains_labels():
    entity = Entity(
        "my-entity",
        description="My entity",
        value_type=ValueType.STRING,
        labels={"key1": "val1", "key2": "val2"},
    )
    assert "key1" in entity.labels.keys() and entity.labels["key1"] == "val1"
    assert "key2" in entity.labels.keys() and entity.labels["key2"] == "val2"
Beispiel #22
0
 def test_update_from_source_success(self, dataframe):
     fs = FeatureSet("driver-feature-set")
     fs.update_from_dataset(
         dataframe,
         column_mapping={
             "entity_id": Entity(name="entity", dtype=ValueType.INT64)
         },
     )
     assert len(fs.features) == 3 and fs.features[1].name == "feature_2"
Beispiel #23
0
def test_apply_all_featuresets(client):
    client.set_project(PROJECT_NAME)

    file_fs1 = FeatureSet(
            "file_feature_set",
            features=[Feature("feature_value1", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )
    client.apply(file_fs1)

    gcs_fs1 = FeatureSet(
            "gcs_feature_set",
            features=[Feature("feature_value2", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )
    client.apply(gcs_fs1)

    proc_time_fs = FeatureSet(
            "processing_time",
            features=[Feature("feature_value3", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )
    client.apply(proc_time_fs)

    add_cols_fs = FeatureSet(
            "additional_columns",
            features=[Feature("feature_value4", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )
    client.apply(add_cols_fs)

    historical_fs = FeatureSet(
            "historical",
            features=[Feature("feature_value5", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )
    client.apply(historical_fs)

    fs1 = FeatureSet(
            "feature_set_1",
            features=[Feature("feature_value6", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )

    fs2 = FeatureSet(
        "feature_set_2",
        features=[Feature("other_feature_value7", ValueType.INT64)],
        entities=[Entity("other_entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    client.apply(fs1)
    client.apply(fs2)
Beispiel #24
0
def test_hash():
    entity1 = Entity(name="my-entity", value_type=ValueType.STRING)
    entity2 = Entity(name="my-entity", value_type=ValueType.STRING)
    entity3 = Entity(name="my-entity", value_type=ValueType.FLOAT)
    entity4 = Entity(name="my-entity",
                     value_type=ValueType.FLOAT,
                     description="test")

    s1 = {entity1, entity2}
    assert len(s1) == 1

    s2 = {entity1, entity3}
    assert len(s2) == 2

    s3 = {entity3, entity4}
    assert len(s3) == 2

    s4 = {entity1, entity2, entity3, entity4}
    assert len(s4) == 3
def prep_bq_fs_and_fv(
    bq_source_type: str, ) -> Iterator[Tuple[FeatureStore, FeatureView]]:
    client = bigquery.Client()
    gcp_project = client.project
    bigquery_dataset = "test_ingestion"
    dataset = bigquery.Dataset(f"{gcp_project}.{bigquery_dataset}")
    client.create_dataset(dataset, exists_ok=True)
    dataset.default_table_expiration_ms = (1000 * 60 * 60 * 24 * 14
                                           )  # 2 weeks in milliseconds
    client.update_dataset(dataset, ["default_table_expiration_ms"])

    df = create_dataset()

    job_config = bigquery.LoadJobConfig()
    table_ref = f"{gcp_project}.{bigquery_dataset}.{bq_source_type}_correctness_{int(time.time_ns())}"
    query = f"SELECT * FROM `{table_ref}`"
    job = client.load_table_from_dataframe(df,
                                           table_ref,
                                           job_config=job_config)
    job.result()

    bigquery_source = BigQuerySource(
        table_ref=table_ref if bq_source_type == "table" else None,
        query=query if bq_source_type == "query" else None,
        event_timestamp_column="ts",
        created_timestamp_column="created_ts",
        date_partition_column="",
        field_mapping={
            "ts_1": "ts",
            "id": "driver_id"
        },
    )

    fv = driver_feature_view(bigquery_source)
    e = Entity(
        name="driver",
        description="id for driver",
        join_key="driver_id",
        value_type=ValueType.INT32,
    )
    with tempfile.TemporaryDirectory() as repo_dir_name:
        config = RepoConfig(
            registry=str(Path(repo_dir_name) / "registry.db"),
            project=f"test_bq_correctness_{str(uuid.uuid4()).replace('-', '')}",
            provider="gcp",
            online_store=DatastoreOnlineStoreConfig(
                namespace="integration_test"),
        )
        fs = FeatureStore(config=config)
        fs.apply([fv, e])

        yield fs, fv

        fs.teardown()
Beispiel #26
0
def test_reapply_feature_view_success(test_feature_store, dataframe_source):
    with prep_file_source(df=dataframe_source,
                          event_timestamp_column="ts_1") as file_source:

        e = Entity(name="id",
                   join_keys=["id_join_key"],
                   value_type=ValueType.STRING)

        # Create Feature View
        fv1 = FeatureView(
            name="my_feature_view_1",
            schema=[Field(name="string_col", dtype=String)],
            entities=["id"],
            batch_source=file_source,
            ttl=timedelta(minutes=5),
        )

        # Register Feature View
        test_feature_store.apply([fv1, e])

        # Check Feature View
        fv_stored = test_feature_store.get_feature_view(fv1.name)
        assert len(fv_stored.materialization_intervals) == 0

        # Run materialization
        test_feature_store.materialize(datetime(2020, 1, 1),
                                       datetime(2021, 1, 1))

        # Check Feature View
        fv_stored = test_feature_store.get_feature_view(fv1.name)
        assert len(fv_stored.materialization_intervals) == 1

        # Apply again
        test_feature_store.apply([fv1])

        # Check Feature View
        fv_stored = test_feature_store.get_feature_view(fv1.name)
        assert len(fv_stored.materialization_intervals) == 1

        # Change and apply Feature View
        fv1 = FeatureView(
            name="my_feature_view_1",
            schema=[Field(name="int64_col", dtype=Int64)],
            entities=["id"],
            batch_source=file_source,
            ttl=timedelta(minutes=5),
        )
        test_feature_store.apply([fv1])

        # Check Feature View
        fv_stored = test_feature_store.get_feature_view(fv1.name)
        assert len(fv_stored.materialization_intervals) == 0

        test_feature_store.teardown()
Beispiel #27
0
    def _apply_entity(self, project: str, entity: Entity):
        """
        Registers a single entity with Feast

        Args:
            entity: Entity that will be registered
        """

        entity.is_valid()
        entity_proto = entity.to_spec_proto()

        # Convert the entity to a request and send to Feast Core
        try:
            apply_entity_response = self._core_service.ApplyEntity(
                ApplyEntityRequest(project=project, spec=entity_proto),  # type: ignore
                timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
                metadata=self._get_grpc_metadata(),
            )  # type: ApplyEntityResponse
        except grpc.RpcError as e:
            raise grpc.RpcError(e.details())

        # Extract the returned entity
        applied_entity = Entity.from_proto(apply_entity_response.entity)

        # Deep copy from the returned entity to the local entity
        entity._update_from_entity(applied_entity)
Beispiel #28
0
def test_entity_class_contains_tags():
    with pytest.deprecated_call():
        entity = Entity(
            "my-entity",
            description="My entity",
            value_type=ValueType.STRING,
            tags={
                "key1": "val1",
                "key2": "val2"
            },
        )
    assert "key1" in entity.tags.keys() and entity.tags["key1"] == "val1"
    assert "key2" in entity.tags.keys() and entity.tags["key2"] == "val2"
Beispiel #29
0
def feature_stats_feature_set(client):
    fv_fs = FeatureSet(
        "feature_stats",
        features=[
            Feature("strings", ValueType.STRING),
            Feature("ints", ValueType.INT64),
            Feature("floats", ValueType.FLOAT),
        ],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    client.apply(fv_fs)
    return fv_fs
Beispiel #30
0
    def test_apply_feature_set_success(self, test_client):

        test_client.set_project("project1")

        # Create Feature Sets
        fs1 = FeatureSet("my-feature-set-1")
        fs1.add(Feature(name="fs1-my-feature-1", dtype=ValueType.INT64))
        fs1.add(Feature(name="fs1-my-feature-2", dtype=ValueType.STRING))
        fs1.add(Entity(name="fs1-my-entity-1", dtype=ValueType.INT64))

        fs2 = FeatureSet("my-feature-set-2")
        fs2.add(Feature(name="fs2-my-feature-1", dtype=ValueType.STRING_LIST))
        fs2.add(Feature(name="fs2-my-feature-2", dtype=ValueType.BYTES_LIST))
        fs2.add(Entity(name="fs2-my-entity-1", dtype=ValueType.INT64))

        # Register Feature Set with Core
        test_client.apply(fs1)
        test_client.apply(fs2)

        feature_sets = test_client.list_feature_sets()

        # List Feature Sets
        assert (
            len(feature_sets) == 2
            and feature_sets[0].name == "my-feature-set-1"
            and feature_sets[0].features[0].name == "fs1-my-feature-1"
            and feature_sets[0].features[0].dtype == ValueType.INT64
            and feature_sets[0].features[1].name == "fs1-my-feature-2"
            and feature_sets[0].features[1].dtype == ValueType.STRING
            and feature_sets[0].entities[0].name == "fs1-my-entity-1"
            and feature_sets[0].entities[0].dtype == ValueType.INT64
            and feature_sets[1].features[0].name == "fs2-my-feature-1"
            and feature_sets[1].features[0].dtype == ValueType.STRING_LIST
            and feature_sets[1].features[1].name == "fs2-my-feature-2"
            and feature_sets[1].features[1].dtype == ValueType.BYTES_LIST
            and feature_sets[1].entities[0].name == "fs2-my-entity-1"
            and feature_sets[1].entities[0].dtype == ValueType.INT64
        )