예제 #1
0
    def test_feature_set_ingest_throws_exception_if_kafka_down(
            self, dataframe, test_client, exception, mocker):

        test_client.set_project("project1")
        driver_fs = FeatureSet(
            "driver-feature-set",
            source=KafkaSource(brokers="localhost:4412", topic="test"),
        )
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        # Register with Feast core
        test_client.apply(driver_fs)
        driver_fs = driver_fs.to_proto()
        driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY

        mocker.patch.object(
            test_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=driver_fs),
        )

        with pytest.raises(exception):
            test_client.ingest("driver-feature-set", dataframe)
예제 #2
0
    def test_feature_set_ingest_fail_if_pending(self, dataframe, exception,
                                                test_client, mocker):
        with pytest.raises(exception):
            test_client.set_project("project1")
            driver_fs = FeatureSet(
                "driver-feature-set",
                source=KafkaSource(brokers="kafka:9092", topic="test"),
            )
            driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
            driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
            driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
            driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

            # Register with Feast core
            test_client.apply(driver_fs)
            driver_fs = driver_fs.to_proto()
            driver_fs.meta.status = FeatureSetStatusProto.STATUS_PENDING

            mocker.patch.object(
                test_client._core_service_stub,
                "GetFeatureSet",
                return_value=GetFeatureSetResponse(feature_set=driver_fs),
            )

            # Need to create a mock producer
            with patch("feast.client.get_producer"):
                # Ingest data into Feast
                test_client.ingest("driver-feature-set", dataframe, timeout=1)
예제 #3
0
    def test_feature_set_ingest_success(self, dataframe, client, mocker):
        client.set_project("project1")
        driver_fs = FeatureSet("driver-feature-set",
                               source=KafkaSource(brokers="kafka:9092",
                                                  topic="test"))
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        # Register with Feast core
        client.apply(driver_fs)
        driver_fs = driver_fs.to_proto()
        driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=driver_fs),
        )

        # Need to create a mock producer
        with patch("feast.client.get_producer") as mocked_queue:
            # Ingest data into Feast
            client.ingest("driver-feature-set", dataframe)
예제 #4
0
    def test_feature_set_ingest_success(self, dataframe, client, mocker):

        driver_fs = FeatureSet("driver-feature-set")
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        driver_fs.source = KafkaSource(topic="feature-topic",
                                       brokers="127.0.0.1")

        client._message_producer = MagicMock()
        client._message_producer.produce = MagicMock()

        # Register with Feast core
        client.apply(driver_fs)

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=driver_fs.to_proto()),
        )

        # Ingest data into Feast
        client.ingest("driver-feature-set", dataframe=dataframe)
예제 #5
0
def test_sources_deduplicate_ingest_jobs(client):
    source = KafkaSource("localhost:9092", "feast-features")
    alt_source = KafkaSource("localhost:9092", "feast-data")

    def get_running_jobs():
        return [
            job for job in client.list_ingest_jobs()
            if job.status == IngestionJobStatus.RUNNING
        ]

    # stop all ingest jobs
    ingest_jobs = client.list_ingest_jobs()
    for ingest_job in ingest_jobs:
        client.stop_ingest_job(ingest_job)
    for ingest_job in ingest_jobs:
        ingest_job.wait(IngestionJobStatus.ABORTED)

    # register multiple featuresets with the same source
    # only one ingest job should spawned due to test ingest job deduplication
    cust_trans_fs = FeatureSet.from_yaml(
        f"{DIR_PATH}/basic/cust_trans_fs.yaml")
    driver_fs = FeatureSet.from_yaml(f"{DIR_PATH}/basic/driver_fs.yaml")
    cust_trans_fs.source, driver_fs.source = source, source
    client.apply(cust_trans_fs)
    client.apply(driver_fs)

    while len(get_running_jobs()) != 1:
        assert 0 <= len(get_running_jobs()) <= 1
        time.sleep(1)

    # update feature sets with different sources, should spawn 2 ingest jobs
    driver_fs.source = alt_source
    client.apply(driver_fs)

    while len(get_running_jobs()) != 2:
        assert 1 <= len(get_running_jobs()) <= 2
        time.sleep(1)

    # update feature sets with same source again, should spawn only 1 ingest job
    driver_fs.source = source
    client.apply(driver_fs)

    while len(get_running_jobs()) != 1:
        assert 1 <= len(get_running_jobs()) <= 2
        time.sleep(1)
예제 #6
0
    def test_feature_set_ingest_failure(self, client, dataframe, exception):
        with pytest.raises(exception):
            # Create feature set
            driver_fs = FeatureSet("driver-feature-set")
            driver_fs.source = KafkaSource(topic="feature-topic",
                                           brokers="fake.broker.com")
            client._message_producer = MagicMock()
            client._message_producer.produce = MagicMock()

            # Update based on dataset
            driver_fs.infer_fields_from_df(dataframe)

            # Register with Feast core
            client.apply(driver_fs)

            # Ingest data into Feast
            client.ingest(driver_fs, dataframe=dataframe)
예제 #7
0
    def test_feature_set_types_success(self, client, dataframe, mocker):

        all_types_fs = FeatureSet(
            name="all_types",
            entities=[Entity(name="user_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="float_feature", dtype=ValueType.FLOAT),
                Feature(name="int64_feature", dtype=ValueType.INT64),
                Feature(name="int32_feature", dtype=ValueType.INT32),
                Feature(name="string_feature", dtype=ValueType.STRING),
                Feature(name="bytes_feature", dtype=ValueType.BYTES),
                Feature(name="bool_feature", dtype=ValueType.BOOL),
                Feature(name="double_feature", dtype=ValueType.DOUBLE),
                Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
                Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
                Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
                Feature(name="string_list_feature",
                        dtype=ValueType.STRING_LIST),
                Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
                Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST),
                Feature(name="double_list_feature",
                        dtype=ValueType.DOUBLE_LIST),
            ],
            max_age=Duration(seconds=3600),
        )

        all_types_fs.source = KafkaSource(topic="feature-topic",
                                          brokers="127.0.0.1")
        client._message_producer = MagicMock()
        client._message_producer.produce = MagicMock()

        # Register with Feast core
        client.apply(all_types_fs)

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=all_types_fs.to_proto()),
        )

        # Ingest data into Feast
        client.ingest(all_types_fs, dataframe=dataframe)
예제 #8
0
    def test_feature_set_ingest_success(self, dataframe, client):

        driver_fs = FeatureSet("driver-feature-set")
        driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT))
        driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING))
        driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64))
        driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64))

        driver_fs.source = KafkaSource(topic="feature-topic",
                                       brokers="127.0.0.1")
        driver_fs._message_producer = MagicMock()
        driver_fs._message_producer.send = MagicMock()

        # Register with Feast core
        client.apply(driver_fs)

        # Ingest data into Feast
        driver_fs.ingest(dataframe=dataframe)

        # Make sure message producer is called
        driver_fs._message_producer.send.assert_called()
예제 #9
0
    def test_feature_set_types_success(self, client, dataframe):

        all_types_fs = FeatureSet(
            name="all_types",
            entities=[Entity(name="user_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="float_feature", dtype=ValueType.FLOAT),
                Feature(name="int64_feature", dtype=ValueType.INT64),
                Feature(name="int32_feature", dtype=ValueType.INT32),
                Feature(name="string_feature", dtype=ValueType.STRING),
                Feature(name="bytes_feature", dtype=ValueType.BYTES),
                Feature(name="bool_feature", dtype=ValueType.BOOL),
                Feature(name="double_feature", dtype=ValueType.DOUBLE),
                Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
                Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
                Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
                Feature(name="string_list_feature",
                        dtype=ValueType.STRING_LIST),
                Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
                Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST),
                Feature(name="double_list_feature",
                        dtype=ValueType.DOUBLE_LIST),
            ],
            max_age=Duration(seconds=3600),
        )

        all_types_fs.source = KafkaSource(topic="feature-topic",
                                          brokers="127.0.0.1")
        all_types_fs._message_producer = MagicMock()
        all_types_fs._message_producer.send = MagicMock()

        # Register with Feast core
        client.apply(all_types_fs)

        # Ingest data into Feast
        all_types_fs.ingest(dataframe=dataframe)

        # Make sure message producer is called
        all_types_fs._message_producer.send.assert_called()
예제 #10
0
    def test_feature_set_ingest_failure(self, client, dataframe, exception):
        with pytest.raises(exception):
            # Create feature set
            driver_fs = FeatureSet("driver-feature-set")
            driver_fs.source = KafkaSource(topic="feature-topic",
                                           brokers="fake.broker.com")
            driver_fs._message_producer = MagicMock()
            driver_fs._message_producer.send = MagicMock()

            # Update based on dataset
            driver_fs.update_from_dataset(
                dataframe,
                column_mapping={
                    "entity_id": Entity(name="entity", dtype=ValueType.INT64)
                },
            )

            # Register with Feast core
            client.apply(driver_fs)

            # Ingest data into Feast
            driver_fs.ingest(dataframe=dataframe)