예제 #1
0
파일: test_job.py 프로젝트: zdw520qq/feast
 def test_to_dataframe_local_file_staging_should_pass(
         self, retrieve_job, avro_data_path, mocker):
     mocker.patch.object(
         retrieve_job.serving_stub,
         "GetJob",
         return_value=GetJobResponse(job=BatchRetrievalJob(
             id="123",
             type=JobType.JOB_TYPE_DOWNLOAD,
             status=JobStatus.JOB_STATUS_DONE,
             file_uris=[f"file://{avro_data_path}"],
             data_format=DataFormat.DATA_FORMAT_AVRO,
         )),
     )
     retrived_df = retrieve_job.to_dataframe()
     assert_frame_equal(TEST_DATA_FRAME, retrived_df, check_like=True)
예제 #2
0
파일: test_job.py 프로젝트: zdw520qq/feast
    def test_to_dataframe_s3_file_staging_should_pass(self, retrieve_job,
                                                      avro_data_path, mocker):
        s3_client = boto3.client("s3")
        target = "test_proj/test_features.avro"
        s3_client.create_bucket(Bucket=BUCKET)
        with open(avro_data_path, "rb") as data:
            s3_client.upload_fileobj(data, BUCKET, target)

        mocker.patch.object(
            retrieve_job.serving_stub,
            "GetJob",
            return_value=GetJobResponse(job=BatchRetrievalJob(
                id="123",
                type=JobType.JOB_TYPE_DOWNLOAD,
                status=JobStatus.JOB_STATUS_DONE,
                file_uris=[f"s3://{BUCKET}/{target}"],
                data_format=DataFormat.DATA_FORMAT_AVRO,
            )),
        )
        retrived_df = retrieve_job.to_dataframe()
        assert_frame_equal(TEST_DATA_FRAME, retrived_df, check_like=True)
예제 #3
0
    def test_get_batch_features(self, mocked_client, mocker):

        mocked_client._serving_service_stub = Serving.ServingServiceStub(
            grpc.insecure_channel(""))
        mocked_client._core_service_stub = Core.CoreServiceStub(
            grpc.insecure_channel(""))

        mocker.patch.object(
            mocked_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=FeatureSetProto(
                spec=FeatureSetSpecProto(
                    name="driver",
                    project="driver_project",
                    entities=[
                        EntitySpecProto(name="driver",
                                        value_type=ValueProto.ValueType.INT64),
                        EntitySpecProto(
                            name="transaction",
                            value_type=ValueProto.ValueType.INT64,
                        ),
                    ],
                    features=[
                        FeatureSpecProto(
                            name="driver_id",
                            value_type=ValueProto.ValueType.FLOAT,
                        ),
                        FeatureSpecProto(
                            name="driver_name",
                            value_type=ValueProto.ValueType.STRING,
                        ),
                    ],
                ),
                meta=FeatureSetMetaProto(
                    status=FeatureSetStatusProto.STATUS_READY),
            )),
        )

        expected_dataframe = pd.DataFrame({
            "datetime": [datetime.utcnow() for _ in range(3)],
            "driver": [1001, 1002, 1003],
            "transaction": [1001, 1002, 1003],
            "driver_id": [1001, 1002, 1003],
        })

        final_results = tempfile.mktemp()
        pandavro.to_avro(file_path_or_buffer=final_results,
                         df=expected_dataframe)

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetBatchFeatures",
            return_value=GetBatchFeaturesResponse(job=BatchRetrievalJob(
                id="123",
                type=JobType.JOB_TYPE_DOWNLOAD,
                status=JobStatus.JOB_STATUS_DONE,
                file_uris=[f"file://{final_results}"],
                data_format=DataFormat.DATA_FORMAT_AVRO,
            )),
        )

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetJob",
            return_value=GetJobResponse(job=BatchRetrievalJob(
                id="123",
                type=JobType.JOB_TYPE_DOWNLOAD,
                status=JobStatus.JOB_STATUS_DONE,
                file_uris=[f"file://{final_results}"],
                data_format=DataFormat.DATA_FORMAT_AVRO,
            )),
        )

        mocker.patch.object(
            mocked_client._serving_service_stub,
            "GetFeastServingInfo",
            return_value=GetFeastServingInfoResponse(
                job_staging_location=f"file://{tempfile.mkdtemp()}/",
                type=FeastServingType.FEAST_SERVING_TYPE_BATCH,
            ),
        )

        mocked_client.set_project("project1")
        # TODO: Abstract away GCS client and GCP dependency
        # NOTE: Feast Serving does not allow for feature references
        # that specify the same feature in the same request.
        with patch("google.cloud.storage.Client"):
            response = mocked_client.get_batch_features(
                entity_rows=pd.DataFrame({
                    "datetime": [
                        pd.datetime.now(tz=timezone("Asia/Singapore"))
                        for _ in range(3)
                    ],
                    "driver": [1001, 1002, 1003],
                    "transaction": [1001, 1002, 1003],
                }),
                feature_refs=["driver:driver_id", "driver_id"],
                project="driver_project",
            )  # Type: GetBatchFeaturesResponse

        assert response.id == "123" and response.status == JobStatus.JOB_STATUS_DONE

        actual_dataframe = response.to_dataframe()

        assert actual_dataframe[["driver_id"
                                 ]].equals(expected_dataframe[["driver_id"]])
예제 #4
0
파일: test_job.py 프로젝트: zdw520qq/feast
class TestRetrievalJob:
    @fixture
    def retrieve_job(self):

        serving_service_stub = Serving.ServingServiceStub(
            grpc.insecure_channel(""))
        job_proto = JobProto(
            id="123",
            type=JobType.JOB_TYPE_DOWNLOAD,
            status=JobStatus.JOB_STATUS_RUNNING,
        )
        return RetrievalJob(job_proto, serving_service_stub)

    @fixture
    def avro_data_path(self):
        final_results = tempfile.mktemp()
        pandavro.to_avro(file_path_or_buffer=final_results, df=TEST_DATA_FRAME)
        return final_results

    def test_to_dataframe_local_file_staging_should_pass(
            self, retrieve_job, avro_data_path, mocker):
        mocker.patch.object(
            retrieve_job.serving_stub,
            "GetJob",
            return_value=GetJobResponse(job=BatchRetrievalJob(
                id="123",
                type=JobType.JOB_TYPE_DOWNLOAD,
                status=JobStatus.JOB_STATUS_DONE,
                file_uris=[f"file://{avro_data_path}"],
                data_format=DataFormat.DATA_FORMAT_AVRO,
            )),
        )
        retrived_df = retrieve_job.to_dataframe()
        assert_frame_equal(TEST_DATA_FRAME, retrived_df, check_like=True)

    @mock_s3
    def test_to_dataframe_s3_file_staging_should_pass(self, retrieve_job,
                                                      avro_data_path, mocker):
        s3_client = boto3.client("s3")
        target = "test_proj/test_features.avro"
        s3_client.create_bucket(Bucket=BUCKET)
        with open(avro_data_path, "rb") as data:
            s3_client.upload_fileobj(data, BUCKET, target)

        mocker.patch.object(
            retrieve_job.serving_stub,
            "GetJob",
            return_value=GetJobResponse(job=BatchRetrievalJob(
                id="123",
                type=JobType.JOB_TYPE_DOWNLOAD,
                status=JobStatus.JOB_STATUS_DONE,
                file_uris=[f"s3://{BUCKET}/{target}"],
                data_format=DataFormat.DATA_FORMAT_AVRO,
            )),
        )
        retrived_df = retrieve_job.to_dataframe()
        assert_frame_equal(TEST_DATA_FRAME, retrived_df, check_like=True)

    @pytest.mark.parametrize(
        "job_proto,exception",
        [
            (
                GetJobResponse(job=BatchRetrievalJob(
                    id="123",
                    type=JobType.JOB_TYPE_DOWNLOAD,
                    status=JobStatus.JOB_STATUS_DONE,
                    data_format=DataFormat.DATA_FORMAT_AVRO,
                    error="Testing job failure",
                )),
                Exception,
            ),
            (
                GetJobResponse(job=BatchRetrievalJob(
                    id="123",
                    type=JobType.JOB_TYPE_DOWNLOAD,
                    status=JobStatus.JOB_STATUS_DONE,
                    data_format=DataFormat.DATA_FORMAT_INVALID,
                )),
                Exception,
            ),
        ],
        ids=["when_retrieve_job_fails", "when_data_format_is_not_avro"],
    )
    def test_to_dataframe_s3_file_staging_should_raise(self, retrieve_job,
                                                       mocker, job_proto,
                                                       exception):
        mocker.patch.object(
            retrieve_job.serving_stub,
            "GetJob",
            return_value=job_proto,
        )
        with raises(exception):
            retrieve_job.to_dataframe()
예제 #5
0
    def test_get_batch_features(self, mock_client, mocker):

        mock_client._serving_service_stub = Serving.ServingServiceStub(
            grpc.insecure_channel("")
        )
        mock_client._core_service_stub = Core.CoreServiceStub(grpc.insecure_channel(""))

        mocker.patch.object(
            mock_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=FeatureSetSpec(
                    name="customer_fs",
                    version=1,
                    entities=[
                        EntitySpec(
                            name="customer", value_type=ValueProto.ValueType.INT64
                        ),
                        EntitySpec(
                            name="transaction", value_type=ValueProto.ValueType.INT64
                        ),
                    ],
                    features=[
                        FeatureSpec(
                            name="customer_feature_1",
                            value_type=ValueProto.ValueType.FLOAT,
                        ),
                        FeatureSpec(
                            name="customer_feature_2",
                            value_type=ValueProto.ValueType.STRING,
                        ),
                    ],
                )
            ),
        )

        expected_dataframe = pd.DataFrame(
            {
                "datetime": [datetime.utcnow() for _ in range(3)],
                "customer": [1001, 1002, 1003],
                "transaction": [1001, 1002, 1003],
                "customer_fs:1:customer_feature_1": [1001, 1002, 1003],
                "customer_fs:1:customer_feature_2": [1001, 1002, 1003],
            }
        )

        final_results = tempfile.mktemp()
        to_avro(file_path_or_buffer=final_results, df=expected_dataframe)

        mocker.patch.object(
            mock_client._serving_service_stub,
            "GetBatchFeatures",
            return_value=GetBatchFeaturesResponse(
                job=BatchFeaturesJob(
                    id="123",
                    type=JobType.JOB_TYPE_DOWNLOAD,
                    status=JobStatus.JOB_STATUS_DONE,
                    file_uris=[f"file://{final_results}"],
                    data_format=DataFormat.DATA_FORMAT_AVRO,
                )
            ),
        )

        mocker.patch.object(
            mock_client._serving_service_stub,
            "GetJob",
            return_value=GetJobResponse(
                job=BatchFeaturesJob(
                    id="123",
                    type=JobType.JOB_TYPE_DOWNLOAD,
                    status=JobStatus.JOB_STATUS_DONE,
                    file_uris=[f"file://{final_results}"],
                    data_format=DataFormat.DATA_FORMAT_AVRO,
                )
            ),
        )

        mocker.patch.object(
            mock_client._serving_service_stub,
            "GetFeastServingInfo",
            return_value=GetFeastServingInfoResponse(
                job_staging_location=f"file://{tempfile.mkdtemp()}/",
                type=FeastServingType.FEAST_SERVING_TYPE_BATCH,
            ),
        )

        response = mock_client.get_batch_features(
            entity_rows=pd.DataFrame(
                {
                    "datetime": [
                        pd.datetime.now(tz=timezone("Asia/Singapore")) for _ in range(3)
                    ],
                    "customer": [1001, 1002, 1003],
                    "transaction": [1001, 1002, 1003],
                }
            ),
            feature_ids=[
                "customer_fs:1:customer_feature_1",
                "customer_fs:1:customer_feature_2",
            ],
        )  # type: Job

        assert response.id == "123" and response.status == JobStatus.JOB_STATUS_DONE

        actual_dataframe = response.to_dataframe()

        assert actual_dataframe[
            ["customer_fs:1:customer_feature_1", "customer_fs:1:customer_feature_2"]
        ].equals(
            expected_dataframe[
                ["customer_fs:1:customer_feature_1", "customer_fs:1:customer_feature_2"]
            ]
        )