Exemple #1
0
def test_list_entities_and_features(client):
    customer_entity = Entity("customer_id", ValueType.INT64)
    driver_entity = Entity("driver_id", ValueType.INT64)

    customer_feature_rating = Feature(name="rating", dtype=ValueType.FLOAT, labels={"key1":"val1"})
    customer_feature_cost = Feature(name="cost", dtype=ValueType.FLOAT)
    driver_feature_rating = Feature(name="rating", dtype=ValueType.FLOAT)
    driver_feature_cost = Feature(name="cost", dtype=ValueType.FLOAT, labels={"key1":"val1"})

    filter_by_project_entity_labels_expected = dict([
        ("customer:rating", customer_feature_rating)
    ])

    filter_by_project_entity_expected = dict([
        ("driver:cost", driver_feature_cost),
        ("driver:rating", driver_feature_rating)
    ])

    filter_by_project_labels_expected = dict([
        ("customer:rating", customer_feature_rating),
        ("driver:cost", driver_feature_cost)
    ])

    customer_fs = FeatureSet(
        "customer",
        features=[
            customer_feature_rating,
            customer_feature_cost
        ],
        entities=[customer_entity],
        max_age=Duration(seconds=100)
    )

    driver_fs = FeatureSet(
        "driver",
        features=[
            driver_feature_rating,
            driver_feature_cost
        ],
        entities=[driver_entity],
        max_age=Duration(seconds=100)
    )

    client.set_project(PROJECT_NAME)
    client.apply(customer_fs)
    client.apply(driver_fs)

    # Test for listing of features
    # Case 1: Filter by: project, entities and labels
    filter_by_project_entity_labels_actual = client.list_features_by_ref(project=PROJECT_NAME, entities=["customer_id"], labels={"key1":"val1"})
    
    # Case 2: Filter by: project, entities
    filter_by_project_entity_actual = client.list_features_by_ref(project=PROJECT_NAME, entities=["driver_id"])
    
    # Case 3: Filter by: project, labels
    filter_by_project_labels_actual = client.list_features_by_ref(project=PROJECT_NAME, labels={"key1":"val1"})

    assert set(filter_by_project_entity_labels_expected) == set(filter_by_project_entity_labels_actual)
    assert set(filter_by_project_entity_expected) == set(filter_by_project_entity_actual)
    assert set(filter_by_project_labels_expected) == set(filter_by_project_labels_actual)
def create_fade_overlay_segment(start_time_seconds, end_time_seconds, fade_duration = 0.5):
    
    # create fade in animation     
    animation_start = transcoder.Overlay.Animation()
    animation_start.animation_fade = transcoder.Overlay.AnimationFade()
    
    animation_start.animation_fade.start_time_offset = Duration(seconds = int(start_time_seconds - fade_duration), 
                                                                nanos = get_nanos_from_seconds(start_time_seconds - fade_duration) )
    
    animation_start.animation_fade.end_time_offset = Duration(seconds = int(start_time_seconds ), 
                                                                nanos = get_nanos_from_seconds(start_time_seconds))
    
    animation_start.animation_fade.fade_type = transcoder.Overlay.FadeType.FADE_IN
    animation_start.animation_fade.xy = transcoder.Overlay.NormalizedCoordinate(x=0., y=0.)
    
    
    # create fade out animation  
    animation_end = transcoder.Overlay.Animation()
    animation_end.animation_fade = transcoder.Overlay.AnimationFade()
    animation_end.animation_fade.start_time_offset = Duration(seconds = int(end_time_seconds), 
                                                              nanos = get_nanos_from_seconds(end_time_seconds))
    
    animation_end.animation_fade.end_time_offset = Duration(seconds = int(end_time_seconds + fade_duration), 
                                                            nanos = get_nanos_from_seconds(end_time_seconds + fade_duration))
    animation_end.animation_fade.fade_type = transcoder.Overlay.FadeType.FADE_OUT
    
    animation_end.animation_fade.xy = transcoder.Overlay.NormalizedCoordinate(x=0., y=0.)
    
    
    return [animation_start, animation_end]
Exemple #3
0
def test_multiple_featureset_joins(client):
    fs1 = FeatureSet(
        "feature_set_1",
        features=[Feature("feature_value", ValueType.STRING)],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )

    fs2 = FeatureSet(
        "feature_set_2",
        features=[Feature("other_feature_value", ValueType.INT64)],
        entities=[Entity("other_entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )

    client.apply(fs1)
    time.sleep(10)
    fs1 = client.get_feature_set(name="feature_set_1", version=1)

    client.apply(fs2)
    time.sleep(10)
    fs2 = client.get_feature_set(name="feature_set_2", version=1)

    N_ROWS = 10
    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
    features_1_df = pd.DataFrame({
        "datetime": [time_offset] * N_ROWS,
        "entity_id": [i for i in range(N_ROWS)],
        "feature_value": [f"{i}" for i in range(N_ROWS)],
    })
    client.ingest(fs1, features_1_df)

    features_2_df = pd.DataFrame({
        "datetime": [time_offset] * N_ROWS,
        "other_entity_id": [i for i in range(N_ROWS)],
        "other_feature_value": [i for i in range(N_ROWS)],
    })
    client.ingest(fs2, features_2_df)

    entity_df = pd.DataFrame({
        "datetime": [time_offset] * N_ROWS,
        "entity_id": [i for i in range(N_ROWS)],
        "other_entity_id": [N_ROWS - 1 - i for i in range(N_ROWS)],
    })
    feature_retrieval_job = client.get_batch_features(
        entity_rows=entity_df,
        feature_ids=[
            "feature_set_1:1:feature_value",
            "feature_set_2:1:other_feature_value"
        ])
    output = feature_retrieval_job.to_dataframe()
    print(output.head())

    assert output["entity_id"].to_list() == [
        int(i) for i in output["feature_set_1_v1_feature_value"].to_list()
    ]
    assert output["other_entity_id"].to_list(
    ) == output["feature_set_2_v1_other_feature_value"].to_list()
Exemple #4
0
    def test_list_feature_sets(self, mocked_client, mocker):
        mocker.patch.object(
            mocked_client,
            "_core_service_stub",
            return_value=Core.CoreServiceStub(grpc.insecure_channel("")),
        )

        feature_set_1_proto = FeatureSetProto(
            spec=FeatureSetSpecProto(
                project="test",
                name="driver_car",
                max_age=Duration(seconds=3600),
                labels={"key1": "val1", "key2": "val2"},
                features=[
                    FeatureSpecProto(
                        name="feature_1", value_type=ValueProto.ValueType.FLOAT
                    )
                ],
            )
        )
        feature_set_2_proto = FeatureSetProto(
            spec=FeatureSetSpecProto(
                project="test",
                name="driver_ride",
                max_age=Duration(seconds=3600),
                labels={"key1": "val1"},
                features=[
                    FeatureSpecProto(
                        name="feature_1", value_type=ValueProto.ValueType.FLOAT
                    )
                ],
            )
        )

        mocker.patch.object(
            mocked_client._core_service_stub,
            "ListFeatureSets",
            return_value=ListFeatureSetsResponse(
                feature_sets=[feature_set_1_proto, feature_set_2_proto]
            ),
        )

        feature_sets = mocked_client.list_feature_sets(labels={"key1": "val1"})
        assert len(feature_sets) == 2

        feature_set = feature_sets[0]
        assert (
            feature_set.name == "driver_car"
            and "key1" in feature_set.labels
            and feature_set.labels["key1"] == "val1"
            and "key2" in feature_set.labels
            and feature_set.labels["key2"] == "val2"
            and feature_set.fields["feature_1"].name == "feature_1"
            and feature_set.fields["feature_1"].dtype == ValueType.FLOAT
            and len(feature_set.features) == 1
        )
Exemple #5
0
def test_basic_register_feature_set_success(client):
    # Register feature set without project
    cust_trans_fs_expected = FeatureSet.from_yaml(
        f"{DIR_PATH}/basic/cust_trans_fs.yaml")
    driver_fs_expected = FeatureSet.from_yaml(
        f"{DIR_PATH}/basic/driver_fs.yaml")
    client.apply(cust_trans_fs_expected)
    client.apply(driver_fs_expected)
    cust_trans_fs_actual = client.get_feature_set("customer_transactions")
    assert cust_trans_fs_actual == cust_trans_fs_expected
    driver_fs_actual = client.get_feature_set("driver")
    assert driver_fs_actual == driver_fs_expected

    # Register feature set with project
    cust_trans_fs_expected = FeatureSet.from_yaml(
        f"{DIR_PATH}/basic/cust_trans_fs.yaml")
    client.set_project(PROJECT_NAME)
    client.apply(cust_trans_fs_expected)
    cust_trans_fs_actual = client.get_feature_set("customer_transactions",
                                                  project=PROJECT_NAME)
    assert cust_trans_fs_actual == cust_trans_fs_expected

    # Register feature set with labels
    driver_unlabelled_fs = FeatureSet(
        "driver_unlabelled",
        features=[
            Feature("rating", ValueType.FLOAT),
            Feature("cost", ValueType.FLOAT)
        ],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    driver_labeled_fs_expected = FeatureSet(
        "driver_labeled",
        features=[
            Feature("rating", ValueType.FLOAT),
            Feature("cost", ValueType.FLOAT)
        ],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
        labels={"key1": "val1"},
    )
    client.set_project(PROJECT_NAME)
    client.apply(driver_unlabelled_fs)
    client.apply(driver_labeled_fs_expected)
    driver_fs_actual = client.list_feature_sets(project=PROJECT_NAME,
                                                labels={"key1": "val1"})[0]
    assert driver_fs_actual == driver_labeled_fs_expected

    # reset client's project for other tests
    client.set_project()
Exemple #6
0
    def create_push_subscription(
            self,
            subscription_name: str,
            topic_name: str,
            endpoint: str,
            config: SubscriptionConfig = None) -> Subscription:
        subscriber: SubscriberWrapper = SubscriberClient()
        subcription_path = subscriber.subscription_path(
            self.project_id, subscription_name)
        topic_path = self.get_topic_path(topic_name)

        if not config:
            config = SubscriptionConfig()

        logging_extra = {
            "subscription_name": subscription_name,
            "topic_name": topic_name,
            "config": config.dict(),
        }

        with subscriber:
            try:
                try:
                    subscription = subscriber.get_subscription(
                        request={"subscription": subcription_path})
                    Logger.info("Push subscription exists", logging_extra)
                    return subscription
                except NotFound:
                    request = Subscription(
                        name=subcription_path,
                        topic=topic_path,
                        push_config=PushConfig(push_endpoint=endpoint),
                        ack_deadline_seconds=60,
                        expiration_policy=ExpirationPolicy(ttl=Duration(
                            seconds=config.expiration_days *
                            86400) if config.expiration_days else None),
                        retry_policy=RetryPolicy(),
                        message_retention_duration=Duration(
                            seconds=config.retention_days * 86400))
                    subscription = subscriber.create_subscription(
                        request=request)
                    Logger.info("Push subscription created", logging_extra)
                    return subscription

            except BaseException as ex:
                Logger.error("Failed to create push subscription",
                             exc_info=ex,
                             extra=logging_extra)
                raise ex
Exemple #7
0
    def test_feature_set_types_success(self, client, dataframe, mocker):

        all_types_fs = FeatureSet(
            name="all_types",
            entities=[Entity(name="user_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="float_feature", dtype=ValueType.FLOAT),
                Feature(name="int64_feature", dtype=ValueType.INT64),
                Feature(name="int32_feature", dtype=ValueType.INT32),
                Feature(name="string_feature", dtype=ValueType.STRING),
                Feature(name="bytes_feature", dtype=ValueType.BYTES),
                Feature(name="bool_feature", dtype=ValueType.BOOL),
                Feature(name="double_feature", dtype=ValueType.DOUBLE),
                Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
                Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
                Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
                Feature(name="string_list_feature", dtype=ValueType.STRING_LIST),
                Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
                Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST),
                Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST),
            ],
            max_age=Duration(seconds=3600),
        )

        # Register with Feast core
        client.apply(all_types_fs)

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=all_types_fs.to_proto()),
        )

        # Ingest data into Feast
        client.ingest(all_types_fs, dataframe=dataframe)
Exemple #8
0
def group_findings_and_changes(source_name):
    """Demonstrates grouping all findings across an organization and
    associated changes."""
    i = 0
    # [START group_filtered_findings_with_changes]
    from datetime import timedelta

    from google.cloud import securitycenter
    from google.protobuf.duration_pb2 import Duration

    # Create a client.
    client = securitycenter.SecurityCenterClient()

    # source_name is the resource path for a source that has been
    # created previously (you can use list_sources to find a specific one).
    # Its format is:
    # source_name = "organizations/{organization_id}/sources/{source_id}"
    # e.g.:
    # source_name = "organizations/111122222444/sources/1234"

    # List assets and their state change the last 30 days
    compare_delta = timedelta(days=30)
    # Convert the timedelta to a Duration
    duration_proto = Duration()
    duration_proto.FromTimedelta(compare_delta)

    group_result_iterator = client.group_findings(
        source_name, group_by="state_change", compare_duration=duration_proto)
    for i, group_result in enumerate(group_result_iterator):
        print((i + 1), group_result)
    # [END group_findings_with_changes]
    return i
Exemple #9
0
 def to_proto(self):
     """Return estop_pb2.EstopEndpoint based on current member variables."""
     t_seconds = int(self.estop_timeout)
     t_nanos = int((self.estop_timeout - t_seconds) * 1e9)
     if self.estop_cut_power_timeout is None:
         return estop_pb2.EstopEndpoint(role=self.role, name=self._name,
                                        unique_id=self._unique_id,
                                        timeout=Duration(seconds=t_seconds, nanos=t_nanos))
     else:
         cpt_seconds = int(self.estop_cut_power_timeout)
         cpt_nanos = int((self.estop_cut_power_timeout - cpt_seconds) * 1e9)
         return estop_pb2.EstopEndpoint(role=self.role, name=self._name,
                                        unique_id=self._unique_id,
                                        timeout=Duration(seconds=t_seconds, nanos=t_nanos),
                                        cut_power_timeout=Duration(seconds=cpt_seconds,
                                                                   nanos=cpt_nanos))
Exemple #10
0
def test_update_featureset_apply_featureset_and_ingest_first_subset(
        client, update_featureset_dataframe):
    subset_columns = [
        "datetime", "entity_id", "update_feature1", "update_feature2"
    ]
    subset_df = update_featureset_dataframe.iloc[:5][subset_columns]
    update_fs = FeatureSet(
        "update_fs",
        entities=[Entity(name="entity_id", dtype=ValueType.INT64)],
        max_age=Duration(seconds=432000),
    )
    update_fs.infer_fields_from_df(subset_df)
    client.apply(update_fs)

    client.ingest(feature_set=update_fs, source=subset_df)

    time.sleep(15)
    feature_retrieval_job = client.get_batch_features(
        entity_rows=update_featureset_dataframe[["datetime",
                                                 "entity_id"]].iloc[:5],
        feature_refs=[
            f"{PROJECT_NAME}/update_feature1",
            f"{PROJECT_NAME}/update_feature2",
        ],
    )

    output = feature_retrieval_job.to_dataframe().sort_values(by=["entity_id"])
    print(output.head())

    assert output["update_feature1"].to_list(
    ) == subset_df["update_feature1"].to_list()
    assert output["update_feature2"].to_list(
    ) == subset_df["update_feature2"].to_list()
Exemple #11
0
    def test_build(self):
        """Assert the Opt. model is built correctly"""

        model_builder = OptimizationModelBuilder(
            constraints=[CapacityConstraint()])
        problem = self.problem
        model = model_builder.build(problem)
        self.assertTrue(model, msg='Opt. model built incorrectly.')
        self.assertEqual(model.manager.GetNumberOfVehicles(),
                         len(self.vehicles),
                         msg='Number of vehicles in manager is incorrect.')
        self.assertEqual(model.manager.GetNumberOfIndices(),
                         len(self.vehicles) * 2 + len(self.stops) -
                         len(problem.depots),
                         msg='Number of indices in manager is incorrect.')
        self.assertTrue(model.solver, msg='Solver could not be instantiated.')
        self.assertTrue(model.search_parameters,
                        msg='Search params could not be built.')
        self.assertEqual(model.search_parameters.time_limit,
                         Duration(seconds=self.params.SEARCH_TIME_LIMIT),
                         msg='Time limit is incorrect in the search params.')
        self.assertEqual(
            model.search_parameters.solution_limit,
            self.params.SEARCH_SOLUTIONS_LIMIT,
            msg='Solutions limit is incorrect in the search params.')
        self.assertEqual(
            model.search_parameters.first_solution_strategy,
            FIRST_SOLUTION_STRATEGY[self.params.FIRST_SOLUTION_STRATEGY],
            msg='First solution strategy is incorrect in the search params.')
        self.assertEqual(
            model.search_parameters.local_search_metaheuristic,
            LOCAL_SEARCH_METAHEURISTIC[self.params.SEARCH_METAHEURISTIC],
            msg='Search metaheuristic is incorrect in the search params.')
        self.assertTrue(model.solver.HasDimension('capacity_constraint'),
                        msg='Capacity constraint not added.')
Exemple #12
0
    def __init__(
        self,
        name: str,
        entities: List[str],
        features: List[Feature],
        tags: Dict[str, str],
        ttl: Optional[Union[Duration, timedelta]],
        online: bool,
        input: BigQuerySource,
    ):
        cols = [entity for entity in entities] + [feat.name for feat in features]
        for col in cols:
            if input.field_mapping is not None and col in input.field_mapping.keys():
                raise ValueError(
                    f"The field {col} is mapped to {input.field_mapping[col]} for this data source. Please either remove this field mapping or use {input.field_mapping[col]} as the Entity or Feature name."
                )

        self.name = name
        self.entities = entities
        self.features = features
        self.tags = tags
        if isinstance(ttl, timedelta):
            proto_ttl = Duration()
            proto_ttl.FromTimedelta(ttl)
            self.ttl = proto_ttl
        else:
            self.ttl = ttl

        self.online = online
        self.input = input
Exemple #13
0
    def test_begin_ok_exact_staleness(self):
        from google.protobuf.duration_pb2 import Duration
        from google.cloud.spanner_v1.proto.transaction_pb2 import (
            Transaction as TransactionPB, TransactionOptions)

        transaction_pb = TransactionPB(id=TXN_ID)
        database = _Database()
        api = database.spanner_api = self._make_spanner_api()
        api.begin_transaction.return_value = transaction_pb
        duration = self._makeDuration(seconds=SECONDS, microseconds=MICROS)
        session = _Session(database)
        snapshot = self._make_one(session,
                                  exact_staleness=duration,
                                  multi_use=True)

        txn_id = snapshot.begin()

        self.assertEqual(txn_id, TXN_ID)
        self.assertEqual(snapshot._transaction_id, TXN_ID)

        expected_duration = Duration(seconds=SECONDS, nanos=MICROS * 1000)
        expected_txn_options = TransactionOptions(
            read_only=TransactionOptions.ReadOnly(
                exact_staleness=expected_duration))

        api.begin_transaction.assert_called_once_with(
            session.name,
            expected_txn_options,
            metadata=[('google-cloud-resource-prefix', database.name)])
 def _ingest_request(self):
     """ Interate through the metrics and create an IngestRequest
     """
     self._update_service_info()
     request = IngestRequest(reporter=self._reporter)
     request.idempotency_key = self._generate_idempotency_key()
     start_time = Timestamp()
     start_time.GetCurrentTime()
     duration = Duration()
     duration.FromSeconds(self._intervals * self._flush_interval)
     for metric in self._runtime_metrics:
         metric_type = MetricKind.GAUGE
         if len(metric) == 3:
             key, value, metric_type = metric
         else:
             key, value = metric
         request.points.add(
             duration=duration,
             start=start_time,
             labels=self._labels,
             metric_name=key,
             double_value=value,
             kind=metric_type,
         )
     _log.debug("Metrics collected: %s", request)
     return request
def create_static_overlay_segment(start_time_seconds, end_time_seconds):
    
    animation_start = transcoder.Overlay.Animation()
    animation_start.animation_static = transcoder.Overlay.AnimationStatic()
    animation_start.animation_static.start_time_offset = Duration(seconds = int(start_time_seconds), 
                                                                  nanos = get_nanos_from_seconds(start_time_seconds) )
    animation_start.animation_static.xy = transcoder.Overlay.NormalizedCoordinate(x=0., y=0.)
    animation_start
    
    animation_end = transcoder.Overlay.Animation()
    animation_end.animation_end = transcoder.Overlay.AnimationEnd()
    animation_end.animation_end.start_time_offset = Duration(seconds = int(end_time_seconds), 
                                                             nanos = get_nanos_from_seconds(end_time_seconds))
    animation_end
    
    return [animation_start, animation_end]
Exemple #16
0
    def test_basic(self) -> None:
        """
            Add another table to existing repo using partial apply API. Make sure both the table
            applied via CLI apply and the new table are passing RW test.
        """

        runner = CliRunner()
        with runner.local_repo(get_example_repo("example_feature_repo_1.py")) as store:

            driver_locations_source = BigQuerySource(
                table_ref="rh_prod.ride_hailing_co.drivers",
                event_timestamp_column="event_timestamp",
                created_timestamp_column="created_timestamp",
            )

            driver_locations_100 = FeatureView(
                name="driver_locations_100",
                entities=["driver"],
                ttl=Duration(seconds=86400 * 1),
                features=[
                    Feature(name="lat", dtype=ValueType.FLOAT),
                    Feature(name="lon", dtype=ValueType.STRING),
                    Feature(name="name", dtype=ValueType.STRING),
                ],
                online=True,
                input=driver_locations_source,
                tags={},
            )

            store.apply([driver_locations_100])

            basic_rw_test(store, view_name="driver_locations")
            basic_rw_test(store, view_name="driver_locations_100")
Exemple #17
0
def test_update_featureset_update_featureset_and_ingest_second_subset(
        client, update_featureset_dataframe):
    subset_columns = [
        "datetime",
        "entity_id",
        "update_feature1",
        "update_feature3",
        "update_feature4",
    ]
    subset_df = update_featureset_dataframe.iloc[5:][subset_columns]
    update_fs = FeatureSet(
        "update_fs",
        entities=[Entity(name="entity_id", dtype=ValueType.INT64)],
        max_age=Duration(seconds=432000),
    )
    update_fs.infer_fields_from_df(subset_df)
    client.apply(update_fs)

    # We keep retrying this ingestion until all values make it into the buffer.
    # This is a necessary step because bigquery streaming caches table schemas
    # and as a result, rows may be lost.
    while True:
        ingestion_id = client.ingest(feature_set=update_fs, source=subset_df)
        time.sleep(15)  # wait for rows to get written to bq
        rows_ingested = get_rows_ingested(client, update_fs, ingestion_id)
        if rows_ingested == len(subset_df):
            print(
                f"Number of rows successfully ingested: {rows_ingested}. Continuing."
            )
            break
        print(
            f"Number of rows successfully ingested: {rows_ingested}. Retrying ingestion."
        )
        time.sleep(30)

    def check():
        feature_retrieval_job = client.get_batch_features(
            entity_rows=update_featureset_dataframe[["datetime",
                                                     "entity_id"]].iloc[5:],
            feature_refs=[
                "update_feature1",
                "update_feature3",
                "update_feature4",
            ],
            project=PROJECT_NAME,
        )

        output = feature_retrieval_job.to_dataframe(
            timeout_sec=180).sort_values(by=["entity_id"])
        print(output.head())

        assert output["update_feature1"].to_list(
        ) == subset_df["update_feature1"].to_list()
        assert output["update_feature3"].to_list(
        ) == subset_df["update_feature3"].to_list()
        assert output["update_feature4"].to_list(
        ) == subset_df["update_feature4"].to_list()
        clean_up_remote_files(feature_retrieval_job.get_avro_files())

    wait_for(check, timedelta(minutes=5))
Exemple #18
0
    def to_proto(self) -> FeatureViewProto:
        """
        Converts an feature view object to its protobuf representation.

        Returns:
            FeatureViewProto protobuf
        """

        meta = FeatureViewMetaProto(
            created_timestamp=self.created_timestamp,
            last_updated_timestamp=self.last_updated_timestamp,
        )

        if self.ttl is not None:
            ttl_duration = Duration()
            ttl_duration.FromTimedelta(self.ttl)

        spec = FeatureViewSpecProto(
            name=self.name,
            entities=self.entities,
            features=[feature.to_proto() for feature in self.features],
            tags=self.tags,
            ttl=(ttl_duration if ttl_duration is not None else None),
            online=self.online,
            input=self.input.to_proto(),
        )

        return FeatureViewProto(spec=spec, meta=meta)
Exemple #19
0
def test_ingest_into_bq(
    feast_client: Client,
    customer_entity: Entity,
    driver_entity: Entity,
    bq_dataframe: pd.DataFrame,
    bq_dataset: str,
    pytestconfig,
):
    bq_project = pytestconfig.getoption("bq_project")
    bq_table_id = f"bq_staging_{datetime.now():%Y%m%d%H%M%s}"
    ft = FeatureTable(
        name="basic_featuretable",
        entities=["driver_id", "customer_id"],
        features=[
            Feature(name="dev_feature_float", dtype=ValueType.FLOAT),
            Feature(name="dev_feature_string", dtype=ValueType.STRING),
        ],
        max_age=Duration(seconds=3600),
        batch_source=BigQuerySource(
            table_ref=f"{bq_project}:{bq_dataset}.{bq_table_id}",
            event_timestamp_column="datetime",
            created_timestamp_column="timestamp",
        ),
    )

    # ApplyEntity
    feast_client.apply(customer_entity)
    feast_client.apply(driver_entity)

    # ApplyFeatureTable
    feast_client.apply(ft)
    feast_client.ingest(ft, bq_dataframe, timeout=120)

    bq_client = bigquery.Client(project=bq_project)

    # Poll BQ for table until the table has been created
    def try_get_table():
        try:
            table = bq_client.get_table(
                bigquery.TableReference(
                    bigquery.DatasetReference(bq_project, bq_dataset), bq_table_id
                )
            )
        except NotFound:
            return None, False
        else:
            return table, True

    wait_retry_backoff(
        retry_fn=try_get_table,
        timeout_secs=30,
        timeout_msg="Timed out trying to get bigquery table",
    )

    query_string = f"SELECT * FROM `{bq_project}.{bq_dataset}.{bq_table_id}`"

    job = bq_client.query(query_string)
    query_df = job.to_dataframe()

    assert_frame_equal(query_df, bq_dataframe)
Exemple #20
0
def test_list_point_in_time_changes():
    """Demonstrate listing assets along with their state changes."""
    from google.cloud import securitycenter_v1beta1 as securitycenter
    from google.protobuf.duration_pb2 import Duration
    from datetime import timedelta

    # [START demo_list_assets_changes]
    client = securitycenter.SecurityCenterClient()

    # ORGANIZATION_ID is the numeric ID of the organization (e.g. 123213123121)
    org_name = "organizations/{org_id}".format(org_id=ORGANIZATION_ID)
    project_filter = ("security_center_properties.resource_type=" +
                      '"google.cloud.resourcemanager.Project"')

    # List assets and their state change the last 30 days
    compare_delta = timedelta(days=30)
    # Convert the timedelta to a Duration
    duration_proto = Duration()
    duration_proto.FromTimedelta(compare_delta)
    # Call the API and print results.
    asset_iterator = client.list_assets(org_name,
                                        filter_=project_filter,
                                        compare_duration=duration_proto)
    for i, asset in enumerate(asset_iterator):
        print(i, asset)

    # [END demo_list_assets_changes]
    assert i > 0
Exemple #21
0
def get_or_create_subscription():
    conf = get_config()["google_pub_sub"]
    project_id, topic_id = conf["project_id"], conf["topic_id"]
    subscription_id = get_subs_name(conf["subscription"].get(
        "type", "schedule-consumer"))

    subscriber = pubsub_v1.SubscriberClient()
    publisher = pubsub_v1.PublisherClient()

    sub_path = subscriber.subscription_path(project_id, subscription_id)
    topic_path = publisher.topic_path(project_id, topic_id)

    try:
        subscriber.create_subscription(
            request={
                "name":
                sub_path,
                "topic":
                topic_path,
                "message_retention_duration":
                Duration(seconds=conf["subscription"].get(
                    "message_retention_duration", 86400)),
                "ack_deadline_seconds":
                conf["subscription"].get("ack_deadline_seconds", 300),
                "filter":
                f'attributes.mac = "{get_mac()}"'
            })
        logging.info(f"{sub_path} created")
    except AlreadyExists:
        logging.info(f"{sub_path} already exists")
        return sub_path

    return sub_path
def test_remove_reservation_not_found(get_reservation):
    get_reservation.return_value = None
    processor = cg.EngineProcessor(
        'proj', 'p0', EngineContext(),
        qtypes.QuantumProcessor(schedule_frozen_period=Duration(seconds=10000)))
    with pytest.raises(ValueError):
        processor.remove_reservation('rid')
def _list_groups(client):
    """List Error Groups from the last 60 seconds.

    This class provides a wrapper around making calls to the GAX
    API. It's used by the system tests to find the appropriate error group
    to verify the error was successfully reported.

    :type client: :class:`~google.cloud.error_reporting.client.Client`
    :param client: The client containing a project and credentials.

    :rtype: :class:`~google.gax.ResourceIterator`
    :returns: Iterable of :class:`~.error_stats_service_pb2.ErrorGroupStats`.
    """
    gax_api = error_stats_service_client.ErrorStatsServiceClient(
        credentials=client._credentials)
    project_name = gax_api.project_path(client.project)

    time_range = error_stats_service_pb2.QueryTimeRange()
    time_range.period = error_stats_service_pb2.QueryTimeRange.PERIOD_1_HOUR

    duration = Duration(seconds=60 * 60)

    return gax_api.list_group_stats(project_name,
                                    time_range,
                                    timed_count_duration=duration)
Exemple #24
0
def main():
    # 実行するpythonファイルをGCSにアップロード(事前に手作業でアップロードしてもOK)
    storage_client: StorageClient = StorageClient(
        env['BUCKET_NAME'], env['PROJECT_ID'], env['STORAGE_CREDENTIAL_PATH'])
    main_python_file_uri: str = storage_client.upload_to_gcs(
        './master.py', 'dataproc/src')
    python_file_uris: List[str] = [
        storage_client.upload_to_gcs('./worker.py', 'dataproc/src'),
        storage_client.upload_to_gcs('./module/storage.py',
                                     'dataproc/src/module'),
    ]

    # 処理対象データをGCSにアップロード(事前に手作業でアップロードしてもOK)
    data_file_path: str = './data.txt'
    with open(data_file_path, 'w') as f:
        for sentence in SENTENCES:
            f.write(sentence + '\n')
    storage_client.upload_to_gcs(data_file_path, 'dataproc/input')
    os.remove(data_file_path)

    # pysparkのjobを実行
    with DataprocCluster(
            env['PROJECT_ID'],
            env['DATAPROC_CREDENTIAL_PATH'],
            cluster_name='test-cluster',
            creates_cluster=True,
            idle_delete_ttl=Duration(seconds=1000),
            pip_packages=
            'more-itertools==5.0.0 nltk==3.4.5 gensim==3.8.1 google-cloud-storage==1.20.0',
            environment_variables={
                'PROJECT_ID': env['PROJECT_ID'],
                'BUCKET_NAME': env['BUCKET_NAME']
            }) as cluster:
        cluster.submit_pyspark_job(main_python_file_uri, python_file_uris)
        print('do something')
Exemple #25
0
    def to_proto(self) -> FeatureViewProto:
        """
        Converts an feature view object to its protobuf representation.

        Returns:
            FeatureViewProto protobuf
        """

        meta = FeatureViewMetaProto(
            created_timestamp=self.created_timestamp,
            last_updated_timestamp=self.last_updated_timestamp,
            materialization_intervals=[],
        )
        for interval in self.materialization_intervals:
            interval_proto = MaterializationIntervalProto()
            interval_proto.start_time.FromDatetime(interval[0])
            interval_proto.end_time.FromDatetime(interval[1])
            meta.materialization_intervals.append(interval_proto)

        ttl_duration = None
        if self.ttl is not None:
            ttl_duration = Duration()
            ttl_duration.FromTimedelta(self.ttl)

        spec = FeatureViewSpecProto(
            name=self.name,
            entities=self.entities,
            features=[feature.to_proto() for feature in self.features],
            tags=self.tags,
            ttl=(ttl_duration if ttl_duration is not None else None),
            online=self.online,
            input=self.input.to_proto(),
        )

        return FeatureViewProto(spec=spec, meta=meta)
Exemple #26
0
def _ingest_test_getfeaturetable_mocked_resp(file_url: str,
                                             date_partition_col: str = ""):
    return GetFeatureTableResponse(table=FeatureTableProto(
        spec=FeatureTableSpecProto(
            name="ingest_featuretable",
            max_age=Duration(seconds=3600),
            features=[
                FeatureSpecProto(
                    name="dev_feature_float",
                    value_type=ValueProto.ValueType.FLOAT,
                ),
                FeatureSpecProto(
                    name="dev_feature_string",
                    value_type=ValueProto.ValueType.STRING,
                ),
            ],
            entities=["dev_entity"],
            batch_source=DataSourceProto(
                file_options=DataSourceProto.FileOptions(
                    file_format=ParquetFormat().to_proto(), file_url=file_url),
                event_timestamp_column="datetime",
                created_timestamp_column="timestamp",
                date_partition_column=date_partition_col,
            ),
        ),
        meta=FeatureTableMetaProto(),
    ))
Exemple #27
0
def alltypes_featuretable():
    batch_source = FileSource(
        file_format="parquet",
        file_url="file://feast/*",
        event_timestamp_column="ts_col",
        created_timestamp_column="timestamp",
        date_partition_column="date_partition_col",
    )
    return FeatureTable(
        name="alltypes",
        entities=["alltypes_id"],
        features=[
            Feature(name="float_feature", dtype=ValueType.FLOAT),
            Feature(name="int64_feature", dtype=ValueType.INT64),
            Feature(name="int32_feature", dtype=ValueType.INT32),
            Feature(name="string_feature", dtype=ValueType.STRING),
            Feature(name="bytes_feature", dtype=ValueType.BYTES),
            Feature(name="bool_feature", dtype=ValueType.BOOL),
            Feature(name="double_feature", dtype=ValueType.DOUBLE),
            Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST),
            Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
            Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
            Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
            Feature(name="string_list_feature", dtype=ValueType.STRING_LIST),
            Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
            Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST),
        ],
        max_age=Duration(seconds=3600),
        batch_source=batch_source,
        labels={"cat": "alltypes"},
    )
Exemple #28
0
def test_order_by_creation_time(client):
    proc_time_fs = FeatureSet(
        "processing_time",
        features=[Feature("feature_value", ValueType.STRING)],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    client.apply(proc_time_fs)
    time.sleep(10)
    proc_time_fs = client.get_feature_set(name="processing_time", version=1)

    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
    N_ROWS = 10
    incorrect_df = pd.DataFrame({
        "datetime": [time_offset] * N_ROWS,
        "entity_id": [i for i in range(N_ROWS)],
        "feature_value": ["WRONG"] * N_ROWS,
    })
    correct_df = pd.DataFrame({
        "datetime": [time_offset] * N_ROWS,
        "entity_id": [i for i in range(N_ROWS)],
        "feature_value": ["CORRECT"] * N_ROWS,
    })
    client.ingest(proc_time_fs, incorrect_df)
    time.sleep(10)
    client.ingest(proc_time_fs, correct_df)
    feature_retrieval_job = client.get_batch_features(
        entity_rows=incorrect_df[["datetime", "entity_id"]],
        feature_ids=["processing_time:1:feature_value"])
    output = feature_retrieval_job.to_dataframe()
    print(output.head())

    assert output["processing_time_v1_feature_value"].to_list() == ["CORRECT"
                                                                    ] * N_ROWS
Exemple #29
0
    def test_get_feature_set(self, mocked_client, mocker):
        mocked_client._core_service_stub = Core.CoreServiceStub(
            grpc.insecure_channel("")
        )

        from google.protobuf.duration_pb2 import Duration

        mocker.patch.object(
            mocked_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=FeatureSetProto(
                    spec=FeatureSetSpecProto(
                        name="my_feature_set",
                        max_age=Duration(seconds=3600),
                        labels={"key1": "val1", "key2": "val2"},
                        features=[
                            FeatureSpecProto(
                                name="my_feature_1",
                                value_type=ValueProto.ValueType.FLOAT,
                            ),
                            FeatureSpecProto(
                                name="my_feature_2",
                                value_type=ValueProto.ValueType.FLOAT,
                            ),
                        ],
                        entities=[
                            EntitySpecProto(
                                name="my_entity_1",
                                value_type=ValueProto.ValueType.INT64,
                            )
                        ],
                        source=Source(
                            type=SourceType.KAFKA,
                            kafka_source_config=KafkaSourceConfig(
                                bootstrap_servers="localhost:9092", topic="topic"
                            ),
                        ),
                    ),
                    meta=FeatureSetMetaProto(),
                )
            ),
        )
        mocked_client.set_project("my_project")
        feature_set = mocked_client.get_feature_set("my_feature_set")

        assert (
            feature_set.name == "my_feature_set"
            and "key1" in feature_set.labels
            and feature_set.labels["key1"] == "val1"
            and "key2" in feature_set.labels
            and feature_set.labels["key2"] == "val2"
            and feature_set.fields["my_feature_1"].name == "my_feature_1"
            and feature_set.fields["my_feature_1"].dtype == ValueType.FLOAT
            and feature_set.fields["my_entity_1"].name == "my_entity_1"
            and feature_set.fields["my_entity_1"].dtype == ValueType.INT64
            and len(feature_set.features) == 2
            and len(feature_set.entities) == 1
        )
Exemple #30
0
 def execute(self, context):
     features_df = self.bq.get_pandas_df(self.sql)
     fs = FeatureSet(
         self.feature_set_name,
         max_age=Duration(seconds=86400),
         entities=[Entity(name=self.entity_name, dtype=ValueType.INT64)])
     fs.infer_fields_from_df(features_df, replace_existing_features=True)
     self.feast_client.apply(fs)