def test_list_entities_and_features(client): customer_entity = Entity("customer_id", ValueType.INT64) driver_entity = Entity("driver_id", ValueType.INT64) customer_feature_rating = Feature(name="rating", dtype=ValueType.FLOAT, labels={"key1":"val1"}) customer_feature_cost = Feature(name="cost", dtype=ValueType.FLOAT) driver_feature_rating = Feature(name="rating", dtype=ValueType.FLOAT) driver_feature_cost = Feature(name="cost", dtype=ValueType.FLOAT, labels={"key1":"val1"}) filter_by_project_entity_labels_expected = dict([ ("customer:rating", customer_feature_rating) ]) filter_by_project_entity_expected = dict([ ("driver:cost", driver_feature_cost), ("driver:rating", driver_feature_rating) ]) filter_by_project_labels_expected = dict([ ("customer:rating", customer_feature_rating), ("driver:cost", driver_feature_cost) ]) customer_fs = FeatureSet( "customer", features=[ customer_feature_rating, customer_feature_cost ], entities=[customer_entity], max_age=Duration(seconds=100) ) driver_fs = FeatureSet( "driver", features=[ driver_feature_rating, driver_feature_cost ], entities=[driver_entity], max_age=Duration(seconds=100) ) client.set_project(PROJECT_NAME) client.apply(customer_fs) client.apply(driver_fs) # Test for listing of features # Case 1: Filter by: project, entities and labels filter_by_project_entity_labels_actual = client.list_features_by_ref(project=PROJECT_NAME, entities=["customer_id"], labels={"key1":"val1"}) # Case 2: Filter by: project, entities filter_by_project_entity_actual = client.list_features_by_ref(project=PROJECT_NAME, entities=["driver_id"]) # Case 3: Filter by: project, labels filter_by_project_labels_actual = client.list_features_by_ref(project=PROJECT_NAME, labels={"key1":"val1"}) assert set(filter_by_project_entity_labels_expected) == set(filter_by_project_entity_labels_actual) assert set(filter_by_project_entity_expected) == set(filter_by_project_entity_actual) assert set(filter_by_project_labels_expected) == set(filter_by_project_labels_actual)
def create_fade_overlay_segment(start_time_seconds, end_time_seconds, fade_duration = 0.5): # create fade in animation animation_start = transcoder.Overlay.Animation() animation_start.animation_fade = transcoder.Overlay.AnimationFade() animation_start.animation_fade.start_time_offset = Duration(seconds = int(start_time_seconds - fade_duration), nanos = get_nanos_from_seconds(start_time_seconds - fade_duration) ) animation_start.animation_fade.end_time_offset = Duration(seconds = int(start_time_seconds ), nanos = get_nanos_from_seconds(start_time_seconds)) animation_start.animation_fade.fade_type = transcoder.Overlay.FadeType.FADE_IN animation_start.animation_fade.xy = transcoder.Overlay.NormalizedCoordinate(x=0., y=0.) # create fade out animation animation_end = transcoder.Overlay.Animation() animation_end.animation_fade = transcoder.Overlay.AnimationFade() animation_end.animation_fade.start_time_offset = Duration(seconds = int(end_time_seconds), nanos = get_nanos_from_seconds(end_time_seconds)) animation_end.animation_fade.end_time_offset = Duration(seconds = int(end_time_seconds + fade_duration), nanos = get_nanos_from_seconds(end_time_seconds + fade_duration)) animation_end.animation_fade.fade_type = transcoder.Overlay.FadeType.FADE_OUT animation_end.animation_fade.xy = transcoder.Overlay.NormalizedCoordinate(x=0., y=0.) return [animation_start, animation_end]
def test_multiple_featureset_joins(client): fs1 = FeatureSet( "feature_set_1", features=[Feature("feature_value", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) fs2 = FeatureSet( "feature_set_2", features=[Feature("other_feature_value", ValueType.INT64)], entities=[Entity("other_entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(fs1) time.sleep(10) fs1 = client.get_feature_set(name="feature_set_1", version=1) client.apply(fs2) time.sleep(10) fs2 = client.get_feature_set(name="feature_set_2", version=1) N_ROWS = 10 time_offset = datetime.utcnow().replace(tzinfo=pytz.utc) features_1_df = pd.DataFrame({ "datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], "feature_value": [f"{i}" for i in range(N_ROWS)], }) client.ingest(fs1, features_1_df) features_2_df = pd.DataFrame({ "datetime": [time_offset] * N_ROWS, "other_entity_id": [i for i in range(N_ROWS)], "other_feature_value": [i for i in range(N_ROWS)], }) client.ingest(fs2, features_2_df) entity_df = pd.DataFrame({ "datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], "other_entity_id": [N_ROWS - 1 - i for i in range(N_ROWS)], }) feature_retrieval_job = client.get_batch_features( entity_rows=entity_df, feature_ids=[ "feature_set_1:1:feature_value", "feature_set_2:1:other_feature_value" ]) output = feature_retrieval_job.to_dataframe() print(output.head()) assert output["entity_id"].to_list() == [ int(i) for i in output["feature_set_1_v1_feature_value"].to_list() ] assert output["other_entity_id"].to_list( ) == output["feature_set_2_v1_other_feature_value"].to_list()
def test_list_feature_sets(self, mocked_client, mocker): mocker.patch.object( mocked_client, "_core_service_stub", return_value=Core.CoreServiceStub(grpc.insecure_channel("")), ) feature_set_1_proto = FeatureSetProto( spec=FeatureSetSpecProto( project="test", name="driver_car", max_age=Duration(seconds=3600), labels={"key1": "val1", "key2": "val2"}, features=[ FeatureSpecProto( name="feature_1", value_type=ValueProto.ValueType.FLOAT ) ], ) ) feature_set_2_proto = FeatureSetProto( spec=FeatureSetSpecProto( project="test", name="driver_ride", max_age=Duration(seconds=3600), labels={"key1": "val1"}, features=[ FeatureSpecProto( name="feature_1", value_type=ValueProto.ValueType.FLOAT ) ], ) ) mocker.patch.object( mocked_client._core_service_stub, "ListFeatureSets", return_value=ListFeatureSetsResponse( feature_sets=[feature_set_1_proto, feature_set_2_proto] ), ) feature_sets = mocked_client.list_feature_sets(labels={"key1": "val1"}) assert len(feature_sets) == 2 feature_set = feature_sets[0] assert ( feature_set.name == "driver_car" and "key1" in feature_set.labels and feature_set.labels["key1"] == "val1" and "key2" in feature_set.labels and feature_set.labels["key2"] == "val2" and feature_set.fields["feature_1"].name == "feature_1" and feature_set.fields["feature_1"].dtype == ValueType.FLOAT and len(feature_set.features) == 1 )
def test_basic_register_feature_set_success(client): # Register feature set without project cust_trans_fs_expected = FeatureSet.from_yaml( f"{DIR_PATH}/basic/cust_trans_fs.yaml") driver_fs_expected = FeatureSet.from_yaml( f"{DIR_PATH}/basic/driver_fs.yaml") client.apply(cust_trans_fs_expected) client.apply(driver_fs_expected) cust_trans_fs_actual = client.get_feature_set("customer_transactions") assert cust_trans_fs_actual == cust_trans_fs_expected driver_fs_actual = client.get_feature_set("driver") assert driver_fs_actual == driver_fs_expected # Register feature set with project cust_trans_fs_expected = FeatureSet.from_yaml( f"{DIR_PATH}/basic/cust_trans_fs.yaml") client.set_project(PROJECT_NAME) client.apply(cust_trans_fs_expected) cust_trans_fs_actual = client.get_feature_set("customer_transactions", project=PROJECT_NAME) assert cust_trans_fs_actual == cust_trans_fs_expected # Register feature set with labels driver_unlabelled_fs = FeatureSet( "driver_unlabelled", features=[ Feature("rating", ValueType.FLOAT), Feature("cost", ValueType.FLOAT) ], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) driver_labeled_fs_expected = FeatureSet( "driver_labeled", features=[ Feature("rating", ValueType.FLOAT), Feature("cost", ValueType.FLOAT) ], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), labels={"key1": "val1"}, ) client.set_project(PROJECT_NAME) client.apply(driver_unlabelled_fs) client.apply(driver_labeled_fs_expected) driver_fs_actual = client.list_feature_sets(project=PROJECT_NAME, labels={"key1": "val1"})[0] assert driver_fs_actual == driver_labeled_fs_expected # reset client's project for other tests client.set_project()
def create_push_subscription( self, subscription_name: str, topic_name: str, endpoint: str, config: SubscriptionConfig = None) -> Subscription: subscriber: SubscriberWrapper = SubscriberClient() subcription_path = subscriber.subscription_path( self.project_id, subscription_name) topic_path = self.get_topic_path(topic_name) if not config: config = SubscriptionConfig() logging_extra = { "subscription_name": subscription_name, "topic_name": topic_name, "config": config.dict(), } with subscriber: try: try: subscription = subscriber.get_subscription( request={"subscription": subcription_path}) Logger.info("Push subscription exists", logging_extra) return subscription except NotFound: request = Subscription( name=subcription_path, topic=topic_path, push_config=PushConfig(push_endpoint=endpoint), ack_deadline_seconds=60, expiration_policy=ExpirationPolicy(ttl=Duration( seconds=config.expiration_days * 86400) if config.expiration_days else None), retry_policy=RetryPolicy(), message_retention_duration=Duration( seconds=config.retention_days * 86400)) subscription = subscriber.create_subscription( request=request) Logger.info("Push subscription created", logging_extra) return subscription except BaseException as ex: Logger.error("Failed to create push subscription", exc_info=ex, extra=logging_extra) raise ex
def test_feature_set_types_success(self, client, dataframe, mocker): all_types_fs = FeatureSet( name="all_types", entities=[Entity(name="user_id", dtype=ValueType.INT64)], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST), Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), ], max_age=Duration(seconds=3600), ) # Register with Feast core client.apply(all_types_fs) mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=all_types_fs.to_proto()), ) # Ingest data into Feast client.ingest(all_types_fs, dataframe=dataframe)
def group_findings_and_changes(source_name): """Demonstrates grouping all findings across an organization and associated changes.""" i = 0 # [START group_filtered_findings_with_changes] from datetime import timedelta from google.cloud import securitycenter from google.protobuf.duration_pb2 import Duration # Create a client. client = securitycenter.SecurityCenterClient() # source_name is the resource path for a source that has been # created previously (you can use list_sources to find a specific one). # Its format is: # source_name = "organizations/{organization_id}/sources/{source_id}" # e.g.: # source_name = "organizations/111122222444/sources/1234" # List assets and their state change the last 30 days compare_delta = timedelta(days=30) # Convert the timedelta to a Duration duration_proto = Duration() duration_proto.FromTimedelta(compare_delta) group_result_iterator = client.group_findings( source_name, group_by="state_change", compare_duration=duration_proto) for i, group_result in enumerate(group_result_iterator): print((i + 1), group_result) # [END group_findings_with_changes] return i
def to_proto(self): """Return estop_pb2.EstopEndpoint based on current member variables.""" t_seconds = int(self.estop_timeout) t_nanos = int((self.estop_timeout - t_seconds) * 1e9) if self.estop_cut_power_timeout is None: return estop_pb2.EstopEndpoint(role=self.role, name=self._name, unique_id=self._unique_id, timeout=Duration(seconds=t_seconds, nanos=t_nanos)) else: cpt_seconds = int(self.estop_cut_power_timeout) cpt_nanos = int((self.estop_cut_power_timeout - cpt_seconds) * 1e9) return estop_pb2.EstopEndpoint(role=self.role, name=self._name, unique_id=self._unique_id, timeout=Duration(seconds=t_seconds, nanos=t_nanos), cut_power_timeout=Duration(seconds=cpt_seconds, nanos=cpt_nanos))
def test_update_featureset_apply_featureset_and_ingest_first_subset( client, update_featureset_dataframe): subset_columns = [ "datetime", "entity_id", "update_feature1", "update_feature2" ] subset_df = update_featureset_dataframe.iloc[:5][subset_columns] update_fs = FeatureSet( "update_fs", entities=[Entity(name="entity_id", dtype=ValueType.INT64)], max_age=Duration(seconds=432000), ) update_fs.infer_fields_from_df(subset_df) client.apply(update_fs) client.ingest(feature_set=update_fs, source=subset_df) time.sleep(15) feature_retrieval_job = client.get_batch_features( entity_rows=update_featureset_dataframe[["datetime", "entity_id"]].iloc[:5], feature_refs=[ f"{PROJECT_NAME}/update_feature1", f"{PROJECT_NAME}/update_feature2", ], ) output = feature_retrieval_job.to_dataframe().sort_values(by=["entity_id"]) print(output.head()) assert output["update_feature1"].to_list( ) == subset_df["update_feature1"].to_list() assert output["update_feature2"].to_list( ) == subset_df["update_feature2"].to_list()
def test_build(self): """Assert the Opt. model is built correctly""" model_builder = OptimizationModelBuilder( constraints=[CapacityConstraint()]) problem = self.problem model = model_builder.build(problem) self.assertTrue(model, msg='Opt. model built incorrectly.') self.assertEqual(model.manager.GetNumberOfVehicles(), len(self.vehicles), msg='Number of vehicles in manager is incorrect.') self.assertEqual(model.manager.GetNumberOfIndices(), len(self.vehicles) * 2 + len(self.stops) - len(problem.depots), msg='Number of indices in manager is incorrect.') self.assertTrue(model.solver, msg='Solver could not be instantiated.') self.assertTrue(model.search_parameters, msg='Search params could not be built.') self.assertEqual(model.search_parameters.time_limit, Duration(seconds=self.params.SEARCH_TIME_LIMIT), msg='Time limit is incorrect in the search params.') self.assertEqual( model.search_parameters.solution_limit, self.params.SEARCH_SOLUTIONS_LIMIT, msg='Solutions limit is incorrect in the search params.') self.assertEqual( model.search_parameters.first_solution_strategy, FIRST_SOLUTION_STRATEGY[self.params.FIRST_SOLUTION_STRATEGY], msg='First solution strategy is incorrect in the search params.') self.assertEqual( model.search_parameters.local_search_metaheuristic, LOCAL_SEARCH_METAHEURISTIC[self.params.SEARCH_METAHEURISTIC], msg='Search metaheuristic is incorrect in the search params.') self.assertTrue(model.solver.HasDimension('capacity_constraint'), msg='Capacity constraint not added.')
def __init__( self, name: str, entities: List[str], features: List[Feature], tags: Dict[str, str], ttl: Optional[Union[Duration, timedelta]], online: bool, input: BigQuerySource, ): cols = [entity for entity in entities] + [feat.name for feat in features] for col in cols: if input.field_mapping is not None and col in input.field_mapping.keys(): raise ValueError( f"The field {col} is mapped to {input.field_mapping[col]} for this data source. Please either remove this field mapping or use {input.field_mapping[col]} as the Entity or Feature name." ) self.name = name self.entities = entities self.features = features self.tags = tags if isinstance(ttl, timedelta): proto_ttl = Duration() proto_ttl.FromTimedelta(ttl) self.ttl = proto_ttl else: self.ttl = ttl self.online = online self.input = input
def test_begin_ok_exact_staleness(self): from google.protobuf.duration_pb2 import Duration from google.cloud.spanner_v1.proto.transaction_pb2 import ( Transaction as TransactionPB, TransactionOptions) transaction_pb = TransactionPB(id=TXN_ID) database = _Database() api = database.spanner_api = self._make_spanner_api() api.begin_transaction.return_value = transaction_pb duration = self._makeDuration(seconds=SECONDS, microseconds=MICROS) session = _Session(database) snapshot = self._make_one(session, exact_staleness=duration, multi_use=True) txn_id = snapshot.begin() self.assertEqual(txn_id, TXN_ID) self.assertEqual(snapshot._transaction_id, TXN_ID) expected_duration = Duration(seconds=SECONDS, nanos=MICROS * 1000) expected_txn_options = TransactionOptions( read_only=TransactionOptions.ReadOnly( exact_staleness=expected_duration)) api.begin_transaction.assert_called_once_with( session.name, expected_txn_options, metadata=[('google-cloud-resource-prefix', database.name)])
def _ingest_request(self): """ Interate through the metrics and create an IngestRequest """ self._update_service_info() request = IngestRequest(reporter=self._reporter) request.idempotency_key = self._generate_idempotency_key() start_time = Timestamp() start_time.GetCurrentTime() duration = Duration() duration.FromSeconds(self._intervals * self._flush_interval) for metric in self._runtime_metrics: metric_type = MetricKind.GAUGE if len(metric) == 3: key, value, metric_type = metric else: key, value = metric request.points.add( duration=duration, start=start_time, labels=self._labels, metric_name=key, double_value=value, kind=metric_type, ) _log.debug("Metrics collected: %s", request) return request
def create_static_overlay_segment(start_time_seconds, end_time_seconds): animation_start = transcoder.Overlay.Animation() animation_start.animation_static = transcoder.Overlay.AnimationStatic() animation_start.animation_static.start_time_offset = Duration(seconds = int(start_time_seconds), nanos = get_nanos_from_seconds(start_time_seconds) ) animation_start.animation_static.xy = transcoder.Overlay.NormalizedCoordinate(x=0., y=0.) animation_start animation_end = transcoder.Overlay.Animation() animation_end.animation_end = transcoder.Overlay.AnimationEnd() animation_end.animation_end.start_time_offset = Duration(seconds = int(end_time_seconds), nanos = get_nanos_from_seconds(end_time_seconds)) animation_end return [animation_start, animation_end]
def test_basic(self) -> None: """ Add another table to existing repo using partial apply API. Make sure both the table applied via CLI apply and the new table are passing RW test. """ runner = CliRunner() with runner.local_repo(get_example_repo("example_feature_repo_1.py")) as store: driver_locations_source = BigQuerySource( table_ref="rh_prod.ride_hailing_co.drivers", event_timestamp_column="event_timestamp", created_timestamp_column="created_timestamp", ) driver_locations_100 = FeatureView( name="driver_locations_100", entities=["driver"], ttl=Duration(seconds=86400 * 1), features=[ Feature(name="lat", dtype=ValueType.FLOAT), Feature(name="lon", dtype=ValueType.STRING), Feature(name="name", dtype=ValueType.STRING), ], online=True, input=driver_locations_source, tags={}, ) store.apply([driver_locations_100]) basic_rw_test(store, view_name="driver_locations") basic_rw_test(store, view_name="driver_locations_100")
def test_update_featureset_update_featureset_and_ingest_second_subset( client, update_featureset_dataframe): subset_columns = [ "datetime", "entity_id", "update_feature1", "update_feature3", "update_feature4", ] subset_df = update_featureset_dataframe.iloc[5:][subset_columns] update_fs = FeatureSet( "update_fs", entities=[Entity(name="entity_id", dtype=ValueType.INT64)], max_age=Duration(seconds=432000), ) update_fs.infer_fields_from_df(subset_df) client.apply(update_fs) # We keep retrying this ingestion until all values make it into the buffer. # This is a necessary step because bigquery streaming caches table schemas # and as a result, rows may be lost. while True: ingestion_id = client.ingest(feature_set=update_fs, source=subset_df) time.sleep(15) # wait for rows to get written to bq rows_ingested = get_rows_ingested(client, update_fs, ingestion_id) if rows_ingested == len(subset_df): print( f"Number of rows successfully ingested: {rows_ingested}. Continuing." ) break print( f"Number of rows successfully ingested: {rows_ingested}. Retrying ingestion." ) time.sleep(30) def check(): feature_retrieval_job = client.get_batch_features( entity_rows=update_featureset_dataframe[["datetime", "entity_id"]].iloc[5:], feature_refs=[ "update_feature1", "update_feature3", "update_feature4", ], project=PROJECT_NAME, ) output = feature_retrieval_job.to_dataframe( timeout_sec=180).sort_values(by=["entity_id"]) print(output.head()) assert output["update_feature1"].to_list( ) == subset_df["update_feature1"].to_list() assert output["update_feature3"].to_list( ) == subset_df["update_feature3"].to_list() assert output["update_feature4"].to_list( ) == subset_df["update_feature4"].to_list() clean_up_remote_files(feature_retrieval_job.get_avro_files()) wait_for(check, timedelta(minutes=5))
def to_proto(self) -> FeatureViewProto: """ Converts an feature view object to its protobuf representation. Returns: FeatureViewProto protobuf """ meta = FeatureViewMetaProto( created_timestamp=self.created_timestamp, last_updated_timestamp=self.last_updated_timestamp, ) if self.ttl is not None: ttl_duration = Duration() ttl_duration.FromTimedelta(self.ttl) spec = FeatureViewSpecProto( name=self.name, entities=self.entities, features=[feature.to_proto() for feature in self.features], tags=self.tags, ttl=(ttl_duration if ttl_duration is not None else None), online=self.online, input=self.input.to_proto(), ) return FeatureViewProto(spec=spec, meta=meta)
def test_ingest_into_bq( feast_client: Client, customer_entity: Entity, driver_entity: Entity, bq_dataframe: pd.DataFrame, bq_dataset: str, pytestconfig, ): bq_project = pytestconfig.getoption("bq_project") bq_table_id = f"bq_staging_{datetime.now():%Y%m%d%H%M%s}" ft = FeatureTable( name="basic_featuretable", entities=["driver_id", "customer_id"], features=[ Feature(name="dev_feature_float", dtype=ValueType.FLOAT), Feature(name="dev_feature_string", dtype=ValueType.STRING), ], max_age=Duration(seconds=3600), batch_source=BigQuerySource( table_ref=f"{bq_project}:{bq_dataset}.{bq_table_id}", event_timestamp_column="datetime", created_timestamp_column="timestamp", ), ) # ApplyEntity feast_client.apply(customer_entity) feast_client.apply(driver_entity) # ApplyFeatureTable feast_client.apply(ft) feast_client.ingest(ft, bq_dataframe, timeout=120) bq_client = bigquery.Client(project=bq_project) # Poll BQ for table until the table has been created def try_get_table(): try: table = bq_client.get_table( bigquery.TableReference( bigquery.DatasetReference(bq_project, bq_dataset), bq_table_id ) ) except NotFound: return None, False else: return table, True wait_retry_backoff( retry_fn=try_get_table, timeout_secs=30, timeout_msg="Timed out trying to get bigquery table", ) query_string = f"SELECT * FROM `{bq_project}.{bq_dataset}.{bq_table_id}`" job = bq_client.query(query_string) query_df = job.to_dataframe() assert_frame_equal(query_df, bq_dataframe)
def test_list_point_in_time_changes(): """Demonstrate listing assets along with their state changes.""" from google.cloud import securitycenter_v1beta1 as securitycenter from google.protobuf.duration_pb2 import Duration from datetime import timedelta # [START demo_list_assets_changes] client = securitycenter.SecurityCenterClient() # ORGANIZATION_ID is the numeric ID of the organization (e.g. 123213123121) org_name = "organizations/{org_id}".format(org_id=ORGANIZATION_ID) project_filter = ("security_center_properties.resource_type=" + '"google.cloud.resourcemanager.Project"') # List assets and their state change the last 30 days compare_delta = timedelta(days=30) # Convert the timedelta to a Duration duration_proto = Duration() duration_proto.FromTimedelta(compare_delta) # Call the API and print results. asset_iterator = client.list_assets(org_name, filter_=project_filter, compare_duration=duration_proto) for i, asset in enumerate(asset_iterator): print(i, asset) # [END demo_list_assets_changes] assert i > 0
def get_or_create_subscription(): conf = get_config()["google_pub_sub"] project_id, topic_id = conf["project_id"], conf["topic_id"] subscription_id = get_subs_name(conf["subscription"].get( "type", "schedule-consumer")) subscriber = pubsub_v1.SubscriberClient() publisher = pubsub_v1.PublisherClient() sub_path = subscriber.subscription_path(project_id, subscription_id) topic_path = publisher.topic_path(project_id, topic_id) try: subscriber.create_subscription( request={ "name": sub_path, "topic": topic_path, "message_retention_duration": Duration(seconds=conf["subscription"].get( "message_retention_duration", 86400)), "ack_deadline_seconds": conf["subscription"].get("ack_deadline_seconds", 300), "filter": f'attributes.mac = "{get_mac()}"' }) logging.info(f"{sub_path} created") except AlreadyExists: logging.info(f"{sub_path} already exists") return sub_path return sub_path
def test_remove_reservation_not_found(get_reservation): get_reservation.return_value = None processor = cg.EngineProcessor( 'proj', 'p0', EngineContext(), qtypes.QuantumProcessor(schedule_frozen_period=Duration(seconds=10000))) with pytest.raises(ValueError): processor.remove_reservation('rid')
def _list_groups(client): """List Error Groups from the last 60 seconds. This class provides a wrapper around making calls to the GAX API. It's used by the system tests to find the appropriate error group to verify the error was successfully reported. :type client: :class:`~google.cloud.error_reporting.client.Client` :param client: The client containing a project and credentials. :rtype: :class:`~google.gax.ResourceIterator` :returns: Iterable of :class:`~.error_stats_service_pb2.ErrorGroupStats`. """ gax_api = error_stats_service_client.ErrorStatsServiceClient( credentials=client._credentials) project_name = gax_api.project_path(client.project) time_range = error_stats_service_pb2.QueryTimeRange() time_range.period = error_stats_service_pb2.QueryTimeRange.PERIOD_1_HOUR duration = Duration(seconds=60 * 60) return gax_api.list_group_stats(project_name, time_range, timed_count_duration=duration)
def main(): # 実行するpythonファイルをGCSにアップロード(事前に手作業でアップロードしてもOK) storage_client: StorageClient = StorageClient( env['BUCKET_NAME'], env['PROJECT_ID'], env['STORAGE_CREDENTIAL_PATH']) main_python_file_uri: str = storage_client.upload_to_gcs( './master.py', 'dataproc/src') python_file_uris: List[str] = [ storage_client.upload_to_gcs('./worker.py', 'dataproc/src'), storage_client.upload_to_gcs('./module/storage.py', 'dataproc/src/module'), ] # 処理対象データをGCSにアップロード(事前に手作業でアップロードしてもOK) data_file_path: str = './data.txt' with open(data_file_path, 'w') as f: for sentence in SENTENCES: f.write(sentence + '\n') storage_client.upload_to_gcs(data_file_path, 'dataproc/input') os.remove(data_file_path) # pysparkのjobを実行 with DataprocCluster( env['PROJECT_ID'], env['DATAPROC_CREDENTIAL_PATH'], cluster_name='test-cluster', creates_cluster=True, idle_delete_ttl=Duration(seconds=1000), pip_packages= 'more-itertools==5.0.0 nltk==3.4.5 gensim==3.8.1 google-cloud-storage==1.20.0', environment_variables={ 'PROJECT_ID': env['PROJECT_ID'], 'BUCKET_NAME': env['BUCKET_NAME'] }) as cluster: cluster.submit_pyspark_job(main_python_file_uri, python_file_uris) print('do something')
def to_proto(self) -> FeatureViewProto: """ Converts an feature view object to its protobuf representation. Returns: FeatureViewProto protobuf """ meta = FeatureViewMetaProto( created_timestamp=self.created_timestamp, last_updated_timestamp=self.last_updated_timestamp, materialization_intervals=[], ) for interval in self.materialization_intervals: interval_proto = MaterializationIntervalProto() interval_proto.start_time.FromDatetime(interval[0]) interval_proto.end_time.FromDatetime(interval[1]) meta.materialization_intervals.append(interval_proto) ttl_duration = None if self.ttl is not None: ttl_duration = Duration() ttl_duration.FromTimedelta(self.ttl) spec = FeatureViewSpecProto( name=self.name, entities=self.entities, features=[feature.to_proto() for feature in self.features], tags=self.tags, ttl=(ttl_duration if ttl_duration is not None else None), online=self.online, input=self.input.to_proto(), ) return FeatureViewProto(spec=spec, meta=meta)
def _ingest_test_getfeaturetable_mocked_resp(file_url: str, date_partition_col: str = ""): return GetFeatureTableResponse(table=FeatureTableProto( spec=FeatureTableSpecProto( name="ingest_featuretable", max_age=Duration(seconds=3600), features=[ FeatureSpecProto( name="dev_feature_float", value_type=ValueProto.ValueType.FLOAT, ), FeatureSpecProto( name="dev_feature_string", value_type=ValueProto.ValueType.STRING, ), ], entities=["dev_entity"], batch_source=DataSourceProto( file_options=DataSourceProto.FileOptions( file_format=ParquetFormat().to_proto(), file_url=file_url), event_timestamp_column="datetime", created_timestamp_column="timestamp", date_partition_column=date_partition_col, ), ), meta=FeatureTableMetaProto(), ))
def alltypes_featuretable(): batch_source = FileSource( file_format="parquet", file_url="file://feast/*", event_timestamp_column="ts_col", created_timestamp_column="timestamp", date_partition_column="date_partition_col", ) return FeatureTable( name="alltypes", entities=["alltypes_id"], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST), ], max_age=Duration(seconds=3600), batch_source=batch_source, labels={"cat": "alltypes"}, )
def test_order_by_creation_time(client): proc_time_fs = FeatureSet( "processing_time", features=[Feature("feature_value", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(proc_time_fs) time.sleep(10) proc_time_fs = client.get_feature_set(name="processing_time", version=1) time_offset = datetime.utcnow().replace(tzinfo=pytz.utc) N_ROWS = 10 incorrect_df = pd.DataFrame({ "datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], "feature_value": ["WRONG"] * N_ROWS, }) correct_df = pd.DataFrame({ "datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], "feature_value": ["CORRECT"] * N_ROWS, }) client.ingest(proc_time_fs, incorrect_df) time.sleep(10) client.ingest(proc_time_fs, correct_df) feature_retrieval_job = client.get_batch_features( entity_rows=incorrect_df[["datetime", "entity_id"]], feature_ids=["processing_time:1:feature_value"]) output = feature_retrieval_job.to_dataframe() print(output.head()) assert output["processing_time_v1_feature_value"].to_list() == ["CORRECT" ] * N_ROWS
def test_get_feature_set(self, mocked_client, mocker): mocked_client._core_service_stub = Core.CoreServiceStub( grpc.insecure_channel("") ) from google.protobuf.duration_pb2 import Duration mocker.patch.object( mocked_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=FeatureSetProto( spec=FeatureSetSpecProto( name="my_feature_set", max_age=Duration(seconds=3600), labels={"key1": "val1", "key2": "val2"}, features=[ FeatureSpecProto( name="my_feature_1", value_type=ValueProto.ValueType.FLOAT, ), FeatureSpecProto( name="my_feature_2", value_type=ValueProto.ValueType.FLOAT, ), ], entities=[ EntitySpecProto( name="my_entity_1", value_type=ValueProto.ValueType.INT64, ) ], source=Source( type=SourceType.KAFKA, kafka_source_config=KafkaSourceConfig( bootstrap_servers="localhost:9092", topic="topic" ), ), ), meta=FeatureSetMetaProto(), ) ), ) mocked_client.set_project("my_project") feature_set = mocked_client.get_feature_set("my_feature_set") assert ( feature_set.name == "my_feature_set" and "key1" in feature_set.labels and feature_set.labels["key1"] == "val1" and "key2" in feature_set.labels and feature_set.labels["key2"] == "val2" and feature_set.fields["my_feature_1"].name == "my_feature_1" and feature_set.fields["my_feature_1"].dtype == ValueType.FLOAT and feature_set.fields["my_entity_1"].name == "my_entity_1" and feature_set.fields["my_entity_1"].dtype == ValueType.INT64 and len(feature_set.features) == 2 and len(feature_set.entities) == 1 )
def execute(self, context): features_df = self.bq.get_pandas_df(self.sql) fs = FeatureSet( self.feature_set_name, max_age=Duration(seconds=86400), entities=[Entity(name=self.entity_name, dtype=ValueType.INT64)]) fs.infer_fields_from_df(features_df, replace_existing_features=True) self.feast_client.apply(fs)