def _create_csr(self): """Create CSR protobuf Returns: CSR protobuf object """ csr = cert_utils.create_csr(self._gateway_key, self._hw_id) duration = Duration() duration.FromTimedelta(datetime.timedelta(days=4)) csr = CSR( id=Identity(gateway=Identity.Gateway(hardware_id=self._hw_id)), valid_time=duration, csr_der=csr.public_bytes(serialization.Encoding.DER), ) return csr
def test_historical_features( feast_client: Client, batch_source: Union[BigQuerySource, FileSource] ): customer_entity = Entity( name="user_id", description="Customer", value_type=ValueType.INT64 ) feast_client.apply_entity(customer_entity) max_age = Duration() max_age.FromSeconds(2 * 86400) transactions_feature_table = FeatureTable( name="transactions", entities=["user_id"], features=[ Feature("daily_transactions", ValueType.DOUBLE), Feature("total_transactions", ValueType.DOUBLE), ], batch_source=batch_source, max_age=max_age, ) feast_client.apply_feature_table(transactions_feature_table) transactions_df, customers_df = generate_data() feast_client.ingest(transactions_feature_table, transactions_df) feature_refs = ["transactions:daily_transactions"] job = feast_client.get_historical_features(feature_refs, customers_df) output_dir = job.get_output_file_uri() joined_df = read_parquet(output_dir) expected_joined_df = pd.DataFrame( { "event_timestamp": customers_df.event_timestamp.tolist(), "user_id": customers_df.user_id.tolist(), "transactions__daily_transactions": transactions_df.daily_transactions.tolist() + [None] * transactions_df.shape[0], } ) assert_frame_equal( joined_df.sort_values(by=["user_id", "event_timestamp"]).reset_index(drop=True), expected_joined_df.sort_values(by=["user_id", "event_timestamp"]).reset_index( drop=True ), )
def test_build(self): """Assert the Opt. model is built correctly""" model_builder = OptimizationModelBuilder( constraints=[CapacityConstraint()]) problem = self.problem model = model_builder.build(problem) self.assertTrue(model, msg='Opt. model built incorrectly.') self.assertEqual(model.manager.GetNumberOfVehicles(), len(self.vehicles), msg='Number of vehicles in manager is incorrect.') self.assertEqual(model.manager.GetNumberOfIndices(), len(self.vehicles) * 2 + len(self.stops) - len(problem.depots), msg='Number of indices in manager is incorrect.') self.assertTrue(model.solver, msg='Solver could not be instantiated.') self.assertTrue(model.search_parameters, msg='Search params could not be built.') self.assertEqual(model.search_parameters.time_limit, Duration(seconds=self.params.SEARCH_TIME_LIMIT), msg='Time limit is incorrect in the search params.') self.assertEqual( model.search_parameters.solution_limit, self.params.SEARCH_SOLUTIONS_LIMIT, msg='Solutions limit is incorrect in the search params.') self.assertEqual( model.search_parameters.first_solution_strategy, FIRST_SOLUTION_STRATEGY[self.params.FIRST_SOLUTION_STRATEGY], msg='First solution strategy is incorrect in the search params.') self.assertEqual( model.search_parameters.local_search_metaheuristic, LOCAL_SEARCH_METAHEURISTIC[self.params.SEARCH_METAHEURISTIC], msg='Search metaheuristic is incorrect in the search params.') self.assertTrue(model.solver.HasDimension('capacity_constraint'), msg='Capacity constraint not added.')
def test_feature_set_types_success(self, client, dataframe, mocker): all_types_fs = FeatureSet( name="all_types", entities=[Entity(name="user_id", dtype=ValueType.INT64)], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST), Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), ], max_age=Duration(seconds=3600), ) # Register with Feast core client.apply(all_types_fs) mocker.patch.object( client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse(feature_set=all_types_fs.to_proto()), ) # Ingest data into Feast client.ingest(all_types_fs, dataframe=dataframe)
def _list_groups(client): """List Error Groups from the last 60 seconds. This class provides a wrapper around making calls to the GAX API. It's used by the system tests to find the appropriate error group to verify the error was successfully reported. :type client: :class:`~google.cloud.error_reporting.client.Client` :param client: The client containing a project and credentials. :rtype: :class:`~google.gax.ResourceIterator` :returns: Iterable of :class:`~.error_stats_service_pb2.ErrorGroupStats`. """ gax_api = error_stats_service_client.ErrorStatsServiceClient( credentials=client._credentials) project_name = gax_api.project_path(client.project) time_range = error_stats_service_pb2.QueryTimeRange() time_range.period = error_stats_service_pb2.QueryTimeRange.PERIOD_1_HOUR duration = Duration(seconds=60 * 60) return gax_api.list_group_stats(project_name, time_range, timed_count_duration=duration)
def test_ingest_into_bq( feast_client: Client, customer_entity: Entity, driver_entity: Entity, bq_dataframe: pd.DataFrame, bq_dataset: str, pytestconfig, ): bq_project = pytestconfig.getoption("bq_project") bq_table_id = f"bq_staging_{datetime.now():%Y%m%d%H%M%s}" ft = FeatureTable( name="basic_featuretable", entities=["driver_id", "customer_id"], features=[ Feature(name="dev_feature_float", dtype=ValueType.FLOAT), Feature(name="dev_feature_string", dtype=ValueType.STRING), ], max_age=Duration(seconds=3600), batch_source=BigQuerySource( table_ref=f"{bq_project}:{bq_dataset}.{bq_table_id}", event_timestamp_column="datetime", created_timestamp_column="timestamp", ), ) # ApplyEntity feast_client.apply(customer_entity) feast_client.apply(driver_entity) # ApplyFeatureTable feast_client.apply(ft) feast_client.ingest(ft, bq_dataframe, timeout=120) bq_client = bigquery.Client(project=bq_project) # Poll BQ for table until the table has been created def try_get_table(): try: table = bq_client.get_table( bigquery.TableReference( bigquery.DatasetReference(bq_project, bq_dataset), bq_table_id ) ) except NotFound: return None, False else: return table, True wait_retry_backoff( retry_fn=try_get_table, timeout_secs=30, timeout_msg="Timed out trying to get bigquery table", ) query_string = f"SELECT * FROM `{bq_project}.{bq_dataset}.{bq_table_id}`" job = bq_client.query(query_string) query_df = job.to_dataframe() assert_frame_equal(query_df, bq_dataframe)
def create_static_overlay_segment(start_time_seconds, end_time_seconds): animation_start = transcoder.Overlay.Animation() animation_start.animation_static = transcoder.Overlay.AnimationStatic() animation_start.animation_static.start_time_offset = Duration(seconds = int(start_time_seconds), nanos = get_nanos_from_seconds(start_time_seconds) ) animation_start.animation_static.xy = transcoder.Overlay.NormalizedCoordinate(x=0., y=0.) animation_start animation_end = transcoder.Overlay.Animation() animation_end.animation_end = transcoder.Overlay.AnimationEnd() animation_end.animation_end.start_time_offset = Duration(seconds = int(end_time_seconds), nanos = get_nanos_from_seconds(end_time_seconds)) animation_end return [animation_start, animation_end]
def test_basic(self) -> None: """ Add another table to existing repo using partial apply API. Make sure both the table applied via CLI apply and the new table are passing RW test. """ runner = CliRunner() with runner.local_repo(get_example_repo("example_feature_repo_1.py")) as store: driver_locations_source = BigQuerySource( table_ref="rh_prod.ride_hailing_co.drivers", event_timestamp_column="event_timestamp", created_timestamp_column="created_timestamp", ) driver_locations_100 = FeatureView( name="driver_locations_100", entities=["driver"], ttl=Duration(seconds=86400 * 1), features=[ Feature(name="lat", dtype=ValueType.FLOAT), Feature(name="lon", dtype=ValueType.STRING), Feature(name="name", dtype=ValueType.STRING), ], online=True, input=driver_locations_source, tags={}, ) store.apply([driver_locations_100]) basic_rw_test(store, view_name="driver_locations") basic_rw_test(store, view_name="driver_locations_100")
def test_update_featureset_update_featureset_and_ingest_second_subset( client, update_featureset_dataframe): subset_columns = [ "datetime", "entity_id", "update_feature1", "update_feature3", "update_feature4", ] subset_df = update_featureset_dataframe.iloc[5:][subset_columns] update_fs = FeatureSet( "update_fs", entities=[Entity(name="entity_id", dtype=ValueType.INT64)], max_age=Duration(seconds=432000), ) update_fs.infer_fields_from_df(subset_df) client.apply(update_fs) # We keep retrying this ingestion until all values make it into the buffer. # This is a necessary step because bigquery streaming caches table schemas # and as a result, rows may be lost. while True: ingestion_id = client.ingest(feature_set=update_fs, source=subset_df) time.sleep(15) # wait for rows to get written to bq rows_ingested = get_rows_ingested(client, update_fs, ingestion_id) if rows_ingested == len(subset_df): print( f"Number of rows successfully ingested: {rows_ingested}. Continuing." ) break print( f"Number of rows successfully ingested: {rows_ingested}. Retrying ingestion." ) time.sleep(30) def check(): feature_retrieval_job = client.get_batch_features( entity_rows=update_featureset_dataframe[["datetime", "entity_id"]].iloc[5:], feature_refs=[ "update_feature1", "update_feature3", "update_feature4", ], project=PROJECT_NAME, ) output = feature_retrieval_job.to_dataframe( timeout_sec=180).sort_values(by=["entity_id"]) print(output.head()) assert output["update_feature1"].to_list( ) == subset_df["update_feature1"].to_list() assert output["update_feature3"].to_list( ) == subset_df["update_feature3"].to_list() assert output["update_feature4"].to_list( ) == subset_df["update_feature4"].to_list() clean_up_remote_files(feature_retrieval_job.get_avro_files()) wait_for(check, timedelta(minutes=5))
def test_begin_ok_exact_staleness(self): from google.protobuf.duration_pb2 import Duration from google.cloud.spanner_v1.proto.transaction_pb2 import ( Transaction as TransactionPB, TransactionOptions) transaction_pb = TransactionPB(id=TXN_ID) database = _Database() api = database.spanner_api = self._make_spanner_api() api.begin_transaction.return_value = transaction_pb duration = self._makeDuration(seconds=SECONDS, microseconds=MICROS) session = _Session(database) snapshot = self._make_one(session, exact_staleness=duration, multi_use=True) txn_id = snapshot.begin() self.assertEqual(txn_id, TXN_ID) self.assertEqual(snapshot._transaction_id, TXN_ID) expected_duration = Duration(seconds=SECONDS, nanos=MICROS * 1000) expected_txn_options = TransactionOptions( read_only=TransactionOptions.ReadOnly( exact_staleness=expected_duration)) api.begin_transaction.assert_called_once_with( session.name, expected_txn_options, metadata=[('google-cloud-resource-prefix', database.name)])
def test_update_featureset_apply_featureset_and_ingest_first_subset( client, update_featureset_dataframe): subset_columns = [ "datetime", "entity_id", "update_feature1", "update_feature2" ] subset_df = update_featureset_dataframe.iloc[:5][subset_columns] update_fs = FeatureSet( "update_fs", entities=[Entity(name="entity_id", dtype=ValueType.INT64)], max_age=Duration(seconds=432000), ) update_fs.infer_fields_from_df(subset_df) client.apply(update_fs) client.ingest(feature_set=update_fs, source=subset_df) time.sleep(15) feature_retrieval_job = client.get_batch_features( entity_rows=update_featureset_dataframe[["datetime", "entity_id"]].iloc[:5], feature_refs=[ f"{PROJECT_NAME}/update_feature1", f"{PROJECT_NAME}/update_feature2", ], ) output = feature_retrieval_job.to_dataframe().sort_values(by=["entity_id"]) print(output.head()) assert output["update_feature1"].to_list( ) == subset_df["update_feature1"].to_list() assert output["update_feature2"].to_list( ) == subset_df["update_feature2"].to_list()
def get_or_create_subscription(): conf = get_config()["google_pub_sub"] project_id, topic_id = conf["project_id"], conf["topic_id"] subscription_id = get_subs_name(conf["subscription"].get( "type", "schedule-consumer")) subscriber = pubsub_v1.SubscriberClient() publisher = pubsub_v1.PublisherClient() sub_path = subscriber.subscription_path(project_id, subscription_id) topic_path = publisher.topic_path(project_id, topic_id) try: subscriber.create_subscription( request={ "name": sub_path, "topic": topic_path, "message_retention_duration": Duration(seconds=conf["subscription"].get( "message_retention_duration", 86400)), "ack_deadline_seconds": conf["subscription"].get("ack_deadline_seconds", 300), "filter": f'attributes.mac = "{get_mac()}"' }) logging.info(f"{sub_path} created") except AlreadyExists: logging.info(f"{sub_path} already exists") return sub_path return sub_path
def _ingest_test_getfeaturetable_mocked_resp(file_url: str, date_partition_col: str = ""): return GetFeatureTableResponse(table=FeatureTableProto( spec=FeatureTableSpecProto( name="ingest_featuretable", max_age=Duration(seconds=3600), features=[ FeatureSpecProto( name="dev_feature_float", value_type=ValueProto.ValueType.FLOAT, ), FeatureSpecProto( name="dev_feature_string", value_type=ValueProto.ValueType.STRING, ), ], entities=["dev_entity"], batch_source=DataSourceProto( file_options=DataSourceProto.FileOptions( file_format=ParquetFormat().to_proto(), file_url=file_url), event_timestamp_column="datetime", created_timestamp_column="timestamp", date_partition_column=date_partition_col, ), ), meta=FeatureTableMetaProto(), ))
def alltypes_featuretable(): batch_source = FileSource( file_format="parquet", file_url="file://feast/*", event_timestamp_column="ts_col", created_timestamp_column="timestamp", date_partition_column="date_partition_col", ) return FeatureTable( name="alltypes", entities=["alltypes_id"], features=[ Feature(name="float_feature", dtype=ValueType.FLOAT), Feature(name="int64_feature", dtype=ValueType.INT64), Feature(name="int32_feature", dtype=ValueType.INT32), Feature(name="string_feature", dtype=ValueType.STRING), Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), Feature(name="string_list_feature", dtype=ValueType.STRING_LIST), Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST), Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST), ], max_age=Duration(seconds=3600), batch_source=batch_source, labels={"cat": "alltypes"}, )
def main(): # 実行するpythonファイルをGCSにアップロード(事前に手作業でアップロードしてもOK) storage_client: StorageClient = StorageClient( env['BUCKET_NAME'], env['PROJECT_ID'], env['STORAGE_CREDENTIAL_PATH']) main_python_file_uri: str = storage_client.upload_to_gcs( './master.py', 'dataproc/src') python_file_uris: List[str] = [ storage_client.upload_to_gcs('./worker.py', 'dataproc/src'), storage_client.upload_to_gcs('./module/storage.py', 'dataproc/src/module'), ] # 処理対象データをGCSにアップロード(事前に手作業でアップロードしてもOK) data_file_path: str = './data.txt' with open(data_file_path, 'w') as f: for sentence in SENTENCES: f.write(sentence + '\n') storage_client.upload_to_gcs(data_file_path, 'dataproc/input') os.remove(data_file_path) # pysparkのjobを実行 with DataprocCluster( env['PROJECT_ID'], env['DATAPROC_CREDENTIAL_PATH'], cluster_name='test-cluster', creates_cluster=True, idle_delete_ttl=Duration(seconds=1000), pip_packages= 'more-itertools==5.0.0 nltk==3.4.5 gensim==3.8.1 google-cloud-storage==1.20.0', environment_variables={ 'PROJECT_ID': env['PROJECT_ID'], 'BUCKET_NAME': env['BUCKET_NAME'] }) as cluster: cluster.submit_pyspark_job(main_python_file_uri, python_file_uris) print('do something')
def test_remove_reservation_not_found(get_reservation): get_reservation.return_value = None processor = cg.EngineProcessor( 'proj', 'p0', EngineContext(), qtypes.QuantumProcessor(schedule_frozen_period=Duration(seconds=10000))) with pytest.raises(ValueError): processor.remove_reservation('rid')
def to_proto(self): """Return estop_pb2.EstopEndpoint based on current member variables.""" t_seconds = int(self.estop_timeout) t_nanos = int((self.estop_timeout - t_seconds) * 1e9) if self.estop_cut_power_timeout is None: return estop_pb2.EstopEndpoint(role=self.role, name=self._name, unique_id=self._unique_id, timeout=Duration(seconds=t_seconds, nanos=t_nanos)) else: cpt_seconds = int(self.estop_cut_power_timeout) cpt_nanos = int((self.estop_cut_power_timeout - cpt_seconds) * 1e9) return estop_pb2.EstopEndpoint(role=self.role, name=self._name, unique_id=self._unique_id, timeout=Duration(seconds=t_seconds, nanos=t_nanos), cut_power_timeout=Duration(seconds=cpt_seconds, nanos=cpt_nanos))
def test_order_by_creation_time(client): proc_time_fs = FeatureSet( "processing_time", features=[Feature("feature_value", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(proc_time_fs) time.sleep(10) proc_time_fs = client.get_feature_set(name="processing_time", version=1) time_offset = datetime.utcnow().replace(tzinfo=pytz.utc) N_ROWS = 10 incorrect_df = pd.DataFrame({ "datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], "feature_value": ["WRONG"] * N_ROWS, }) correct_df = pd.DataFrame({ "datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], "feature_value": ["CORRECT"] * N_ROWS, }) client.ingest(proc_time_fs, incorrect_df) time.sleep(10) client.ingest(proc_time_fs, correct_df) feature_retrieval_job = client.get_batch_features( entity_rows=incorrect_df[["datetime", "entity_id"]], feature_ids=["processing_time:1:feature_value"]) output = feature_retrieval_job.to_dataframe() print(output.head()) assert output["processing_time_v1_feature_value"].to_list() == ["CORRECT" ] * N_ROWS
def test_get_feature_set(self, mocked_client, mocker): mocked_client._core_service_stub = Core.CoreServiceStub( grpc.insecure_channel("") ) from google.protobuf.duration_pb2 import Duration mocker.patch.object( mocked_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=FeatureSetProto( spec=FeatureSetSpecProto( name="my_feature_set", max_age=Duration(seconds=3600), labels={"key1": "val1", "key2": "val2"}, features=[ FeatureSpecProto( name="my_feature_1", value_type=ValueProto.ValueType.FLOAT, ), FeatureSpecProto( name="my_feature_2", value_type=ValueProto.ValueType.FLOAT, ), ], entities=[ EntitySpecProto( name="my_entity_1", value_type=ValueProto.ValueType.INT64, ) ], source=Source( type=SourceType.KAFKA, kafka_source_config=KafkaSourceConfig( bootstrap_servers="localhost:9092", topic="topic" ), ), ), meta=FeatureSetMetaProto(), ) ), ) mocked_client.set_project("my_project") feature_set = mocked_client.get_feature_set("my_feature_set") assert ( feature_set.name == "my_feature_set" and "key1" in feature_set.labels and feature_set.labels["key1"] == "val1" and "key2" in feature_set.labels and feature_set.labels["key2"] == "val2" and feature_set.fields["my_feature_1"].name == "my_feature_1" and feature_set.fields["my_feature_1"].dtype == ValueType.FLOAT and feature_set.fields["my_entity_1"].name == "my_entity_1" and feature_set.fields["my_entity_1"].dtype == ValueType.INT64 and len(feature_set.features) == 2 and len(feature_set.entities) == 1 )
def bookings_feature_table_with_mapping(spark, client): schema = StructType([ StructField("id", IntegerType()), StructField("datetime", TimestampType()), StructField("created_datetime", TimestampType()), StructField("total_completed_bookings", IntegerType()), ]) df_data = [ ( 8001, datetime(year=2020, month=9, day=1, tzinfo=utc), datetime(year=2020, month=9, day=1, tzinfo=utc), 100, ), ( 8001, datetime(year=2020, month=9, day=2, tzinfo=utc), datetime(year=2020, month=9, day=2, tzinfo=utc), 150, ), ( 8002, datetime(year=2020, month=9, day=2, tzinfo=utc), datetime(year=2020, month=9, day=2, tzinfo=utc), 200, ), ] temp_dir, file_uri = create_temp_parquet_file(spark, "bookings", schema, df_data) file_source = FileSource( event_timestamp_column="datetime", created_timestamp_column="created_datetime", file_format=ParquetFormat(), file_url=file_uri, field_mapping={"id": "driver_id"}, ) features = [Feature("total_completed_bookings", ValueType.INT32)] max_age = Duration() max_age.FromSeconds(86400) feature_table = FeatureTable("bookings", ["driver_id"], features, batch_source=file_source, max_age=max_age) yield client.apply(feature_table) shutil.rmtree(temp_dir)
def execute(self, context): features_df = self.bq.get_pandas_df(self.sql) fs = FeatureSet( self.feature_set_name, max_age=Duration(seconds=86400), entities=[Entity(name=self.entity_name, dtype=ValueType.INT64)]) fs.infer_fields_from_df(features_df, replace_existing_features=True) self.feast_client.apply(fs)
def make_span_from_db(ret: Dict) -> Span: """ Create a Span object from a Dict that came from MongoDB. :param ret: The Dict that came from MongoDB. :return: The Span object created from the given Dict. """ duration = Duration() duration.FromMicroseconds(ret["duration"]) start_time = Timestamp() start_time.FromDatetime(ret["startTime"]) del ret["startTime"] del ret["duration"] span = ParseDict( ret, Span(duration=duration, start_time=start_time), ignore_unknown_fields=True ) return span
def to_proto(self) -> FeatureViewProto: """ Converts a feature view object to its protobuf representation. Returns: A FeatureViewProto protobuf. """ meta = FeatureViewMetaProto(materialization_intervals=[]) if self.created_timestamp: meta.created_timestamp.FromDatetime(self.created_timestamp) if self.last_updated_timestamp: meta.last_updated_timestamp.FromDatetime( self.last_updated_timestamp) for interval in self.materialization_intervals: interval_proto = MaterializationIntervalProto() interval_proto.start_time.FromDatetime(interval[0]) interval_proto.end_time.FromDatetime(interval[1]) meta.materialization_intervals.append(interval_proto) ttl_duration = None if self.ttl is not None: ttl_duration = Duration() ttl_duration.FromTimedelta(self.ttl) batch_source_proto = self.batch_source.to_proto() batch_source_proto.data_source_class_type = f"{self.batch_source.__class__.__module__}.{self.batch_source.__class__.__name__}" stream_source_proto = None if self.stream_source: stream_source_proto = self.stream_source.to_proto() stream_source_proto.data_source_class_type = f"{self.stream_source.__class__.__module__}.{self.stream_source.__class__.__name__}" spec = FeatureViewSpecProto( name=self.name, entities=self.entities, features=[field.to_proto() for field in self.schema], description=self.description, tags=self.tags, owner=self.owner, ttl=(ttl_duration if ttl_duration is not None else None), online=self.online, batch_source=batch_source_proto, stream_source=stream_source_proto, ) return FeatureViewProto(spec=spec, meta=meta)
def seconds_to_duration(seconds): """Return a protobuf Duration from number of seconds, as a float. Args: seconds (float): duration length """ duration_seconds = int(seconds) duration_nanos = int((seconds - duration_seconds) * NSEC_PER_SEC) return Duration(seconds=duration_seconds, nanos=duration_nanos)
def test_apply_all_featuresets(client): client.set_project(PROJECT_NAME) file_fs1 = FeatureSet( "file_feature_set", features=[Feature("feature_value1", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(file_fs1) gcs_fs1 = FeatureSet( "gcs_feature_set", features=[Feature("feature_value2", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(gcs_fs1) proc_time_fs = FeatureSet( "processing_time", features=[Feature("feature_value3", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(proc_time_fs) add_cols_fs = FeatureSet( "additional_columns", features=[Feature("feature_value4", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(add_cols_fs) historical_fs = FeatureSet( "historical", features=[Feature("feature_value5", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(historical_fs) fs1 = FeatureSet( "feature_set_1", features=[Feature("feature_value6", ValueType.STRING)], entities=[Entity("entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) fs2 = FeatureSet( "feature_set_2", features=[Feature("other_feature_value7", ValueType.INT64)], entities=[Entity("other_entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) client.apply(fs1) client.apply(fs2)
def detect_labels(video_client, file_handle, input_uri, l, t): EXCLUDE = ["nature", "aerial photography", "tree"] print("{} spawned".format(t)) features = [videointelligence.Feature.LABEL_DETECTION] s = [] for j in range(10): s.append(videointelligence.VideoSegment(start_time_offset=Duration(seconds=0+j*5+50*t), end_time_offset=Duration(seconds=(j+1)*5 + 50*t))) if (j+1)*5 + 50*t >= l: break print("{} {} segments: ".format(t, len(s))) operation = video_client.annotate_video( request={ "features": features, "input_uri": input_uri, "video_context": videointelligence.VideoContext(segments=s) } ) result = operation.result(timeout=120) print("\nFinished processing thread {}.".format(t)) # segment_labels = result.annotation_results[0].segment_label_annotations for x in result.annotation_results: segment_labels = x.segment_label_annotations for i, segment_label in enumerate(segment_labels): if segment_label.entity.description in EXCLUDE: continue print("Video label description: {}".format(segment_label.entity.description)) category_desc = "" for category_entity in segment_label.category_entities: print( "\tLabel category description: {}".format(category_entity.description) ) for i, segment in enumerate(segment_label.segments): start_time = ( segment.segment.start_time_offset.seconds + segment.segment.start_time_offset.microseconds / 1e6 ) end_time = ( segment.segment.end_time_offset.seconds + segment.segment.end_time_offset.microseconds / 1e6 ) # positions = "{}s to {}s".format(start_time, end_time) # confidence = segment.confidence # print("\tSegment {}: {}".format(i, positions)) # print("\tConfidence: {}".format(confidence)) file_handle.write("{},{},{},{}\n".format(segment_label.entity.description, str(start_time), str(end_time), str(segment.confidence))) return None
def _trailing_metadata(self): from google.protobuf.duration_pb2 import Duration from google.rpc.error_details_pb2 import RetryInfo from grpc._common import cygrpc_metadata if self._commit_abort_retry_nanos is None: return cygrpc_metadata(()) retry_info = RetryInfo( retry_delay=Duration(seconds=self._commit_abort_retry_seconds, nanos=self._commit_abort_retry_nanos)) return cygrpc_metadata([('google.rpc.retryinfo-bin', retry_info.SerializeToString())])
def create_daily_nearline_30_day_migration(project_id: str, description: str, source_bucket: str, sink_bucket: str, start_date: datetime): """Create a daily migration from a GCS bucket to a Nearline GCS bucket for objects untouched for 30 days.""" client = storage_transfer.StorageTransferServiceClient() # The ID of the Google Cloud Platform Project that owns the job # project_id = 'my-project-id' # A useful description for your transfer job # description = 'My transfer job' # Google Cloud Storage source bucket name # source_bucket = 'my-gcs-source-bucket' # Google Cloud Storage destination bucket name # sink_bucket = 'my-gcs-destination-bucket' transfer_job_request = storage_transfer.CreateTransferJobRequest({ 'transfer_job': { 'project_id': project_id, 'description': description, 'status': storage_transfer.TransferJob.Status.ENABLED, 'schedule': { 'schedule_start_date': { 'day': start_date.day, 'month': start_date.month, 'year': start_date.year } }, 'transfer_spec': { 'gcs_data_source': { 'bucket_name': source_bucket, }, 'gcs_data_sink': { 'bucket_name': sink_bucket, }, 'object_conditions': { 'min_time_elapsed_since_last_modification': Duration(seconds=2592000 # 30 days ) }, 'transfer_options': { 'delete_objects_from_source_after_transfer': True } } } }) result = client.create_transfer_job(transfer_job_request) print(f'Created transferJob: {result.name}')
def Export(self, request, context): context.set_code(StatusCode.UNAVAILABLE) context.send_initial_metadata( (("google.rpc.retryinfo-bin", RetryInfo().SerializeToString()), )) context.set_trailing_metadata((( "google.rpc.retryinfo-bin", RetryInfo(retry_delay=Duration(seconds=4)).SerializeToString(), ), )) return ExportLogsServiceResponse()
def bookings_feature_table(spark, client): schema = StructType([ StructField("driver_id", IntegerType()), StructField("event_timestamp", TimestampType()), StructField("created_timestamp", TimestampType()), StructField("total_completed_bookings", IntegerType()), ]) df_data = [ ( 8001, datetime(year=2020, month=9, day=1), datetime(year=2020, month=9, day=1), 100, ), ( 8001, datetime(year=2020, month=9, day=2), datetime(year=2020, month=9, day=2), 150, ), ( 8002, datetime(year=2020, month=9, day=2), datetime(year=2020, month=9, day=2), 200, ), ] temp_dir, file_uri = create_temp_parquet_file(spark, "bookings", schema, df_data) file_source = FileSource("event_timestamp", "created_timestamp", "parquet", file_uri) features = [Feature("total_completed_bookings", ValueType.INT32)] max_age = Duration() max_age.FromSeconds(86400) feature_table = FeatureTable("bookings", ["driver_id"], features, batch_source=file_source, max_age=max_age) yield client.apply_feature_table(feature_table) shutil.rmtree(temp_dir)