Ejemplo n.º 1
0
    def _create_csr(self):
        """Create CSR protobuf

        Returns:
             CSR protobuf object
        """
        csr = cert_utils.create_csr(self._gateway_key, self._hw_id)
        duration = Duration()
        duration.FromTimedelta(datetime.timedelta(days=4))
        csr = CSR(
            id=Identity(gateway=Identity.Gateway(hardware_id=self._hw_id)),
            valid_time=duration,
            csr_der=csr.public_bytes(serialization.Encoding.DER),
        )
        return csr
Ejemplo n.º 2
0
def test_historical_features(
    feast_client: Client, batch_source: Union[BigQuerySource, FileSource]
):
    customer_entity = Entity(
        name="user_id", description="Customer", value_type=ValueType.INT64
    )
    feast_client.apply_entity(customer_entity)

    max_age = Duration()
    max_age.FromSeconds(2 * 86400)

    transactions_feature_table = FeatureTable(
        name="transactions",
        entities=["user_id"],
        features=[
            Feature("daily_transactions", ValueType.DOUBLE),
            Feature("total_transactions", ValueType.DOUBLE),
        ],
        batch_source=batch_source,
        max_age=max_age,
    )

    feast_client.apply_feature_table(transactions_feature_table)

    transactions_df, customers_df = generate_data()
    feast_client.ingest(transactions_feature_table, transactions_df)

    feature_refs = ["transactions:daily_transactions"]

    job = feast_client.get_historical_features(feature_refs, customers_df)
    output_dir = job.get_output_file_uri()
    joined_df = read_parquet(output_dir)

    expected_joined_df = pd.DataFrame(
        {
            "event_timestamp": customers_df.event_timestamp.tolist(),
            "user_id": customers_df.user_id.tolist(),
            "transactions__daily_transactions": transactions_df.daily_transactions.tolist()
            + [None] * transactions_df.shape[0],
        }
    )

    assert_frame_equal(
        joined_df.sort_values(by=["user_id", "event_timestamp"]).reset_index(drop=True),
        expected_joined_df.sort_values(by=["user_id", "event_timestamp"]).reset_index(
            drop=True
        ),
    )
Ejemplo n.º 3
0
    def test_build(self):
        """Assert the Opt. model is built correctly"""

        model_builder = OptimizationModelBuilder(
            constraints=[CapacityConstraint()])
        problem = self.problem
        model = model_builder.build(problem)
        self.assertTrue(model, msg='Opt. model built incorrectly.')
        self.assertEqual(model.manager.GetNumberOfVehicles(),
                         len(self.vehicles),
                         msg='Number of vehicles in manager is incorrect.')
        self.assertEqual(model.manager.GetNumberOfIndices(),
                         len(self.vehicles) * 2 + len(self.stops) -
                         len(problem.depots),
                         msg='Number of indices in manager is incorrect.')
        self.assertTrue(model.solver, msg='Solver could not be instantiated.')
        self.assertTrue(model.search_parameters,
                        msg='Search params could not be built.')
        self.assertEqual(model.search_parameters.time_limit,
                         Duration(seconds=self.params.SEARCH_TIME_LIMIT),
                         msg='Time limit is incorrect in the search params.')
        self.assertEqual(
            model.search_parameters.solution_limit,
            self.params.SEARCH_SOLUTIONS_LIMIT,
            msg='Solutions limit is incorrect in the search params.')
        self.assertEqual(
            model.search_parameters.first_solution_strategy,
            FIRST_SOLUTION_STRATEGY[self.params.FIRST_SOLUTION_STRATEGY],
            msg='First solution strategy is incorrect in the search params.')
        self.assertEqual(
            model.search_parameters.local_search_metaheuristic,
            LOCAL_SEARCH_METAHEURISTIC[self.params.SEARCH_METAHEURISTIC],
            msg='Search metaheuristic is incorrect in the search params.')
        self.assertTrue(model.solver.HasDimension('capacity_constraint'),
                        msg='Capacity constraint not added.')
Ejemplo n.º 4
0
    def test_feature_set_types_success(self, client, dataframe, mocker):

        all_types_fs = FeatureSet(
            name="all_types",
            entities=[Entity(name="user_id", dtype=ValueType.INT64)],
            features=[
                Feature(name="float_feature", dtype=ValueType.FLOAT),
                Feature(name="int64_feature", dtype=ValueType.INT64),
                Feature(name="int32_feature", dtype=ValueType.INT32),
                Feature(name="string_feature", dtype=ValueType.STRING),
                Feature(name="bytes_feature", dtype=ValueType.BYTES),
                Feature(name="bool_feature", dtype=ValueType.BOOL),
                Feature(name="double_feature", dtype=ValueType.DOUBLE),
                Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
                Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
                Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
                Feature(name="string_list_feature", dtype=ValueType.STRING_LIST),
                Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
                Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST),
                Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST),
            ],
            max_age=Duration(seconds=3600),
        )

        # Register with Feast core
        client.apply(all_types_fs)

        mocker.patch.object(
            client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(feature_set=all_types_fs.to_proto()),
        )

        # Ingest data into Feast
        client.ingest(all_types_fs, dataframe=dataframe)
Ejemplo n.º 5
0
def _list_groups(client):
    """List Error Groups from the last 60 seconds.

    This class provides a wrapper around making calls to the GAX
    API. It's used by the system tests to find the appropriate error group
    to verify the error was successfully reported.

    :type client: :class:`~google.cloud.error_reporting.client.Client`
    :param client: The client containing a project and credentials.

    :rtype: :class:`~google.gax.ResourceIterator`
    :returns: Iterable of :class:`~.error_stats_service_pb2.ErrorGroupStats`.
    """
    gax_api = error_stats_service_client.ErrorStatsServiceClient(
        credentials=client._credentials)
    project_name = gax_api.project_path(client.project)

    time_range = error_stats_service_pb2.QueryTimeRange()
    time_range.period = error_stats_service_pb2.QueryTimeRange.PERIOD_1_HOUR

    duration = Duration(seconds=60 * 60)

    return gax_api.list_group_stats(project_name,
                                    time_range,
                                    timed_count_duration=duration)
Ejemplo n.º 6
0
def test_ingest_into_bq(
    feast_client: Client,
    customer_entity: Entity,
    driver_entity: Entity,
    bq_dataframe: pd.DataFrame,
    bq_dataset: str,
    pytestconfig,
):
    bq_project = pytestconfig.getoption("bq_project")
    bq_table_id = f"bq_staging_{datetime.now():%Y%m%d%H%M%s}"
    ft = FeatureTable(
        name="basic_featuretable",
        entities=["driver_id", "customer_id"],
        features=[
            Feature(name="dev_feature_float", dtype=ValueType.FLOAT),
            Feature(name="dev_feature_string", dtype=ValueType.STRING),
        ],
        max_age=Duration(seconds=3600),
        batch_source=BigQuerySource(
            table_ref=f"{bq_project}:{bq_dataset}.{bq_table_id}",
            event_timestamp_column="datetime",
            created_timestamp_column="timestamp",
        ),
    )

    # ApplyEntity
    feast_client.apply(customer_entity)
    feast_client.apply(driver_entity)

    # ApplyFeatureTable
    feast_client.apply(ft)
    feast_client.ingest(ft, bq_dataframe, timeout=120)

    bq_client = bigquery.Client(project=bq_project)

    # Poll BQ for table until the table has been created
    def try_get_table():
        try:
            table = bq_client.get_table(
                bigquery.TableReference(
                    bigquery.DatasetReference(bq_project, bq_dataset), bq_table_id
                )
            )
        except NotFound:
            return None, False
        else:
            return table, True

    wait_retry_backoff(
        retry_fn=try_get_table,
        timeout_secs=30,
        timeout_msg="Timed out trying to get bigquery table",
    )

    query_string = f"SELECT * FROM `{bq_project}.{bq_dataset}.{bq_table_id}`"

    job = bq_client.query(query_string)
    query_df = job.to_dataframe()

    assert_frame_equal(query_df, bq_dataframe)
def create_static_overlay_segment(start_time_seconds, end_time_seconds):
    
    animation_start = transcoder.Overlay.Animation()
    animation_start.animation_static = transcoder.Overlay.AnimationStatic()
    animation_start.animation_static.start_time_offset = Duration(seconds = int(start_time_seconds), 
                                                                  nanos = get_nanos_from_seconds(start_time_seconds) )
    animation_start.animation_static.xy = transcoder.Overlay.NormalizedCoordinate(x=0., y=0.)
    animation_start
    
    animation_end = transcoder.Overlay.Animation()
    animation_end.animation_end = transcoder.Overlay.AnimationEnd()
    animation_end.animation_end.start_time_offset = Duration(seconds = int(end_time_seconds), 
                                                             nanos = get_nanos_from_seconds(end_time_seconds))
    animation_end
    
    return [animation_start, animation_end]
Ejemplo n.º 8
0
    def test_basic(self) -> None:
        """
            Add another table to existing repo using partial apply API. Make sure both the table
            applied via CLI apply and the new table are passing RW test.
        """

        runner = CliRunner()
        with runner.local_repo(get_example_repo("example_feature_repo_1.py")) as store:

            driver_locations_source = BigQuerySource(
                table_ref="rh_prod.ride_hailing_co.drivers",
                event_timestamp_column="event_timestamp",
                created_timestamp_column="created_timestamp",
            )

            driver_locations_100 = FeatureView(
                name="driver_locations_100",
                entities=["driver"],
                ttl=Duration(seconds=86400 * 1),
                features=[
                    Feature(name="lat", dtype=ValueType.FLOAT),
                    Feature(name="lon", dtype=ValueType.STRING),
                    Feature(name="name", dtype=ValueType.STRING),
                ],
                online=True,
                input=driver_locations_source,
                tags={},
            )

            store.apply([driver_locations_100])

            basic_rw_test(store, view_name="driver_locations")
            basic_rw_test(store, view_name="driver_locations_100")
Ejemplo n.º 9
0
def test_update_featureset_update_featureset_and_ingest_second_subset(
        client, update_featureset_dataframe):
    subset_columns = [
        "datetime",
        "entity_id",
        "update_feature1",
        "update_feature3",
        "update_feature4",
    ]
    subset_df = update_featureset_dataframe.iloc[5:][subset_columns]
    update_fs = FeatureSet(
        "update_fs",
        entities=[Entity(name="entity_id", dtype=ValueType.INT64)],
        max_age=Duration(seconds=432000),
    )
    update_fs.infer_fields_from_df(subset_df)
    client.apply(update_fs)

    # We keep retrying this ingestion until all values make it into the buffer.
    # This is a necessary step because bigquery streaming caches table schemas
    # and as a result, rows may be lost.
    while True:
        ingestion_id = client.ingest(feature_set=update_fs, source=subset_df)
        time.sleep(15)  # wait for rows to get written to bq
        rows_ingested = get_rows_ingested(client, update_fs, ingestion_id)
        if rows_ingested == len(subset_df):
            print(
                f"Number of rows successfully ingested: {rows_ingested}. Continuing."
            )
            break
        print(
            f"Number of rows successfully ingested: {rows_ingested}. Retrying ingestion."
        )
        time.sleep(30)

    def check():
        feature_retrieval_job = client.get_batch_features(
            entity_rows=update_featureset_dataframe[["datetime",
                                                     "entity_id"]].iloc[5:],
            feature_refs=[
                "update_feature1",
                "update_feature3",
                "update_feature4",
            ],
            project=PROJECT_NAME,
        )

        output = feature_retrieval_job.to_dataframe(
            timeout_sec=180).sort_values(by=["entity_id"])
        print(output.head())

        assert output["update_feature1"].to_list(
        ) == subset_df["update_feature1"].to_list()
        assert output["update_feature3"].to_list(
        ) == subset_df["update_feature3"].to_list()
        assert output["update_feature4"].to_list(
        ) == subset_df["update_feature4"].to_list()
        clean_up_remote_files(feature_retrieval_job.get_avro_files())

    wait_for(check, timedelta(minutes=5))
Ejemplo n.º 10
0
    def test_begin_ok_exact_staleness(self):
        from google.protobuf.duration_pb2 import Duration
        from google.cloud.spanner_v1.proto.transaction_pb2 import (
            Transaction as TransactionPB, TransactionOptions)

        transaction_pb = TransactionPB(id=TXN_ID)
        database = _Database()
        api = database.spanner_api = self._make_spanner_api()
        api.begin_transaction.return_value = transaction_pb
        duration = self._makeDuration(seconds=SECONDS, microseconds=MICROS)
        session = _Session(database)
        snapshot = self._make_one(session,
                                  exact_staleness=duration,
                                  multi_use=True)

        txn_id = snapshot.begin()

        self.assertEqual(txn_id, TXN_ID)
        self.assertEqual(snapshot._transaction_id, TXN_ID)

        expected_duration = Duration(seconds=SECONDS, nanos=MICROS * 1000)
        expected_txn_options = TransactionOptions(
            read_only=TransactionOptions.ReadOnly(
                exact_staleness=expected_duration))

        api.begin_transaction.assert_called_once_with(
            session.name,
            expected_txn_options,
            metadata=[('google-cloud-resource-prefix', database.name)])
Ejemplo n.º 11
0
def test_update_featureset_apply_featureset_and_ingest_first_subset(
        client, update_featureset_dataframe):
    subset_columns = [
        "datetime", "entity_id", "update_feature1", "update_feature2"
    ]
    subset_df = update_featureset_dataframe.iloc[:5][subset_columns]
    update_fs = FeatureSet(
        "update_fs",
        entities=[Entity(name="entity_id", dtype=ValueType.INT64)],
        max_age=Duration(seconds=432000),
    )
    update_fs.infer_fields_from_df(subset_df)
    client.apply(update_fs)

    client.ingest(feature_set=update_fs, source=subset_df)

    time.sleep(15)
    feature_retrieval_job = client.get_batch_features(
        entity_rows=update_featureset_dataframe[["datetime",
                                                 "entity_id"]].iloc[:5],
        feature_refs=[
            f"{PROJECT_NAME}/update_feature1",
            f"{PROJECT_NAME}/update_feature2",
        ],
    )

    output = feature_retrieval_job.to_dataframe().sort_values(by=["entity_id"])
    print(output.head())

    assert output["update_feature1"].to_list(
    ) == subset_df["update_feature1"].to_list()
    assert output["update_feature2"].to_list(
    ) == subset_df["update_feature2"].to_list()
Ejemplo n.º 12
0
def get_or_create_subscription():
    conf = get_config()["google_pub_sub"]
    project_id, topic_id = conf["project_id"], conf["topic_id"]
    subscription_id = get_subs_name(conf["subscription"].get(
        "type", "schedule-consumer"))

    subscriber = pubsub_v1.SubscriberClient()
    publisher = pubsub_v1.PublisherClient()

    sub_path = subscriber.subscription_path(project_id, subscription_id)
    topic_path = publisher.topic_path(project_id, topic_id)

    try:
        subscriber.create_subscription(
            request={
                "name":
                sub_path,
                "topic":
                topic_path,
                "message_retention_duration":
                Duration(seconds=conf["subscription"].get(
                    "message_retention_duration", 86400)),
                "ack_deadline_seconds":
                conf["subscription"].get("ack_deadline_seconds", 300),
                "filter":
                f'attributes.mac = "{get_mac()}"'
            })
        logging.info(f"{sub_path} created")
    except AlreadyExists:
        logging.info(f"{sub_path} already exists")
        return sub_path

    return sub_path
Ejemplo n.º 13
0
def _ingest_test_getfeaturetable_mocked_resp(file_url: str,
                                             date_partition_col: str = ""):
    return GetFeatureTableResponse(table=FeatureTableProto(
        spec=FeatureTableSpecProto(
            name="ingest_featuretable",
            max_age=Duration(seconds=3600),
            features=[
                FeatureSpecProto(
                    name="dev_feature_float",
                    value_type=ValueProto.ValueType.FLOAT,
                ),
                FeatureSpecProto(
                    name="dev_feature_string",
                    value_type=ValueProto.ValueType.STRING,
                ),
            ],
            entities=["dev_entity"],
            batch_source=DataSourceProto(
                file_options=DataSourceProto.FileOptions(
                    file_format=ParquetFormat().to_proto(), file_url=file_url),
                event_timestamp_column="datetime",
                created_timestamp_column="timestamp",
                date_partition_column=date_partition_col,
            ),
        ),
        meta=FeatureTableMetaProto(),
    ))
Ejemplo n.º 14
0
def alltypes_featuretable():
    batch_source = FileSource(
        file_format="parquet",
        file_url="file://feast/*",
        event_timestamp_column="ts_col",
        created_timestamp_column="timestamp",
        date_partition_column="date_partition_col",
    )
    return FeatureTable(
        name="alltypes",
        entities=["alltypes_id"],
        features=[
            Feature(name="float_feature", dtype=ValueType.FLOAT),
            Feature(name="int64_feature", dtype=ValueType.INT64),
            Feature(name="int32_feature", dtype=ValueType.INT32),
            Feature(name="string_feature", dtype=ValueType.STRING),
            Feature(name="bytes_feature", dtype=ValueType.BYTES),
            Feature(name="bool_feature", dtype=ValueType.BOOL),
            Feature(name="double_feature", dtype=ValueType.DOUBLE),
            Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST),
            Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST),
            Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST),
            Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST),
            Feature(name="string_list_feature", dtype=ValueType.STRING_LIST),
            Feature(name="bytes_list_feature", dtype=ValueType.BYTES_LIST),
            Feature(name="bool_list_feature", dtype=ValueType.BOOL_LIST),
        ],
        max_age=Duration(seconds=3600),
        batch_source=batch_source,
        labels={"cat": "alltypes"},
    )
Ejemplo n.º 15
0
def main():
    # 実行するpythonファイルをGCSにアップロード(事前に手作業でアップロードしてもOK)
    storage_client: StorageClient = StorageClient(
        env['BUCKET_NAME'], env['PROJECT_ID'], env['STORAGE_CREDENTIAL_PATH'])
    main_python_file_uri: str = storage_client.upload_to_gcs(
        './master.py', 'dataproc/src')
    python_file_uris: List[str] = [
        storage_client.upload_to_gcs('./worker.py', 'dataproc/src'),
        storage_client.upload_to_gcs('./module/storage.py',
                                     'dataproc/src/module'),
    ]

    # 処理対象データをGCSにアップロード(事前に手作業でアップロードしてもOK)
    data_file_path: str = './data.txt'
    with open(data_file_path, 'w') as f:
        for sentence in SENTENCES:
            f.write(sentence + '\n')
    storage_client.upload_to_gcs(data_file_path, 'dataproc/input')
    os.remove(data_file_path)

    # pysparkのjobを実行
    with DataprocCluster(
            env['PROJECT_ID'],
            env['DATAPROC_CREDENTIAL_PATH'],
            cluster_name='test-cluster',
            creates_cluster=True,
            idle_delete_ttl=Duration(seconds=1000),
            pip_packages=
            'more-itertools==5.0.0 nltk==3.4.5 gensim==3.8.1 google-cloud-storage==1.20.0',
            environment_variables={
                'PROJECT_ID': env['PROJECT_ID'],
                'BUCKET_NAME': env['BUCKET_NAME']
            }) as cluster:
        cluster.submit_pyspark_job(main_python_file_uri, python_file_uris)
        print('do something')
Ejemplo n.º 16
0
def test_remove_reservation_not_found(get_reservation):
    get_reservation.return_value = None
    processor = cg.EngineProcessor(
        'proj', 'p0', EngineContext(),
        qtypes.QuantumProcessor(schedule_frozen_period=Duration(seconds=10000)))
    with pytest.raises(ValueError):
        processor.remove_reservation('rid')
Ejemplo n.º 17
0
 def to_proto(self):
     """Return estop_pb2.EstopEndpoint based on current member variables."""
     t_seconds = int(self.estop_timeout)
     t_nanos = int((self.estop_timeout - t_seconds) * 1e9)
     if self.estop_cut_power_timeout is None:
         return estop_pb2.EstopEndpoint(role=self.role, name=self._name,
                                        unique_id=self._unique_id,
                                        timeout=Duration(seconds=t_seconds, nanos=t_nanos))
     else:
         cpt_seconds = int(self.estop_cut_power_timeout)
         cpt_nanos = int((self.estop_cut_power_timeout - cpt_seconds) * 1e9)
         return estop_pb2.EstopEndpoint(role=self.role, name=self._name,
                                        unique_id=self._unique_id,
                                        timeout=Duration(seconds=t_seconds, nanos=t_nanos),
                                        cut_power_timeout=Duration(seconds=cpt_seconds,
                                                                   nanos=cpt_nanos))
Ejemplo n.º 18
0
def test_order_by_creation_time(client):
    proc_time_fs = FeatureSet(
        "processing_time",
        features=[Feature("feature_value", ValueType.STRING)],
        entities=[Entity("entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    client.apply(proc_time_fs)
    time.sleep(10)
    proc_time_fs = client.get_feature_set(name="processing_time", version=1)

    time_offset = datetime.utcnow().replace(tzinfo=pytz.utc)
    N_ROWS = 10
    incorrect_df = pd.DataFrame({
        "datetime": [time_offset] * N_ROWS,
        "entity_id": [i for i in range(N_ROWS)],
        "feature_value": ["WRONG"] * N_ROWS,
    })
    correct_df = pd.DataFrame({
        "datetime": [time_offset] * N_ROWS,
        "entity_id": [i for i in range(N_ROWS)],
        "feature_value": ["CORRECT"] * N_ROWS,
    })
    client.ingest(proc_time_fs, incorrect_df)
    time.sleep(10)
    client.ingest(proc_time_fs, correct_df)
    feature_retrieval_job = client.get_batch_features(
        entity_rows=incorrect_df[["datetime", "entity_id"]],
        feature_ids=["processing_time:1:feature_value"])
    output = feature_retrieval_job.to_dataframe()
    print(output.head())

    assert output["processing_time_v1_feature_value"].to_list() == ["CORRECT"
                                                                    ] * N_ROWS
Ejemplo n.º 19
0
    def test_get_feature_set(self, mocked_client, mocker):
        mocked_client._core_service_stub = Core.CoreServiceStub(
            grpc.insecure_channel("")
        )

        from google.protobuf.duration_pb2 import Duration

        mocker.patch.object(
            mocked_client._core_service_stub,
            "GetFeatureSet",
            return_value=GetFeatureSetResponse(
                feature_set=FeatureSetProto(
                    spec=FeatureSetSpecProto(
                        name="my_feature_set",
                        max_age=Duration(seconds=3600),
                        labels={"key1": "val1", "key2": "val2"},
                        features=[
                            FeatureSpecProto(
                                name="my_feature_1",
                                value_type=ValueProto.ValueType.FLOAT,
                            ),
                            FeatureSpecProto(
                                name="my_feature_2",
                                value_type=ValueProto.ValueType.FLOAT,
                            ),
                        ],
                        entities=[
                            EntitySpecProto(
                                name="my_entity_1",
                                value_type=ValueProto.ValueType.INT64,
                            )
                        ],
                        source=Source(
                            type=SourceType.KAFKA,
                            kafka_source_config=KafkaSourceConfig(
                                bootstrap_servers="localhost:9092", topic="topic"
                            ),
                        ),
                    ),
                    meta=FeatureSetMetaProto(),
                )
            ),
        )
        mocked_client.set_project("my_project")
        feature_set = mocked_client.get_feature_set("my_feature_set")

        assert (
            feature_set.name == "my_feature_set"
            and "key1" in feature_set.labels
            and feature_set.labels["key1"] == "val1"
            and "key2" in feature_set.labels
            and feature_set.labels["key2"] == "val2"
            and feature_set.fields["my_feature_1"].name == "my_feature_1"
            and feature_set.fields["my_feature_1"].dtype == ValueType.FLOAT
            and feature_set.fields["my_entity_1"].name == "my_entity_1"
            and feature_set.fields["my_entity_1"].dtype == ValueType.INT64
            and len(feature_set.features) == 2
            and len(feature_set.entities) == 1
        )
Ejemplo n.º 20
0
def bookings_feature_table_with_mapping(spark, client):
    schema = StructType([
        StructField("id", IntegerType()),
        StructField("datetime", TimestampType()),
        StructField("created_datetime", TimestampType()),
        StructField("total_completed_bookings", IntegerType()),
    ])
    df_data = [
        (
            8001,
            datetime(year=2020, month=9, day=1, tzinfo=utc),
            datetime(year=2020, month=9, day=1, tzinfo=utc),
            100,
        ),
        (
            8001,
            datetime(year=2020, month=9, day=2, tzinfo=utc),
            datetime(year=2020, month=9, day=2, tzinfo=utc),
            150,
        ),
        (
            8002,
            datetime(year=2020, month=9, day=2, tzinfo=utc),
            datetime(year=2020, month=9, day=2, tzinfo=utc),
            200,
        ),
    ]
    temp_dir, file_uri = create_temp_parquet_file(spark, "bookings", schema,
                                                  df_data)

    file_source = FileSource(
        event_timestamp_column="datetime",
        created_timestamp_column="created_datetime",
        file_format=ParquetFormat(),
        file_url=file_uri,
        field_mapping={"id": "driver_id"},
    )
    features = [Feature("total_completed_bookings", ValueType.INT32)]
    max_age = Duration()
    max_age.FromSeconds(86400)
    feature_table = FeatureTable("bookings", ["driver_id"],
                                 features,
                                 batch_source=file_source,
                                 max_age=max_age)
    yield client.apply(feature_table)
    shutil.rmtree(temp_dir)
Ejemplo n.º 21
0
 def execute(self, context):
     features_df = self.bq.get_pandas_df(self.sql)
     fs = FeatureSet(
         self.feature_set_name,
         max_age=Duration(seconds=86400),
         entities=[Entity(name=self.entity_name, dtype=ValueType.INT64)])
     fs.infer_fields_from_df(features_df, replace_existing_features=True)
     self.feast_client.apply(fs)
Ejemplo n.º 22
0
    def make_span_from_db(ret: Dict) -> Span:
        """
        Create a Span object from a Dict that came from MongoDB.

        :param ret: The Dict that came from MongoDB.
        :return: The Span object created from the given Dict.
        """
        duration = Duration()
        duration.FromMicroseconds(ret["duration"])
        start_time = Timestamp()
        start_time.FromDatetime(ret["startTime"])
        del ret["startTime"]
        del ret["duration"]
        span = ParseDict(
            ret, Span(duration=duration, start_time=start_time), ignore_unknown_fields=True
        )
        return span
Ejemplo n.º 23
0
    def to_proto(self) -> FeatureViewProto:
        """
        Converts a feature view object to its protobuf representation.

        Returns:
            A FeatureViewProto protobuf.
        """
        meta = FeatureViewMetaProto(materialization_intervals=[])
        if self.created_timestamp:
            meta.created_timestamp.FromDatetime(self.created_timestamp)
        if self.last_updated_timestamp:
            meta.last_updated_timestamp.FromDatetime(
                self.last_updated_timestamp)
        for interval in self.materialization_intervals:
            interval_proto = MaterializationIntervalProto()
            interval_proto.start_time.FromDatetime(interval[0])
            interval_proto.end_time.FromDatetime(interval[1])
            meta.materialization_intervals.append(interval_proto)

        ttl_duration = None
        if self.ttl is not None:
            ttl_duration = Duration()
            ttl_duration.FromTimedelta(self.ttl)

        batch_source_proto = self.batch_source.to_proto()
        batch_source_proto.data_source_class_type = f"{self.batch_source.__class__.__module__}.{self.batch_source.__class__.__name__}"

        stream_source_proto = None
        if self.stream_source:
            stream_source_proto = self.stream_source.to_proto()
            stream_source_proto.data_source_class_type = f"{self.stream_source.__class__.__module__}.{self.stream_source.__class__.__name__}"

        spec = FeatureViewSpecProto(
            name=self.name,
            entities=self.entities,
            features=[field.to_proto() for field in self.schema],
            description=self.description,
            tags=self.tags,
            owner=self.owner,
            ttl=(ttl_duration if ttl_duration is not None else None),
            online=self.online,
            batch_source=batch_source_proto,
            stream_source=stream_source_proto,
        )

        return FeatureViewProto(spec=spec, meta=meta)
Ejemplo n.º 24
0
def seconds_to_duration(seconds):
    """Return a protobuf Duration from number of seconds, as a float.

    Args:
      seconds (float): duration length
    """
    duration_seconds = int(seconds)
    duration_nanos = int((seconds - duration_seconds) * NSEC_PER_SEC)
    return Duration(seconds=duration_seconds, nanos=duration_nanos)
Ejemplo n.º 25
0
def test_apply_all_featuresets(client):
    client.set_project(PROJECT_NAME)

    file_fs1 = FeatureSet(
            "file_feature_set",
            features=[Feature("feature_value1", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )
    client.apply(file_fs1)

    gcs_fs1 = FeatureSet(
            "gcs_feature_set",
            features=[Feature("feature_value2", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )
    client.apply(gcs_fs1)

    proc_time_fs = FeatureSet(
            "processing_time",
            features=[Feature("feature_value3", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )
    client.apply(proc_time_fs)

    add_cols_fs = FeatureSet(
            "additional_columns",
            features=[Feature("feature_value4", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )
    client.apply(add_cols_fs)

    historical_fs = FeatureSet(
            "historical",
            features=[Feature("feature_value5", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )
    client.apply(historical_fs)

    fs1 = FeatureSet(
            "feature_set_1",
            features=[Feature("feature_value6", ValueType.STRING)],
            entities=[Entity("entity_id", ValueType.INT64)],
            max_age=Duration(seconds=100),
        )

    fs2 = FeatureSet(
        "feature_set_2",
        features=[Feature("other_feature_value7", ValueType.INT64)],
        entities=[Entity("other_entity_id", ValueType.INT64)],
        max_age=Duration(seconds=100),
    )
    client.apply(fs1)
    client.apply(fs2)
Ejemplo n.º 26
0
def detect_labels(video_client, file_handle, input_uri, l, t):
    EXCLUDE = ["nature", "aerial photography", "tree"]
    print("{} spawned".format(t))
    features = [videointelligence.Feature.LABEL_DETECTION]
    s = []
    for j in range(10):
        s.append(videointelligence.VideoSegment(start_time_offset=Duration(seconds=0+j*5+50*t), end_time_offset=Duration(seconds=(j+1)*5 + 50*t)))
        if (j+1)*5 + 50*t >= l:
            break

    print("{} {} segments: ".format(t, len(s)))

    operation = video_client.annotate_video(
        request={
            "features": features,
            "input_uri": input_uri,
            "video_context": videointelligence.VideoContext(segments=s)
        }
    )
    result = operation.result(timeout=120)

    print("\nFinished processing thread {}.".format(t))

    # segment_labels = result.annotation_results[0].segment_label_annotations

    for x in result.annotation_results:
        segment_labels = x.segment_label_annotations
        for i, segment_label in enumerate(segment_labels):
            if segment_label.entity.description in EXCLUDE:
                continue

            print("Video label description: {}".format(segment_label.entity.description))
            category_desc = ""
            for category_entity in segment_label.category_entities:
                print(
                    "\tLabel category description: {}".format(category_entity.description)
                )

            for i, segment in enumerate(segment_label.segments):
                start_time = (
                    segment.segment.start_time_offset.seconds
                    + segment.segment.start_time_offset.microseconds / 1e6
                )
                end_time = (
                    segment.segment.end_time_offset.seconds
                    + segment.segment.end_time_offset.microseconds / 1e6
                )
                # positions = "{}s to {}s".format(start_time, end_time)
                # confidence = segment.confidence
                # print("\tSegment {}: {}".format(i, positions))
                # print("\tConfidence: {}".format(confidence))

                file_handle.write("{},{},{},{}\n".format(segment_label.entity.description, str(start_time), str(end_time), str(segment.confidence)))

    return None
Ejemplo n.º 27
0
 def _trailing_metadata(self):
     from google.protobuf.duration_pb2 import Duration
     from google.rpc.error_details_pb2 import RetryInfo
     from grpc._common import cygrpc_metadata
     if self._commit_abort_retry_nanos is None:
         return cygrpc_metadata(())
     retry_info = RetryInfo(
         retry_delay=Duration(seconds=self._commit_abort_retry_seconds,
                              nanos=self._commit_abort_retry_nanos))
     return cygrpc_metadata([('google.rpc.retryinfo-bin',
                              retry_info.SerializeToString())])
def create_daily_nearline_30_day_migration(project_id: str, description: str,
                                           source_bucket: str,
                                           sink_bucket: str,
                                           start_date: datetime):
    """Create a daily migration from a GCS bucket to a Nearline GCS bucket
    for objects untouched for 30 days."""

    client = storage_transfer.StorageTransferServiceClient()

    # The ID of the Google Cloud Platform Project that owns the job
    # project_id = 'my-project-id'

    # A useful description for your transfer job
    # description = 'My transfer job'

    # Google Cloud Storage source bucket name
    # source_bucket = 'my-gcs-source-bucket'

    # Google Cloud Storage destination bucket name
    # sink_bucket = 'my-gcs-destination-bucket'

    transfer_job_request = storage_transfer.CreateTransferJobRequest({
        'transfer_job': {
            'project_id': project_id,
            'description': description,
            'status': storage_transfer.TransferJob.Status.ENABLED,
            'schedule': {
                'schedule_start_date': {
                    'day': start_date.day,
                    'month': start_date.month,
                    'year': start_date.year
                }
            },
            'transfer_spec': {
                'gcs_data_source': {
                    'bucket_name': source_bucket,
                },
                'gcs_data_sink': {
                    'bucket_name': sink_bucket,
                },
                'object_conditions': {
                    'min_time_elapsed_since_last_modification':
                    Duration(seconds=2592000  # 30 days
                             )
                },
                'transfer_options': {
                    'delete_objects_from_source_after_transfer': True
                }
            }
        }
    })

    result = client.create_transfer_job(transfer_job_request)
    print(f'Created transferJob: {result.name}')
Ejemplo n.º 29
0
    def Export(self, request, context):
        context.set_code(StatusCode.UNAVAILABLE)

        context.send_initial_metadata(
            (("google.rpc.retryinfo-bin", RetryInfo().SerializeToString()), ))
        context.set_trailing_metadata(((
            "google.rpc.retryinfo-bin",
            RetryInfo(retry_delay=Duration(seconds=4)).SerializeToString(),
        ), ))

        return ExportLogsServiceResponse()
def bookings_feature_table(spark, client):
    schema = StructType([
        StructField("driver_id", IntegerType()),
        StructField("event_timestamp", TimestampType()),
        StructField("created_timestamp", TimestampType()),
        StructField("total_completed_bookings", IntegerType()),
    ])
    df_data = [
        (
            8001,
            datetime(year=2020, month=9, day=1),
            datetime(year=2020, month=9, day=1),
            100,
        ),
        (
            8001,
            datetime(year=2020, month=9, day=2),
            datetime(year=2020, month=9, day=2),
            150,
        ),
        (
            8002,
            datetime(year=2020, month=9, day=2),
            datetime(year=2020, month=9, day=2),
            200,
        ),
    ]
    temp_dir, file_uri = create_temp_parquet_file(spark, "bookings", schema,
                                                  df_data)

    file_source = FileSource("event_timestamp", "created_timestamp", "parquet",
                             file_uri)
    features = [Feature("total_completed_bookings", ValueType.INT32)]
    max_age = Duration()
    max_age.FromSeconds(86400)
    feature_table = FeatureTable("bookings", ["driver_id"],
                                 features,
                                 batch_source=file_source,
                                 max_age=max_age)
    yield client.apply_feature_table(feature_table)
    shutil.rmtree(temp_dir)