Beispiel #1
0
def construct_universal_datasets(
        entities: Dict[str, List[Any]], start_time: datetime,
        end_time: datetime) -> Dict[str, pd.DataFrame]:
    customer_df = driver_test_data.create_customer_daily_profile_df(
        entities["customer"], start_time, end_time)
    driver_df = driver_test_data.create_driver_hourly_stats_df(
        entities["driver"], start_time, end_time)
    orders_df = driver_test_data.create_orders_df(
        customers=entities["customer"],
        drivers=entities["driver"],
        start_date=start_time,
        end_date=end_time,
        order_count=20,
    )
    global_df = driver_test_data.create_global_daily_stats_df(
        start_time, end_time)
    entity_df = orders_df[[
        "customer_id", "driver_id", "order_id", "event_timestamp"
    ]]

    return {
        "customer": customer_df,
        "driver": driver_df,
        "orders": orders_df,
        "global": global_df,
        "entity": entity_df,
    }
def generate_entities(date, infer_event_timestamp_col):
    end_date = date
    before_start_date = end_date - timedelta(days=14)
    start_date = end_date - timedelta(days=7)
    after_end_date = end_date + timedelta(days=7)
    customer_entities = [1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010]
    driver_entities = [5001, 5002, 5003, 5004, 5005, 5006, 5007, 5008, 5009, 5010]
    orders_df = driver_data.create_orders_df(
        customer_entities,
        driver_entities,
        before_start_date,
        after_end_date,
        20,
        infer_event_timestamp_col=infer_event_timestamp_col,
    )
    return customer_entities, driver_entities, end_date, orders_df, start_date
Beispiel #3
0
def generate_entities(date, infer_event_timestamp_col):
    end_date = date
    before_start_date = end_date - timedelta(days=365)
    start_date = end_date - timedelta(days=7)
    after_end_date = end_date + timedelta(days=365)
    customer_entities = list(range(1001, 1110))
    driver_entities = list(range(5001, 5110))
    orders_df = driver_data.create_orders_df(
        customers=customer_entities,
        drivers=driver_entities,
        start_date=before_start_date,
        end_date=after_end_date,
        order_count=1000,
        infer_event_timestamp_col=infer_event_timestamp_col,
    )
    return customer_entities, driver_entities, end_date, orders_df, start_date
Beispiel #4
0
def construct_universal_datasets(entities: UniversalEntities,
                                 start_time: datetime,
                                 end_time: datetime) -> UniversalDatasets:
    customer_df = driver_test_data.create_customer_daily_profile_df(
        entities.customer_vals, start_time, end_time)
    driver_df = driver_test_data.create_driver_hourly_stats_df(
        entities.driver_vals, start_time, end_time)
    location_df = driver_test_data.create_location_stats_df(
        entities.location_vals, start_time, end_time)
    orders_df = driver_test_data.create_orders_df(
        customers=entities.customer_vals,
        drivers=entities.driver_vals,
        locations=entities.location_vals,
        start_date=start_time,
        end_date=end_time,
        order_count=20,
    )
    global_df = driver_test_data.create_global_daily_stats_df(
        start_time, end_time)
    field_mapping_df = driver_test_data.create_field_mapping_df(
        start_time, end_time)
    entity_df = orders_df[[
        "customer_id",
        "driver_id",
        "order_id",
        "origin_id",
        "destination_id",
        "event_timestamp",
    ]]

    return UniversalDatasets(
        customer_df=customer_df,
        driver_df=driver_df,
        location_df=location_df,
        orders_df=orders_df,
        global_df=global_df,
        field_mapping_df=field_mapping_df,
        entity_df=entity_df,
    )
Beispiel #5
0
class Environment:
    name: str
    test_repo_config: TestRepoConfig
    feature_store: FeatureStore
    data_source: DataSource
    data_source_creator: DataSourceCreator

    end_date = datetime.now().replace(microsecond=0, second=0, minute=0)
    start_date = end_date - timedelta(days=7)
    before_start_date = end_date - timedelta(days=365)
    after_end_date = end_date + timedelta(days=365)

    customer_entities = list(range(1001, 1110))
    customer_df = driver_test_data.create_customer_daily_profile_df(
        customer_entities, start_date, end_date)
    _customer_feature_view: Optional[FeatureView] = None

    driver_entities = list(range(5001, 5110))
    driver_df = driver_test_data.create_driver_hourly_stats_df(
        driver_entities, start_date, end_date)
    _driver_stats_feature_view: Optional[FeatureView] = None

    orders_df = driver_test_data.create_orders_df(
        customers=customer_entities,
        drivers=driver_entities,
        start_date=before_start_date,
        end_date=after_end_date,
        order_count=1000,
    )
    _orders_table: Optional[str] = None

    def customer_feature_view(self) -> FeatureView:
        if self._customer_feature_view is None:
            customer_table_id = self.data_source_creator.get_prefixed_table_name(
                self.name, "customer_profile")
            ds = self.data_source_creator.create_data_source(
                customer_table_id,
                self.customer_df,
                event_timestamp_column="event_timestamp",
                created_timestamp_column="created",
            )
            self._customer_feature_view = create_customer_daily_profile_feature_view(
                ds)
        return self._customer_feature_view

    def driver_stats_feature_view(self) -> FeatureView:
        if self._driver_stats_feature_view is None:
            driver_table_id = self.data_source_creator.get_prefixed_table_name(
                self.name, "driver_hourly")
            ds = self.data_source_creator.create_data_source(
                driver_table_id,
                self.driver_df,
                event_timestamp_column="event_timestamp",
                created_timestamp_column="created",
            )
            self._driver_stats_feature_view = create_driver_hourly_stats_feature_view(
                ds)
        return self._driver_stats_feature_view

    def orders_table(self) -> Optional[str]:
        if self._orders_table is None:
            orders_table_id = self.data_source_creator.get_prefixed_table_name(
                self.name, "orders")
            ds = self.data_source_creator.create_data_source(
                orders_table_id,
                self.orders_df,
                event_timestamp_column="event_timestamp",
                created_timestamp_column="created",
            )
            if hasattr(ds, "table_ref"):
                self._orders_table = ds.table_ref
            elif hasattr(ds, "table"):
                self._orders_table = ds.table
        return self._orders_table