def construct_universal_datasets( entities: Dict[str, List[Any]], start_time: datetime, end_time: datetime) -> Dict[str, pd.DataFrame]: customer_df = driver_test_data.create_customer_daily_profile_df( entities["customer"], start_time, end_time) driver_df = driver_test_data.create_driver_hourly_stats_df( entities["driver"], start_time, end_time) orders_df = driver_test_data.create_orders_df( customers=entities["customer"], drivers=entities["driver"], start_date=start_time, end_date=end_time, order_count=20, ) global_df = driver_test_data.create_global_daily_stats_df( start_time, end_time) entity_df = orders_df[[ "customer_id", "driver_id", "order_id", "event_timestamp" ]] return { "customer": customer_df, "driver": driver_df, "orders": orders_df, "global": global_df, "entity": entity_df, }
def generate_entities(date, infer_event_timestamp_col): end_date = date before_start_date = end_date - timedelta(days=14) start_date = end_date - timedelta(days=7) after_end_date = end_date + timedelta(days=7) customer_entities = [1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010] driver_entities = [5001, 5002, 5003, 5004, 5005, 5006, 5007, 5008, 5009, 5010] orders_df = driver_data.create_orders_df( customer_entities, driver_entities, before_start_date, after_end_date, 20, infer_event_timestamp_col=infer_event_timestamp_col, ) return customer_entities, driver_entities, end_date, orders_df, start_date
def generate_entities(date, infer_event_timestamp_col): end_date = date before_start_date = end_date - timedelta(days=365) start_date = end_date - timedelta(days=7) after_end_date = end_date + timedelta(days=365) customer_entities = list(range(1001, 1110)) driver_entities = list(range(5001, 5110)) orders_df = driver_data.create_orders_df( customers=customer_entities, drivers=driver_entities, start_date=before_start_date, end_date=after_end_date, order_count=1000, infer_event_timestamp_col=infer_event_timestamp_col, ) return customer_entities, driver_entities, end_date, orders_df, start_date
def construct_universal_datasets(entities: UniversalEntities, start_time: datetime, end_time: datetime) -> UniversalDatasets: customer_df = driver_test_data.create_customer_daily_profile_df( entities.customer_vals, start_time, end_time) driver_df = driver_test_data.create_driver_hourly_stats_df( entities.driver_vals, start_time, end_time) location_df = driver_test_data.create_location_stats_df( entities.location_vals, start_time, end_time) orders_df = driver_test_data.create_orders_df( customers=entities.customer_vals, drivers=entities.driver_vals, locations=entities.location_vals, start_date=start_time, end_date=end_time, order_count=20, ) global_df = driver_test_data.create_global_daily_stats_df( start_time, end_time) field_mapping_df = driver_test_data.create_field_mapping_df( start_time, end_time) entity_df = orders_df[[ "customer_id", "driver_id", "order_id", "origin_id", "destination_id", "event_timestamp", ]] return UniversalDatasets( customer_df=customer_df, driver_df=driver_df, location_df=location_df, orders_df=orders_df, global_df=global_df, field_mapping_df=field_mapping_df, entity_df=entity_df, )
class Environment: name: str test_repo_config: TestRepoConfig feature_store: FeatureStore data_source: DataSource data_source_creator: DataSourceCreator end_date = datetime.now().replace(microsecond=0, second=0, minute=0) start_date = end_date - timedelta(days=7) before_start_date = end_date - timedelta(days=365) after_end_date = end_date + timedelta(days=365) customer_entities = list(range(1001, 1110)) customer_df = driver_test_data.create_customer_daily_profile_df( customer_entities, start_date, end_date) _customer_feature_view: Optional[FeatureView] = None driver_entities = list(range(5001, 5110)) driver_df = driver_test_data.create_driver_hourly_stats_df( driver_entities, start_date, end_date) _driver_stats_feature_view: Optional[FeatureView] = None orders_df = driver_test_data.create_orders_df( customers=customer_entities, drivers=driver_entities, start_date=before_start_date, end_date=after_end_date, order_count=1000, ) _orders_table: Optional[str] = None def customer_feature_view(self) -> FeatureView: if self._customer_feature_view is None: customer_table_id = self.data_source_creator.get_prefixed_table_name( self.name, "customer_profile") ds = self.data_source_creator.create_data_source( customer_table_id, self.customer_df, event_timestamp_column="event_timestamp", created_timestamp_column="created", ) self._customer_feature_view = create_customer_daily_profile_feature_view( ds) return self._customer_feature_view def driver_stats_feature_view(self) -> FeatureView: if self._driver_stats_feature_view is None: driver_table_id = self.data_source_creator.get_prefixed_table_name( self.name, "driver_hourly") ds = self.data_source_creator.create_data_source( driver_table_id, self.driver_df, event_timestamp_column="event_timestamp", created_timestamp_column="created", ) self._driver_stats_feature_view = create_driver_hourly_stats_feature_view( ds) return self._driver_stats_feature_view def orders_table(self) -> Optional[str]: if self._orders_table is None: orders_table_id = self.data_source_creator.get_prefixed_table_name( self.name, "orders") ds = self.data_source_creator.create_data_source( orders_table_id, self.orders_df, event_timestamp_column="event_timestamp", created_timestamp_column="created", ) if hasattr(ds, "table_ref"): self._orders_table = ds.table_ref elif hasattr(ds, "table"): self._orders_table = ds.table return self._orders_table