def download_table_as_df(self, full_table_id, staging_location): """ Download a BigQuery table as Pandas Dataframe Args: full_table_id (src) : fully qualified BigQuery table id staging_location: url to staging_location (currently support a folder in GCS) Returns: pandas.DataFrame: dataframe of the training dataset """ if not is_gs_path(staging_location): raise ValueError("staging_uri must be a directory in GCS") temp_file_name = "temp_{}".format(int(round(time.time() * 1000))) staging_file_path = os.path.join(staging_location, temp_file_name) job_config = ExtractJobConfig() job_config.destination_format = DestinationFormat.CSV job = self.bq.extract_table(Table.from_string(full_table_id), staging_file_path, job_config=job_config) # await completion job.result() return gcs_to_df(staging_file_path)
def from_csv(cls, path, entity, granularity, owner, staging_location=None, id_column=None, feature_columns=None, timestamp_column=None, timestamp_value=None, serving_store=None, warehouse_store=None): """Creates an importer from a given csv dataset. This file can be either local or remote (in gcs). If it's a local file then staging_location must be determined. Args: path (str): path to csv file entity (str): entity id granularity (Granularity): granularity of data owner (str): owner staging_location (str, optional): Defaults to None. Staging location for ingesting a local csv file. id_column (str, optional): Defaults to None. Id column in the csv. If not set, will default to the `entity` argument. feature_columns ([str], optional): Defaults to None. Feature columns to ingest. If not set, the importer will by default ingest all available columns. timestamp_column (str, optional): Defaults to None. Timestamp column in the csv. If not set, defaults to timestamp value. timestamp_value (datetime, optional): Defaults to current datetime. Timestamp value to assign to all features in the dataset. serving_store (feast.sdk.resources.feature.DataStore): Defaults to None. Serving store to write the features in this instance to. warehouse_store (feast.sdk.resources.feature.DataStore): Defaults to None. Warehouse store to write the features in this instance to. Returns: Importer: the importer for the dataset provided. """ import_spec_options = {"format": "csv"} import_spec_options["path"], require_staging = \ _get_remote_location(path, staging_location) if is_gs_path(path): df = gcs_to_df(path) else: df = pd.read_csv(path) schema, features = \ _detect_schema_and_feature(entity, granularity, owner, id_column, feature_columns, timestamp_column, timestamp_value, serving_store, warehouse_store, df) iport_spec = _create_import("file", import_spec_options, entity, schema) props = (_properties("csv", len(df.index), require_staging, import_spec_options["path"])) specs = _specs(iport_spec, Entity(name=entity), features) return cls(specs, df, props)