Exemple #1
0
    def archive_project(self, project):
        """
        Archives a project. Project will still continue to function for
        ingestion and retrieval, but will be in a read-only state. It will
        also not be visible from the Core API for management purposes.

        Args:
            project: Name of project to archive
        """

        if self._use_object_store_registry:
            raise NotImplementedError(
                "Projects are not implemented for object store registry.")
        else:
            try:
                self._core_service.ArchiveProject(
                    ArchiveProjectRequest(name=project),
                    timeout=self._config.getint(opt.GRPC_CONNECTION_TIMEOUT),
                    metadata=self._get_grpc_metadata(),
                )  # type: ArchiveProjectResponse
            except grpc.RpcError as e:
                raise grpc.RpcError(e.details())

            # revert to the default project
            if self._project == project:
                self._project = opt().PROJECT
Exemple #2
0
def _init_config(path: str):
    """
    Returns a ConfigParser that reads in a feast configuration file. If the
    file does not exist it will be created.

    Args:
        path: Optional path to initialize as Feast configuration

    Returns: ConfigParser of the Feast configuration file, with defaults
    preloaded

    """
    # Create the configuration file directory if needed
    config_dir = os.path.dirname(path)
    config_dir = config_dir.rstrip("/") + "/"

    os.makedirs(os.path.dirname(config_dir), exist_ok=True)

    # Create the configuration file itself
    config = ConfigParser(defaults=opt().defaults(), allow_no_value=True)
    if os.path.exists(path):
        config.read(path)

    # Store all configuration in a single section
    if not config.has_section(CONFIG_FILE_SECTION):
        config.add_section(CONFIG_FILE_SECTION)

    return config
Exemple #3
0
    def set_project(self, project: Optional[str] = None):
        """
        Set currently active Feast project

        Args:
            project: Project to set as active. If unset, will reset to the default project.
        """
        if project is None:
            project = opt().PROJECT
        self._config.set(opt.PROJECT, project)
Exemple #4
0
    def ingest(
        self,
        feature_table: Union[str, FeatureTable],
        source: Union[pd.DataFrame, str],
        project: str = None,
        chunk_size: int = 10000,
        max_workers: int = max(CPU_COUNT - 1, 1),
        timeout: int = int(opt().BATCH_INGESTION_PRODUCTION_TIMEOUT),
    ) -> None:
        """
        Batch load feature data into a FeatureTable.

        Args:
            feature_table (typing.Union[str, feast.feature_table.FeatureTable]):
                FeatureTable object or the string name of the feature table

            source (typing.Union[pd.DataFrame, str]):
                Either a file path or Pandas Dataframe to ingest into Feast
                Files that are currently supported:
                    * parquet
                    * csv
                    * json

            project: Feast project to locate FeatureTable

            chunk_size (int):
                Amount of rows to load and ingest at a time.

            max_workers (int):
                Number of worker processes to use to encode values.

            timeout (int):
                Timeout in seconds to wait for completion.

        Examples:
            >>> from feast import Client
            >>>
            >>> client = Client(core_url="localhost:6565")
            >>> ft_df = pd.DataFrame(
            >>>         {
            >>>            "datetime": [pd.datetime.now()],
            >>>            "driver": [1001],
            >>>            "rating": [4.3],
            >>>         }
            >>>     )
            >>> client.set_project("project1")
            >>>
            >>> driver_ft = client.get_feature_table("driver")
            >>> client.ingest(driver_ft, ft_df)
        """

        if project is None:
            project = self.project
        if isinstance(feature_table, str):
            name = feature_table
        if isinstance(feature_table, FeatureTable):
            name = feature_table.name

        fetched_feature_table: Optional[FeatureTable] = self.get_feature_table(
            name, project)
        if fetched_feature_table is not None:
            feature_table = fetched_feature_table
        else:
            raise Exception(f"FeatureTable, {name} cannot be found.")

        # Check 1) Only parquet file format for FeatureTable batch source is supported
        if (feature_table.batch_source
                and issubclass(type(feature_table.batch_source), FileSource)
                and isinstance(
                    type(feature_table.batch_source.file_options.file_format),
                    ParquetFormat)):
            raise Exception(
                f"No suitable batch source found for FeatureTable, {name}."
                f"Only BATCH_FILE source with parquet format is supported for batch ingestion."
            )

        pyarrow_table, column_names = _read_table_from_source(source)
        # Check 2) Check if FeatureTable batch source field mappings can be found in provided source table
        _check_field_mappings(
            column_names,
            name,
            feature_table.batch_source.event_timestamp_column,
            feature_table.batch_source.field_mapping,
        )

        dir_path = None
        with_partitions = False
        if (issubclass(type(feature_table.batch_source), FileSource)
                and feature_table.batch_source.date_partition_column):
            with_partitions = True
            dest_path = _write_partitioned_table_from_source(
                column_names,
                pyarrow_table,
                feature_table.batch_source.date_partition_column,
                feature_table.batch_source.event_timestamp_column,
            )
        else:
            dir_path, dest_path = _write_non_partitioned_table_from_source(
                column_names,
                pyarrow_table,
                chunk_size,
                max_workers,
            )

        try:
            if issubclass(type(feature_table.batch_source), FileSource):
                file_url = feature_table.batch_source.file_options.file_url.rstrip(
                    "*")
                _upload_to_file_source(file_url, with_partitions, dest_path,
                                       self._config)
            if issubclass(type(feature_table.batch_source), BigQuerySource):
                bq_table_ref = feature_table.batch_source.bigquery_options.table_ref
                feature_table_timestamp_column = (
                    feature_table.batch_source.event_timestamp_column)

                _upload_to_bq_source(bq_table_ref,
                                     feature_table_timestamp_column, dest_path)
        finally:
            # Remove parquet file(s) that were created earlier
            print("Removing temporary file(s)...")
            if dir_path:
                shutil.rmtree(dir_path)

        print(
            "Data has been successfully ingested into FeatureTable batch source."
        )