Python Client.get_entity 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: feast

클래스/타입: Client

메소드/함수: get_entity

hotexamples.com에서의 예제들: 2

Python Client.get_entity - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 feast.Client.get_entity에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

apply(13)

apply_entity(9)

apply_feature_table(9)

ingest(8)

start_stream_to_online_ingestion(7)

list_feature_tables(6)

list_projects(5)

Client(4)

get_online_features(4)

set_project(4)

get_historical_features(3)

start_offline_to_online_ingestion(3)

delete_feature_table(2)

get_entity(2)

list_jobs(2)

get_feature_table(1)

예제 #1

파일 보기

파일: ingest.py 프로젝트: hsheth2/datahub

def cli(core_url, output_path):

    client = Client(core_url=core_url)

    tables = client.list_feature_tables()

    # sort tables by name for consistent outputs
    tables = sorted(tables, key=lambda x: x.name)

    parsed_tables = []

    for table in tables:

        # sort entities by name for consistent outputs
        entities = sorted(table.entities)

        batch_source = None
        stream_source = None

        # platform and name for constructing URN later on
        batch_source_platform = "unknown"
        stream_source_platform = "unknown"
        batch_source_name = "unknown"
        stream_source_name = "unknown"

        if isinstance(table.batch_source, BigQuerySource):
            batch_source = "BigQuerySource"
            batch_source_platform = "bigquery"
            batch_source_name = table.batch_source.bigquery_options.table_ref

        if isinstance(table.batch_source, FileSource):
            batch_source = "FileSource"
            batch_source_platform = "file"

            # replace slashes because the react frontend can't parse them correctly
            batch_source_name = table.batch_source.file_options.file_url.replace(
                "/", "."
            )

            # replace redundant file prefix
            if batch_source_name.startswith("file:.."):
                batch_source_name = batch_source_name[7:]

        if isinstance(table.stream_source, KafkaSource):
            stream_source = "KafkaSource"
            stream_source_platform = "kafka"
            stream_source_name = table.stream_source.kafka_options.topic

        if isinstance(table.stream_source, KinesisSource):
            stream_source = "KinesisSource"
            stream_source_platform = "kinesis"
            stream_source_name = f"{table.stream_source.kinesis_options.region}-{table.stream_source.kinesis_options.stream_name}"

        # currently unused in MCE outputs, but useful for debugging
        stream_source_config = table.to_dict()["spec"].get("streamSource")
        batch_source_config = table.to_dict()["spec"]["batchSource"]

        raw_entities = [
            client.get_entity(entity_name) for entity_name in table.entities
        ]
        raw_entities = sorted(raw_entities, key=lambda x: x.name)

        source_info = {
            "batch_source": batch_source,
            "stream_source": stream_source,
            "batch_source_config": batch_source_config,
            "stream_source_config": stream_source_config,
            "batch_source_platform": batch_source_platform,
            "stream_source_platform": stream_source_platform,
            "batch_source_name": batch_source_name,
            "stream_source_name": stream_source_name,
        }

        # sort entities by name for consistent outputs
        entities = sorted(
            [
                {
                    "name": x.name,
                    "type": x.value_type.name,
                    "description": x.description,
                    **source_info,
                }
                for x in raw_entities
            ],
            key=lambda x: x["name"],
        )

        # sort features by name for consistent outputs
        features = sorted(
            [
                {"name": x.name, "type": x.dtype.name, **source_info}
                for x in table.features
            ],
            key=lambda x: x["name"],
        )

        parsed_tables.append(
            {
                "name": table.name,
                "entities": entities,
                "features": features,
            }
        )

    if output_path is not None:

        with open(output_path, "w") as f:
            json.dump(parsed_tables, f)

    else:

        print(parsed_tables)

예제 #2

파일 보기

파일: feast_extractor.py 프로젝트: irvcaza/datalake4os

class FeastExtractor(Extractor):
    """
    Extracts feature tables from Feast Core service. Since Feast is
    a metadata store (and not the database itself), it maps the
    following atributes:

     * a database is name of feast project
     * table name is a name of the feature table
     * columns are features stored in the feature table
    """

    FEAST_SERVICE_CONFIG_KEY = "instance_name"
    FEAST_ENDPOINT_CONFIG_KEY = "endpoint"
    DESCRIBE_FEATURE_TABLES = "describe_feature_tables"
    DEFAULT_CONFIG = ConfigFactory.from_dict({
        FEAST_SERVICE_CONFIG_KEY: "main",
        DESCRIBE_FEATURE_TABLES: True
    })

    def init(self, conf: ConfigTree) -> None:
        conf = conf.with_fallback(FeastExtractor.DEFAULT_CONFIG)
        self._feast_service = conf.get_string(
            FeastExtractor.FEAST_SERVICE_CONFIG_KEY)
        self._describe_feature_tables = conf.get_bool(
            FeastExtractor.DESCRIBE_FEATURE_TABLES)
        self._client = Client(
            core_url=conf.get_string(FeastExtractor.FEAST_ENDPOINT_CONFIG_KEY))
        self._extract_iter: Union[None, Iterator] = None

    def get_scope(self) -> str:
        return "extractor.feast"

    def extract(self) -> Union[TableMetadata, None]:
        """
        For every feature table from Feast, a multiple objets are extracted:

        1. TableMetadata with feature table description
        2. Programmatic Description of the feature table, containing
           metadata - date of creation and labels
        3. Programmatic Description with Batch Source specification
        4. (if applicable) Programmatic Description with Stream Source
           specification
        """
        if not self._extract_iter:
            self._extract_iter = self._get_extract_iter()
        try:
            return next(self._extract_iter)
        except StopIteration:
            return None

    def _get_extract_iter(self) -> Iterator[TableMetadata]:
        for project in self._client.list_projects():
            for feature_table in self._client.list_feature_tables(
                    project=project):
                yield from self._extract_feature_table(project, feature_table)

    def _extract_feature_table(
            self, project: str,
            feature_table: FeatureTable) -> Iterator[TableMetadata]:
        columns = []
        for index, entity_name in enumerate(feature_table.entities):
            entity = self._client.get_entity(entity_name, project=project)
            columns.append(
                ColumnMetadata(entity.name, entity.description,
                               entity.value_type, index))

        for index, feature in enumerate(feature_table.features):
            columns.append(
                ColumnMetadata(
                    feature.name,
                    None,
                    feature.dtype.name,
                    len(feature_table.entities) + index,
                ))

        yield TableMetadata(
            "feast",
            self._feast_service,
            project,
            feature_table.name,
            None,
            columns,
        )

        if self._describe_feature_tables:
            created_at = datetime.utcfromtimestamp(
                feature_table.created_timestamp.seconds)
            description = f"* Created at **{created_at}**\n"

            if feature_table.labels:
                description += "* Labels:\n"
                for key, value in feature_table.labels.items():
                    description += f"    * {key}: **{value}**\n"

            yield TableMetadata(
                "feast",
                self._feast_service,
                project,
                feature_table.name,
                description,
                description_source="feature_table_details",
            )

            yield TableMetadata(
                "feast",
                self._feast_service,
                project,
                feature_table.name,
                f'```\n{yaml.dump(feature_table.to_dict()["spec"]["batchSource"])}```',
                description_source="batch_source",
            )

            if feature_table.stream_source:
                yield TableMetadata(
                    "feast",
                    self._feast_service,
                    project,
                    feature_table.name,
                    f'```\n{yaml.dump(feature_table.to_dict()["spec"]["streamSource"])}```',
                    description_source="stream_source",
                )