Python FeatureSet.get_kafka_source_brokersの例

プログラミング言語: Python

名前空間/パッケージ名: feast.feature_set

クラス/型: FeatureSet

メソッド/関数: get_kafka_source_brokers

hotexamples.comのコード掲載数: 2

Python FeatureSet.get_kafka_source_brokers - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのfeast.feature_set.FeatureSet.get_kafka_source_brokersの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

FeatureSet(30)

from_proto(16)

to_proto(12)

add(11)

from_yaml(10)

infer_fields_from_df(7)

source(6)

set_label(5)

_update_from_feature_set(4)

is_valid(4)

ingest(4)

get_kafka_source_topic(3)

_client(3)

update_from_dataset(3)

_message_producer(3)

remove_label(2)

from_dict(2)

drop(2)

get_kafka_source_brokers(2)

update_from_feature_set(1)

import_tfx_schema(1)

to_yaml(1)

_version(1)

version(1)

コード例 #1

ファイルを表示

ファイル: ingest.py プロジェクト: rolftesmer/feast

def ingest_kafka(
    feature_set: FeatureSet,
    dataframe: pd.DataFrame,
    max_workers: int,
    timeout: int = None,
    chunk_size: int = 5000,
    disable_pbar: bool = False,
):
    pbar = tqdm(unit="rows", total=dataframe.shape[0], disable=disable_pbar)

    # Validate feature set schema
    validate_dataframe(dataframe, feature_set)

    # Split dataframe into chunks
    num_chunks = max(dataframe.shape[0] / max(chunk_size, 100), 1)
    df_chunks = np.array_split(dataframe, num_chunks)

    # Create queue through which encoding and production will coordinate
    chunk_queue = Queue()

    # Create a context object to send and receive information across processes
    ctx = multiprocessing.Manager().dict({
        "success_count": 0,
        "error_count": 0,
        "last_exception": ""
    })

    # Create producer to push feature rows to Kafka
    ingestion_process = Process(
        target=_kafka_feature_row_chunk_producer,
        args=(
            chunk_queue,
            num_chunks,
            feature_set.get_kafka_source_brokers(),
            feature_set.get_kafka_source_topic(),
            ctx,
            pbar,
        ),
    )

    try:
        # Start ingestion process
        print(
            f"\nIngestion started for {feature_set.name}:{feature_set.version}"
        )
        ingestion_process.start()

        # Create a pool of workers to convert df chunks into feature row chunks
        # and push them into the queue for ingestion to pick up
        with Pool(processes=max_workers) as pool:
            chunks_done = 0
            while chunks_done < num_chunks:
                chunks_to = min(chunks_done + max_workers, len(df_chunks))
                results = pool.starmap_async(
                    _encode_chunk,
                    zip(df_chunks[chunks_done:chunks_to], repeat(feature_set)),
                )

                # Push feature row encoded chunks onto queue
                for result in results.get():
                    chunk_queue.put(result)
                chunks_done += max_workers
    except Exception as ex:
        _logger.error(f"Exception occurred: {ex}")
    finally:
        # Wait for ingestion to complete, or time out
        ingestion_process.join(timeout=timeout)
        failed_message = ("" if ctx["error_count"] == 0 else
                          f"\nFail: {ctx['error_count']}/{dataframe.shape[0]}")

        last_exception_message = (
            "" if ctx["last_exception"] == "" else
            f"\nLast exception:\n{ctx['last_exception']}")
        print(f"\nIngestion statistics:"
              f"\nSuccess: {ctx['success_count']}/{dataframe.shape[0]}"
              f"{failed_message}"
              f"{last_exception_message}")

コード例 #2

ファイルを表示

ファイル: ingest.py プロジェクト: wzpy/feast

def ingest_table_to_kafka(
    feature_set: FeatureSet,
    table: pa.lib.Table,
    max_workers: int,
    chunk_size: int = 5000,
    disable_pbar: bool = False,
    timeout: int = None,
) -> None:
    """
    Ingest a PyArrow Table to a Kafka topic based for a Feature Set

    Args:
        feature_set: FeatureSet describing PyArrow table.
        table: PyArrow table to be processed.
        max_workers: Maximum number of workers.
        chunk_size:  Maximum size of each chunk when PyArrow table is batched.
        disable_pbar: Flag to indicate if tqdm progress bar should be disabled.
        timeout: Maximum time before method times out
    """

    pbar = tqdm(unit="rows", total=table.num_rows, disable=disable_pbar)

    # Use a small DataFrame to validate feature set schema
    ref_df = table.to_batches(max_chunksize=100)[0].to_pandas()
    df_datetime_dtype = ref_df[DATETIME_COLUMN].dtype

    # Validate feature set schema
    _validate_dataframe(ref_df, feature_set)

    # Create queue through which encoding and production will coordinate
    row_queue = Queue()

    # Create a context object to send and receive information across processes
    ctx = multiprocessing.Manager().dict({
        "success_count": 0,
        "error_count": 0,
        "last_exception": ""
    })

    # Create producer to push feature rows to Kafka
    ingestion_process = Process(
        target=_kafka_feature_row_producer,
        args=(
            row_queue,
            table.num_rows,
            feature_set.get_kafka_source_brokers(),
            feature_set.get_kafka_source_topic(),
            ctx,
            pbar,
        ),
    )

    try:
        # Start ingestion process
        print(
            f"\n(ingest table to kafka) Ingestion started for {feature_set.name}:{feature_set.version}"
        )
        ingestion_process.start()

        # Iterate over chunks in the table and return feature rows
        for row in _encode_pa_chunks(
                tbl=table,
                fs=feature_set,
                max_workers=max_workers,
                chunk_size=chunk_size,
                df_datetime_dtype=df_datetime_dtype,
        ):
            # Push rows onto a queue for the production process to pick up
            row_queue.put(row)
            while row_queue.qsize() > chunk_size:
                time.sleep(0.1)
        row_queue.put(None)
    except Exception as ex:
        _logger.error(f"Exception occurred: {ex}")
    finally:
        # Wait for the Kafka production to complete
        ingestion_process.join(timeout=timeout)
        failed_message = ("" if ctx["error_count"] == 0 else
                          f"\nFail: {ctx['error_count']}/{table.num_rows}")

        last_exception_message = (
            "" if ctx["last_exception"] == "" else
            f"\nLast exception:\n{ctx['last_exception']}")
        print(f"\nIngestion statistics:"
              f"\nSuccess: {ctx['success_count']}/{table.num_rows}"
              f"{failed_message}"
              f"{last_exception_message}")