Python Table.to_pydict Examples

Programming Language: Python

Namespace/Package Name: pyarrow

Class/Type: Table

Method/Function: to_pydict

Examples at hotexamples.com: 5

Python Table.to_pydict - 5 examples found. These are the top rated real world Python examples of pyarrow.Table.to_pydict extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

from_pandas(18)

to_pandas(18)

column(16)

append_column(10)

slice(9)

itercolumns(8)

from_pydict(7)

set_column(6)

to_batches(6)

drop(5)

take(5)

to_pydict(5)

filter(4)

field(4)

rename_columns(4)

from_arrays(3)

from_batches(3)

cast(3)

replace_schema_metadata(3)

combine_chunks(2)

select(1)

Example #1

Show file

def _convert_arrow_to_proto(
    table: pyarrow.Table, feature_view: FeatureView
) -> List[Tuple[EntityKeyProto, Dict[str, ValueProto], datetime,
                Optional[datetime]]]:
    rows_to_write = []
    for row in zip(*table.to_pydict().values()):
        entity_key = EntityKeyProto()
        for entity_name in feature_view.entities:
            entity_key.entity_names.append(entity_name)
            idx = table.column_names.index(entity_name)
            value = python_value_to_proto_value(row[idx])
            entity_key.entity_values.append(value)
        feature_dict = {}
        for feature in feature_view.features:
            idx = table.column_names.index(feature.name)
            value = python_value_to_proto_value(row[idx])
            feature_dict[feature.name] = value
        event_timestamp_idx = table.column_names.index(
            feature_view.input.event_timestamp_column)
        event_timestamp = row[event_timestamp_idx]
        if feature_view.input.created_timestamp_column is not None:
            created_timestamp_idx = table.column_names.index(
                feature_view.input.created_timestamp_column)
            created_timestamp = row[created_timestamp_idx]
        else:
            created_timestamp = None

        rows_to_write.append(
            (entity_key, feature_dict, event_timestamp, created_timestamp))
    return rows_to_write

Example #2

Show file

File: provider.py Project: pyalex/feast

def _convert_arrow_to_proto(
    table: pyarrow.Table,
    feature_view: FeatureView,
    join_keys: List[str],
) -> List[Tuple[EntityKeyProto, Dict[str, ValueProto], datetime,
                Optional[datetime]]]:
    rows_to_write = []

    def _coerce_datetime(ts):
        """
        Depending on underlying time resolution, arrow to_pydict() sometimes returns pandas
        timestamp type (for nanosecond resolution), and sometimes you get standard python datetime
        (for microsecond resolution).

        While pandas timestamp class is a subclass of python datetime, it doesn't always behave the
        same way. We convert it to normal datetime so that consumers downstream don't have to deal
        with these quirks.
        """

        if isinstance(ts, pandas.Timestamp):
            return ts.to_pydatetime()
        else:
            return ts

    column_names_idx = {k: i for i, k in enumerate(table.column_names)}
    for row in zip(*table.to_pydict().values()):
        entity_key = EntityKeyProto()
        for join_key in join_keys:
            entity_key.join_keys.append(join_key)
            idx = column_names_idx[join_key]
            value = python_value_to_proto_value(row[idx])
            entity_key.entity_values.append(value)
        feature_dict = {}
        for feature in feature_view.features:
            idx = column_names_idx[feature.name]
            value = python_value_to_proto_value(row[idx], feature.dtype)
            feature_dict[feature.name] = value
        event_timestamp_idx = column_names_idx[
            feature_view.batch_source.event_timestamp_column]
        event_timestamp = _coerce_datetime(row[event_timestamp_idx])

        if feature_view.batch_source.created_timestamp_column:
            created_timestamp_idx = column_names_idx[
                feature_view.batch_source.created_timestamp_column]
            created_timestamp = _coerce_datetime(row[created_timestamp_idx])
        else:
            created_timestamp = None

        rows_to_write.append(
            (entity_key, feature_dict, event_timestamp, created_timestamp))
    return rows_to_write

Example #3

Show file

 def extract_batch(self, pa_table: pa.Table) -> dict:
     return pa_table.to_pydict()

Example #4

Show file

 def extract_row(self, pa_table: pa.Table) -> dict:
     return _unnest(pa_table.to_pydict())

Example #5

Show file

File: util.py Project: CJWorkbench/cjwparquet

def assert_arrow_table_equals(actual: pyarrow.Table, expected: pyarrow.Table):
    assert actual.to_pydict() == expected.to_pydict()