Esempio n. 1
0
def add_per_vector_min_max_to_table_schema_metadata(
        table: pa.Table, per_vector_min_max: Dict[str, dict]) -> pa.Table:
    """Store dict with per-vector min/max values schema's metadata"""

    webviz_meta = {_PER_VECTOR_MIN_MAX_KEY: per_vector_min_max}
    new_combined_meta = {}
    if table.schema.metadata is not None:
        new_combined_meta.update(table.schema.metadata)
    new_combined_meta.update(
        {_MAIN_WEBVIZ_METADATA_KEY: json.dumps(webviz_meta)})
    table = table.replace_schema_metadata(new_combined_meta)
    return table
Esempio n. 2
0
def _update_metadata(table: pa.Table, new_metadata={}) -> pa.Table:
    """
    Serialise user-defined table-level metadata as JSON-encoded byte strings
    and append to existing table metadata.
    """
    # with help from stackoverflow users 3519145 'thomas' and 289784 'suvayu'

    if new_metadata:
        # set aside original metadata
        tbl_metadata = table.schema.metadata
        # update original metadata with new metadata from user
        for k, v in new_metadata.items():
            tbl_metadata[k] = json.dumps(v).encode("utf-8")
        # replace metadata in table object
        table = table.replace_schema_metadata(tbl_metadata)
    return table
Esempio n. 3
0
def table_cast(table: pa.Table, schema: pa.Schema):
    """Improved version of pa.Table.cast

    It supports casting to feature types stored in the schema metadata.

    Args:
        table (pa.Table): PyArrow table to cast
        schema (pa.Schema): target PyArrow schema.

    Returns:
        pa.Table: the casted table
    """
    if table.schema != schema:
        from .features import Features

        return cast_table_to_features(table,
                                      Features.from_arrow_schema(schema))
    elif table.schema.metadata != schema.metadata:
        return table.replace_schema_metadata(schema.metadata)
    else:
        return table