Beispiel #1
0
def _reconstruct_block(item):
    # Construct the individual blocks converting dictionary types to pandas
    # categorical types and Timestamps-with-timezones types to the proper
    # pandas Blocks

    block_arr = item['block']
    placement = item['placement']
    if 'dictionary' in item:
        cat = pd.Categorical.from_codes(block_arr,
                                        categories=item['dictionary'],
                                        ordered=item['ordered'])
        block = _int.make_block(cat, placement=placement,
                                klass=_int.CategoricalBlock)
    elif 'timezone' in item:
        dtype = _make_datetimetz(item['timezone'])
        block = _int.make_block(block_arr, placement=placement,
                                klass=_int.DatetimeTZBlock,
                                dtype=dtype)
    elif 'object' in item:
        block = _int.make_block(builtin_pickle.loads(block_arr),
                                placement=placement, klass=_int.ObjectBlock)
    else:
        block = _int.make_block(block_arr, placement=placement)

    return block
Beispiel #2
0
def _reconstruct_block(item):
    import pandas.core.internals as _int
    # Construct the individual blocks converting dictionary types to pandas
    # categorical types and Timestamps-with-timezones types to the proper
    # pandas Blocks

    block_arr = item['block']
    placement = item['placement']
    if 'dictionary' in item:
        cat = _pandas_api.categorical_type.from_codes(
            block_arr, categories=item['dictionary'], ordered=item['ordered'])
        block = _int.make_block(cat,
                                placement=placement,
                                klass=_int.CategoricalBlock)
    elif 'timezone' in item:
        dtype = make_datetimetz(item['timezone'])
        block = _int.make_block(block_arr,
                                placement=placement,
                                klass=_int.DatetimeTZBlock,
                                dtype=dtype)
    elif 'object' in item:
        block = _int.make_block(builtin_pickle.loads(block_arr),
                                placement=placement,
                                klass=_int.ObjectBlock)
    else:
        block = _int.make_block(block_arr, placement=placement)

    return block
Beispiel #3
0
def _reconstruct_block(item):
    import pandas.core.internals as _int
    # Construct the individual blocks converting dictionary types to pandas
    # categorical types and Timestamps-with-timezones types to the proper
    # pandas Blocks

    block_arr = item.get('block', None)
    placement = item['placement']
    if 'dictionary' in item:
        cat = _pandas_api.categorical_type.from_codes(
            block_arr, categories=item['dictionary'], ordered=item['ordered'])
        block = _int.make_block(cat,
                                placement=placement,
                                klass=_int.CategoricalBlock)
    elif 'timezone' in item:
        dtype = make_datetimetz(item['timezone'])
        block = _int.make_block(block_arr,
                                placement=placement,
                                klass=_int.DatetimeTZBlock,
                                dtype=dtype)
    elif 'object' in item:
        block = _int.make_block(builtin_pickle.loads(block_arr),
                                placement=placement,
                                klass=_int.ObjectBlock)
    elif 'py_array' in item:
        arr = item['py_array']
        # TODO have mechanism to know a method to create a
        # pandas ExtensionArray given the pyarrow type
        # Now hardcode here to create a pandas IntegerArray for the example
        arr = arr.chunk(0)
        buflist = arr.buffers()
        data = np.frombuffer(
            buflist[-1],
            dtype=arr.type.to_pandas_dtype())[arr.offset:arr.offset + len(arr)]
        bitmask = buflist[0]
        if bitmask is not None:
            mask = pa.BooleanArray.from_buffers(pa.bool_(), len(arr),
                                                [None, bitmask])
            mask = np.asarray(mask)
        else:
            mask = np.ones(len(arr), dtype=bool)
        block_arr = _pandas_api.pd.arrays.IntegerArray(data.copy(),
                                                       ~mask,
                                                       copy=False)
        # create ExtensionBlock
        block = _int.make_block(block_arr,
                                placement=placement,
                                klass=_int.ExtensionBlock)
    else:
        block = _int.make_block(block_arr, placement=placement)

    return block
Beispiel #4
0
def _load_pickle_from_buffer(data):
    as_memoryview = memoryview(data)
    return builtin_pickle.loads(as_memoryview)
def _load_pickle_from_buffer(data):
    as_memoryview = memoryview(data)
    if six.PY2:
        return builtin_pickle.loads(as_memoryview.tobytes())
    else:
        return builtin_pickle.loads(as_memoryview)
Beispiel #6
0
def _reconstruct_block(item, columns=None, extension_columns=None):
    """
    Construct a pandas Block from the `item` dictionary coming from pyarrow's
    serialization or returned by arrow::python::ConvertTableToPandas.

    This function takes care of converting dictionary types to pandas
    categorical, Timestamp-with-timezones to the proper pandas Block, and
    conversion to pandas ExtensionBlock

    Parameters
    ----------
    item : dict
        For basic types, this is a dictionary in the form of
        {'block': np.ndarray of values, 'placement': pandas block placement}.
        Additional keys are present for other types (dictionary, timezone,
        object).
    columns :
        Column names of the table being constructed, used for extension types
    extension_columns : dict
        Dictionary of {column_name: pandas_dtype} that includes all columns
        and corresponding dtypes that will be converted to a pandas
        ExtensionBlock.

    Returns
    -------
    pandas Block

    """
    import pandas.core.internals as _int

    block_arr = item.get('block', None)
    placement = item['placement']
    if 'dictionary' in item:
        cat = _pandas_api.categorical_type.from_codes(
            block_arr, categories=item['dictionary'],
            ordered=item['ordered'])
        block = _int.make_block(cat, placement=placement,
                                klass=_int.CategoricalBlock)
    elif 'timezone' in item:
        dtype = make_datetimetz(item['timezone'])
        block = _int.make_block(block_arr, placement=placement,
                                klass=_int.DatetimeTZBlock,
                                dtype=dtype)
    elif 'object' in item:
        block = _int.make_block(builtin_pickle.loads(block_arr),
                                placement=placement, klass=_int.ObjectBlock)
    elif 'py_array' in item:
        # create ExtensionBlock
        arr = item['py_array']
        assert len(placement) == 1
        name = columns[placement[0]]
        pandas_dtype = extension_columns[name]
        if not hasattr(pandas_dtype, '__from_arrow__'):
            raise ValueError("This column does not support to be converted "
                             "to a pandas ExtensionArray")
        pd_ext_arr = pandas_dtype.__from_arrow__(arr)
        block = _int.make_block(pd_ext_arr, placement=placement,
                                klass=_int.ExtensionBlock)
    else:
        block = _int.make_block(block_arr, placement=placement)

    return block
Beispiel #7
0
def _load_pickle_from_buffer(data):
    as_memoryview = memoryview(data)
    if six.PY2:
        return builtin_pickle.loads(as_memoryview.tobytes())
    else:
        return builtin_pickle.loads(as_memoryview)