def _reconstruct_block(item): # Construct the individual blocks converting dictionary types to pandas # categorical types and Timestamps-with-timezones types to the proper # pandas Blocks block_arr = item['block'] placement = item['placement'] if 'dictionary' in item: cat = pd.Categorical.from_codes(block_arr, categories=item['dictionary'], ordered=item['ordered']) block = _int.make_block(cat, placement=placement, klass=_int.CategoricalBlock) elif 'timezone' in item: dtype = _make_datetimetz(item['timezone']) block = _int.make_block(block_arr, placement=placement, klass=_int.DatetimeTZBlock, dtype=dtype) elif 'object' in item: block = _int.make_block(builtin_pickle.loads(block_arr), placement=placement, klass=_int.ObjectBlock) else: block = _int.make_block(block_arr, placement=placement) return block
def _reconstruct_block(item): import pandas.core.internals as _int # Construct the individual blocks converting dictionary types to pandas # categorical types and Timestamps-with-timezones types to the proper # pandas Blocks block_arr = item['block'] placement = item['placement'] if 'dictionary' in item: cat = _pandas_api.categorical_type.from_codes( block_arr, categories=item['dictionary'], ordered=item['ordered']) block = _int.make_block(cat, placement=placement, klass=_int.CategoricalBlock) elif 'timezone' in item: dtype = make_datetimetz(item['timezone']) block = _int.make_block(block_arr, placement=placement, klass=_int.DatetimeTZBlock, dtype=dtype) elif 'object' in item: block = _int.make_block(builtin_pickle.loads(block_arr), placement=placement, klass=_int.ObjectBlock) else: block = _int.make_block(block_arr, placement=placement) return block
def _reconstruct_block(item): import pandas.core.internals as _int # Construct the individual blocks converting dictionary types to pandas # categorical types and Timestamps-with-timezones types to the proper # pandas Blocks block_arr = item.get('block', None) placement = item['placement'] if 'dictionary' in item: cat = _pandas_api.categorical_type.from_codes( block_arr, categories=item['dictionary'], ordered=item['ordered']) block = _int.make_block(cat, placement=placement, klass=_int.CategoricalBlock) elif 'timezone' in item: dtype = make_datetimetz(item['timezone']) block = _int.make_block(block_arr, placement=placement, klass=_int.DatetimeTZBlock, dtype=dtype) elif 'object' in item: block = _int.make_block(builtin_pickle.loads(block_arr), placement=placement, klass=_int.ObjectBlock) elif 'py_array' in item: arr = item['py_array'] # TODO have mechanism to know a method to create a # pandas ExtensionArray given the pyarrow type # Now hardcode here to create a pandas IntegerArray for the example arr = arr.chunk(0) buflist = arr.buffers() data = np.frombuffer( buflist[-1], dtype=arr.type.to_pandas_dtype())[arr.offset:arr.offset + len(arr)] bitmask = buflist[0] if bitmask is not None: mask = pa.BooleanArray.from_buffers(pa.bool_(), len(arr), [None, bitmask]) mask = np.asarray(mask) else: mask = np.ones(len(arr), dtype=bool) block_arr = _pandas_api.pd.arrays.IntegerArray(data.copy(), ~mask, copy=False) # create ExtensionBlock block = _int.make_block(block_arr, placement=placement, klass=_int.ExtensionBlock) else: block = _int.make_block(block_arr, placement=placement) return block
def _load_pickle_from_buffer(data): as_memoryview = memoryview(data) return builtin_pickle.loads(as_memoryview)
def _load_pickle_from_buffer(data): as_memoryview = memoryview(data) if six.PY2: return builtin_pickle.loads(as_memoryview.tobytes()) else: return builtin_pickle.loads(as_memoryview)
def _reconstruct_block(item, columns=None, extension_columns=None): """ Construct a pandas Block from the `item` dictionary coming from pyarrow's serialization or returned by arrow::python::ConvertTableToPandas. This function takes care of converting dictionary types to pandas categorical, Timestamp-with-timezones to the proper pandas Block, and conversion to pandas ExtensionBlock Parameters ---------- item : dict For basic types, this is a dictionary in the form of {'block': np.ndarray of values, 'placement': pandas block placement}. Additional keys are present for other types (dictionary, timezone, object). columns : Column names of the table being constructed, used for extension types extension_columns : dict Dictionary of {column_name: pandas_dtype} that includes all columns and corresponding dtypes that will be converted to a pandas ExtensionBlock. Returns ------- pandas Block """ import pandas.core.internals as _int block_arr = item.get('block', None) placement = item['placement'] if 'dictionary' in item: cat = _pandas_api.categorical_type.from_codes( block_arr, categories=item['dictionary'], ordered=item['ordered']) block = _int.make_block(cat, placement=placement, klass=_int.CategoricalBlock) elif 'timezone' in item: dtype = make_datetimetz(item['timezone']) block = _int.make_block(block_arr, placement=placement, klass=_int.DatetimeTZBlock, dtype=dtype) elif 'object' in item: block = _int.make_block(builtin_pickle.loads(block_arr), placement=placement, klass=_int.ObjectBlock) elif 'py_array' in item: # create ExtensionBlock arr = item['py_array'] assert len(placement) == 1 name = columns[placement[0]] pandas_dtype = extension_columns[name] if not hasattr(pandas_dtype, '__from_arrow__'): raise ValueError("This column does not support to be converted " "to a pandas ExtensionArray") pd_ext_arr = pandas_dtype.__from_arrow__(arr) block = _int.make_block(pd_ext_arr, placement=placement, klass=_int.ExtensionBlock) else: block = _int.make_block(block_arr, placement=placement) return block