def dataframe_to_serialized_dict(frame): block_manager = frame._data blocks = [] axes = [ax for ax in block_manager.axes] for block in block_manager.blocks: values = block.values block_data = {} if _pandas_api.is_datetimetz(values.dtype): block_data['timezone'] = pa.lib.tzinfo_to_string(values.tz) if hasattr(values, 'values'): values = values.values elif _pandas_api.is_categorical(values): block_data.update(dictionary=values.categories, ordered=values.ordered) values = values.codes block_data.update( placement=block.mgr_locs.as_array, block=values ) # If we are dealing with an object array, pickle it instead. if values.dtype == np.dtype(object): block_data['object'] = None block_data['block'] = builtin_pickle.dumps( values, protocol=builtin_pickle.HIGHEST_PROTOCOL) blocks.append(block_data) return { 'blocks': blocks, 'axes': axes }
def dataframe_to_types(df, preserve_index, columns=None): (all_names, column_names, _, index_descriptors, index_columns, columns_to_convert, _) = _get_columns_to_convert(df, None, preserve_index, columns) types = [] # If pandas knows type, skip conversion for c in columns_to_convert: values = c.values if _pandas_api.is_categorical(values): type_ = pa.array(c, from_pandas=True).type elif _pandas_api.is_extension_array_dtype(values): type_ = pa.array(c.head(0), from_pandas=True).type else: values, type_ = get_datetimetz_type(values, c.dtype, None) type_ = pa.lib._ndarray_to_arrow_type(values, type_) if type_ is None: type_ = pa.array(c, from_pandas=True).type types.append(type_) metadata = construct_metadata(columns_to_convert, df, column_names, index_columns, index_descriptors, preserve_index, types) return all_names, types, metadata
def dataframe_to_types(df, preserve_index, columns=None): (all_names, column_names, index_descriptors, index_columns, columns_to_convert, _) = _get_columns_to_convert(df, None, preserve_index, columns) types = [] # If pandas knows type, skip conversion for c in columns_to_convert: values = c.values if _pandas_api.is_categorical(values): type_ = pa.array(c, from_pandas=True).type else: values, type_ = get_datetimetz_type(values, c.dtype, None) type_ = pa.lib._ndarray_to_arrow_type(values, type_) if type_ is None: type_ = pa.array(c, from_pandas=True).type types.append(type_) metadata = construct_metadata(df, column_names, index_columns, index_descriptors, preserve_index, types) return all_names, types, metadata