コード例 #1
0
def dataframe_to_serialized_dict(frame):
    block_manager = frame._data

    blocks = []
    axes = [ax for ax in block_manager.axes]

    for block in block_manager.blocks:
        values = block.values
        block_data = {}

        if _pandas_api.is_datetimetz(values.dtype):
            block_data['timezone'] = pa.lib.tzinfo_to_string(values.tz)
            if hasattr(values, 'values'):
                values = values.values
        elif _pandas_api.is_categorical(values):
            block_data.update(dictionary=values.categories,
                              ordered=values.ordered)
            values = values.codes
        block_data.update(
            placement=block.mgr_locs.as_array,
            block=values
        )

        # If we are dealing with an object array, pickle it instead.
        if values.dtype == np.dtype(object):
            block_data['object'] = None
            block_data['block'] = builtin_pickle.dumps(
                values, protocol=builtin_pickle.HIGHEST_PROTOCOL)

        blocks.append(block_data)

    return {
        'blocks': blocks,
        'axes': axes
    }
コード例 #2
0
ファイル: pandas_compat.py プロジェクト: laurentgo/arrow
def _extract_index_level(table, result_table, descr,
                         field_name_to_metadata):
    field_name = descr['field_name']
    logical_name = field_name_to_metadata[field_name]['name']
    index_name = _backwards_compatible_index_name(field_name, logical_name)
    i = table.schema.get_field_index(field_name)

    if i == -1:
        # The serialized index column was removed by the user
        return table, None, None

    col = table.column(i)
    col_pandas = col.to_pandas()
    values = col_pandas.values
    if hasattr(values, 'flags') and not values.flags.writeable:
        # ARROW-1054: in pandas 0.19.2, factorize will reject
        # non-writeable arrays when calling MultiIndex.from_arrays
        values = values.copy()

    pd = _pandas_api.pd

    if _pandas_api.is_datetimetz(col_pandas.dtype):
        index_level = (pd.Series(values).dt.tz_localize('utc')
                       .dt.tz_convert(col_pandas.dtype.tz))
    else:
        index_level = pd.Series(values, dtype=col_pandas.dtype)
    result_table = result_table.remove_column(
        result_table.schema.get_field_index(field_name)
    )
    return result_table, index_level, index_name
コード例 #3
0
ファイル: pandas_compat.py プロジェクト: youny626/arrow
def _extract_index_level(table, result_table, field_name,
                         field_name_to_metadata):
    logical_name = field_name_to_metadata[field_name]['name']
    index_name = _backwards_compatible_index_name(field_name, logical_name)
    i = table.schema.get_field_index(field_name)

    if i == -1:
        # The serialized index column was removed by the user
        return table, None, None

    col = table.column(i)
    col_pandas = col.to_pandas()
    values = col_pandas.values
    if hasattr(values, 'flags') and not values.flags.writeable:
        # ARROW-1054: in pandas 0.19.2, factorize will reject
        # non-writeable arrays when calling MultiIndex.from_arrays
        values = values.copy()

    pd = _pandas_api.pd

    if _pandas_api.is_datetimetz(col_pandas.dtype):
        index_level = (pd.Series(values).dt.tz_localize('utc').dt.tz_convert(
            col_pandas.dtype.tz))
    else:
        index_level = pd.Series(values, dtype=col_pandas.dtype)
    result_table = result_table.remove_column(
        result_table.schema.get_field_index(field_name))
    return result_table, index_level, index_name
コード例 #4
0
def get_datetimetz_type(values, dtype, type_):
    if values.dtype.type != np.datetime64:
        return values, type_

    if _pandas_api.is_datetimetz(dtype) and type_ is None:
        # If no user type passed, construct a tz-aware timestamp type
        tz = dtype.tz
        unit = dtype.unit
        type_ = pa.timestamp(unit, tz)
    elif type_ is None:
        # Trust the NumPy dtype
        type_ = pa.from_numpy_dtype(values.dtype)

    return values, type_
コード例 #5
0
ファイル: pandas_compat.py プロジェクト: laurentgo/arrow
def get_datetimetz_type(values, dtype, type_):
    if values.dtype.type != np.datetime64:
        return values, type_

    if _pandas_api.is_datetimetz(dtype) and type_ is None:
        # If no user type passed, construct a tz-aware timestamp type
        tz = dtype.tz
        unit = dtype.unit
        type_ = pa.timestamp(unit, tz)
    elif type_ is None:
        # Trust the NumPy dtype
        type_ = pa.from_numpy_dtype(values.dtype)

    return values, type_