Example #1
0
def unconvert(values, dtype, compress=None):

    as_is_ext = isinstance(values, ExtType) and values.code == 0

    if as_is_ext:
        values = values.data

    if dtype == np.object_:
        return np.array(values, dtype=object)

    dtype = pandas_dtype(dtype).base

    if not as_is_ext:
        values = values.encode('latin1')

    if compress == u'zlib':
        import zlib
        values = zlib.decompress(values)
        return np.frombuffer(values, dtype=dtype)

    elif compress == u'blosc':
        import blosc
        values = blosc.decompress(values)
        return np.frombuffer(values, dtype=dtype)

    # from a string
    return np.fromstring(values, dtype=dtype)
Example #2
0
def unconvert(values, dtype, compress=None):

    as_is_ext = isinstance(values, ExtType) and values.code == 0

    if as_is_ext:
        values = values.data

    if dtype == np.object_:
        return np.array(values, dtype=object)

    dtype = pandas_dtype(dtype).base

    if not as_is_ext:
        values = values.encode('latin1')

    if compress == u'zlib':
        import zlib
        values = zlib.decompress(values)
        return np.frombuffer(values, dtype=dtype)

    elif compress == u'blosc':
        import blosc
        values = blosc.decompress(values)
        return np.frombuffer(values, dtype=dtype)

    # from a string
    return np.fromstring(values, dtype=dtype)
Example #3
0
 def astype(self, dtype):
     dtype = pandas_dtype(dtype)
     if is_float_dtype(dtype) or is_integer_dtype(dtype):
         values = self._values.astype(dtype)
     elif is_object_dtype(dtype):
         values = self._values
     else:
         raise TypeError('Setting %s dtype to anything other than '
                         'float64 or object is not supported' %
                         self.__class__)
     return Index(values, name=self.name, dtype=dtype)
Example #4
0
 def astype(self, dtype):
     dtype = pandas_dtype(dtype)
     if is_float_dtype(dtype) or is_integer_dtype(dtype):
         values = self._values.astype(dtype)
     elif is_object_dtype(dtype):
         values = self._values
     else:
         raise TypeError('Setting %s dtype to anything other than '
                         'float64 or object is not supported' %
                         self.__class__)
     return Index(values, name=self.name, dtype=dtype)
Example #5
0
def unconvert(values, dtype, compress=None):

    as_is_ext = isinstance(values, ExtType) and values.code == 0

    if as_is_ext:
        values = values.data

    if is_categorical_dtype(dtype):
        return values

    elif is_object_dtype(dtype):
        return np.array(values, dtype=object)

    dtype = pandas_dtype(dtype).base

    if not as_is_ext:
        values = values.encode('latin1')

    if compress:
        if compress == u'zlib':
            _check_zlib()
            decompress = zlib.decompress
        elif compress == u'blosc':
            _check_blosc()
            decompress = blosc.decompress
        else:
            raise ValueError("compress must be one of 'zlib' or 'blosc'")

        try:
            return np.frombuffer(
                _move_into_mutable_buffer(decompress(values)),
                dtype=dtype,
            )
        except _BadMove as e:
            # Pull the decompressed data off of the `_BadMove` exception.
            # We don't just store this in the locals because we want to
            # minimize the risk of giving users access to a `bytes` object
            # whose data is also given to a mutable buffer.
            values = e.args[0]
            if len(values) > 1:
                # The empty string and single characters are memoized in many
                # string creating functions in the capi. This case should not
                # warn even though we need to make a copy because we are only
                # copying at most 1 byte.
                warnings.warn(
                    'copying data after decompressing; this may mean that'
                    ' decompress is caching its result',
                    PerformanceWarning,
                )
                # fall through to copying `np.fromstring`

    # Copy the string into a numpy array.
    return np.fromstring(values, dtype=dtype)
Example #6
0
 def astype(self, dtype, copy=True):
     dtype = pandas_dtype(dtype)
     if is_float_dtype(dtype):
         values = self._values.astype(dtype, copy=copy)
     elif is_integer_dtype(dtype):
         if self.hasnans:
             raise ValueError('cannot convert float NaN to integer')
         values = self._values.astype(dtype, copy=copy)
     elif is_object_dtype(dtype):
         values = self._values.astype('object', copy=copy)
     else:
         raise TypeError('Setting %s dtype to anything other than '
                         'float64 or object is not supported' %
                         self.__class__)
     return Index(values, name=self.name, dtype=dtype)
Example #7
0
def decode(obj):
    """
    Decoder for deserializing numpy data types.
    """

    typ = obj.get(u'typ')
    if typ is None:
        return obj
    elif typ == u'timestamp':
        return Timestamp(obj[u'value'], tz=obj[u'tz'], offset=obj[u'offset'])
    elif typ == u'nat':
        return NaT
    elif typ == u'period':
        return Period(ordinal=obj[u'ordinal'], freq=obj[u'freq'])
    elif typ == u'index':
        dtype = dtype_for(obj[u'dtype'])
        data = unconvert(obj[u'data'], dtype,
                         obj.get(u'compress'))
        return globals()[obj[u'klass']](data, dtype=dtype, name=obj[u'name'])
    elif typ == u'range_index':
        return globals()[obj[u'klass']](obj[u'start'],
                                        obj[u'stop'],
                                        obj[u'step'],
                                        name=obj[u'name'])
    elif typ == u'multi_index':
        dtype = dtype_for(obj[u'dtype'])
        data = unconvert(obj[u'data'], dtype,
                         obj.get(u'compress'))
        data = [tuple(x) for x in data]
        return globals()[obj[u'klass']].from_tuples(data, names=obj[u'names'])
    elif typ == u'period_index':
        data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
        d = dict(name=obj[u'name'], freq=obj[u'freq'])
        return globals()[obj[u'klass']](data, **d)
    elif typ == u'datetime_index':
        data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
        d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False)
        result = globals()[obj[u'klass']](data, **d)
        tz = obj[u'tz']

        # reverse tz conversion
        if tz is not None:
            result = result.tz_localize('UTC').tz_convert(tz)
        return result

    elif typ == u'series':
        dtype = dtype_for(obj[u'dtype'])
        pd_dtype = pandas_dtype(dtype)
        np_dtype = pandas_dtype(dtype).base
        index = obj[u'index']
        result = globals()[obj[u'klass']](unconvert(obj[u'data'], dtype,
                                                    obj[u'compress']),
                                          index=index,
                                          dtype=np_dtype,
                                          name=obj[u'name'])
        tz = getattr(pd_dtype, 'tz', None)
        if tz:
            result = result.dt.tz_localize('UTC').dt.tz_convert(tz)
        return result

    elif typ == u'block_manager':
        axes = obj[u'axes']

        def create_block(b):
            values = unconvert(b[u'values'], dtype_for(b[u'dtype']),
                               b[u'compress']).reshape(b[u'shape'])

            # locs handles duplicate column names, and should be used instead
            # of items; see GH 9618
            if u'locs' in b:
                placement = b[u'locs']
            else:
                placement = axes[0].get_indexer(b[u'items'])
            return make_block(values=values,
                              klass=getattr(internals, b[u'klass']),
                              placement=placement,
                              dtype=b[u'dtype'])

        blocks = [create_block(b) for b in obj[u'blocks']]
        return globals()[obj[u'klass']](BlockManager(blocks, axes))
    elif typ == u'datetime':
        return parse(obj[u'data'])
    elif typ == u'datetime64':
        return np.datetime64(parse(obj[u'data']))
    elif typ == u'date':
        return parse(obj[u'data']).date()
    elif typ == u'timedelta':
        return timedelta(*obj[u'data'])
    elif typ == u'timedelta64':
        return np.timedelta64(int(obj[u'data']))
    # elif typ == 'sparse_series':
    #    dtype = dtype_for(obj['dtype'])
    #    return globals()[obj['klass']](
    #        unconvert(obj['sp_values'], dtype, obj['compress']),
    #        sparse_index=obj['sp_index'], index=obj['index'],
    #        fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name'])
    # elif typ == 'sparse_dataframe':
    #    return globals()[obj['klass']](
    #        obj['data'], columns=obj['columns'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind']
    #    )
    # elif typ == 'sparse_panel':
    #    return globals()[obj['klass']](
    #        obj['data'], items=obj['items'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind'])
    elif typ == u'block_index':
        return globals()[obj[u'klass']](obj[u'length'], obj[u'blocs'],
                                        obj[u'blengths'])
    elif typ == u'int_index':
        return globals()[obj[u'klass']](obj[u'length'], obj[u'indices'])
    elif typ == u'ndarray':
        return unconvert(obj[u'data'], np.typeDict[obj[u'dtype']],
                         obj.get(u'compress')).reshape(obj[u'shape'])
    elif typ == u'np_scalar':
        if obj.get(u'sub_typ') == u'np_complex':
            return c2f(obj[u'real'], obj[u'imag'], obj[u'dtype'])
        else:
            dtype = dtype_for(obj[u'dtype'])
            try:
                return dtype(obj[u'data'])
            except:
                return dtype.type(obj[u'data'])
    elif typ == u'np_complex':
        return complex(obj[u'real'] + u'+' + obj[u'imag'] + u'j')
    elif isinstance(obj, (dict, list, set)):
        return obj
    else:
        return obj
Example #8
0
def decode(obj):
    """
    Decoder for deserializing numpy data types.
    """

    typ = obj.get(u'typ')
    if typ is None:
        return obj
    elif typ == u'timestamp':
        return Timestamp(obj[u'value'], tz=obj[u'tz'], offset=obj[u'offset'])
    elif typ == u'nat':
        return NaT
    elif typ == u'period':
        return Period(ordinal=obj[u'ordinal'], freq=obj[u'freq'])
    elif typ == u'index':
        dtype = dtype_for(obj[u'dtype'])
        data = unconvert(obj[u'data'], dtype, obj.get(u'compress'))
        return globals()[obj[u'klass']](data, dtype=dtype, name=obj[u'name'])
    elif typ == u'range_index':
        return globals()[obj[u'klass']](obj[u'start'],
                                        obj[u'stop'],
                                        obj[u'step'],
                                        name=obj[u'name'])
    elif typ == u'multi_index':
        dtype = dtype_for(obj[u'dtype'])
        data = unconvert(obj[u'data'], dtype, obj.get(u'compress'))
        data = [tuple(x) for x in data]
        return globals()[obj[u'klass']].from_tuples(data, names=obj[u'names'])
    elif typ == u'period_index':
        data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
        d = dict(name=obj[u'name'], freq=obj[u'freq'])
        return globals()[obj[u'klass']](data, **d)
    elif typ == u'datetime_index':
        data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
        d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False)
        result = globals()[obj[u'klass']](data, **d)
        tz = obj[u'tz']

        # reverse tz conversion
        if tz is not None:
            result = result.tz_localize('UTC').tz_convert(tz)
        return result

    elif typ == u'series':
        dtype = dtype_for(obj[u'dtype'])
        pd_dtype = pandas_dtype(dtype)
        np_dtype = pandas_dtype(dtype).base
        index = obj[u'index']
        result = globals()[obj[u'klass']](unconvert(obj[u'data'], dtype,
                                                    obj[u'compress']),
                                          index=index,
                                          dtype=np_dtype,
                                          name=obj[u'name'])
        tz = getattr(pd_dtype, 'tz', None)
        if tz:
            result = result.dt.tz_localize('UTC').dt.tz_convert(tz)
        return result

    elif typ == u'block_manager':
        axes = obj[u'axes']

        def create_block(b):
            values = unconvert(b[u'values'], dtype_for(b[u'dtype']),
                               b[u'compress']).reshape(b[u'shape'])

            # locs handles duplicate column names, and should be used instead
            # of items; see GH 9618
            if u'locs' in b:
                placement = b[u'locs']
            else:
                placement = axes[0].get_indexer(b[u'items'])
            return make_block(values=values,
                              klass=getattr(internals, b[u'klass']),
                              placement=placement,
                              dtype=b[u'dtype'])

        blocks = [create_block(b) for b in obj[u'blocks']]
        return globals()[obj[u'klass']](BlockManager(blocks, axes))
    elif typ == u'datetime':
        return parse(obj[u'data'])
    elif typ == u'datetime64':
        return np.datetime64(parse(obj[u'data']))
    elif typ == u'date':
        return parse(obj[u'data']).date()
    elif typ == u'timedelta':
        return timedelta(*obj[u'data'])
    elif typ == u'timedelta64':
        return np.timedelta64(int(obj[u'data']))
    # elif typ == 'sparse_series':
    #    dtype = dtype_for(obj['dtype'])
    #    return globals()[obj['klass']](
    #        unconvert(obj['sp_values'], dtype, obj['compress']),
    #        sparse_index=obj['sp_index'], index=obj['index'],
    #        fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name'])
    # elif typ == 'sparse_dataframe':
    #    return globals()[obj['klass']](
    #        obj['data'], columns=obj['columns'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind']
    #    )
    # elif typ == 'sparse_panel':
    #    return globals()[obj['klass']](
    #        obj['data'], items=obj['items'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind'])
    elif typ == u'block_index':
        return globals()[obj[u'klass']](obj[u'length'], obj[u'blocs'],
                                        obj[u'blengths'])
    elif typ == u'int_index':
        return globals()[obj[u'klass']](obj[u'length'], obj[u'indices'])
    elif typ == u'ndarray':
        return unconvert(obj[u'data'], np.typeDict[obj[u'dtype']],
                         obj.get(u'compress')).reshape(obj[u'shape'])
    elif typ == u'np_scalar':
        if obj.get(u'sub_typ') == u'np_complex':
            return c2f(obj[u'real'], obj[u'imag'], obj[u'dtype'])
        else:
            dtype = dtype_for(obj[u'dtype'])
            try:
                return dtype(obj[u'data'])
            except:
                return dtype.type(obj[u'data'])
    elif typ == u'np_complex':
        return complex(obj[u'real'] + u'+' + obj[u'imag'] + u'j')
    elif isinstance(obj, (dict, list, set)):
        return obj
    else:
        return obj