Exemple #1
0
    def __init__(self, values, items, ref_items, ndim=2):
        if issubclass(values.dtype.type, basestring):
            values = np.array(values, dtype=object)

        assert(values.ndim == ndim)
        assert(len(items) == len(values))

        self.values = values
        self.ndim = ndim
        self.items = _ensure_index(items)
        self.ref_items = _ensure_index(ref_items)
        self._check_integrity()
Exemple #2
0
    def get_indexer(self, target, method=None):
        """

        Parameters
        ----------
        target : Index
        method :

        Returns
        -------
        (indexer, mask)
        """
        if method:
            method = method.upper()

        aliases = {
            'FFILL' : 'PAD',
            'BFILL' : 'BACKFILL'
        }

        target = _ensure_index(target)

        method = aliases.get(method, method)
        indexer, mask = _tseries.getFillVec(self, target, self.indexMap,
                                            target.indexMap, method)
        return indexer, mask
Exemple #3
0
    def reindex_items(self, new_items):
        """

        """
        new_items = _ensure_index(new_items)
        data = self
        if not data.is_consolidated():
            data = data.consolidate()
            return data.reindex_items(new_items)

        # TODO: this part could be faster (!)
        new_items, _, mask = self.items.reindex(new_items)
        notmask = -mask

        new_blocks = []
        for block in self.blocks:
            newb = block.reindex_items_from(new_items)
            if len(newb.items) > 0:
                new_blocks.append(newb)

        if notmask.any():
            extra_items = new_items[notmask]

            block_shape = list(self.shape)
            block_shape[0] = len(extra_items)
            block_values = np.empty(block_shape, dtype=np.float64)
            block_values.fill(nan)
            na_block = make_block(block_values, extra_items, new_items)
            new_blocks.append(na_block)
            new_blocks = _consolidate(new_blocks, new_items)

        new_axes = list(self.axes)
        new_axes[0] = new_items

        return BlockManager(new_blocks, new_axes)
Exemple #4
0
    def _init_dict(self, data, axes, dtype=None):
        items = axes[0]

        # prefilter if items passed
        if items is not None:
            items = _ensure_index(items)
            data = dict((k, v) for k, v in data.iteritems() if k in items)
        else:
            items = Index(_try_sort(data.keys()))

        # figure out the index, if necessary
        if index is None:
            index = extract_index(data)

        # don't force copy because getting jammed in an ndarray anyway
        # homogenized = _homogenize(data, index, columns, dtype)

        data, index, columns = _homogenize(data, intersect=intersect)

        # segregates dtypes and forms blocks matching to columns
        blocks = form_blocks(homogenized, index, columns)

        # consolidate for now
        mgr = BlockManager(blocks, [columns, index])
        return mgr.consolidate()
Exemple #5
0
    def __init__(self, levels, labels, sortorder=None):
        self.levels = [_ensure_index(lev) for lev in levels]
        self.labels = [np.asarray(labs, dtype=np.int32) for labs in labels]

        if sortorder is not None:
            self.sortorder = int(sortorder)
        else:
            self.sortorder = sortorder
Exemple #6
0
    def set_axis(self, axis, value):
        cur_axis = self.axes[axis]
        if len(value) != len(cur_axis):
            raise Exception('Length mismatch (%d vs %d)'
                            % (len(value), len(cur_axis)))
        self.axes[axis] = _ensure_index(value)

        if axis == 0:
            for block in self.blocks:
                block.set_ref_items(self.items, maybe_rename=True)
Exemple #7
0
    def __init__(self, blocks, axes, skip_integrity_check=False):
        self.axes = [_ensure_index(ax) for ax in axes]
        self.blocks = blocks

        ndim = len(axes)
        for block in blocks:
            assert(ndim == block.values.ndim)

        if not skip_integrity_check:
            self._verify_integrity()
Exemple #8
0
    def _set_index(self, index):
        indexTypes = ndarray, Index, list, tuple
        if not isinstance(index, indexTypes):
            raise TypeError("Expected index to be in %s; was %s."
                            % (indexTypes, type(index)))

        if len(self) != len(index):
            raise AssertionError('Lengths of index and values did not match!')

        self._index = _ensure_index(index)
Exemple #9
0
    def _reindex_axis(self, new_index, fill_method, axis, copy):
        new_index = _ensure_index(new_index)
        cur_axis = self._data.axes[axis]
        if cur_axis.equals(new_index) and not copy:
            return self

        if axis == 0:
            new_data = self._data.reindex_items(new_index)
        else:
            new_data = self._data.reindex_axis(new_index, axis=axis, method=fill_method)
        return self._constructor(new_data)
Exemple #10
0
    def __setstate__(self, state):
        # discard anything after 3rd, support beta pickling format for a little
        # while longer
        ax_arrays, bvalues, bitems = state[:3]

        self.axes = [_ensure_index(ax) for ax in ax_arrays]
        blocks = []
        for values, items in zip(bvalues, bitems):
            blk = make_block(values, items, self.axes[0])
            blocks.append(blk)
        self.blocks = blocks
Exemple #11
0
    def __init__(self, levels, labels, sortorder=None, names=None,
                 consistent=None):
        self.levels = [_ensure_index(lev) for lev in levels]
        self.labels = [np.asarray(labs, dtype=np.int32) for labs in labels]

        if names is None:
            self.names = ['level_%d' % i for i in range(self.nlevels)]
        else:
            assert(len(names) == self.nlevels)
            self.names = list(names)

        if sortorder is not None:
            self.sortorder = int(sortorder)
        else:
            self.sortorder = sortorder
Exemple #12
0
    def get_indexer(self, target, method=None):
        """
        Compute indexer and mask for new index given the current index. The
        indexer should be then used as an input to ndarray.take to align the
        current data to the new index. The mask determines whether labels are
        found or not in the current index

        Parameters
        ----------
        target : Index
        method : {'pad', 'ffill', 'backfill', 'bfill'}
            pad / ffill: propagate LAST valid observation forward to next valid
            backfill / bfill: use NEXT valid observation to fill gap

        Notes
        -----
        This is a low-level method and probably should be used at your own risk

        Examples
        --------
        >>> indexer, mask = index.get_indexer(new_index)
        >>> new_values = cur_values.take(indexer)
        >>> new_values[-mask] = np.nan

        Returns
        -------
        (indexer, mask) : (ndarray, ndarray)
        """
        if method:
            method = method.upper()

        aliases = {
            'FFILL' : 'PAD',
            'BFILL' : 'BACKFILL'
        }

        target = _ensure_index(target)

        method = aliases.get(method, method)
        indexer, mask = _tseries.getFillVec(self, target, self.indexMap,
                                            target.indexMap, method)
        return indexer, mask
Exemple #13
0
    def _interleave(self, items):
        """
        Return ndarray from blocks with specified item order
        Items must be contained in the blocks
        """
        dtype = _interleaved_dtype(self.blocks)
        items = _ensure_index(items)

        result = np.empty(self.shape, dtype=dtype)
        itemmask = np.zeros(len(items), dtype=bool)

        # By construction, all of the item should be covered by one of the
        # blocks
        for block in self.blocks:
            indexer, mask = items.get_indexer(block.items)
            assert(mask.all())
            result[indexer] = block.values
            itemmask[indexer] = 1
        assert(itemmask.all())
        return result
Exemple #14
0
    def reindex(self, index=None, method=None, copy=True):
        """Conform Series to new Index

        Parameters
        ----------
        index : array-like
            Preferably an Index object (to avoid duplicating data)
        method : {'backfill', 'bfill', 'pad', 'ffill', None}
            Method to use for filling holes in reindexed Series
            pad / ffill: propagate last valid observation forward to next valid
            backfill / bfill: use NEXT valid observation to fill gap
        copy : boolean, default True
            Return a new object, even if the passed indexes are the same

        Returns
        -------
        reindexed : Series
        """
        if self.index.equals(index):
            if copy:
                return self.copy()
            else:
                return self

        index = _ensure_index(index)
        if len(self.index) == 0:
            return Series(nan, index=index)

        new_index, fill_vec, mask = self.index.reindex(index, method=method)
        new_values = self.values.take(fill_vec)

        notmask = -mask
        if notmask.any():
            if issubclass(new_values.dtype.type, np.int_):
                new_values = new_values.astype(float)
            elif issubclass(new_values.dtype.type, np.bool_):
                new_values = new_values.astype(object)

            np.putmask(new_values, notmask, nan)

        return Series(new_values, index=new_index)
Exemple #15
0
    def union(self, other):
        """
        Form the union of two Index objects and sorts if possible

        Parameters
        ----------
        other : Index or array-like

        Returns
        -------
        union : Index
        """
        if not hasattr(other, '__iter__'):
            raise Exception('Input must be iterable!')

        if len(other) == 0 or self.equals(other):
            return self
        if len(self) == 0:
            return _ensure_index(other)

        return Index(_tseries.fast_unique_multiple([self, other]))
Exemple #16
0
    def _init_matrix(self, data, axes, dtype=None, copy=False):
        values = _prep_ndarray(data, copy=copy)

        if dtype is not None:
            try:
                values = values.astype(dtype)
            except Exception:
                raise ValueError('failed to cast to %s' % dtype)

        shape = values.shape
        fixed_axes = []
        for i, ax in enumerate(axes):
            if ax is None:
                ax = _default_index(shape[i])
            else:
                ax = _ensure_index(ax)
            fixed_axes.append(ax)

        items = fixed_axes[0]
        block = make_block(values, items, items)
        return BlockManager([block], fixed_axes)
Exemple #17
0
    def _init_dict(self, data, axes, dtype=None):
        items, major, minor = axes

        # prefilter if items passed
        if items is not None:
            items = _ensure_index(items)
            data = dict((k, v) for k, v in data.iteritems() if k in items)
        else:
            items = Index(_try_sort(data.keys()))

        for k, v in data.iteritems():
            if not isinstance(v, DataFrame):
                data[k] = DataFrame(v)

        if major is None:
            indexes = [v.index for v in data.values()]
            major = _union_indexes(indexes)

        if minor is None:
            indexes = [v.columns for v in data.values()]
            minor = _union_indexes(indexes)

        axes = [items, major, minor]

        reshaped_data = data.copy() # shallow
        # homogenize
        for k, v in data.iteritems():
            v = v.reindex(index=major, columns=minor, copy=False)
            if dtype is not None:
                v = v.astype(dtype)
            values = v.values
            shape = values.shape
            reshaped_data[k] = values.reshape((1,) + shape)

        # segregates dtypes and forms blocks matching to columns
        blocks = form_blocks(reshaped_data, axes)
        mgr = BlockManager(blocks, axes).consolidate()
        return mgr
Exemple #18
0
    def reindex_axis(self, new_axis, method=None, axis=0):
        if axis == 0:
            assert(method is None)
            return self.reindex_items(new_axis)

        new_axis = _ensure_index(new_axis)
        cur_axis = self.axes[axis]

        new_axis, indexer, mask = cur_axis.reindex(new_axis, method)

        # TODO: deal with length-0 case? or does it fall out?
        notmask = -mask
        needs_masking = len(new_axis) > 0 and notmask.any()

        new_blocks = []
        for block in self.blocks:
            newb = block.reindex_axis(indexer, notmask, needs_masking,
                                      axis=axis)
            new_blocks.append(newb)

        new_axes = list(self.axes)
        new_axes[axis] = new_axis
        return BlockManager(new_blocks, new_axes)
Exemple #19
0
 def __setstate__(self, state):
     items, ref_items, values = state
     self.items = _ensure_index(items)
     self.ref_items = _ensure_index(ref_items)
     self.values = values
     self.ndim = values.ndim
Exemple #20
0
 def __set__(self, obj, value):
     value = _ensure_index(value)
     setattr(obj, self.cache_field, value)
Exemple #21
0
    def set_items_norename(self, value):
        value = _ensure_index(value)
        self.axes[0] = value

        for block in self.blocks:
            block.set_ref_items(value, maybe_rename=False)