Beispiel #1
0
    def _merge_blocks(self, merge_chunks):
        """
        merge_chunks -> [(_JoinUnit, Block)]
        """
        funit, fblock = merge_chunks[0]
        fidx = funit.indexer

        out_shape = list(fblock.values.shape)

        n = len(fidx) if fidx is not None else out_shape[self.axis]

        out_shape[0] = sum(len(blk) for unit, blk in merge_chunks)
        out_shape[self.axis] = n

        # Should use Fortran order??
        block_dtype = _get_block_dtype([x[1] for x in merge_chunks])
        out = np.empty(out_shape, dtype=block_dtype)

        sofar = 0
        for unit, blk in merge_chunks:
            out_chunk = out[sofar: sofar + len(blk)]
            com.take_fast(blk.values, unit.indexer,
                          None, False, axis=self.axis,
                          out=out_chunk)
            sofar += len(blk)

        # does not sort
        new_block_items = _concat_indexes([b.items for _, b in merge_chunks])
        return make_block(out, new_block_items, self.result_items)
Beispiel #2
0
    def _merge_blocks(self, merge_chunks):
        """
        merge_chunks -> [(_JoinUnit, Block)]
        """
        funit, fblock = merge_chunks[0]
        fidx = funit.indexer

        out_shape = list(fblock.values.shape)

        n = len(fidx) if fidx is not None else out_shape[self.axis]

        out_shape[0] = sum(len(blk) for unit, blk in merge_chunks)
        out_shape[self.axis] = n

        # Should use Fortran order??
        out = np.empty(out_shape, dtype=fblock.values.dtype)

        sofar = 0
        for unit, blk in merge_chunks:
            out_chunk = out[sofar : sofar + len(blk)]

            if unit.indexer is None:
                # is this really faster than assigning to arr.flat?
                com.take_fast(blk.values, np.arange(n, dtype=np.int64), None, False, axis=self.axis, out=out_chunk)
            else:
                # write out the values to the result array
                com.take_fast(blk.values, unit.indexer, None, False, axis=self.axis, out=out_chunk)

            sofar += len(blk)

        # does not sort
        new_block_items = _concat_indexes([b.items for _, b in merge_chunks])
        return make_block(out, new_block_items, self.result_items)
Beispiel #3
0
    def _reindex_indexer_items(self, new_items, indexer, fill_value):
        # TODO: less efficient than I'd like

        item_order = com.take_1d(self.items.values, indexer)

        # keep track of what items aren't found anywhere
        mask = np.zeros(len(item_order), dtype=bool)

        new_blocks = []
        for blk in self.blocks:
            blk_indexer = blk.items.get_indexer(item_order)
            selector = blk_indexer != -1
            # update with observed items
            mask |= selector

            if not selector.any():
                continue

            new_block_items = new_items.take(selector.nonzero()[0])
            new_values = com.take_fast(blk.values, blk_indexer[selector],
                                       None, False, axis=0)
            new_blocks.append(make_block(new_values, new_block_items,
                                         new_items))

        if not mask.all():
            na_items = new_items[-mask]
            na_block = self._make_na_block(na_items, new_items,
                                           fill_value=fill_value)
            new_blocks.append(na_block)
            new_blocks = _consolidate(new_blocks, new_items)

        return BlockManager(new_blocks, [new_items] + self.axes[1:])
Beispiel #4
0
 def take(self, indexer, axis=1, fill_value=np.nan):
     if axis < 1:
         raise AssertionError('axis must be at least 1, got %d' % axis)
     new_values = com.take_fast(self.values, indexer, None,
                                None, axis=axis,
                                fill_value=fill_value)
     return make_block(new_values, self.items, self.ref_items)
Beispiel #5
0
 def reindex_axis(self, indexer, mask, needs_masking, axis=0, fill_value=np.nan):
     """
     Reindex using pre-computed indexer information
     """
     if self.values.size > 0:
         new_values = com.take_fast(self.values, indexer, mask, needs_masking, axis=axis, fill_value=fill_value)
     else:
         shape = list(self.shape)
         shape[axis] = len(indexer)
         new_values = np.empty(shape)
         new_values.fill(fill_value)
     return make_block(new_values, self.items, self.ref_items)
Beispiel #6
0
    def reindex_indexer(self, new_axis, indexer, axis=1):
        """
        pandas-indexer with -1's only
        """
        if axis == 0:
            raise NotImplementedError

        new_axes = list(self.axes)
        new_axes[axis] = new_axis
        new_blocks = []
        for blk in self.blocks:
            new_values = common.take_fast(blk.values, indexer, None,
                                          False, axis=axis)
            newb = make_block(new_values, blk.items, self.items)
            new_blocks.append(newb)

        return BlockManager(new_blocks, new_axes)
Beispiel #7
0
    def take(self, indexer, axis=1, pandas_indexer=False):
        if axis == 0:
            raise NotImplementedError

        if pandas_indexer:
            take_f = lambda arr: common.take_fast(arr, indexer,
                                                  None, False, axis=axis)
        else:
            take_f = lambda arr: arr.take(indexer, axis=axis)

        new_axes = list(self.axes)
        new_axes[axis] = self.axes[axis].take(indexer)
        new_blocks = []
        for blk in self.blocks:
            newb = make_block(take_f(blk.values), blk.items, self.items)
            new_blocks.append(newb)

        return BlockManager(new_blocks, new_axes)
Beispiel #8
0
    def take(self, indexer, axis=1):
        if axis == 0:
            raise NotImplementedError

        indexer = np.asarray(indexer, dtype="i4")

        n = len(self.axes[axis])
        if ((indexer == -1) | (indexer >= n)).any():
            raise Exception("Indices must be nonzero and less than " "the axis length")

        new_axes = list(self.axes)
        new_axes[axis] = self.axes[axis].take(indexer)
        new_blocks = []
        for blk in self.blocks:
            new_values = com.take_fast(blk.values, indexer, None, False, axis=axis)
            newb = make_block(new_values, blk.items, self.items)
            new_blocks.append(newb)

        return BlockManager(new_blocks, new_axes)
Beispiel #9
0
    def _merge_blocks(self, lblk, rblk):
        lidx = self.lindexer
        ridx = self.rindexer

        n = lblk.values.shape[self.axis] if lidx is None else len(lidx)
        lk = len(lblk.items)
        rk = len(rblk.items)

        out_shape = list(lblk.shape)
        out_shape[0] = lk + rk
        out_shape[self.axis] = n

        out = np.empty(out_shape, dtype=lblk.values.dtype)

        # is this really faster than assigning to arr.flat?
        if lidx is None:
            # out[:lk] = lblk.values
            common.take_fast(lblk.values, np.arange(n, dtype='i4'),
                             None, False,
                             axis=self.axis, out=out[:lk])
        else:
            # write out the values to the result array
            common.take_fast(lblk.values, lidx, None, False,
                             axis=self.axis, out=out[:lk])
        if ridx is None:
            # out[lk:] = lblk.values
            common.take_fast(rblk.values, np.arange(n, dtype='i4'),
                             None, False,
                             axis=self.axis, out=out[lk:])
        else:
            common.take_fast(rblk.values, ridx, None, False,
                             axis=self.axis, out=out[lk:])

        # does not sort
        new_items = lblk.items.append(rblk.items)
        return make_block(out, new_items, self.result_items)
Beispiel #10
0
    def take(self, indexer, axis=1):
        if axis == 0:
            raise NotImplementedError

        indexer = np.asarray(indexer, dtype='i4')

        n = len(self.axes[axis])
        if ((indexer == -1) | (indexer >= n)).any():
            raise Exception('Indices must be nonzero and less than '
                            'the axis length')

        new_axes = list(self.axes)
        new_axes[axis] = self.axes[axis].take(indexer)
        new_blocks = []
        for blk in self.blocks:
            new_values = com.take_fast(blk.values,
                                       indexer,
                                       None,
                                       False,
                                       axis=axis)
            newb = make_block(new_values, blk.items, self.items)
            new_blocks.append(newb)

        return BlockManager(new_blocks, new_axes)
Beispiel #11
0
    def _reindex_indexer_items(self, new_items, indexer, fill_value):
        # TODO: less efficient than I'd like

        item_order = com.take_1d(self.items.values, indexer)

        # keep track of what items aren't found anywhere
        mask = np.zeros(len(item_order), dtype=bool)

        new_blocks = []
        for blk in self.blocks:
            blk_indexer = blk.items.get_indexer(item_order)
            selector = blk_indexer != -1
            # update with observed items
            mask |= selector

            if not selector.any():
                continue

            new_block_items = new_items.take(selector.nonzero()[0])
            new_values = com.take_fast(blk.values,
                                       blk_indexer[selector],
                                       None,
                                       False,
                                       axis=0)
            new_blocks.append(
                make_block(new_values, new_block_items, new_items))

        if not mask.all():
            na_items = new_items[-mask]
            na_block = self._make_na_block(na_items,
                                           new_items,
                                           fill_value=fill_value)
            new_blocks.append(na_block)
            new_blocks = _consolidate(new_blocks, new_items)

        return BlockManager(new_blocks, [new_items] + self.axes[1:])
Beispiel #12
0
 def take(self, indexer, axis=1, fill_value=np.nan):
     assert(axis >= 1)
     new_values = com.take_fast(self.values, indexer, None,
                                None, axis=axis,
                                fill_value=fill_value)
     return make_block(new_values, self.items, self.ref_items)
Beispiel #13
0
 def take(self, indexer, axis=1, fill_value=np.nan):
     assert(axis >= 1)
     new_values = com.take_fast(self.values, indexer, None,
                                None, axis=axis,
                                fill_value=fill_value)
     return make_block(new_values, self.items, self.ref_items)