Example #1
0
    def _get_group_keys(self):
        """

        Parameters
        ----------

        Returns
        -------

        """
        if self.left_index:
            if isinstance(self.left.index, MultiIndex):
                left_keys = [
                    lev.values.take(lab) for lev, lab in zip(
                        self.left.index.levels, self.left.index.labels)
                ]
            else:
                left_keys = [self.left.index.values]
        else:
            left_keys = self.left_join_keys

        if self.right_index:
            if isinstance(self.right.index, MultiIndex):
                right_keys = [
                    lev.values.take(lab) for lev, lab in zip(
                        self.right.index.levels, self.right.index.labels)
                ]
            else:
                right_keys = [self.right.index.values]
        else:
            right_keys = self.right_join_keys

        assert (len(left_keys) == len(right_keys))

        left_labels = []
        right_labels = []
        group_sizes = []

        for lk, rk in zip(left_keys, right_keys):
            llab, rlab, count = _factorize_objects(lk, rk, sort=self.sort)

            left_labels.append(llab)
            right_labels.append(rlab)
            group_sizes.append(count)

        left_group_key = get_group_index(left_labels, group_sizes)
        right_group_key = get_group_index(right_labels, group_sizes)

        max_groups = 1L
        for x in group_sizes:
            max_groups *= long(x)

        if max_groups > 2**63:  # pragma: no cover
            raise Exception('Combinatorial explosion! (boom)')

        left_group_key, right_group_key, max_groups = \
            _factorize_int64(left_group_key, right_group_key,
                             sort=self.sort)
        return left_group_key, right_group_key, max_groups
Example #2
0
    def _get_group_keys(self):
        """

        Parameters
        ----------

        Returns
        -------

        """
        if self.left_index:
            if isinstance(self.left.index, MultiIndex):
                left_keys = [lev.values.take(lab)
                             for lev, lab in zip(self.left.index.levels,
                                                 self.left.index.labels)]
            else:
                left_keys = [self.left.index.values]
        else:
            left_keys = self.left_join_keys

        if self.right_index:
            if isinstance(self.right.index, MultiIndex):
                right_keys = [lev.values.take(lab)
                              for lev, lab in zip(self.right.index.levels,
                                                  self.right.index.labels)]
            else:
                right_keys = [self.right.index.values]
        else:
            right_keys = self.right_join_keys

        assert(len(left_keys) == len(right_keys))

        left_labels = []
        right_labels = []
        group_sizes = []

        for lk, rk in zip(left_keys, right_keys):
            llab, rlab, count = _factorize_objects(lk, rk, sort=self.sort)

            left_labels.append(llab)
            right_labels.append(rlab)
            group_sizes.append(count)

        left_group_key = get_group_index(left_labels, group_sizes)
        right_group_key = get_group_index(right_labels, group_sizes)

        max_groups = 1L
        for x in group_sizes:
            max_groups *= long(x)

        if max_groups > 2**63:  # pragma: no cover
            raise Exception('Combinatorial explosion! (boom)')

        left_group_key, right_group_key, max_groups = \
            _factorize_int64(left_group_key, right_group_key,
                             sort=self.sort)
        return left_group_key, right_group_key, max_groups
Example #3
0
    def _make_selectors(self):
        new_levels = self.new_index_levels

        # make the mask
        group_index = get_group_index(self.sorted_labels[:-1], [len(x) for x in new_levels])

        group_index = _ensure_platform_int(group_index)

        group_mask = np.zeros(self.full_shape[0], dtype=bool)
        group_mask.put(group_index, True)

        stride = self.index.levshape[self.level]
        selector = self.sorted_labels[-1] + stride * group_index
        mask = np.zeros(np.prod(self.full_shape), dtype=bool)
        mask.put(selector, True)

        # compress labels
        unique_groups = np.arange(self.full_shape[0])[group_mask]
        compressor = group_index.searchsorted(unique_groups)

        if mask.sum() < len(self.index):
            raise ReshapeError("Index contains duplicate entries, " "cannot reshape")

        self.group_mask = group_mask
        self.group_index = group_index
        self.mask = mask
        self.unique_groups = unique_groups
        self.compressor = compressor
Example #4
0
    def _make_selectors(self):
        new_levels = self.new_index_levels

        # make the mask
        group_index = get_group_index(self.sorted_labels[:-1],
                                      [len(x) for x in new_levels])

        comp_index, obs_ids = _compress_group_index(group_index)
        ngroups = len(obs_ids)

        comp_index = _ensure_platform_int(comp_index)
        stride = self.index.levshape[self.level]
        self.full_shape = ngroups, stride

        selector = self.sorted_labels[-1] + stride * comp_index
        mask = np.zeros(np.prod(self.full_shape), dtype=bool)
        mask.put(selector, True)

        if mask.sum() < len(self.index):
            raise ReshapeError('Index contains duplicate entries, '
                               'cannot reshape')

        self.group_index = comp_index
        self.mask = mask
        self.unique_groups = obs_ids
        self.compressor = comp_index.searchsorted(np.arange(ngroups))
Example #5
0
    def _make_selectors(self):
        new_levels = self.new_index_levels

        # make the mask
        group_index = get_group_index(self.sorted_labels[:-1],
                                      [len(x) for x in new_levels])

        comp_index, obs_ids = _compress_group_index(group_index)
        ngroups = len(obs_ids)

        comp_index = _ensure_platform_int(comp_index)
        stride = self.index.levshape[self.level]
        self.full_shape = ngroups, stride

        selector = self.sorted_labels[-1] + stride * comp_index
        mask = np.zeros(np.prod(self.full_shape), dtype=bool)
        mask.put(selector, True)

        if mask.sum() < len(self.index):
            raise ReshapeError('Index contains duplicate entries, '
                               'cannot reshape')

        self.group_index = comp_index
        self.mask = mask
        self.unique_groups = obs_ids
        self.compressor = comp_index.searchsorted(np.arange(ngroups))
Example #6
0
    def _make_selectors(self):
        new_levels = self.new_index_levels

        # make the mask
        group_index = get_group_index(self.sorted_labels,
                                      [len(x) for x in new_levels])

        group_mask = np.zeros(self.full_shape[0], dtype=bool)
        group_mask.put(group_index, True)

        stride = self.index.levshape[self.level]
        selector = self.sorted_labels[-1] + stride * group_index
        mask = np.zeros(np.prod(self.full_shape), dtype=bool)
        mask.put(selector, True)

        # compress labels
        unique_groups = np.arange(self.full_shape[0])[group_mask]
        compressor = group_index.searchsorted(unique_groups)

        if mask.sum() < len(self.index):
            raise ReshapeError('Index contains duplicate entries, '
                               'cannot reshape')

        self.group_mask = group_mask
        self.group_index = group_index
        self.mask = mask
        self.unique_groups = unique_groups
        self.compressor = compressor
Example #7
0
def _get_multiindex_indexer(join_keys, index, sort=False):
    shape = []
    labels = []
    for level, key in zip(index.levels, join_keys):
        llab, rlab, count = _factorize_objects(level, key, sort=False)
        labels.append(rlab)
        shape.append(count)

    left_group_key = get_group_index(labels, shape)
    right_group_key = get_group_index(index.labels, shape)

    left_group_key, right_group_key, max_groups = _factorize_int64(left_group_key, right_group_key, sort=False)

    left_indexer, right_indexer = lib.left_outer_join(
        left_group_key.astype("i4"), right_group_key.astype("i4"), max_groups, sort=False
    )

    return left_indexer, right_indexer
Example #8
0
def _get_multiindex_indexer(join_keys, index, sort=False):
    shape = []
    labels = []
    for level, key in zip(index.levels, join_keys):
        llab, rlab, count = _factorize_keys(level, key, sort=False)
        labels.append(rlab)
        shape.append(count)

    left_group_key = get_group_index(labels, shape)
    right_group_key = get_group_index(index.labels, shape)

    left_group_key, right_group_key, max_groups = _factorize_keys(left_group_key, right_group_key, sort=False)

    left_indexer, right_indexer = algos.left_outer_join(
        com._ensure_int64(left_group_key), com._ensure_int64(right_group_key), max_groups, sort=False
    )

    return left_indexer, right_indexer
Example #9
0
def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
    """

    Parameters
    ----------

    Returns
    -------

    """
    if len(left_keys) != len(right_keys):
        raise AssertionError('left_key and right_keys must be the same length')

    left_labels = []
    right_labels = []
    group_sizes = []

    for lk, rk in zip(left_keys, right_keys):
        llab, rlab, count = _factorize_keys(lk, rk, sort=sort)

        left_labels.append(llab)
        right_labels.append(rlab)
        group_sizes.append(count)

    max_groups = long(1)
    for x in group_sizes:
        max_groups *= long(x)

    if max_groups > 2**63:  # pragma: no cover
        left_group_key, right_group_key, max_groups = \
            _factorize_keys(lib.fast_zip(left_labels),
                            lib.fast_zip(right_labels))
    else:
        left_group_key = get_group_index(left_labels, group_sizes)
        right_group_key = get_group_index(right_labels, group_sizes)

        left_group_key, right_group_key, max_groups = \
            _factorize_keys(left_group_key, right_group_key, sort=sort)

    # preserve left frame order if how == 'left' and sort == False
    kwargs = {'sort': sort} if how == 'left' else {}
    join_func = _join_functions[how]
    return join_func(left_group_key, right_group_key, max_groups, **kwargs)
Example #10
0
def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
    """

    Parameters
    ----------

    Returns
    -------

    """
    if len(left_keys) != len(right_keys):
        raise AssertionError('left_key and right_keys must be the same length')

    left_labels = []
    right_labels = []
    group_sizes = []

    for lk, rk in zip(left_keys, right_keys):
        llab, rlab, count = _factorize_keys(lk, rk, sort=sort)

        left_labels.append(llab)
        right_labels.append(rlab)
        group_sizes.append(count)

    max_groups = long(1)
    for x in group_sizes:
        max_groups *= long(x)

    if max_groups > 2 ** 63:  # pragma: no cover
        left_group_key, right_group_key, max_groups = \
            _factorize_keys(lib.fast_zip(left_labels),
                            lib.fast_zip(right_labels))
    else:
        left_group_key = get_group_index(left_labels, group_sizes)
        right_group_key = get_group_index(right_labels, group_sizes)

        left_group_key, right_group_key, max_groups = \
            _factorize_keys(left_group_key, right_group_key, sort=sort)

    # preserve left frame order if how == 'left' and sort == False
    kwargs = {'sort':sort} if how == 'left' else {}
    join_func = _join_functions[how]
    return join_func(left_group_key, right_group_key, max_groups, **kwargs)
Example #11
0
def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
    """

    Parameters
    ----------

    Returns
    -------

    """
    if not ((len(left_keys) == len(right_keys))):
        raise AssertionError()

    left_labels = []
    right_labels = []
    group_sizes = []

    for lk, rk in zip(left_keys, right_keys):
        llab, rlab, count = _factorize_keys(lk, rk, sort=sort)

        left_labels.append(llab)
        right_labels.append(rlab)
        group_sizes.append(count)

    max_groups = 1L
    for x in group_sizes:
        max_groups *= long(x)

    if max_groups > 2 ** 63:  # pragma: no cover
        left_group_key, right_group_key, max_groups = \
            _factorize_keys(lib.fast_zip(left_labels),
                            lib.fast_zip(right_labels))
    else:
        left_group_key = get_group_index(left_labels, group_sizes)
        right_group_key = get_group_index(right_labels, group_sizes)

        left_group_key, right_group_key, max_groups = \
            _factorize_keys(left_group_key, right_group_key, sort=sort)

    join_func = _join_functions[how]
    return join_func(left_group_key, right_group_key, max_groups)
Example #12
0
def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
    """

    Parameters
    ----------

    Returns
    -------

    """
    if not ((len(left_keys) == len(right_keys))):
        raise AssertionError()

    left_labels = []
    right_labels = []
    group_sizes = []

    for lk, rk in zip(left_keys, right_keys):
        llab, rlab, count = _factorize_keys(lk, rk, sort=sort)

        left_labels.append(llab)
        right_labels.append(rlab)
        group_sizes.append(count)

    max_groups = long(1)
    for x in group_sizes:
        max_groups *= long(x)

    if max_groups > 2**63:  # pragma: no cover
        left_group_key, right_group_key, max_groups = \
            _factorize_keys(lib.fast_zip(left_labels),
                            lib.fast_zip(right_labels))
    else:
        left_group_key = get_group_index(left_labels, group_sizes)
        right_group_key = get_group_index(right_labels, group_sizes)

        left_group_key, right_group_key, max_groups = \
            _factorize_keys(left_group_key, right_group_key, sort=sort)

    join_func = _join_functions[how]
    return join_func(left_group_key, right_group_key, max_groups)
Example #13
0
def _get_multiindex_indexer(join_keys, index, sort=False):
    shape = []
    labels = []
    for level, key in zip(index.levels, join_keys):
        llab, rlab, count = _factorize_keys(level, key, sort=False)
        labels.append(rlab)
        shape.append(count)

    left_group_key = get_group_index(labels, shape)
    right_group_key = get_group_index(index.labels, shape)

    left_group_key, right_group_key, max_groups = \
        _factorize_keys(left_group_key, right_group_key,
                        sort=False)

    left_indexer, right_indexer = \
        algos.left_outer_join(com._ensure_int64(left_group_key),
                              com._ensure_int64(right_group_key),
                              max_groups, sort=False)

    return left_indexer, right_indexer
Example #14
0
File: merge.py Project: lahi/pandas
def _get_multiindex_indexer(join_keys, index, sort=False):
    shape = []
    labels = []
    for level, key in zip(index.levels, join_keys):
        llab, rlab, count = _factorize_objects(level, key, sort=False)
        labels.append(rlab)
        shape.append(count)

    left_group_key = get_group_index(labels, shape)
    right_group_key = get_group_index(index.labels, shape)

    left_group_key, right_group_key, max_groups = \
        _factorize_int64(left_group_key, right_group_key,
                         sort=False)

    left_indexer, right_indexer = \
        lib.left_outer_join(left_group_key.astype('i4'),
                            right_group_key.astype('i4'),
                            max_groups, sort=False)

    return left_indexer, right_indexer
Example #15
0
    def _get_group_keys(self):
        """

        Parameters
        ----------

        Returns
        -------

        """
        left_keys = self.left_join_keys
        right_keys = self.right_join_keys

        assert(len(left_keys) == len(right_keys))

        left_labels = []
        right_labels = []
        group_sizes = []

        for lk, rk in zip(left_keys, right_keys):
            llab, rlab, count = _factorize_keys(lk, rk, sort=self.sort)

            left_labels.append(llab)
            right_labels.append(rlab)
            group_sizes.append(count)

        left_group_key = get_group_index(left_labels, group_sizes)
        right_group_key = get_group_index(right_labels, group_sizes)

        max_groups = 1L
        for x in group_sizes:
            max_groups *= long(x)

        if max_groups > 2**63:  # pragma: no cover
            raise Exception('Combinatorial explosion! (boom)')

        left_group_key, right_group_key, max_groups = \
            _factorize_keys(left_group_key, right_group_key,
                             sort=self.sort)
        return left_group_key, right_group_key, max_groups
Example #16
0
File: merge.py Project: lahi/pandas
    def _get_group_keys(self):
        """

        Parameters
        ----------

        Returns
        -------

        """
        left_keys = self.left_join_keys
        right_keys = self.right_join_keys

        assert (len(left_keys) == len(right_keys))

        left_labels = []
        right_labels = []
        group_sizes = []

        for lk, rk in zip(left_keys, right_keys):
            llab, rlab, count = _factorize_objects(lk, rk, sort=self.sort)

            left_labels.append(llab)
            right_labels.append(rlab)
            group_sizes.append(count)

        left_group_key = get_group_index(left_labels, group_sizes)
        right_group_key = get_group_index(right_labels, group_sizes)

        max_groups = 1L
        for x in group_sizes:
            max_groups *= long(x)

        if max_groups > 2**63:  # pragma: no cover
            raise Exception('Combinatorial explosion! (boom)')

        left_group_key, right_group_key, max_groups = \
            _factorize_int64(left_group_key, right_group_key,
                             sort=self.sort)
        return left_group_key, right_group_key, max_groups
Example #17
0
def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
    """

    Parameters
    ----------

    Returns
    -------

    """
    assert(len(left_keys) == len(right_keys))

    left_labels = []
    right_labels = []
    group_sizes = []

    for lk, rk in zip(left_keys, right_keys):
        llab, rlab, count = _factorize_keys(lk, rk, sort=sort)

        left_labels.append(llab)
        right_labels.append(rlab)
        group_sizes.append(count)

    left_group_key = get_group_index(left_labels, group_sizes)
    right_group_key = get_group_index(right_labels, group_sizes)

    max_groups = 1L
    for x in group_sizes:
        max_groups *= long(x)

    if max_groups > 2 ** 63:  # pragma: no cover
        raise MergeError('Combinatorial explosion! (boom)')

    left_group_key, right_group_key, max_groups = \
        _factorize_keys(left_group_key, right_group_key, sort=sort)

    join_func = _join_functions[how]
    return join_func(left_group_key, right_group_key, max_groups)
Example #18
0
def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
    """

    Parameters
    ----------

    Returns
    -------

    """
    assert (len(left_keys) == len(right_keys))

    left_labels = []
    right_labels = []
    group_sizes = []

    for lk, rk in zip(left_keys, right_keys):
        llab, rlab, count = _factorize_keys(lk, rk, sort=sort)

        left_labels.append(llab)
        right_labels.append(rlab)
        group_sizes.append(count)

    left_group_key = get_group_index(left_labels, group_sizes)
    right_group_key = get_group_index(right_labels, group_sizes)

    max_groups = 1L
    for x in group_sizes:
        max_groups *= long(x)

    if max_groups > 2**63:  # pragma: no cover
        raise MergeError('Combinatorial explosion! (boom)')

    left_group_key, right_group_key, max_groups = \
        _factorize_keys(left_group_key, right_group_key, sort=sort)

    join_func = _join_functions[how]
    return join_func(left_group_key, right_group_key, max_groups)
Example #19
0
    def _make_sorted_values_labels(self):
        v = self.level

        labs = self.index.labels
        levs = self.index.levels
        to_sort = labs[:v] + labs[v + 1:] + [labs[v]]
        sizes = [len(x) for x in levs[:v] + levs[v + 1:] + [levs[v]]]

        group_index = get_group_index(to_sort, sizes)
        comp_index, obs_ids = _compress_group_index(group_index)
        ngroups = len(obs_ids)

        indexer = algos.groupsort_indexer(comp_index, ngroups)[0]
        indexer = _ensure_platform_int(indexer)

        self.sorted_values = com.take_2d(self.values, indexer, axis=0)
        self.sorted_labels = [l.take(indexer) for l in to_sort]
Example #20
0
    def _make_sorted_values_labels(self):
        v = self.level

        labs = self.index.labels
        levs = self.index.levels
        to_sort = labs[:v] + labs[v + 1:] + [labs[v]]
        sizes = [len(x) for x in levs[:v] + levs[v + 1:] + [levs[v]]]

        group_index = get_group_index(to_sort, sizes)
        comp_index, obs_ids = _compress_group_index(group_index)
        ngroups = len(obs_ids)

        indexer = lib.groupsort_indexer(comp_index, ngroups)[0]
        indexer = _ensure_platform_int(indexer)

        self.sorted_values = com.take_2d(self.values, indexer, axis=0)
        self.sorted_labels = [l.take(indexer) for l in to_sort]
Example #21
0
    def _make_sorted_values_labels(self):
        v = self.level

        labs = self.index.labels
        levs = self.index.levels
        to_sort = labs[:v] + labs[v + 1 :] + [labs[v]]
        sizes = [len(x) for x in levs[:v] + levs[v + 1 :] + [levs[v]]]

        group_index = get_group_index(to_sort, sizes)
        max_groups = np.prod(sizes)
        if max_groups > 1000000:
            comp_index, obs_ids = _compress_group_index(group_index)
            ngroups = len(obs_ids)
        else:
            comp_index, ngroups = group_index, max_groups

        indexer = lib.groupsort_indexer(comp_index, ngroups)[0]
        indexer = _ensure_platform_int(indexer)

        self.sorted_values = self.values.take(indexer, axis=0)
        self.sorted_labels = [l.take(indexer) for l in to_sort]
Example #22
0
def get_compressed_ids(labels, sizes):
    # no overflow
    if com._long_prod(sizes) < 2 ** 63:
        group_index = get_group_index(labels, sizes)
        comp_index, obs_ids = _compress_group_index(group_index)
    else:
        n = len(labels[0])
        mask = np.zeros(n, dtype=bool)
        for v in labels:
            mask |= v < 0

        while com._long_prod(sizes) >= 2 ** 63:
            i = len(sizes)
            while com._long_prod(sizes[:i]) >= 2 ** 63:
                i -= 1

            rem_index, rem_ids = get_compressed_ids(labels[:i], sizes[:i])
            sizes = [len(rem_ids)] + sizes[i:]
            labels = [rem_index] + labels[i:]

        return get_compressed_ids(labels, sizes)

    return comp_index, obs_ids
Example #23
0
def get_compressed_ids(labels, sizes):
    # no overflow
    if com._long_prod(sizes) < 2**63:
        group_index = get_group_index(labels, sizes)
        comp_index, obs_ids = _compress_group_index(group_index)
    else:
        n = len(labels[0])
        mask = np.zeros(n, dtype=bool)
        for v in labels:
            mask |= v < 0

        while com._long_prod(sizes) >= 2**63:
            i = len(sizes)
            while com._long_prod(sizes[:i]) >= 2**63:
                i -= 1

            rem_index, rem_ids = get_compressed_ids(labels[:i], sizes[:i])
            sizes = [len(rem_ids)] + sizes[i:]
            labels = [rem_index] + labels[i:]

        return get_compressed_ids(labels, sizes)

    return comp_index, obs_ids
Example #24
0
def _unstack_multiple(data, clocs):
    if len(clocs) == 0:
        return data

    # NOTE: This doesn't deal with hierarchical columns yet

    index = data.index

    clocs = [index._get_level_number(i) for i in clocs]

    rlocs = [i for i in range(index.nlevels) if i not in clocs]

    clevels = [index.levels[i] for i in clocs]
    clabels = [index.labels[i] for i in clocs]
    cnames = [index.names[i] for i in clocs]
    rlevels = [index.levels[i] for i in rlocs]
    rlabels = [index.labels[i] for i in rlocs]
    rnames = [index.names[i] for i in rlocs]

    shape = [len(x) for x in clevels]
    group_index = get_group_index(clabels, shape)

    comp_ids, obs_ids = _compress_group_index(group_index, sort=False)
    recons_labels = decons_group_index(obs_ids, shape)

    dummy_index = MultiIndex(levels=rlevels + [obs_ids],
                             labels=rlabels + [comp_ids],
                             names=rnames + ['__placeholder__'])

    if isinstance(data, Series):
        dummy = Series(data.values, index=dummy_index)
        unstacked = dummy.unstack('__placeholder__')
        new_levels = clevels
        new_names = cnames
        new_labels = recons_labels
    else:
        if isinstance(data.columns, MultiIndex):
            result = data
            for i in range(len(clocs)):
                val = clocs[i]
                result = result.unstack(val)
                clocs = [val if i > val else val - 1 for val in clocs]

            return result

        dummy = DataFrame(data.values, index=dummy_index,
                          columns=data.columns)

        unstacked = dummy.unstack('__placeholder__')
        if isinstance(unstacked, Series):
            unstcols = unstacked.index
        else:
            unstcols = unstacked.columns
        new_levels = [unstcols.levels[0]] + clevels
        new_names = [data.columns.name] + cnames

        new_labels = [unstcols.labels[0]]
        for rec in recons_labels:
            new_labels.append(rec.take(unstcols.labels[-1]))

    new_columns = MultiIndex(levels=new_levels, labels=new_labels,
                             names=new_names)

    if isinstance(unstacked, Series):
        unstacked.index = new_columns
    else:
        unstacked.columns = new_columns

    return unstacked
Example #25
0
def _unstack_multiple(data, clocs):
    if len(clocs) == 0:
        return data

    # NOTE: This doesn't deal with hierarchical columns yet

    index = data.index

    clocs = [index._get_level_number(i) for i in clocs]

    rlocs = [i for i in range(index.nlevels) if i not in clocs]

    clevels = [index.levels[i] for i in clocs]
    clabels = [index.labels[i] for i in clocs]
    cnames = [index.names[i] for i in clocs]
    rlevels = [index.levels[i] for i in rlocs]
    rlabels = [index.labels[i] for i in rlocs]
    rnames = [index.names[i] for i in rlocs]

    shape = [len(x) for x in clevels]
    group_index = get_group_index(clabels, shape)

    comp_ids, obs_ids = _compress_group_index(group_index, sort=False)
    recons_labels = decons_group_index(obs_ids, shape)

    dummy_index = MultiIndex(levels=rlevels + [obs_ids],
                             labels=rlabels + [comp_ids],
                             names=rnames + ['__placeholder__'])

    if isinstance(data, Series):
        dummy = Series(data.values, index=dummy_index)
        unstacked = dummy.unstack('__placeholder__')
        new_levels = clevels
        new_names = cnames
        new_labels = recons_labels
    else:
        if isinstance(data.columns, MultiIndex):
            result = data
            for i in range(len(clocs)):
                val = clocs[i]
                result = result.unstack(val)
                clocs = [val if i > val else val - 1 for val in clocs]

            return result

        dummy = DataFrame(data.values, index=dummy_index, columns=data.columns)

        unstacked = dummy.unstack('__placeholder__')
        if isinstance(unstacked, Series):
            unstcols = unstacked.index
        else:
            unstcols = unstacked.columns
        new_levels = [unstcols.levels[0]] + clevels
        new_names = [data.columns.name] + cnames

        new_labels = [unstcols.labels[0]]
        for rec in recons_labels:
            new_labels.append(rec.take(unstcols.labels[-1]))

    new_columns = MultiIndex(levels=new_levels,
                             labels=new_labels,
                             names=new_names)

    if isinstance(unstacked, Series):
        unstacked.index = new_columns
    else:
        unstacked.columns = new_columns

    return unstacked
Example #26
0
def get_compressed_ids(labels, sizes):
    from pandas.core.groupby import get_group_index

    ids = get_group_index(labels, sizes, sort=True, xnull=False)
    return _compress_group_index(ids, sort=True)
Example #27
0
def get_compressed_ids(labels, sizes):
    from pandas.core.groupby import get_group_index

    ids = get_group_index(labels, sizes, sort=True, xnull=False)
    return _compress_group_index(ids, sort=True)
Example #28
0
    def testit(label_list, shape):
        group_index = get_group_index(label_list, shape, sort=True, xnull=True)
        label_list2 = decons_group_index(group_index, shape)

        for a, b in zip(label_list, label_list2):
            assert (np.array_equal(a, b))
Example #29
0

import pandas._tseries as lib

f = np.std


grouped = df.groupby(['A', 'B'])

label_list = [ping.labels for ping in grouped.groupings]
shape = [len(ping.ids) for ping in grouped.groupings]

from pandas.core.groupby import get_group_index


group_index = get_group_index(label_list, shape).astype('i4')

ngroups = np.prod(shape)

indexer = lib.groupsort_indexer(group_index, ngroups)

values = df['C'].values.take(indexer)
group_index = group_index.take(indexer)

f = lambda x: x.std(ddof=1)

grouper = lib.Grouper(df['C'], np.ndarray.std, group_index, ngroups)
result = grouper.get_result()

expected = grouped.std()