Ejemplo n.º 1
0
    def _make_labels(self):
        if self._was_factor:  # pragma: no cover
            raise Exception('Should not call this method grouping by level')
        else:
            values = self.grouper
            if values.dtype != np.object_:
                values = values.astype('O')

            # khash
            rizer = lib.Factorizer(len(values))
            labels, counts = rizer.factorize(values, sort=False)

            uniques = Index(rizer.uniques, name=self.name)
            if self.sort and len(counts) > 0:
                sorter = uniques.argsort()
                reverse_indexer = np.empty(len(sorter), dtype=np.int32)
                reverse_indexer.put(sorter, np.arange(len(sorter)))

                mask = labels < 0
                labels = reverse_indexer.take(labels)
                np.putmask(labels, mask, -1)

                uniques = uniques.take(sorter)
                counts = counts.take(sorter)

            self._labels = labels
            self._group_index = uniques
            self._counts = counts
Ejemplo n.º 2
0
    def _set_grouper(self, obj, sort=False):
        """
        given an object and the specifications, setup the internal grouper
        for this particular specification

        Parameters
        ----------
        obj : the subject object
        sort : bool, default False
            whether the resulting grouper should be sorted
        """

        if self.key is not None and self.level is not None:
            raise ValueError(
                "The Grouper cannot specify both a key and a level!")

        # Keep self.grouper value before overriding
        if self._grouper is None:
            self._grouper = self.grouper

        # the key must be a valid info item
        if self.key is not None:
            key = self.key
            # The 'on' is already defined
            if getattr(self.grouper, "name", None) == key and isinstance(
                    obj, ABCSeries):
                ax = self._grouper.take(obj.index)
            else:
                if key not in obj._info_axis:
                    raise KeyError(
                        "The grouper name {0} is not found".format(key))
                ax = Index(obj[key], name=key)

        else:
            ax = obj._get_axis(self.axis)
            if self.level is not None:
                level = self.level

                # if a level is given it must be a mi level or
                # equivalent to the axis name
                if isinstance(ax, MultiIndex):
                    level = ax._get_level_number(level)
                    ax = Index(ax._get_level_values(level),
                               name=ax.names[level])

                else:
                    if level not in (0, ax.name):
                        raise ValueError(
                            "The level {0} is not valid".format(level))

        # possibly sort
        if (self.sort or sort) and not ax.is_monotonic:
            # use stable sort to support first, last, nth
            indexer = self.indexer = ax.argsort(kind="mergesort")
            ax = ax.take(indexer)
            obj = obj.take(indexer, axis=self.axis, is_copy=False)

        self.obj = obj
        self.grouper = ax
        return self.grouper
Ejemplo n.º 3
0
    def _set_grouper(self, obj, sort=False):
        """
        given an object and the specifications, setup the internal grouper
        for this particular specification

        Parameters
        ----------
        obj : the subject object
        sort : bool, default False
            whether the resulting grouper should be sorted
        """

        if self.key is not None and self.level is not None:
            raise ValueError(
                "The Grouper cannot specify both a key and a level!")

        # Keep self.grouper value before overriding
        if self._grouper is None:
            self._grouper = self.grouper

        # the key must be a valid info item
        if self.key is not None:
            key = self.key
            # The 'on' is already defined
            if (getattr(self.grouper, 'name', None) == key and
                    isinstance(obj, ABCSeries)):
                ax = self._grouper.take(obj.index)
            else:
                if key not in obj._info_axis:
                    raise KeyError(
                        "The grouper name {0} is not found".format(key))
                ax = Index(obj[key], name=key)

        else:
            ax = obj._get_axis(self.axis)
            if self.level is not None:
                level = self.level

                # if a level is given it must be a mi level or
                # equivalent to the axis name
                if isinstance(ax, MultiIndex):
                    level = ax._get_level_number(level)
                    ax = Index(ax._get_level_values(level),
                               name=ax.names[level])

                else:
                    if level not in (0, ax.name):
                        raise ValueError(
                            "The level {0} is not valid".format(level))

        # possibly sort
        if (self.sort or sort) and not ax.is_monotonic:
            # use stable sort to support first, last, nth
            indexer = self.indexer = ax.argsort(kind='mergesort')
            ax = ax.take(indexer)
            obj = obj._take(indexer, axis=self.axis, is_copy=False)

        self.obj = obj
        self.grouper = ax
        return self.grouper
Ejemplo n.º 4
0
def _sort_labels(uniques, left, right):
    if not isinstance(uniques, np.ndarray):
        # tuplesafe
        uniques = Index(uniques).values

    sorter = uniques.argsort()

    reverse_indexer = np.empty(len(sorter), dtype=np.int32)
    reverse_indexer.put(sorter, np.arange(len(sorter)))
    return reverse_indexer.take(left), reverse_indexer.take(right)
Ejemplo n.º 5
0
def _sort_labels(uniques, left, right):
    if not isinstance(uniques, np.ndarray):
        # tuplesafe
        uniques = Index(uniques).values

    sorter = uniques.argsort()

    reverse_indexer = np.empty(len(sorter), dtype=np.int32)
    reverse_indexer.put(sorter, np.arange(len(sorter)))
    return reverse_indexer.take(left), reverse_indexer.take(right)
Ejemplo n.º 6
0
def unique_with_labels(values):
    from pandas.core.index import Index
    rizer = lib.Factorizer(len(values))
    labels, _ = rizer.factorize(values, sort=False)
    uniques = Index(rizer.uniques)

    try:
        sorter = uniques.argsort()
        reverse_indexer = np.empty(len(sorter), dtype='i4')
        reverse_indexer.put(sorter, np.arange(len(sorter)))
        labels = reverse_indexer.take(labels)
        uniques = uniques.take(sorter)
    except TypeError:
        pass

    return uniques, labels
Ejemplo n.º 7
0
def _sort_labels(uniques, left, right):
    if not isinstance(uniques, np.ndarray):
        # tuplesafe
        uniques = Index(uniques).values

    sorter = uniques.argsort()

    reverse_indexer = np.empty(len(sorter), dtype=np.int64)
    reverse_indexer.put(sorter, np.arange(len(sorter)))

    new_left = reverse_indexer.take(com._ensure_platform_int(left))
    np.putmask(new_left, left == -1, -1)

    new_right = reverse_indexer.take(com._ensure_platform_int(right))
    np.putmask(new_right, right == -1, -1)

    return new_left, new_right