예제 #1
0
파일: matrix.py 프로젝트: pedrot/pandas
    def _reindex_columns(self, columns):
        if len(columns) == 0:
            return DataMatrix(index=self.index)

        if not isinstance(columns, Index):
            columns = Index(columns)

        if self.objects is not None:
            object_columns = columns.intersection(self.objects.columns)
            columns = columns - object_columns

            objects = self.objects._reindex_columns(object_columns)
        else:
            objects = None

        if len(columns) > 0 and len(self.columns) == 0:
            return DataMatrix(index=self.index, columns=columns,
                              objects=objects)

        indexer, mask = common.get_indexer(self.columns, columns, None)
        mat = self.values.take(indexer, axis=1)

        notmask = -mask
        if len(mask) > 0:
            if notmask.any():
                if issubclass(mat.dtype.type, np.int_):
                    mat = mat.astype(float)
                elif issubclass(mat.dtype.type, np.bool_):
                    mat = mat.astype(float)

                common.null_out_axis(mat, notmask, 1)

        return DataMatrix(mat, index=self.index, columns=columns,
                          objects=objects)
예제 #2
0
    def _reindex_index(self, index, method):
        if index is self.index:
            return self.copy()

        if not isinstance(index, Index):
            index = Index(index)

        if len(self.index) == 0:
            return DataMatrix(index=index, columns=self.columns)

        indexer, mask = common.get_indexer(self.index, index, method)
        mat = self.values.take(indexer, axis=0)

        notmask = -mask
        if len(index) > 0:
            if notmask.any():
                if issubclass(mat.dtype.type, np.int_):
                    mat = mat.astype(float)
                elif issubclass(mat.dtype.type, np.bool_):
                    mat = mat.astype(float)

                common.null_out_axis(mat, notmask, 0)

        if self.objects is not None and len(self.objects.columns) > 0:
            newObjects = self.objects.reindex(index)
        else:
            newObjects = None

        return DataMatrix(mat,
                          index=index,
                          columns=self.columns,
                          objects=newObjects)
예제 #3
0
파일: matrix.py 프로젝트: pedrot/pandas
    def _reindex_index(self, index, method):
        if index is self.index:
            return self.copy()

        if not isinstance(index, Index):
            index = Index(index)

        if len(self.index) == 0:
            return DataMatrix(index=index, columns=self.columns)

        indexer, mask = common.get_indexer(self.index, index, method)
        mat = self.values.take(indexer, axis=0)

        notmask = -mask
        if len(index) > 0:
            if notmask.any():
                if issubclass(mat.dtype.type, np.int_):
                    mat = mat.astype(float)
                elif issubclass(mat.dtype.type, np.bool_):
                    mat = mat.astype(float)

                common.null_out_axis(mat, notmask, 0)

        if self.objects is not None and len(self.objects.columns) > 0:
            newObjects = self.objects.reindex(index)
        else:
            newObjects = None

        return DataMatrix(mat, index=index, columns=self.columns,
                          objects=newObjects)
예제 #4
0
    def transform(self, applyfunc):
        """
        For given Series, group index by given mapper function or dict, take
        the sub-Series (reindex) for this group and call apply(applyfunc)
        on this sub-Series. Return a Series of the results for each
        key.

        Parameters
        ----------
        mapper : function
            on being called on each element of the Series
            index, determines the groups.

        applyfunc : function to apply to each group

        Note
        ----
        This function does not aggregate like groupby/tgroupby,
        the results of the given function on the subSeries should be another
        Series.

        Example
        -------
        series.fgroupby(lambda x: mapping[x],
                        lambda x: (x - mean(x)) / std(x))

        Returns
        -------
        Series standardized by each unique value of mapping
        """
        result = self.obj.copy()

        for name, group in self:
            # XXX
            group.groupName = name
            res = applyfunc(group)

            indexer, _ = common.get_indexer(self.obj.index, group.index, None)
            np.put(result, indexer, res)

        return result
예제 #5
0
    def _reindex_columns(self, columns):
        if len(columns) == 0:
            return DataMatrix(index=self.index)

        if not isinstance(columns, Index):
            columns = Index(columns)

        if self.objects is not None:
            object_columns = columns.intersection(self.objects.columns)
            columns = columns - object_columns

            objects = self.objects._reindex_columns(object_columns)
        else:
            objects = None

        if len(columns) > 0 and len(self.columns) == 0:
            return DataMatrix(index=self.index,
                              columns=columns,
                              objects=objects)

        indexer, mask = common.get_indexer(self.columns, columns, None)
        mat = self.values.take(indexer, axis=1)

        notmask = -mask
        if len(mask) > 0:
            if notmask.any():
                if issubclass(mat.dtype.type, np.int_):
                    mat = mat.astype(float)
                elif issubclass(mat.dtype.type, np.bool_):
                    mat = mat.astype(float)

                common.null_out_axis(mat, notmask, 1)

        return DataMatrix(mat,
                          index=self.index,
                          columns=columns,
                          objects=objects)
예제 #6
0
def _reorder_columns(mat, current, desired):
    indexer, mask = common.get_indexer(current, desired, None)
    return mat.take(indexer[mask], axis=1)
예제 #7
0
파일: matrix.py 프로젝트: pedrot/pandas
def _reorder_columns(mat, current, desired):
    indexer, mask = common.get_indexer(current, desired, None)
    return mat.take(indexer[mask], axis=1)
예제 #8
0
    def transform(self, func):
        """
        For given DataFrame, group index by given mapper function or dict, take
        the sub-DataFrame (reindex) for this group and call apply(func)
        on this sub-DataFrame. Return a DataFrame of the results for each
        key.

        Note: this function does not aggregate like groupby/tgroupby,
        the results of the given function on the subDataFrame should be another
        DataFrame.

        Parameters
        ----------
        mapper : function, dict-like, or string
            Mapping or mapping function. If string given, must be a column
            name in the frame
        func : function
            Function to apply to each subframe

        Note
        ----
        Each subframe is endowed the attribute 'groupName' in case
        you need to know which group you are working on.

        Example
        --------
        >>> grouped = df.groupby(lambda x: mapping[x])
        >>> grouped.transform(lambda x: (x - x.mean()) / x.std())
        """
        # DataMatrix objects?
        result_values = np.empty_like(self.obj.values)

        if self.axis == 0:
            trans = lambda x: x
        elif self.axis == 1:
            trans = lambda x: x.T

        result_values = trans(result_values)

        for val, group in self.groups.iteritems():
            if not isinstance(group, list): # pragma: no cover
                group = list(group)

            if self.axis == 0:
                subframe = self.obj.reindex(group)
                indexer, _ = common.get_indexer(self.obj.index,
                                                subframe.index, None)
            else:
                subframe = self.obj.reindex(columns=group)
                indexer, _ = common.get_indexer(self.obj.columns,
                                                subframe.columns, None)
            subframe.groupName = val

            try:
                res = subframe.apply(func, axis=self.axis)
            except Exception: # pragma: no cover
                res = func(subframe)

            result_values[indexer] = trans(res.values)

        result_values = trans(result_values)

        return DataFrame(result_values, index=self.obj.index,
                         columns=self.obj.columns)