Example #1
0
    def _forward_dataset_grouped(self, ds):
        mdata = []  # list of samples array pieces
        if self.__axis == 'samples':
            col = ds.sa
            axis = 0
        elif self.__axis == 'features':
            col = ds.fa
            axis = 1
        else:
            raise RuntimeError("This should not have happened!")

        attrs = dict(zip(col.keys(), [[] for i in col]))

        # create a dictionary for all unique elements in all attribute this
        # mapper should operate on
        self.__attrcombs = dict(
            zip(self.__uattrs, [col[attr].unique for attr in self.__uattrs]))
        # let it generate all combinations of unique elements in any attr
        for comb in _orthogonal_permutations(self.__attrcombs):
            selector = reduce(np.multiply, [
                array_whereequal(col[attr].value, value)
                for attr, value in comb.iteritems()
            ])
            # process the samples
            if axis == 0:
                samples = ds.samples[selector]
            else:
                samples = ds.samples[:, selector]

            # check if there were any samples for such a combination,
            # if not -- warning and skip the rest of the loop body
            if not len(samples):
                warning(
                    'There were no samples for combination %s. It might be '
                    'a sign of a disbalanced dataset %s.' % (comb, ds))
                continue

            fxed_samples = self.__smart_apply_along_axis(samples)
            mdata.append(fxed_samples)
            if not self.__attrfx is None:
                # and now all samples attributes
                fxed_attrs = [
                    self.__attrfx(col[attr].value[selector]) for attr in col
                ]
                for i, attr in enumerate(col):
                    attrs[attr].append(fxed_attrs[i])

        if axis == 0:
            mdata = np.vstack(mdata)
        else:
            mdata = np.vstack(np.transpose(mdata))
        return mdata, attrs
Example #2
0
    def _forward_dataset_grouped(self, ds):
        mdata = [] # list of samples array pieces
        if self.__axis == 'samples':
            col = ds.sa
            axis = 0
        elif self.__axis == 'features':
            col = ds.fa
            axis = 1
        else:
            raise RuntimeError("This should not have happened!")

        attrs = dict(zip(col.keys(), [[] for i in col]))

        # create a dictionary for all unique elements in all attribute this
        # mapper should operate on
        self.__attrcombs = dict(zip(self.__uattrs,
                                [col[attr].unique for attr in self.__uattrs]))
        # let it generate all combinations of unique elements in any attr
        for comb in _orthogonal_permutations(self.__attrcombs):
            selector = reduce(np.multiply,
                                [array_whereequal(col[attr].value, value)
                                 for attr, value in comb.iteritems()])
            # process the samples
            if axis == 0:
                samples = ds.samples[selector]
            else:
                samples = ds.samples[:, selector]

            # check if there were any samples for such a combination,
            # if not -- warning and skip the rest of the loop body
            if not len(samples):
                warning('There were no samples for combination %s. It might be '
                        'a sign of a disbalanced dataset %s.' % (comb, ds))
                continue

            fxed_samples = np.apply_along_axis(self.__fx, axis, samples,
                                              *self.__fxargs)
            mdata.append(fxed_samples)
            if not self.__attrfx is None:
                # and now all samples attributes
                fxed_attrs = [self.__attrfx(col[attr].value[selector])
                                    for attr in col]
                for i, attr in enumerate(col):
                    attrs[attr].append(fxed_attrs[i])

        if axis == 0:
            mdata = np.vstack(mdata)
        else:
            mdata = np.vstack(np.transpose(mdata))
        return mdata, attrs
Example #3
0
    def _forward_dataset_grouped(self, ds):
        mdata = [] # list of samples array pieces
        if self.__axis == 'samples':
            col = ds.sa
            axis = 0
        elif self.__axis == 'features':
            col = ds.fa
            axis = 1
        else:
            raise RuntimeError("This should not have happened!")

        attrs = dict(zip(col.keys(), [[] for i in col]))

        # create a dictionary for all unique elements in all attribute this
        # mapper should operate on
        self.__attrcombs = dict(zip(self.__uattrs,
                                [col[attr].unique for attr in self.__uattrs]))
        # let it generate all combinations of unique elements in any attr
        order = self.order
        order_keys = []
        for comb in _orthogonal_permutations(self.__attrcombs):
            selector = reduce(np.multiply,
                                [array_whereequal(col[attr].value, value)
                                 for attr, value in comb.iteritems()])

            # process the samples
            if axis == 0:
                samples = ds.samples[selector]
            else:
                samples = ds.samples[:, selector]

            # check if there were any samples for such a combination,
            # if not -- warning and skip the rest of the loop body
            if not len(samples):
                warning('There were no samples for combination %s. It might be '
                        'a sign of a disbalanced dataset %s.' % (comb, ds))
                continue

            fxed_samples = self.__smart_apply_along_axis(samples)
            mdata.append(fxed_samples)
            if not self.__attrfx is None:
                # and now all samples attributes
                for i, attr in enumerate(col):
                    fxed_attr = self.__attrfx(col[attr].value[selector])
                    attrs[attr].append(fxed_attr)
            # possibly take care about collecting information to have groups ordered
            if order == 'uattrs':
                # reverse order as per docstring -- most of the time we have
                # used uattrs=['targets', 'chunks'] and did expect chunks being
                # groupped together.
                order_keys.append([comb[a] for a in self.__uattrs[::-1]])
            elif order == 'occurrence':
                # First index should be sufficient since we are dealing
                # with unique non-overlapping groups here (AFAIK ;) )
                order_keys.append(np.where(selector)[0][0])

        if order:
            # reorder our groups using collected "order_keys"
            # data
            order_idxs = argsort(order_keys)
            mdata = [mdata[i] for i in order_idxs]
            # and attributes
            attrs = dict((k, [v[i] for i in order_idxs])
                         for k,v in attrs.iteritems())

        if axis == 0:
            mdata = np.vstack(mdata)
        else:
            mdata = np.vstack(np.transpose(mdata))
        return mdata, attrs
Example #4
0
    def _forward_dataset_grouped(self, ds):
        mdata = [] # list of samples array pieces
        if self.__axis == 'samples':
            col = ds.sa
            axis = 0
        elif self.__axis == 'features':
            col = ds.fa
            axis = 1
        else:
            raise RuntimeError("This should not have happened!")

        attrs = dict(list(zip(list(col.keys()), [[] for i in col])))

        # create a dictionary for all unique elements in all attribute this
        # mapper should operate on
        self.__attrcombs = dict(list(zip(self.__uattrs,
                                [col[attr].unique for attr in self.__uattrs])))
        # let it generate all combinations of unique elements in any attr
        order = self.order
        order_keys = []
        for comb in _orthogonal_permutations(self.__attrcombs):
            selector = reduce(np.multiply,
                                [array_whereequal(col[attr].value, value)
                                 for attr, value in comb.items()])

            # process the samples
            if axis == 0:
                samples = ds.samples[selector]
            else:
                samples = ds.samples[:, selector]

            # check if there were any samples for such a combination,
            # if not -- warning and skip the rest of the loop body
            if not len(samples):
                warning('There were no samples for combination %s. It might be '
                        'a sign of a disbalanced dataset %s.' % (comb, ds))
                continue

            fxed_samples = self.__smart_apply_along_axis(samples)
            mdata.append(fxed_samples)
            if self.__attrfx is not None:
                # and now all samples attributes
                for i, attr in enumerate(col):
                    fxed_attr = self.__attrfx(col[attr].value[selector])
                    attrs[attr].append(fxed_attr)
            # possibly take care about collecting information to have groups ordered
            if order == 'uattrs':
                # reverse order as per docstring -- most of the time we have
                # used uattrs=['targets', 'chunks'] and did expect chunks being
                # groupped together.
                order_keys.append([comb[a] for a in self.__uattrs[::-1]])
            elif order == 'occurrence':
                # First index should be sufficient since we are dealing
                # with unique non-overlapping groups here (AFAIK ;) )
                order_keys.append(np.where(selector)[0][0])

        if order:
            # reorder our groups using collected "order_keys"
            # data
            order_idxs = argsort(order_keys)
            mdata = [mdata[i] for i in order_idxs]
            # and attributes
            attrs = dict((k, [v[i] for i in order_idxs])
                         for k,v in attrs.items())

        if axis == 0:
            mdata = np.vstack(mdata)
        else:
            mdata = np.vstack(np.transpose(mdata))
        return mdata, attrs