def _forward_dataset_grouped(self, ds): mdata = [] # list of samples array pieces if self.__axis == 'samples': col = ds.sa axis = 0 elif self.__axis == 'features': col = ds.fa axis = 1 else: raise RuntimeError("This should not have happened!") attrs = dict(zip(col.keys(), [[] for i in col])) # create a dictionary for all unique elements in all attribute this # mapper should operate on self.__attrcombs = dict( zip(self.__uattrs, [col[attr].unique for attr in self.__uattrs])) # let it generate all combinations of unique elements in any attr for comb in _orthogonal_permutations(self.__attrcombs): selector = reduce(np.multiply, [ array_whereequal(col[attr].value, value) for attr, value in comb.iteritems() ]) # process the samples if axis == 0: samples = ds.samples[selector] else: samples = ds.samples[:, selector] # check if there were any samples for such a combination, # if not -- warning and skip the rest of the loop body if not len(samples): warning( 'There were no samples for combination %s. It might be ' 'a sign of a disbalanced dataset %s.' % (comb, ds)) continue fxed_samples = self.__smart_apply_along_axis(samples) mdata.append(fxed_samples) if not self.__attrfx is None: # and now all samples attributes fxed_attrs = [ self.__attrfx(col[attr].value[selector]) for attr in col ] for i, attr in enumerate(col): attrs[attr].append(fxed_attrs[i]) if axis == 0: mdata = np.vstack(mdata) else: mdata = np.vstack(np.transpose(mdata)) return mdata, attrs
def _forward_dataset_grouped(self, ds): mdata = [] # list of samples array pieces if self.__axis == 'samples': col = ds.sa axis = 0 elif self.__axis == 'features': col = ds.fa axis = 1 else: raise RuntimeError("This should not have happened!") attrs = dict(zip(col.keys(), [[] for i in col])) # create a dictionary for all unique elements in all attribute this # mapper should operate on self.__attrcombs = dict(zip(self.__uattrs, [col[attr].unique for attr in self.__uattrs])) # let it generate all combinations of unique elements in any attr for comb in _orthogonal_permutations(self.__attrcombs): selector = reduce(np.multiply, [array_whereequal(col[attr].value, value) for attr, value in comb.iteritems()]) # process the samples if axis == 0: samples = ds.samples[selector] else: samples = ds.samples[:, selector] # check if there were any samples for such a combination, # if not -- warning and skip the rest of the loop body if not len(samples): warning('There were no samples for combination %s. It might be ' 'a sign of a disbalanced dataset %s.' % (comb, ds)) continue fxed_samples = np.apply_along_axis(self.__fx, axis, samples, *self.__fxargs) mdata.append(fxed_samples) if not self.__attrfx is None: # and now all samples attributes fxed_attrs = [self.__attrfx(col[attr].value[selector]) for attr in col] for i, attr in enumerate(col): attrs[attr].append(fxed_attrs[i]) if axis == 0: mdata = np.vstack(mdata) else: mdata = np.vstack(np.transpose(mdata)) return mdata, attrs
def _forward_dataset_grouped(self, ds): mdata = [] # list of samples array pieces if self.__axis == 'samples': col = ds.sa axis = 0 elif self.__axis == 'features': col = ds.fa axis = 1 else: raise RuntimeError("This should not have happened!") attrs = dict(zip(col.keys(), [[] for i in col])) # create a dictionary for all unique elements in all attribute this # mapper should operate on self.__attrcombs = dict(zip(self.__uattrs, [col[attr].unique for attr in self.__uattrs])) # let it generate all combinations of unique elements in any attr order = self.order order_keys = [] for comb in _orthogonal_permutations(self.__attrcombs): selector = reduce(np.multiply, [array_whereequal(col[attr].value, value) for attr, value in comb.iteritems()]) # process the samples if axis == 0: samples = ds.samples[selector] else: samples = ds.samples[:, selector] # check if there were any samples for such a combination, # if not -- warning and skip the rest of the loop body if not len(samples): warning('There were no samples for combination %s. It might be ' 'a sign of a disbalanced dataset %s.' % (comb, ds)) continue fxed_samples = self.__smart_apply_along_axis(samples) mdata.append(fxed_samples) if not self.__attrfx is None: # and now all samples attributes for i, attr in enumerate(col): fxed_attr = self.__attrfx(col[attr].value[selector]) attrs[attr].append(fxed_attr) # possibly take care about collecting information to have groups ordered if order == 'uattrs': # reverse order as per docstring -- most of the time we have # used uattrs=['targets', 'chunks'] and did expect chunks being # groupped together. order_keys.append([comb[a] for a in self.__uattrs[::-1]]) elif order == 'occurrence': # First index should be sufficient since we are dealing # with unique non-overlapping groups here (AFAIK ;) ) order_keys.append(np.where(selector)[0][0]) if order: # reorder our groups using collected "order_keys" # data order_idxs = argsort(order_keys) mdata = [mdata[i] for i in order_idxs] # and attributes attrs = dict((k, [v[i] for i in order_idxs]) for k,v in attrs.iteritems()) if axis == 0: mdata = np.vstack(mdata) else: mdata = np.vstack(np.transpose(mdata)) return mdata, attrs
def _forward_dataset_grouped(self, ds): mdata = [] # list of samples array pieces if self.__axis == 'samples': col = ds.sa axis = 0 elif self.__axis == 'features': col = ds.fa axis = 1 else: raise RuntimeError("This should not have happened!") attrs = dict(list(zip(list(col.keys()), [[] for i in col]))) # create a dictionary for all unique elements in all attribute this # mapper should operate on self.__attrcombs = dict(list(zip(self.__uattrs, [col[attr].unique for attr in self.__uattrs]))) # let it generate all combinations of unique elements in any attr order = self.order order_keys = [] for comb in _orthogonal_permutations(self.__attrcombs): selector = reduce(np.multiply, [array_whereequal(col[attr].value, value) for attr, value in comb.items()]) # process the samples if axis == 0: samples = ds.samples[selector] else: samples = ds.samples[:, selector] # check if there were any samples for such a combination, # if not -- warning and skip the rest of the loop body if not len(samples): warning('There were no samples for combination %s. It might be ' 'a sign of a disbalanced dataset %s.' % (comb, ds)) continue fxed_samples = self.__smart_apply_along_axis(samples) mdata.append(fxed_samples) if self.__attrfx is not None: # and now all samples attributes for i, attr in enumerate(col): fxed_attr = self.__attrfx(col[attr].value[selector]) attrs[attr].append(fxed_attr) # possibly take care about collecting information to have groups ordered if order == 'uattrs': # reverse order as per docstring -- most of the time we have # used uattrs=['targets', 'chunks'] and did expect chunks being # groupped together. order_keys.append([comb[a] for a in self.__uattrs[::-1]]) elif order == 'occurrence': # First index should be sufficient since we are dealing # with unique non-overlapping groups here (AFAIK ;) ) order_keys.append(np.where(selector)[0][0]) if order: # reorder our groups using collected "order_keys" # data order_idxs = argsort(order_keys) mdata = [mdata[i] for i in order_idxs] # and attributes attrs = dict((k, [v[i] for i in order_idxs]) for k,v in attrs.items()) if axis == 0: mdata = np.vstack(mdata) else: mdata = np.vstack(np.transpose(mdata)) return mdata, attrs