def collect( self ): '''collect all data. Data is stored in a multi-level dictionary (DataTree) ''' self.data = odict() self.debug( "%s: collecting data paths." % (self.tracker)) is_function, datapaths = self.getDataPaths(self.tracker) self.debug( "%s: collected data paths." % (self.tracker)) # if function, no datapaths if is_function: d = self.getData( () ) # save in data tree as leaf DataTree.setLeaf( self.data, ("all",), d ) self.debug( "%s: collecting data finished for function." % (self.tracker)) return # if no tracks, error if len(datapaths) == 0 or len(datapaths[0]) == 0: self.warn( "%s: no tracks found - no output" % self.tracker ) return self.debug( "%s: filtering data paths." % (self.tracker)) # filter data paths datapaths = self.filterDataPaths( datapaths ) self.debug( "%s: filtered data paths." % (self.tracker)) # if no tracks, error if len(datapaths) == 0 or len(datapaths[0]) == 0: self.warn( "%s: no tracks remain after filtering - no output" % self.tracker ) return self.debug( "%s: building all_paths" % (self.tracker ) ) if len(datapaths) > MAX_PATH_NESTING: self.warn( "%s: number of nesting in data paths too large: %i" % (self.tracker, len(all_paths))) raise ValueError( "%s: number of nesting in data paths too large: %i" % (self.tracker, len(all_paths))) all_paths = list(itertools.product( *datapaths )) self.debug( "%s: collecting data started for %i data paths" % (self.tracker, len( all_paths) ) ) self.data = odict() for path in all_paths: d = self.getData( path ) # ignore empty data sets if d is None: continue # save in data tree as leaf DataTree.setLeaf( self.data, path, d ) self.debug( "%s: collecting data finished for %i data paths" % (self.tracker, len( all_paths) ) ) return self.data
def transform(self, data, path): debug( "%s: called" % str(self)) vals = data.keys() new_data = odict() for x1 in range(len(vals)-1): n1 = vals[x1] # find the first field that fits for field in self.fields: if field in data[n1]: d1 = data[n1][field] break else: raise KeyError("could not find any match from '%s' in '%s'" % (str(data[n1].keys()), str(self.fields ))) for x2 in range(x1+1, len(vals)): n2 = vals[x2] try: d2 = data[n2][field] except KeyErrror: raise KeyError("no field %s in '%s'" % sttr(data[n2])) ## check if array? if len(d1) != len(d2): raise ValueError("length of elements not equal: %i != %i" % (len(d1), len(d2))) DataTree.setLeaf( new_data, ( ("%s x %s" % (n1, n2) ), n1), d1 ) DataTree.setLeaf( new_data, ( ("%s x %s" % (n1, n2) ), n2), d2 ) return new_data
def __call__(self, data ): if self.nlevels == None: raise NotImplementedError("incomplete implementation of %s" % str(self)) labels = DataTree.getPaths( data ) debug( "transform: started with paths: %s" % labels) assert len(labels) >= self.nlevels, "expected at least %i levels - got %i" % (self.nlevels, len(labels)) paths = list(itertools.product( *labels[:-self.nlevels] )) for path in paths: work = DataTree.getLeaf( data, path ) if not work: continue new_data = self.transform( work, path ) if new_data: if path: DataTree.setLeaf( data, path, new_data ) else: # set new root data = new_data else: warn( "no data at %s - removing branch" % str(path)) DataTree.removeLeaf( data, path ) debug( "transform: finished with paths: %s" % DataTree.getPaths( data )) return data
def collect(self): """collect all data. Data is stored in a multi-level dictionary (DataTree) """ self.data = odict() is_function, datapaths = self.getDataPaths(self.tracker) # if function, no datapaths if is_function: d = self.getData(()) # save in data tree as leaf DataTree.setLeaf(self.data, ("all",), d) self.debug("%s: collecting data finished for function." % (self.tracker)) return # if no tracks, error if len(datapaths) == 0 or len(datapaths[0]) == 0: self.warn("%s: no tracks found - no output" % self.tracker) raise ValueError("no tracks found from %s" % self.tracker) # filter data paths datapaths = self.filterDataPaths(datapaths) # if no tracks, error if len(datapaths) == 0 or len(datapaths[0]) == 0: self.warn("%s: no tracks remain after filtering - no output" % self.tracker) raise ValueError("no tracks found from %s" % self.tracker) all_paths = list(itertools.product(*datapaths)) self.debug("%s: collecting data started for %i data paths" % (self.tracker, len(all_paths))) self.data = odict() for path in all_paths: d = self.getData(path) # ignore empty data sets if d == None: continue # save in data tree as leaf DataTree.setLeaf(self.data, path, d) self.debug("%s: collecting data finished for %i data paths" % (self.tracker, len(all_paths)))
def transform(self,data,path): from rpy2.robjects import r as R paths, lengths, values = [],[],[] labels = DataTree.getPaths(data) paths = list(itertools.product( *labels[:-1])) for path in paths: work = DataTree.getLeaf(data, path) try: lengths.append(len(work[self.pval])) values.extend(work[self.pval]) except TypeError: lengths.append(0) values.append(work[self.pval]) padj = R["p.adjust"](values, method = self.method) padj = [x for x in padj] for path in paths: num = lengths.pop(0) if num > 0: new_values = padj[0:num] padj = padj[num:] else: new_values = padj[0] padj = padj[1:] if path: work = odict(DataTree.getLeaf(data,path)) work["P-adjust"] = new_values DataTree.setLeaf(data,path,work) else: data["P-adjust"] = new_values return data