Exemple #1
0
    def collect( self ):
        '''collect all data.

        Data is stored in a multi-level dictionary (DataTree)
        '''

        self.data = odict()

        self.debug( "%s: collecting data paths." % (self.tracker))        
        is_function, datapaths = self.getDataPaths(self.tracker)
        self.debug( "%s: collected data paths." % (self.tracker))        

        # if function, no datapaths
        if is_function:
            d = self.getData( () )

            # save in data tree as leaf
            DataTree.setLeaf( self.data, ("all",), d )

            self.debug( "%s: collecting data finished for function." % (self.tracker))
            return

        # if no tracks, error
        if len(datapaths) == 0 or len(datapaths[0]) == 0:
            self.warn( "%s: no tracks found - no output" % self.tracker )
            return

        self.debug( "%s: filtering data paths." % (self.tracker))        
        # filter data paths
        datapaths = self.filterDataPaths( datapaths )
        self.debug( "%s: filtered data paths." % (self.tracker))        

        # if no tracks, error
        if len(datapaths) == 0 or len(datapaths[0]) == 0:
            self.warn( "%s: no tracks remain after filtering - no output" % self.tracker )
            return

        self.debug( "%s: building all_paths" % (self.tracker ) )
        if len(datapaths) > MAX_PATH_NESTING:
            self.warn( "%s: number of nesting in data paths too large: %i" % (self.tracker, len(all_paths)))
            raise ValueError( "%s: number of nesting in data paths too large: %i" % (self.tracker, len(all_paths)))

        all_paths = list(itertools.product( *datapaths ))
        self.debug( "%s: collecting data started for %i data paths" % (self.tracker, 
                                                                       len( all_paths) ) )

        self.data = odict()
        for path in all_paths:

            d = self.getData( path )

            # ignore empty data sets
            if d is None: continue

            # save in data tree as leaf
            DataTree.setLeaf( self.data, path, d )

        self.debug( "%s: collecting data finished for %i data paths" % (self.tracker, 
                                                                       len( all_paths) ) )
        return self.data
    def transform(self, data, path):

        debug( "%s: called" % str(self))

        vals =  data.keys()
        new_data = odict()

        for x1 in range(len(vals)-1):
            n1 = vals[x1]
            # find the first field that fits
            for field in self.fields:
                if field in data[n1]:
                    d1 = data[n1][field]
                    break
            else:
                raise KeyError("could not find any match from '%s' in '%s'" % (str(data[n1].keys()), str(self.fields )))

            for x2 in range(x1+1, len(vals)):
                n2 = vals[x2]
                try:
                    d2 = data[n2][field]
                except KeyErrror:
                    raise KeyError("no field %s in '%s'" % sttr(data[n2]))

                ## check if array?
                if len(d1) != len(d2):
                    raise ValueError("length of elements not equal: %i != %i" % (len(d1), len(d2)))
                
                DataTree.setLeaf( new_data, ( ("%s x %s" % (n1, n2) ), n1),
                                  d1 )

                DataTree.setLeaf( new_data, ( ("%s x %s" % (n1, n2) ), n2),
                                  d2 )
                                  
        return new_data
    def __call__(self, data ):

        if self.nlevels == None: raise NotImplementedError("incomplete implementation of %s" % str(self))

        labels = DataTree.getPaths( data )        
        debug( "transform: started with paths: %s" % labels)
        assert len(labels) >= self.nlevels, "expected at least %i levels - got %i" % (self.nlevels, len(labels))
        
        paths = list(itertools.product( *labels[:-self.nlevels] ))
        for path in paths:
            work = DataTree.getLeaf( data, path )
            if not work: continue
            new_data = self.transform( work, path )
            if new_data:
                if path:
                    DataTree.setLeaf( data, path, new_data )
                else:
                    # set new root
                    data = new_data
            else:
                warn( "no data at %s - removing branch" % str(path))
                DataTree.removeLeaf( data, path )

        debug( "transform: finished with paths: %s" % DataTree.getPaths( data ))

        return data
    def collect(self):
        """collect all data.

        Data is stored in a multi-level dictionary (DataTree)
        """

        self.data = odict()

        is_function, datapaths = self.getDataPaths(self.tracker)

        # if function, no datapaths
        if is_function:
            d = self.getData(())

            # save in data tree as leaf
            DataTree.setLeaf(self.data, ("all",), d)

            self.debug("%s: collecting data finished for function." % (self.tracker))
            return

        # if no tracks, error
        if len(datapaths) == 0 or len(datapaths[0]) == 0:
            self.warn("%s: no tracks found - no output" % self.tracker)
            raise ValueError("no tracks found from %s" % self.tracker)

        # filter data paths
        datapaths = self.filterDataPaths(datapaths)

        # if no tracks, error
        if len(datapaths) == 0 or len(datapaths[0]) == 0:
            self.warn("%s: no tracks remain after filtering - no output" % self.tracker)
            raise ValueError("no tracks found from %s" % self.tracker)

        all_paths = list(itertools.product(*datapaths))
        self.debug("%s: collecting data started for %i data paths" % (self.tracker, len(all_paths)))

        self.data = odict()
        for path in all_paths:

            d = self.getData(path)

            # ignore empty data sets
            if d == None:
                continue

            # save in data tree as leaf
            DataTree.setLeaf(self.data, path, d)

        self.debug("%s: collecting data finished for %i data paths" % (self.tracker, len(all_paths)))
    def transform(self,data,path):

        from rpy2.robjects import r as R

        paths, lengths, values = [],[],[]

        labels = DataTree.getPaths(data)
        paths = list(itertools.product( *labels[:-1]))

        for path in paths:
            
            work = DataTree.getLeaf(data, path)
            try:
                lengths.append(len(work[self.pval]))
                values.extend(work[self.pval])

            except TypeError:
                lengths.append(0)
                values.append(work[self.pval])
            
        padj = R["p.adjust"](values, method = self.method)

        padj = [x for x in padj]
    
        for path in paths:

            num = lengths.pop(0)
            
            if num > 0:
                new_values = padj[0:num]
                padj = padj[num:]
            else:
                new_values = padj[0]
                padj = padj[1:]

        
            if path:
                work = odict(DataTree.getLeaf(data,path))
                work["P-adjust"] = new_values
                DataTree.setLeaf(data,path,work)
            else:
                data["P-adjust"] = new_values


        return data