def run(self):
        sample1 = pickle.load( gzip.open(self.sampleobjfile1, "rb") )
        sample2 = pickle.load( gzip.open(self.sampleobjfile2, "rb") )
        
        print self.size, os.path.basename(self.sampleobjfile1).split('.')[0], sample1.total, os.path.basename(self.sampleobjfile2).split('.')[0], sample2.total
        #sampling "size" sequences from sample:
        subsample1 = iseqlib.samplingSample( sample1, self.size ) #sampled sample
        subsample2 = iseqlib.samplingSample( sample2, self.size ) #sampled sample
        
        #Get different statistics:
        stats = PairSamplingStats() #initialize stats for the current subsample

        #Find Sizes of the Union of clones between subsample1 and subsample2:
        counts1 = []
        counts2 = []
        for header, seq in subsample1.seqs.iteritems():
            counts1.append( seq.count )
            if header in subsample2.seqs:
                counts2.append( subsample2.seqs[header].count )
            else:
                counts2.append( 0 )
        for header, seq in subsample2.seqs.iteritems():
            if header not in subsample1.seqs:
                counts1.append( 0 )
                counts2.append( seq.count )
        
        if len(counts1) != len(counts2):
            raise ValueError("The count vectors of two compared samples must have equal length. Found %d and %d\n" %(len(counts1), len(counts2)))

        ## Initialize the R interface
        import rpy2.rinterface as rinterface
        rinterface.set_initoptions(('rpy2', '--no-save'))
        rinterface.initr()

        import rpy2.robjects as robjs
        from rpy2.robjects.packages import importr
        vegan = importr('vegan')
        vegdist = vegan.vegdist

        counts = counts1
        counts.extend(counts2)
        rcountsVec = robjs.IntVector( counts ) #convert python list into R vector
        rcountsMatrix = robjs.r['matrix'](rcountsVec, nrow = 2, byrow=True)
        
        #indices = ['bray', 'horn', 'mountford', 'chao']
        indices = self.options.similarityIndices
        #"manhattan", "euclidean", "canberra", "bray", "kulczynski", "jaccard", "gower", "morisita", "horn", "mountford", "raup" , "binomial" or "chao"
        for index in indices:
            disimilarity = vegdist( rcountsMatrix, method=index )
            stats[index] = disimilarity[0]
            
        #Write to temp file:
        picklefile = os.path.join( self.outdir, "%d.pickle" % self.id)
        pickle.dump( stats, gzip.open(picklefile, "wb") )
 def run(self):
     sample = pickle.load( gzip.open(self.samplefile, "rb") )
     if self.options.uniq:
         subsample = iseqlib.samplingSample_weightedUniq(sample, self.size)
         #subsample = iseqlib.samplingSample_uniq(sample, self.size)
     else:
         subsample = iseqlib.samplingSample(sample, self.size)
     
     #filtering if selected Vs and/or selected Js were specified
     subsample = iseqlib.filterSampleByGenes(subsample, self.options.vs, self.options.js)
     
     pickle.dump(subsample, gzip.open(self.outfile, "wb"))
Beispiel #3
0
    def run(self):
        globalTempDir = self.getGlobalTempDir()
        #sampling
        sample = pickle.load( gzip.open(self.samplefile, "rb") )
        if self.options.uniq:
            subsample = iseqlib.samplingSample_weightedUniq(sample, size)
        else:
            subsample = iseqlib.samplingSample(sample, self.size)
        
        #filtering if selected Vs and/or selected Js were specified
        subsample = iseqlib.filterSampleByGenes(subsample, self.options.vs, self.options.js)

        picklefile = os.path.join(globalTempDir, "%s.pickle" %sample.name)
        pickle.dump(subsample, gzip.open(picklefile, "wb"))
        self.addChildTarget( Analyses(picklefile, self.outdir, self.options) )
    def run(self):
        sample = pickle.load( gzip.open(self.sampleobjfile, "rb") )
        #sampling "size" sequences from sample:
        subsample = iseqlib.samplingSample( sample, self.size ) #sampled sample
        
        #Get different statistics:
        stats = SingleSamplingStats() #initialize stats for the current subsample

        #1/ Number of uniq clones:
        stats.uniqClones = len( subsample.seqs )

        #2/ Diversity Indices
        counts = [ seq.count for seq in subsample.seqs.values() ] #list of clone sizes

        ## Initialize the R interface
        import rpy2.rinterface as rinterface
        rinterface.set_initoptions(('rpy2', '--no-save'))
        rinterface.initr()

        import rpy2.robjects as robjs
        from rpy2.robjects.packages import importr
        vegan = importr('vegan')
        rcounts = robjs.IntVector( counts ) #convert python list into R vector
        
        ##Diversity indices (Simpson, InverseSimpson, Shannon)
        indices = self.options.diversityIndices
        #['simpson', 'invsimpson', 'shannon']
        for index in indices:
            if index == 'uniqClones' or index == 'fisherAlpha':
                continue
            rval = vegan.diversity( rcounts, index )
            stats[index] = rval[0]
            
        ## Fisher Alpha:
        rfisher = vegan.fisher_alpha( rcounts )
        stats.fisherAlpha = rfisher[0]

        #Write to temp file:
        picklefile = os.path.join( self.outdir, "%d.pickle" % self.id)
        pickle.dump( stats, gzip.open(picklefile, "wb") )