Ejemplo n.º 1
0
    def run(self):
        self.load_indir()
        name2sampling = self.name2obj
        # Print out summary table
        g2s = self.group2samples
        g2avr = {}
        if g2s:
            g2avr = libcommon.get_group_avr(name2sampling, g2s)
        txtfile = os.path.join(self.outdir, "diversity.txt")
        tabcommon.table(name2sampling, txtfile, self.indices, g2avr, g2s)
        texfile = os.path.join(self.outdir, "diversity.tex")
        tabcommon.table(name2sampling, texfile, self.indices, g2avr, g2s, True)

        # For each diversity index, each pair of groups, perform ttest
        # and draw plot
        if g2s and len(g2s) >= 2:
            outdir = os.path.join(self.outdir, "group_comparisons")
            system("mkdir -p %s" % outdir)
            global_dir = os.path.join(self.getGlobalTempDir(), "ttests")
            system("mkdir -p %s" % global_dir)
            for index in self.indices:
                self.addChildTarget(DiversityTtest(global_dir, index, g2s,
                            name2sampling, self.matched, self.plotfmt, outdir))
            self.setFollowOnTarget(DiversityTtestSummary(global_dir, outdir,
                                                         self.pval))
Ejemplo n.º 2
0
    def run(self):
        system("rm -Rf %s" % self.workdir)
        opts = self.options
        indices = opts.diversity
        name2size2sampling = {}
        size2name2sampling = {}
        for name in os.listdir(self.indir):
            sampledir = os.path.join(self.indir, name)
            name2size2sampling[name] = {}
            for file in os.listdir(sampledir):
                size = long(file.split(".")[0])
                filepath = os.path.join(sampledir, file)
                sampling = pickle.load(gzip.open(filepath, "rb"))
                name2size2sampling[name][size] = sampling

                if size not in size2name2sampling:
                    size2name2sampling[size] = {name: sampling}
                else:
                    size2name2sampling[size][name] = sampling

        outdir = os.path.join(opts.outdir, "diversity")
        txtdir = os.path.join(outdir, "txt")
        system("mkdir -p %s" % txtdir)
        texdir = os.path.join(outdir, "tex")
        system("mkdir -p %s" % texdir)
        pdfdir = os.path.join(outdir, "pdf")
        if opts.makeplots:
            system("mkdir -p %s" % pdfdir)
        
        # Summary table for each index:
        groups = None
        g2s = opts.group2samples
        if g2s:
            groups = opts.group2samples.keys()

        for index in indices:
            rftabfile = os.path.join(txtdir, "rf_%s.txt" % index)
            rf_diversity_table(name2size2sampling, rftabfile, index)
            if opts.makeplots:
                rfplotfile = os.path.join(pdfdir, "rf_%s" % index)
                rfplot.draw_rarefaction(name2size2sampling, groups, index,
                                        rfplotfile, opts.plotformat, opts.dpi)
        # Summary table for each sampling size:
        for size, name2sampling in size2name2sampling.iteritems():
            g2avr = {}
            if g2s:
                g2avr = libcommon.get_group_avr(name2sampling, g2s)
            txtfile = os.path.join(txtdir, "diversity_%d.txt" % size)
            tabcommon.table(name2sampling, txtfile, indices, g2avr, g2s)
            texfile = os.path.join(texdir, "diversity_%d.tex" % size)
            tabcommon.table(name2sampling, texfile, indices, g2avr, g2s, True)
        self.setFollowOnTarget(libcommon.CleanupDir(self.indir))
Ejemplo n.º 3
0
 def run(self):
     self.load_indir()
     name2obj = self.name2obj
     opts = self.opts
     assert len(name2obj) > 0
     obj0 = name2obj.values()[0]
     numsam = len(name2obj)
     g2s = opts.group2samples
     g2name_avr = None
     g2avr = None
     if g2s:
         g2name_avr = libcommon.get_group_avr(name2obj, g2s)
         if opts.makeplots:
             g2avr = {}
             for g, na in g2name_avr.iteritems():
                 groupname = na[0]
                 groupavr = na[1]
                 g2avr[groupname] = groupavr
         
     attrs = ['numclones', 'counts', 'topfreqs', 'numclones_cumul',
              'counts_cumul', 'topfreqs_cumul']
     txtdir = os.path.join(self.outdir, "txt_tables")
     system("mkdir -p %s" % txtdir)
     plotdir = None
     if opts.makeplots:
         plotdir = os.path.join(self.outdir, "figures")
         system("mkdir -p %s" % plotdir)
     
     for attr in attrs:
         txtfile = os.path.join(txtdir, "%s.txt" % attr)
         colfields = get_attr_colfields(attr, obj0)
         tabcommon.table(name2obj, txtfile, colfields, g2name_avr,
                                            g2s, keyattr=attr, islist=True)
         if opts.makeplots:
             plotfile = os.path.join(plotdir, attr)
             if numsam < 100 or opts.cs_force_all:
                 csplot.draw_clonesize_dist(name2obj, attr, plotfile,
                                                 opts.plotformat, opts.dpi)
             elif g2avr:
                 csplot.draw_clonesize_dist_avr(name2obj, attr, plotfile,
                                                 opts.plotformat, opts.dpi)