コード例 #1
0
ファイル: aimseq.py プロジェクト: ngannguyen/aimseqtk
    def run(self):
        self.logToMaster("RepSize\n")
        stime = time.time()
        name2sample = {}
        for sam in os.listdir(self.sampledir):
            filepath = os.path.join(self.sampledir, sam, sam)
            sample = pickle.load(gzip.open(filepath, 'rb'))
            name2sample[sam] = sample
        logger.info("RepSize, done loading %d samples in %.4f s." %
                    (len(name2sample), (time.time() - stime)))
        stime = time.time()

        # Get summary of samples' sizes:
        group2samples = self.options.group2samples
        group2avr = libcommon.get_group_avr(name2sample, group2samples)
        logger.info("RepSize, done computing group_avr in %.4f s." %
                    (time.time() - stime))
        
        txtfile = os.path.join(self.options.outdir, "clonesize.txt")
        repsize.repsize_table(name2sample, txtfile, group2avr, group2samples)
        texfile = os.path.join(self.options.outdir, "clonesize.tex")
        repsize.repsize_table(name2sample, texfile, group2avr, group2samples,
                              True)
        self.addChildTarget(diversity.DiversityRarefaction(self.sampledir,
                                                           self.options))
コード例 #2
0
ファイル: diversity.py プロジェクト: ngannguyen/aimseqtk
    def run(self):
        self.load_indir()
        name2sampling = self.name2obj
        # Print out summary table
        g2s = self.group2samples
        g2avr = {}
        if g2s:
            g2avr = libcommon.get_group_avr(name2sampling, g2s)
        txtfile = os.path.join(self.outdir, "diversity.txt")
        tabcommon.table(name2sampling, txtfile, self.indices, g2avr, g2s)
        texfile = os.path.join(self.outdir, "diversity.tex")
        tabcommon.table(name2sampling, texfile, self.indices, g2avr, g2s, True)

        # For each diversity index, each pair of groups, perform ttest
        # and draw plot
        if g2s and len(g2s) >= 2:
            outdir = os.path.join(self.outdir, "group_comparisons")
            system("mkdir -p %s" % outdir)
            global_dir = os.path.join(self.getGlobalTempDir(), "ttests")
            system("mkdir -p %s" % global_dir)
            for index in self.indices:
                self.addChildTarget(DiversityTtest(global_dir, index, g2s,
                            name2sampling, self.matched, self.plotfmt, outdir))
            self.setFollowOnTarget(DiversityTtestSummary(global_dir, outdir,
                                                         self.pval))
コード例 #3
0
ファイル: diversity.py プロジェクト: ngannguyen/aimseqtk
    def run(self):
        system("rm -Rf %s" % self.workdir)
        opts = self.options
        indices = opts.diversity
        name2size2sampling = {}
        size2name2sampling = {}
        for name in os.listdir(self.indir):
            sampledir = os.path.join(self.indir, name)
            name2size2sampling[name] = {}
            for file in os.listdir(sampledir):
                size = long(file.split(".")[0])
                filepath = os.path.join(sampledir, file)
                sampling = pickle.load(gzip.open(filepath, "rb"))
                name2size2sampling[name][size] = sampling

                if size not in size2name2sampling:
                    size2name2sampling[size] = {name: sampling}
                else:
                    size2name2sampling[size][name] = sampling

        outdir = os.path.join(opts.outdir, "diversity")
        txtdir = os.path.join(outdir, "txt")
        system("mkdir -p %s" % txtdir)
        texdir = os.path.join(outdir, "tex")
        system("mkdir -p %s" % texdir)
        pdfdir = os.path.join(outdir, "pdf")
        if opts.makeplots:
            system("mkdir -p %s" % pdfdir)
        
        # Summary table for each index:
        groups = None
        g2s = opts.group2samples
        if g2s:
            groups = opts.group2samples.keys()

        for index in indices:
            rftabfile = os.path.join(txtdir, "rf_%s.txt" % index)
            rf_diversity_table(name2size2sampling, rftabfile, index)
            if opts.makeplots:
                rfplotfile = os.path.join(pdfdir, "rf_%s" % index)
                rfplot.draw_rarefaction(name2size2sampling, groups, index,
                                        rfplotfile, opts.plotformat, opts.dpi)
        # Summary table for each sampling size:
        for size, name2sampling in size2name2sampling.iteritems():
            g2avr = {}
            if g2s:
                g2avr = libcommon.get_group_avr(name2sampling, g2s)
            txtfile = os.path.join(txtdir, "diversity_%d.txt" % size)
            tabcommon.table(name2sampling, txtfile, indices, g2avr, g2s)
            texfile = os.path.join(texdir, "diversity_%d.tex" % size)
            tabcommon.table(name2sampling, texfile, indices, g2avr, g2s, True)
        self.setFollowOnTarget(libcommon.CleanupDir(self.indir))
コード例 #4
0
    def test_get_group_avr(self):
        group2avr = lcommon.get_group_avr(self.name2obj, self.group2names)
        self.assertEqual(len(group2avr), 2)
        self.assertEqual(sorted(group2avr.keys()), ['g1', 'g2'])
        (name1, avr1) = group2avr['g1']
        self.assertEqual(name1, 'g1_Avr')
        self.assertEqual(avr1.size, (10 + 5) / 2)
        self.assertEqual(avr1.numclone, (2 + 4) / 2)
        self.assertEqual(avr1.clones, [(5 + 3) / 2, (0 + 2) / 2])

        (name2, avr2) = group2avr['g2']
        self.assertEqual(name2, 'g2_Avr')
        self.assertEqual(avr2.size, 15)
        self.assertEqual(avr2.numclone, 1)
        self.assertEqual(avr2.clones, None)
コード例 #5
0
 def test_sort_objs_by_group(self):
     group2avr = lcommon.get_group_avr(self.name2obj, self.group2names)
     sorted_objs = lcommon.sort_objs_by_group(self.name2obj,
                                     self.group2names, True, group2avr)
     self.assertEqual(len(sorted_objs), 5)
     names = ['sam1', 'sam2', 'g1_Avr', 'sam3', 'g2_Avr']
     self.assertEqual(names, [o[0] for o in sorted_objs])
     n2o = self.name2obj
     objs = [n2o['sam1'], n2o['sam2'], group2avr['g1'][1], n2o['sam3'],
                                                     group2avr['g2'][1]]
     self.assertEqual(objs, [o[1] for o in sorted_objs])
     #Without adding average group
     sorted_objs = lcommon.sort_objs_by_group(self.name2obj,
                                     self.group2names, False, group2avr)
     self.assertEqual(len(sorted_objs), 3)
     names = ['sam1', 'sam2', 'sam3']
     self.assertEqual(names, [o[0] for o in sorted_objs])
     n2o = self.name2obj
     objs = [n2o['sam1'], n2o['sam2'], n2o['sam3']]
     self.assertEqual(objs, [o[1] for o in sorted_objs])
コード例 #6
0
ファイル: clonesize.py プロジェクト: ngannguyen/aimseqtk
 def run(self):
     self.load_indir()
     name2obj = self.name2obj
     opts = self.opts
     assert len(name2obj) > 0
     obj0 = name2obj.values()[0]
     numsam = len(name2obj)
     g2s = opts.group2samples
     g2name_avr = None
     g2avr = None
     if g2s:
         g2name_avr = libcommon.get_group_avr(name2obj, g2s)
         if opts.makeplots:
             g2avr = {}
             for g, na in g2name_avr.iteritems():
                 groupname = na[0]
                 groupavr = na[1]
                 g2avr[groupname] = groupavr
         
     attrs = ['numclones', 'counts', 'topfreqs', 'numclones_cumul',
              'counts_cumul', 'topfreqs_cumul']
     txtdir = os.path.join(self.outdir, "txt_tables")
     system("mkdir -p %s" % txtdir)
     plotdir = None
     if opts.makeplots:
         plotdir = os.path.join(self.outdir, "figures")
         system("mkdir -p %s" % plotdir)
     
     for attr in attrs:
         txtfile = os.path.join(txtdir, "%s.txt" % attr)
         colfields = get_attr_colfields(attr, obj0)
         tabcommon.table(name2obj, txtfile, colfields, g2name_avr,
                                            g2s, keyattr=attr, islist=True)
         if opts.makeplots:
             plotfile = os.path.join(plotdir, attr)
             if numsam < 100 or opts.cs_force_all:
                 csplot.draw_clonesize_dist(name2obj, attr, plotfile,
                                                 opts.plotformat, opts.dpi)
             elif g2avr:
                 csplot.draw_clonesize_dist_avr(name2obj, attr, plotfile,
                                                 opts.plotformat, opts.dpi)