def run(self): self.logToMaster("RepSize\n") stime = time.time() name2sample = {} for sam in os.listdir(self.sampledir): filepath = os.path.join(self.sampledir, sam, sam) sample = pickle.load(gzip.open(filepath, 'rb')) name2sample[sam] = sample logger.info("RepSize, done loading %d samples in %.4f s." % (len(name2sample), (time.time() - stime))) stime = time.time() # Get summary of samples' sizes: group2samples = self.options.group2samples group2avr = libcommon.get_group_avr(name2sample, group2samples) logger.info("RepSize, done computing group_avr in %.4f s." % (time.time() - stime)) txtfile = os.path.join(self.options.outdir, "clonesize.txt") repsize.repsize_table(name2sample, txtfile, group2avr, group2samples) texfile = os.path.join(self.options.outdir, "clonesize.tex") repsize.repsize_table(name2sample, texfile, group2avr, group2samples, True) self.addChildTarget(diversity.DiversityRarefaction(self.sampledir, self.options))
def run(self): self.load_indir() name2sampling = self.name2obj # Print out summary table g2s = self.group2samples g2avr = {} if g2s: g2avr = libcommon.get_group_avr(name2sampling, g2s) txtfile = os.path.join(self.outdir, "diversity.txt") tabcommon.table(name2sampling, txtfile, self.indices, g2avr, g2s) texfile = os.path.join(self.outdir, "diversity.tex") tabcommon.table(name2sampling, texfile, self.indices, g2avr, g2s, True) # For each diversity index, each pair of groups, perform ttest # and draw plot if g2s and len(g2s) >= 2: outdir = os.path.join(self.outdir, "group_comparisons") system("mkdir -p %s" % outdir) global_dir = os.path.join(self.getGlobalTempDir(), "ttests") system("mkdir -p %s" % global_dir) for index in self.indices: self.addChildTarget(DiversityTtest(global_dir, index, g2s, name2sampling, self.matched, self.plotfmt, outdir)) self.setFollowOnTarget(DiversityTtestSummary(global_dir, outdir, self.pval))
def run(self): system("rm -Rf %s" % self.workdir) opts = self.options indices = opts.diversity name2size2sampling = {} size2name2sampling = {} for name in os.listdir(self.indir): sampledir = os.path.join(self.indir, name) name2size2sampling[name] = {} for file in os.listdir(sampledir): size = long(file.split(".")[0]) filepath = os.path.join(sampledir, file) sampling = pickle.load(gzip.open(filepath, "rb")) name2size2sampling[name][size] = sampling if size not in size2name2sampling: size2name2sampling[size] = {name: sampling} else: size2name2sampling[size][name] = sampling outdir = os.path.join(opts.outdir, "diversity") txtdir = os.path.join(outdir, "txt") system("mkdir -p %s" % txtdir) texdir = os.path.join(outdir, "tex") system("mkdir -p %s" % texdir) pdfdir = os.path.join(outdir, "pdf") if opts.makeplots: system("mkdir -p %s" % pdfdir) # Summary table for each index: groups = None g2s = opts.group2samples if g2s: groups = opts.group2samples.keys() for index in indices: rftabfile = os.path.join(txtdir, "rf_%s.txt" % index) rf_diversity_table(name2size2sampling, rftabfile, index) if opts.makeplots: rfplotfile = os.path.join(pdfdir, "rf_%s" % index) rfplot.draw_rarefaction(name2size2sampling, groups, index, rfplotfile, opts.plotformat, opts.dpi) # Summary table for each sampling size: for size, name2sampling in size2name2sampling.iteritems(): g2avr = {} if g2s: g2avr = libcommon.get_group_avr(name2sampling, g2s) txtfile = os.path.join(txtdir, "diversity_%d.txt" % size) tabcommon.table(name2sampling, txtfile, indices, g2avr, g2s) texfile = os.path.join(texdir, "diversity_%d.tex" % size) tabcommon.table(name2sampling, texfile, indices, g2avr, g2s, True) self.setFollowOnTarget(libcommon.CleanupDir(self.indir))
def test_get_group_avr(self): group2avr = lcommon.get_group_avr(self.name2obj, self.group2names) self.assertEqual(len(group2avr), 2) self.assertEqual(sorted(group2avr.keys()), ['g1', 'g2']) (name1, avr1) = group2avr['g1'] self.assertEqual(name1, 'g1_Avr') self.assertEqual(avr1.size, (10 + 5) / 2) self.assertEqual(avr1.numclone, (2 + 4) / 2) self.assertEqual(avr1.clones, [(5 + 3) / 2, (0 + 2) / 2]) (name2, avr2) = group2avr['g2'] self.assertEqual(name2, 'g2_Avr') self.assertEqual(avr2.size, 15) self.assertEqual(avr2.numclone, 1) self.assertEqual(avr2.clones, None)
def test_sort_objs_by_group(self): group2avr = lcommon.get_group_avr(self.name2obj, self.group2names) sorted_objs = lcommon.sort_objs_by_group(self.name2obj, self.group2names, True, group2avr) self.assertEqual(len(sorted_objs), 5) names = ['sam1', 'sam2', 'g1_Avr', 'sam3', 'g2_Avr'] self.assertEqual(names, [o[0] for o in sorted_objs]) n2o = self.name2obj objs = [n2o['sam1'], n2o['sam2'], group2avr['g1'][1], n2o['sam3'], group2avr['g2'][1]] self.assertEqual(objs, [o[1] for o in sorted_objs]) #Without adding average group sorted_objs = lcommon.sort_objs_by_group(self.name2obj, self.group2names, False, group2avr) self.assertEqual(len(sorted_objs), 3) names = ['sam1', 'sam2', 'sam3'] self.assertEqual(names, [o[0] for o in sorted_objs]) n2o = self.name2obj objs = [n2o['sam1'], n2o['sam2'], n2o['sam3']] self.assertEqual(objs, [o[1] for o in sorted_objs])
def run(self): self.load_indir() name2obj = self.name2obj opts = self.opts assert len(name2obj) > 0 obj0 = name2obj.values()[0] numsam = len(name2obj) g2s = opts.group2samples g2name_avr = None g2avr = None if g2s: g2name_avr = libcommon.get_group_avr(name2obj, g2s) if opts.makeplots: g2avr = {} for g, na in g2name_avr.iteritems(): groupname = na[0] groupavr = na[1] g2avr[groupname] = groupavr attrs = ['numclones', 'counts', 'topfreqs', 'numclones_cumul', 'counts_cumul', 'topfreqs_cumul'] txtdir = os.path.join(self.outdir, "txt_tables") system("mkdir -p %s" % txtdir) plotdir = None if opts.makeplots: plotdir = os.path.join(self.outdir, "figures") system("mkdir -p %s" % plotdir) for attr in attrs: txtfile = os.path.join(txtdir, "%s.txt" % attr) colfields = get_attr_colfields(attr, obj0) tabcommon.table(name2obj, txtfile, colfields, g2name_avr, g2s, keyattr=attr, islist=True) if opts.makeplots: plotfile = os.path.join(plotdir, attr) if numsam < 100 or opts.cs_force_all: csplot.draw_clonesize_dist(name2obj, attr, plotfile, opts.plotformat, opts.dpi) elif g2avr: csplot.draw_clonesize_dist_avr(name2obj, attr, plotfile, opts.plotformat, opts.dpi)