def run(self, debug=False): pool = Pool( processes=cpu(use_mem=3221225472, cpu_limit=len(self.contigs))) for chrom in self.contigs: pool.apply_async(self.win_correct, args=(chrom, )) pool.close() pool.join() for chrom in self.contigs: cnvdata = dict() nbarg = os.path.join(self.indir, "%s.nbinom.arg" % chrom) if os.path.isfile(nbarg): f_in = smart_open(nbarg) trials, best_probability, devi = f_in.readline().strip().split( "\t") f_in.close() if not debug: os.remove(nbarg) else: continue for sample in self.samples: cnvdata[sample] = CNVdata() cnvdata[sample].trials = int(trials) cnvdata[sample].best_probability = float(best_probability) cnvdata[sample].min_devi = float(devi) cnvdata[sample].ploid = int( self.chrom_stat[sample][chrom].ploid) cnvdata[sample].regions = list() cnvdata[sample].data = list() dep_data = os.path.join( self.indir, sample, "%s.W%dS%d.fixdep.gz" % (chrom, self.CorrectWinLen, self.CorrectShiftLen)) if not os.path.isfile(dep_data): continue with smart_open(dep_data) as f_in: for line in f_in: if line.startswith("#"): continue chrom, start, stop, deps = line.strip().split("\t") start = int(start) stop = int(stop) deps = int(deps) cnvdata[sample].regions.append([chrom, start, stop]) cnvdata[sample].data.append(deps) c_stat = SaveLoad(os.path.join(self.indir, "%s.cnv.args" % chrom)) c_stat.save(cnvdata)
def run(self, debug=False): pool = Pool(processes=cpu(use_mem=3221225472, cpu_limit=len(self.contigs))) for chrom in self.contigs: pool.apply_async(self.win_correct, args=(chrom,)) pool.close() pool.join() for chrom in self.contigs: cnvdata = dict() nbarg = os.path.join(self.indir, "%s.nbinom.arg" % chrom) if os.path.isfile(nbarg): f_in = smart_open(nbarg) trials, best_probability, devi = f_in.readline().strip().split("\t") f_in.close() if not debug: os.remove(nbarg) else: continue for sample in self.samples: cnvdata[sample] = CNVdata() cnvdata[sample].trials = int(trials) cnvdata[sample].best_probability = float(best_probability) cnvdata[sample].min_devi = float(devi) cnvdata[sample].ploid = int(self.chrom_stat[sample][chrom].ploid) cnvdata[sample].regions = list() cnvdata[sample].data = list() dep_data = os.path.join(self.indir, sample, "%s.W%dS%d.fixdep.gz" % (chrom, self.CorrectWinLen, self.CorrectShiftLen)) if not os.path.isfile(dep_data): continue with smart_open(dep_data) as f_in: for line in f_in: if line.startswith("#"): continue chrom, start, stop, deps = line.strip().split("\t") start = int(start) stop = int(stop) deps = int(deps) cnvdata[sample].regions.append([chrom, start, stop]) cnvdata[sample].data.append(deps) c_stat = SaveLoad(os.path.join(self.indir, "%s.cnv.args" % chrom)) c_stat.save(cnvdata)
def bedAnalysis(**kwargs): global pos_gc, win_gc bed = os.path.abspath(kwargs["bed"]) reference = os.path.abspath(kwargs["reference"]) db = os.path.abspath(kwargs["db"]) outdir = os.path.abspath(kwargs["outdir"]) winlen = int(kwargs["winlen"]) if "winlen" in kwargs else 200 siftlen = int(kwargs["siftlen"]) if "siftlen" in kwargs else 20 depth_f = [os.path.abspath(i) for i in kwargs["depthfile"].split(",") if os.path.isfile(i)] model = RegionAnalysis(reference, db) bed_gc_out = SaveLoad(os.path.join(outdir, "win.gc")) pos_gc_out = SaveLoad(os.path.join(outdir, "pos.gc")) chrom_stat = SaveLoad(os.path.join(outdir, "chrom.stat")) with smart_open(bed) as f_in: for line in f_in: rows = line.strip().split("\t") chrom = str(rows[0]) if chrom not in pos_gc: pos_gc[chrom] = dict() start = int(rows[1]) stop = int(rows[2]) + 1 try: model.analysis(chrom, start, stop, winlen, siftlen) except ValueError: continue bed_gc_out.save(win_gc) pos_gc_out.save(pos_gc) chrom_stat.save(model.chrom_stat(depth_f)) model.__del__() return bed_gc_out.fname, pos_gc_out.fname, chrom_stat.fname
def bedAnalysis(**kwargs): global pos_gc, win_gc bed = os.path.abspath(kwargs["bed"]) reference = os.path.abspath(kwargs["reference"]) db = os.path.abspath(kwargs["db"]) outdir = os.path.abspath(kwargs["outdir"]) winlen = int(kwargs["winlen"]) if "winlen" in kwargs else 200 siftlen = int(kwargs["siftlen"]) if "siftlen" in kwargs else 20 depth_f = [ os.path.abspath(i) for i in kwargs["depthfile"].split(",") if os.path.isfile(i) ] model = RegionAnalysis(reference, db) bed_gc_out = SaveLoad(os.path.join(outdir, "win.gc")) pos_gc_out = SaveLoad(os.path.join(outdir, "pos.gc")) chrom_stat = SaveLoad(os.path.join(outdir, "chrom.stat")) with smart_open(bed) as f_in: for line in f_in: rows = line.strip().split("\t") chrom = str(rows[0]) if chrom not in pos_gc: pos_gc[chrom] = dict() start = int(rows[1]) stop = int(rows[2]) + 1 try: model.analysis(chrom, start, stop, winlen, siftlen) except ValueError: continue bed_gc_out.save(win_gc) pos_gc_out.save(pos_gc) chrom_stat.save(model.chrom_stat(depth_f)) model.__del__() return bed_gc_out.fname, pos_gc_out.fname, chrom_stat.fname