Ejemplo n.º 1
0
 def run(self, debug=False):
     pool = Pool(
         processes=cpu(use_mem=3221225472, cpu_limit=len(self.contigs)))
     for chrom in self.contigs:
         pool.apply_async(self.win_correct, args=(chrom, ))
     pool.close()
     pool.join()
     for chrom in self.contigs:
         cnvdata = dict()
         nbarg = os.path.join(self.indir, "%s.nbinom.arg" % chrom)
         if os.path.isfile(nbarg):
             f_in = smart_open(nbarg)
             trials, best_probability, devi = f_in.readline().strip().split(
                 "\t")
             f_in.close()
             if not debug:
                 os.remove(nbarg)
         else:
             continue
         for sample in self.samples:
             cnvdata[sample] = CNVdata()
             cnvdata[sample].trials = int(trials)
             cnvdata[sample].best_probability = float(best_probability)
             cnvdata[sample].min_devi = float(devi)
             cnvdata[sample].ploid = int(
                 self.chrom_stat[sample][chrom].ploid)
             cnvdata[sample].regions = list()
             cnvdata[sample].data = list()
             dep_data = os.path.join(
                 self.indir, sample, "%s.W%dS%d.fixdep.gz" %
                 (chrom, self.CorrectWinLen, self.CorrectShiftLen))
             if not os.path.isfile(dep_data):
                 continue
             with smart_open(dep_data) as f_in:
                 for line in f_in:
                     if line.startswith("#"):
                         continue
                     chrom, start, stop, deps = line.strip().split("\t")
                     start = int(start)
                     stop = int(stop)
                     deps = int(deps)
                     cnvdata[sample].regions.append([chrom, start, stop])
                     cnvdata[sample].data.append(deps)
         c_stat = SaveLoad(os.path.join(self.indir, "%s.cnv.args" % chrom))
         c_stat.save(cnvdata)
Ejemplo n.º 2
0
	def run(self, debug=False):
		pool = Pool(processes=cpu(use_mem=3221225472, cpu_limit=len(self.contigs)))
		for chrom in self.contigs:
			pool.apply_async(self.win_correct, args=(chrom,))
		pool.close()
		pool.join()
		for chrom in self.contigs:
			cnvdata = dict()
			nbarg = os.path.join(self.indir, "%s.nbinom.arg" % chrom)
			if os.path.isfile(nbarg):
				f_in = smart_open(nbarg)
				trials, best_probability, devi = f_in.readline().strip().split("\t")
				f_in.close()
				if not debug:
					os.remove(nbarg)
			else:
				continue
			for sample in self.samples:
				cnvdata[sample] = CNVdata()
				cnvdata[sample].trials = int(trials)
				cnvdata[sample].best_probability = float(best_probability)
				cnvdata[sample].min_devi = float(devi)
				cnvdata[sample].ploid = int(self.chrom_stat[sample][chrom].ploid)
				cnvdata[sample].regions = list()
				cnvdata[sample].data = list()
				dep_data = os.path.join(self.indir, sample,
				                        "%s.W%dS%d.fixdep.gz" % (chrom, self.CorrectWinLen, self.CorrectShiftLen))
				if not os.path.isfile(dep_data):
					continue
				with smart_open(dep_data) as f_in:
					for line in f_in:
						if line.startswith("#"):
							continue
						chrom, start, stop, deps = line.strip().split("\t")
						start = int(start)
						stop = int(stop)
						deps = int(deps)
						cnvdata[sample].regions.append([chrom, start, stop])
						cnvdata[sample].data.append(deps)
			c_stat = SaveLoad(os.path.join(self.indir, "%s.cnv.args" % chrom))
			c_stat.save(cnvdata)
Ejemplo n.º 3
0
def bedAnalysis(**kwargs):
    global pos_gc, win_gc
    bed = os.path.abspath(kwargs["bed"])
    reference = os.path.abspath(kwargs["reference"])
    db = os.path.abspath(kwargs["db"])
    outdir = os.path.abspath(kwargs["outdir"])
    winlen = int(kwargs["winlen"]) if "winlen" in kwargs else 200
    siftlen = int(kwargs["siftlen"]) if "siftlen" in kwargs else 20
    depth_f = [os.path.abspath(i) for i in kwargs["depthfile"].split(",") if os.path.isfile(i)]
    model = RegionAnalysis(reference, db)
    bed_gc_out = SaveLoad(os.path.join(outdir, "win.gc"))
    pos_gc_out = SaveLoad(os.path.join(outdir, "pos.gc"))
    chrom_stat = SaveLoad(os.path.join(outdir, "chrom.stat"))
    with smart_open(bed) as f_in:
        for line in f_in:
            rows = line.strip().split("\t")
            chrom = str(rows[0])
            if chrom not in pos_gc:
                pos_gc[chrom] = dict()
            start = int(rows[1])
            stop = int(rows[2]) + 1
            try:
                model.analysis(chrom, start, stop, winlen, siftlen)
            except ValueError:
                continue
    bed_gc_out.save(win_gc)
    pos_gc_out.save(pos_gc)
    chrom_stat.save(model.chrom_stat(depth_f))
    model.__del__()
    return bed_gc_out.fname, pos_gc_out.fname, chrom_stat.fname
Ejemplo n.º 4
0
def bedAnalysis(**kwargs):
    global pos_gc, win_gc
    bed = os.path.abspath(kwargs["bed"])
    reference = os.path.abspath(kwargs["reference"])
    db = os.path.abspath(kwargs["db"])
    outdir = os.path.abspath(kwargs["outdir"])
    winlen = int(kwargs["winlen"]) if "winlen" in kwargs else 200
    siftlen = int(kwargs["siftlen"]) if "siftlen" in kwargs else 20
    depth_f = [
        os.path.abspath(i) for i in kwargs["depthfile"].split(",")
        if os.path.isfile(i)
    ]
    model = RegionAnalysis(reference, db)
    bed_gc_out = SaveLoad(os.path.join(outdir, "win.gc"))
    pos_gc_out = SaveLoad(os.path.join(outdir, "pos.gc"))
    chrom_stat = SaveLoad(os.path.join(outdir, "chrom.stat"))
    with smart_open(bed) as f_in:
        for line in f_in:
            rows = line.strip().split("\t")
            chrom = str(rows[0])
            if chrom not in pos_gc:
                pos_gc[chrom] = dict()
            start = int(rows[1])
            stop = int(rows[2]) + 1
            try:
                model.analysis(chrom, start, stop, winlen, siftlen)
            except ValueError:
                continue
    bed_gc_out.save(win_gc)
    pos_gc_out.save(pos_gc)
    chrom_stat.save(model.chrom_stat(depth_f))
    model.__del__()
    return bed_gc_out.fname, pos_gc_out.fname, chrom_stat.fname