Ejemplo n.º 1
0
	def __init__(self, **kwargs):
		self.outdir = os.path.abspath(kwargs["indir"])
		self.win_len = int(kwargs["correct_win_len"]) or 30
		self.shift_len = int(kwargs["correct_shift_len"]) or 25
		self.contral_wins = int(100.0 / self.shift_len + 0.5) + 1
		chroms = str(kwargs["chrom"]).split(",") if kwargs["chrom"] else None
		samples = str(kwargs["sample"]).split(",") if kwargs["sample"] else None
		all_samples = set()
		contigs = list()
		self.cnvdata = defaultdict(dict)
		self.sample_win_data = defaultdict(dict)
		for cnv_data in glob(os.path.join(self.outdir, "chr*.cnv.args")):
			chrom = ".".join(os.path.basename(cnv_data).split(".")[0:-2])
			if chroms is not None and chrom not in chroms:
				continue
			cnvdata = SaveLoad(cnv_data)
			cnvdata = cnvdata.load()
			contigs.append(chrom)
			for sample in cnvdata.keys():
				dep_f = os.path.join(self.outdir, sample, "%s.W%iS%i.fixdep.gz" % (chrom, self.win_len, self.shift_len))
				if os.path.isfile(dep_f) and os.path.isfile(dep_f + '.tbi'):
					self.sample_win_data[chrom][sample] = dep_f
				if samples is not None and sample not in samples:
					continue
				all_samples.add(sample)
				self.cnvdata[sample][chrom] = cnvdata[sample]
		self.samples = sorted(all_samples)
		self.contigs = sorted(contigs, key=lambda x: _chrom_valued(x))
		databases = os.path.abspath(kwargs["dbdir"])
		t_db = os.path.abspath(kwargs["transdb"]) if "transdb" in kwargs else os.path.join(databases, "transdb",
		                                                                                   "ncbi_anno_rel104.dbref.db")
		for db in glob(os.path.join(databases, "*", "*.cnvdb.config")):
			db = os.path.abspath(db)
			dbname = os.path.basename(os.path.dirname(db))
			_AnnotationDB[dbname].add(db)
		self.reference = os.path.abspath(kwargs["reference"]) if kwargs["reference"] else \
			os.path.join(databases, 'aln_db/hg19/hg19_chM_male_mask.fa')
		self.DBAnno = CNVAnnotation(self.reference, _AnnotationDB)
		self.HGVS = HGVS(t_db)