def __init__(self, **kwargs): self.outdir = os.path.abspath(kwargs["indir"]) self.win_len = int(kwargs["correct_win_len"]) or 30 self.shift_len = int(kwargs["correct_shift_len"]) or 25 self.contral_wins = int(100.0 / self.shift_len + 0.5) + 1 chroms = str(kwargs["chrom"]).split(",") if kwargs["chrom"] else None samples = str(kwargs["sample"]).split(",") if kwargs["sample"] else None all_samples = set() contigs = list() self.cnvdata = defaultdict(dict) self.sample_win_data = defaultdict(dict) for cnv_data in glob(os.path.join(self.outdir, "chr*.cnv.args")): chrom = ".".join(os.path.basename(cnv_data).split(".")[0:-2]) if chroms is not None and chrom not in chroms: continue cnvdata = SaveLoad(cnv_data) cnvdata = cnvdata.load() contigs.append(chrom) for sample in cnvdata.keys(): dep_f = os.path.join(self.outdir, sample, "%s.W%iS%i.fixdep.gz" % (chrom, self.win_len, self.shift_len)) if os.path.isfile(dep_f) and os.path.isfile(dep_f + '.tbi'): self.sample_win_data[chrom][sample] = dep_f if samples is not None and sample not in samples: continue all_samples.add(sample) self.cnvdata[sample][chrom] = cnvdata[sample] self.samples = sorted(all_samples) self.contigs = sorted(contigs, key=lambda x: _chrom_valued(x)) databases = os.path.abspath(kwargs["dbdir"]) t_db = os.path.abspath(kwargs["transdb"]) if "transdb" in kwargs else os.path.join(databases, "transdb", "ncbi_anno_rel104.dbref.db") for db in glob(os.path.join(databases, "*", "*.cnvdb.config")): db = os.path.abspath(db) dbname = os.path.basename(os.path.dirname(db)) _AnnotationDB[dbname].add(db) self.reference = os.path.abspath(kwargs["reference"]) if kwargs["reference"] else \ os.path.join(databases, 'aln_db/hg19/hg19_chM_male_mask.fa') self.DBAnno = CNVAnnotation(self.reference, _AnnotationDB) self.HGVS = HGVS(t_db)