regex["NRNA"] = opt.normaltransre regex["SDNA"] = opt.tumordnare regex["TRNA"] = opt.tumortransre progress = ProgressText() base = os.path.split(os.path.abspath(opt.counts))[0] TRNA = {} NRNA = {} GDNA = {} SDNA = {} from chromreg import ChromLabelRegistry chrreg = ChromLabelRegistry() labels = list(map(str, list(range(1, 100)))) + ["X", "Y", "MT"] chrreg.add_labels(opt.counts, labels) chrreg.default_chrom_order() chrorder = lambda l: chrreg.chrom_order(chrreg.label2chrom(opt.counts, l)) progress.stage("Parsing read-counts") f = open(opt.counts, mode='rt', encoding='utf8') reader = csv.DictReader(f, delimiter='\t') types2files = defaultdict(set) files2types = defaultdict(set) for row in reader: key = (row['CHROM'], row['POS']) filename = row['AlignedReads'] for k in row: if k.endswith('Count') and row[k] != "":
if r.get('INFO:INDEL'): continue if len(ref) != 1: continue if not re.search(r'^[ACGT](,[ACGT])*$', alt): continue # for h in r: # if r.get(h): # usedsnvheaders.add(h) snvkey = (filename, chr, locus, ref, alt) if snvkey not in snvdata: snvdata[snvkey] = r progress.update() progress.done() chrreg = ChromLabelRegistry() for snvfile in snvchroms: chrreg.add_labels(snvfile, snvchroms[snvfile]) snvdata1 = {} for (sf, chr, locus, ref, alt), r in snvdata.iteritems(): chrom = chrreg.label2chrom(sf, chr) assert (chrom) snvkey = (chrom, locus, ref, alt) if snvkey not in snvdata1: snvdata1[snvkey] = (chrom, locus, ref, alt, r) for bamfile in opt.alignments: chrreg.add_bamlabels(bamfile)
if outfile.endswith('.vcf'): assert (filename.endswith('.vcf')) if not filename.endswith('.vcf'): assert (outfile.endswith('.tsv')) print("Reading", filename, "...", end=' ') sys.stdout.flush() if filename.rsplit('.', 1)[-1].lower() == 'vcf': fileheader, chrlab, snvdata = ReadVCF(filename) else: fileheader, chrlab, snvdata = ReadTSV(filename) from chromreg import ChromLabelRegistry chrreg = ChromLabelRegistry() chrreg.add_labels(filename, chrlab) for i in range(len(snvdata) - 1, -1, -1): chrlab = snvdata[i][0] chrom = chrreg.label2chrom(filename, chrlab) if not chrreg.isnumberedchrom(chrom) and \ not chrreg.issexchrom(chrom): del snvdata[i] continue snvdata[i][0] = chrom snvdata[i][1] = int(snvdata[i][1]) # Expected chromosome labels from UCSC file... # Extra numeric chromosomes might make this more robust for organisms # with more chromosomes? Doesn't hurt normal (human) case either way.
continue if len(ref) != 1: continue if not re.search(r'^[ACGT](,[ACGT])*$', alt): continue # for h in r: # if r.get(h): # usedsnvheaders.add(h) snvkey = (filename, chr, locus, ref, alt) if snvkey not in snvdata: snvdata[snvkey] = r progress.update() progress.done() chrreg = ChromLabelRegistry() for snvfile in snvchroms: chrreg.add_labels(snvfile,snvchroms[snvfile]) snvdata1 = {} for (sf, chr, locus, ref, alt), r in snvdata.iteritems(): chrom = chrreg.label2chrom(sf,chr) assert(chrom) snvkey = (chrom,locus,ref,alt) if snvkey not in snvdata1: snvdata1[snvkey] = (chrom,locus,ref,alt,r) for bamfile in opt.alignments: chrreg.add_bamlabels(bamfile)
opt, args = parser.parse_args() regex = {} regex["GDNA"] = opt.normaldnare regex["NRNA"] = opt.normaltransre regex["SDNA"] = opt.tumordnare regex["TRNA"] = opt.tumortransre progress = ProgressText() base = os.path.split(os.path.abspath(opt.counts))[0] TRNA = {}; NRNA = {}; GDNA = {}; SDNA = {} from chromreg import ChromLabelRegistry chrreg = ChromLabelRegistry() labels = map(str,range(1,100)) + ["X","Y","MT"] chrreg.add_labels(opt.counts,labels) chrreg.default_chrom_order() chrorder = lambda l: chrreg.chrom_order(chrreg.label2chrom(opt.counts,l)) progress.stage("Parsing read-counts") f = open(opt.counts, 'r') reader = csv.DictReader(f, delimiter='\t') types2files = defaultdict(set) files2types = defaultdict(set) for row in reader: key = (row['CHROM'],row['POS']) filename = row['AlignedReads'] for k in row: if k.endswith('Count') and row[k] != "":