sys.exit(0) else: opt, args = parser.parse_args() break progress = None if not opt.output: opt.quiet = True if opt.maxreads == None: opt.maxreads = 1e+20 progress = ProgressText(quiet=opt.quiet) from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable, BEDFile, VCFFile progress.stage("Read SNV data", len(opt.snvs)) snvheaders = filter(None, """ CHROM POS REF ALT """.split()) snvdata = {} # extrasnvheaders = [] # usedsnvheaders = set() snvchroms = defaultdict(set) for filename in opt.snvs: base, extn = filename.rsplit('.', 1) extn = extn.lower() if extn == 'csv': snvs = CSVFileTable(filename=filename) elif extn == 'vcf':
base = os.path.split(os.path.abspath(opt.counts))[0] TRNA = {} NRNA = {} GDNA = {} SDNA = {} from chromreg import ChromLabelRegistry chrreg = ChromLabelRegistry() labels = list(map(str, list(range(1, 100)))) + ["X", "Y", "MT"] chrreg.add_labels(opt.counts, labels) chrreg.default_chrom_order() chrorder = lambda l: chrreg.chrom_order(chrreg.label2chrom(opt.counts, l)) progress.stage("Parsing read-counts") f = open(opt.counts, mode='rt', encoding='utf8') reader = csv.DictReader(f, delimiter='\t') types2files = defaultdict(set) files2types = defaultdict(set) for row in reader: key = (row['CHROM'], row['POS']) filename = row['AlignedReads'] for k in row: if k.endswith('Count') and row[k] != "": row[k] = int(row[k]) if k.endswith('Sc') and row[k] != "": row[k] = float(row[k]) if re.search(regex["GDNA"], filename) and key not in GDNA: GDNA[key] = row types2files["GDNA"].add(filename)
except UserCancelledError: sys.exit(0) else: opt, args = parser.parse_args() break progress = None if not opt.output: opt.quiet = True progress = ProgressText(quiet=opt.quiet) from pysamimport import pysam from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable, BEDFile, VCFFile progress.stage("Read SNV data", len(opt.snvs)) snvheaders = [_f for _f in """ CHROM POS REF ALT """.split() if _f] snvdata = {} extrasnvheaders = [] usedsnvheaders = set() for filename in opt.snvs: base, extn = filename.rsplit('.', 1) extn = extn.lower() if extn == 'csv': snvs = CSVFileTable(filename=filename) elif extn == 'vcf': snvs = VCFFile(filename=filename)
ReadCounts Files (-c): %s Matrix Output (-M): %s Min. Reads (-m): %s%s Quiet (-q): %s Outfile File (-o): %s Command-Line: readCountsMatrix %s """ % (", ".join(opt.counts), None if not matrix else opt.matrix, opt.minreads, "" if opt.matrix not in ("Ref:Var", "Ref;Var") or opt.minreads == 0 else " (ignored)", opt.quiet, opt.output, cmdargs) progress.message(execution_log) from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable progress.stage("Read ReadCounts input files", len(opt.counts)) headers = "CHROM POS REF ALT ReadGroup RefCount SNVCount GoodReads".split() # NOTE: This *MUST* correspond to the columns in the readCounts .txt file output txtheaders = "CHROM POS REF ALT ReadGroup SNVCountForward SNVCountReverse RefCountForward RefCountReverse SNVCount RefCount GoodReads".split( ) allrg = set() vafmatrix = defaultdict(dict) for filename in opt.counts: base, extn = filename.rsplit('.', 1) extn = extn.lower() if extn == 'csv': counts = CSVFileTable(filename=filename) elif extn == 'vcf': counts = VCFFile(filename=filename) elif extn == 'tsv':
except UserCancelledError: sys.exit(0) else: opt, args = parser.parse_args() break progress = None if not opt.output: opt.quiet = True progress = ProgressText(quiet=opt.quiet) import pysam from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable, BEDFile, VCFFile progress.stage("Read SNP data", len(opt.snps)) snpheaders = filter(None, """ CHROM POS REF ALT """.split()) snpdata = {} extrasnpheaders = [] usedsnpheaders = set() for filename in opt.snps: base, extn = filename.rsplit('.', 1) extn = extn.lower() if extn == 'csv': snps = CSVFileTable(filename=filename) elif extn == 'vcf': snps = VCFFile(filename=filename)
regex["TRNA"] = opt.tumortransre progress = ProgressText() base = os.path.split(os.path.abspath(opt.counts))[0] TRNA = {}; NRNA = {}; GDNA = {}; SDNA = {} from chromreg import ChromLabelRegistry chrreg = ChromLabelRegistry() labels = map(str,range(1,100)) + ["X","Y","MT"] chrreg.add_labels(opt.counts,labels) chrreg.default_chrom_order() chrorder = lambda l: chrreg.chrom_order(chrreg.label2chrom(opt.counts,l)) progress.stage("Parsing read-counts") f = open(opt.counts, 'r') reader = csv.DictReader(f, delimiter='\t') types2files = defaultdict(set) files2types = defaultdict(set) for row in reader: key = (row['CHROM'],row['POS']) filename = row['AlignedReads'] for k in row: if k.endswith('Count') and row[k] != "": row[k] = int(row[k]) if k.endswith('Sc') and row[k] != "": row[k] = float(row[k]) if re.search(regex["GDNA"],filename) and key not in GDNA: GDNA[key] = row; types2files["GDNA"].add(filename); files2types[filename].add("GDNA") if re.search(regex["NRNA"],filename) and key not in NRNA:
else: opt, args = parser.parse_args() break opts.mates = False progress = None if not opt.output: opt.quiet = True progress = ProgressText(quiet=opt.quiet) import pysam from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable, BEDFile, VCFFile progress.stage("Read SNP data", len(opt.snps)) snpheaders = [_f for _f in """ CHROM POS REF ALT """.split() if _f] snvdata = {} snvchroms = defaultdict(set) extrasnpheaders = [] usedsnpheaders = set() for filename in opt.snps: filename0 = filename base, extn = filename.rsplit('.', 1) extn = extn.lower() tempfilename = None if opt.exoncoords: if extn != 'vcf':
except UserCancelledError: sys.exit(0) else: opt, args = parser.parse_args() break progress = None if not opt.output: opt.quiet = True progress = ProgressText(quiet=opt.quiet) from pysamimport import pysam from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable, BEDFile, VCFFile progress.stage("Read SNV data", len(opt.snvs)) snvheaders = filter( None, """ CHROM POS REF ALT """.split(), ) snvdata = {} extrasnvheaders = [] usedsnvheaders = set() for filename in opt.snvs: base, extn = filename.rsplit(".", 1) extn = extn.lower() if extn == "csv":
break progress = None if not opt.output: opt.quiet = True progress = ProgressText(quiet=opt.quiet) sumkeys = [ _f for _f in map( str.strip, """ SNPJuncIntronCount SNPJuncNoIntronCount NoSNPJuncIntronCount NoSNPJuncNoIntronCount SNPMateCount NoSNPMateCount SNPCount NoSNPCount MatesCount NotMatesCount IntronCount NoIntronCount SpanningReads RemovedDuplicateReads SNPLociReads""" .split()) if _f ] countdata = defaultdict(dict) progress.stage("Read SNP/Junction counts") from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable countheaders = None for filename in opt.counts: base, extn = filename.rsplit('.', 1) path, base = os.path.split(base) extn = extn.lower() if extn == 'csv': counts = CSVFileTable(filename=filename) elif extn == 'tsv': counts = TSVFileTable(filename=filename) elif extn == 'xls': counts = XLSFileTable(filename=filename) elif extn == 'xlsx': counts = XLSXFileTable(filename=filename) else:
except UserCancelledError: sys.exit(0) else: opt, args = parser.parse_args() break progress = None if not opt.output: opt.quiet = True progress = ProgressText(quiet=opt.quiet) sumkeys = filter(None, map(str.strip, """ SNPJuncIntronCount SNPJuncNoIntronCount NoSNPJuncIntronCount NoSNPJuncNoIntronCount SNPMateCount NoSNPMateCount SNPCount NoSNPCount MatesCount NotMatesCount IntronCount NoIntronCount SpanningReads RemovedDuplicateReads SNPLociReads""".split())) countdata = defaultdict(dict) progress.stage("Read SNP/Junction counts") from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable countheaders = None for filename in opt.counts: base, extn = filename.rsplit('.', 1) path, base = os.path.split(base) extn = extn.lower() if extn == 'csv': counts = CSVFileTable(filename=filename) elif extn == 'tsv': counts = TSVFileTable(filename=filename) elif extn == 'xls': counts = XLSFileTable(filename=filename) elif extn == 'xlsx': counts = XLSXFileTable(filename=filename) else: