def ReadTSV(filename): snvheaders = [_f for _f in """CHROM POS REF ALT""".split() if _f] base, extn = filename.rsplit('.', 1) extn = extn.lower() if extn == 'csv': snvs = CSVFileTable(filename=filename) elif extn == 'tsv': snvs = TSVFileTable(filename=filename) elif extn == 'xls': snvs = XLSFileTable(filename=filename) elif extn == 'xlsx': snvs = XLSXFileTable(filename=filename) elif extn == 'txt': snvs = TXTFileTable(filename=filename, headers=snvheaders) else: raise RuntimeError("Unexpected SNV file extension: %s" % filename) for h in snvheaders: if h not in snvs.headers(): raise RuntimeError("Required header: %s missing from SNV file %s" % (h, filename)) assert (snvs.headers()[:4] == snvheaders) chrom = set() snvdata = [] for r in snvs: ri = list(map(r.get, snvs.headers())) chrom.add(ri[0]) snvdata.append(ri) return ["\t".join(snvs.headers())], chrom, snvdata
snvs = CSVFileTable(filename=filename) elif extn == 'vcf': snvs = VCFFile(filename=filename) elif extn == 'tsv': snvs = TSVFileTable(filename=filename) elif extn == 'xls': snvs = XLSFileTable(filename=filename) elif extn == 'xlsx': snvs = XLSXFileTable(filename=filename) elif extn == 'txt': snvs = TXTFileTable(filename=filename, headers=snvheaders) else: raise RuntimeError("Unexpected SNV file extension: %s" % filename) for h in snvheaders: if h not in snvs.headers(): raise RuntimeError("Required header: %s missing from SNV file %s" % (h, filename)) for h in snvs.headers(): if h in snvheaders: continue # if h not in extrasnvheaders: # extrasnvheaders.append(h) for r in snvs: chr = r[snvheaders[0]].strip() snvchroms[filename].add(chr) locus = int(r[snvheaders[1]].strip()) ref = r[snvheaders[2]].strip() alt = r[snvheaders[3]].strip() if r.get('INFO:INDEL'):
snvs = CSVFileTable(filename=filename) elif extn == 'vcf': snvs = VCFFile(filename=filename) elif extn == 'tsv': snvs = TSVFileTable(filename=filename) elif extn == 'xls': snvs = XLSFileTable(filename=filename) elif extn == 'xlsx': snvs = XLSXFileTable(filename=filename) elif extn == 'txt': snvs = TXTFileTable(filename=filename, headers=snvheaders) else: raise RuntimeError("Unexpected SNV file extension: %s" % filename) for h in snvheaders: if h not in snvs.headers(): raise RuntimeError( "Required header: %s missing from SNV file %s" % (h, filename)) for h in snvs.headers(): if h in snvheaders: continue # if h not in extrasnvheaders: # extrasnvheaders.append(h) for r in snvs: chr = r[snvheaders[0]].strip() snvchroms[filename].add(chr) locus = int(r[snvheaders[1]].strip()) ref = r[snvheaders[2]].strip() alt = r[snvheaders[3]].strip()
elif extn == 'vcf': counts = VCFFile(filename=filename) elif extn == 'tsv': counts = TSVFileTable(filename=filename) elif extn == 'xls': counts = XLSFileTable(filename=filename) elif extn == 'xlsx': counts = XLSXFileTable(filename=filename) elif extn == 'txt': counts = TXTFileTable(filename=filename, headers=txtheaders) else: raise RuntimeError("Unexpected ReadCounts file extension: %s" % filename) for h in headers: if h not in counts.headers(): raise RuntimeError( "Required header: %s missing from ReadCounts file %s" % (h, filename)) for r in counts: try: chr = str(int(float(r[headers[0]]))) except ValueError: chr = r[headers[0]].strip() locus = int(float(r[headers[1]])) ref = r[headers[2]].strip() alt = r[headers[3]].strip() snvkey = (filename, chr, locus, ref, alt) rg = r[headers[4]].strip() nref = int(float(r[headers[5]]))
base, extn = filename.rsplit('.', 1) path, base = os.path.split(base) extn = extn.lower() if extn == 'csv': counts = CSVFileTable(filename=filename) elif extn == 'tsv': counts = TSVFileTable(filename=filename) elif extn == 'xls': counts = XLSFileTable(filename=filename) elif extn == 'xlsx': counts = XLSXFileTable(filename=filename) else: raise RuntimeError("Unexpected count file extension: %s" % filename) if countheaders == None: countheaders = counts.headers() else: assert countheaders == counts.headers() assert 'CHROM' in countheaders assert 'POS' in countheaders assert 'REF' in countheaders assert 'ALT' in countheaders assert 'Junctions' in countheaders for r in counts: for k in list(r.keys()): if r.get(k) in ("", None): del r[k] chr = r['CHROM'] pos = r['POS'] ref = r['REF']
base, extn = filename.rsplit('.', 1) path, base = os.path.split(base) extn = extn.lower() if extn == 'csv': counts = CSVFileTable(filename=filename) elif extn == 'tsv': counts = TSVFileTable(filename=filename) elif extn == 'xls': counts = XLSFileTable(filename=filename) elif extn == 'xlsx': counts = XLSXFileTable(filename=filename) else: raise RuntimeError("Unexpected count file extension: %s" % filename) if countheaders == None: countheaders = counts.headers() else: assert countheaders == counts.headers() assert 'CHROM' in countheaders assert 'POS' in countheaders assert 'REF' in countheaders assert 'ALT' in countheaders assert 'Junctions' in countheaders for r in counts: for k in r.keys(): if r.get(k) in ("", None): del r[k] chr = r['CHROM'] pos = r['POS'] ref = r['REF']