예제 #1
0
def ReadTSV(filename):
    snvheaders = [_f for _f in """CHROM POS REF ALT""".split() if _f]
    base, extn = filename.rsplit('.', 1)
    extn = extn.lower()
    if extn == 'csv':
        snvs = CSVFileTable(filename=filename)
    elif extn == 'tsv':
        snvs = TSVFileTable(filename=filename)
    elif extn == 'xls':
        snvs = XLSFileTable(filename=filename)
    elif extn == 'xlsx':
        snvs = XLSXFileTable(filename=filename)
    elif extn == 'txt':
        snvs = TXTFileTable(filename=filename, headers=snvheaders)
    else:
        raise RuntimeError("Unexpected SNV file extension: %s" % filename)

    for h in snvheaders:
        if h not in snvs.headers():
            raise RuntimeError("Required header: %s missing from SNV file %s" %
                               (h, filename))

    assert (snvs.headers()[:4] == snvheaders)

    chrom = set()
    snvdata = []
    for r in snvs:
        ri = list(map(r.get, snvs.headers()))
        chrom.add(ri[0])
        snvdata.append(ri)

    return ["\t".join(snvs.headers())], chrom, snvdata
예제 #2
0
        snvs = CSVFileTable(filename=filename)
    elif extn == 'vcf':
        snvs = VCFFile(filename=filename)
    elif extn == 'tsv':
        snvs = TSVFileTable(filename=filename)
    elif extn == 'xls':
        snvs = XLSFileTable(filename=filename)
    elif extn == 'xlsx':
        snvs = XLSXFileTable(filename=filename)
    elif extn == 'txt':
        snvs = TXTFileTable(filename=filename, headers=snvheaders)
    else:
        raise RuntimeError("Unexpected SNV file extension: %s" % filename)

    for h in snvheaders:
        if h not in snvs.headers():
            raise RuntimeError("Required header: %s missing from SNV file %s" %
                               (h, filename))

    for h in snvs.headers():
        if h in snvheaders:
            continue
        # if h not in extrasnvheaders:
        #     extrasnvheaders.append(h)
    for r in snvs:
        chr = r[snvheaders[0]].strip()
        snvchroms[filename].add(chr)
        locus = int(r[snvheaders[1]].strip())
        ref = r[snvheaders[2]].strip()
        alt = r[snvheaders[3]].strip()
        if r.get('INFO:INDEL'):
예제 #3
0
        snvs = CSVFileTable(filename=filename)
    elif extn == 'vcf':
        snvs = VCFFile(filename=filename)
    elif extn == 'tsv':
        snvs = TSVFileTable(filename=filename)
    elif extn == 'xls':
        snvs = XLSFileTable(filename=filename)
    elif extn == 'xlsx':
        snvs = XLSXFileTable(filename=filename)
    elif extn == 'txt':
        snvs = TXTFileTable(filename=filename, headers=snvheaders)
    else:
        raise RuntimeError("Unexpected SNV file extension: %s" % filename)

    for h in snvheaders:
        if h not in snvs.headers():
            raise RuntimeError(
                "Required header: %s missing from SNV file %s" % (h, filename))

    for h in snvs.headers():
        if h in snvheaders:
            continue
        # if h not in extrasnvheaders:
        #     extrasnvheaders.append(h)

    for r in snvs:
        chr = r[snvheaders[0]].strip()
	snvchroms[filename].add(chr)
        locus = int(r[snvheaders[1]].strip())
        ref = r[snvheaders[2]].strip()
        alt = r[snvheaders[3]].strip()
예제 #4
0
    elif extn == 'vcf':
        counts = VCFFile(filename=filename)
    elif extn == 'tsv':
        counts = TSVFileTable(filename=filename)
    elif extn == 'xls':
        counts = XLSFileTable(filename=filename)
    elif extn == 'xlsx':
        counts = XLSXFileTable(filename=filename)
    elif extn == 'txt':
        counts = TXTFileTable(filename=filename, headers=txtheaders)
    else:
        raise RuntimeError("Unexpected ReadCounts file extension: %s" %
                           filename)

    for h in headers:
        if h not in counts.headers():
            raise RuntimeError(
                "Required header: %s missing from ReadCounts file %s" %
                (h, filename))

    for r in counts:
        try:
            chr = str(int(float(r[headers[0]])))
        except ValueError:
            chr = r[headers[0]].strip()
        locus = int(float(r[headers[1]]))
        ref = r[headers[2]].strip()
        alt = r[headers[3]].strip()
        snvkey = (filename, chr, locus, ref, alt)
        rg = r[headers[4]].strip()
        nref = int(float(r[headers[5]]))
예제 #5
0
    base, extn = filename.rsplit('.', 1)
    path, base = os.path.split(base)
    extn = extn.lower()
    if extn == 'csv':
        counts = CSVFileTable(filename=filename)
    elif extn == 'tsv':
        counts = TSVFileTable(filename=filename)
    elif extn == 'xls':
        counts = XLSFileTable(filename=filename)
    elif extn == 'xlsx':
        counts = XLSXFileTable(filename=filename)
    else:
        raise RuntimeError("Unexpected count file extension: %s" % filename)

    if countheaders == None:
        countheaders = counts.headers()
    else:
        assert countheaders == counts.headers()
    assert 'CHROM' in countheaders
    assert 'POS' in countheaders
    assert 'REF' in countheaders
    assert 'ALT' in countheaders
    assert 'Junctions' in countheaders

    for r in counts:
        for k in list(r.keys()):
            if r.get(k) in ("", None):
                del r[k]
        chr = r['CHROM']
        pos = r['POS']
        ref = r['REF']
예제 #6
0
    base, extn = filename.rsplit('.', 1)
    path, base = os.path.split(base)
    extn = extn.lower()
    if extn == 'csv':
        counts = CSVFileTable(filename=filename)
    elif extn == 'tsv':
        counts = TSVFileTable(filename=filename)
    elif extn == 'xls':
        counts = XLSFileTable(filename=filename)
    elif extn == 'xlsx':
        counts = XLSXFileTable(filename=filename)
    else:
        raise RuntimeError("Unexpected count file extension: %s" % filename)

    if countheaders == None:
        countheaders = counts.headers()
    else:
        assert countheaders == counts.headers()
    assert 'CHROM' in countheaders
    assert 'POS' in countheaders
    assert 'REF' in countheaders
    assert 'ALT' in countheaders
    assert 'Junctions' in countheaders

    for r in counts:
        for k in r.keys():
            if r.get(k) in ("", None):
                del r[k]
        chr = r['CHROM']
        pos = r['POS']
        ref = r['REF']