Beispiel #1
0
progress = ProgressText()

base = os.path.split(os.path.abspath(opt.counts))[0]

TRNA = {}
NRNA = {}
GDNA = {}
SDNA = {}

from chromreg import ChromLabelRegistry

chrreg = ChromLabelRegistry()
labels = list(map(str, list(range(1, 100)))) + ["X", "Y", "MT"]
chrreg.add_labels(opt.counts, labels)
chrreg.default_chrom_order()
chrorder = lambda l: chrreg.chrom_order(chrreg.label2chrom(opt.counts, l))

progress.stage("Parsing read-counts")
f = open(opt.counts, mode='rt', encoding='utf8')
reader = csv.DictReader(f, delimiter='\t')
types2files = defaultdict(set)
files2types = defaultdict(set)
for row in reader:
    key = (row['CHROM'], row['POS'])
    filename = row['AlignedReads']
    for k in row:
        if k.endswith('Count') and row[k] != "":
            row[k] = int(row[k])
        if k.endswith('Sc') and row[k] != "":
            row[k] = float(row[k])
    if re.search(regex["GDNA"], filename) and key not in GDNA:
Beispiel #2
0
    #     if r.get(h):
    #         usedsnvheaders.add(h)
        snvkey = (filename, chr, locus, ref, alt)
        if snvkey not in snvdata:
            snvdata[snvkey] = r
    progress.update()
progress.done()

chrreg = ChromLabelRegistry()

for snvfile in snvchroms:
    chrreg.add_labels(snvfile, snvchroms[snvfile])

snvdata1 = {}
for (sf, chr, locus, ref, alt), r in snvdata.iteritems():
    chrom = chrreg.label2chrom(sf, chr)
    assert (chrom)
    snvkey = (chrom, locus, ref, alt)
    if snvkey not in snvdata1:
        snvdata1[snvkey] = (chrom, locus, ref, alt, r)

for bamfile in opt.alignments:
    chrreg.add_bamlabels(bamfile)

chrreg.determine_chrom_order()

snvdata = sorted(snvdata1.values(),
                 key=lambda s: (chrreg.chrom_order(s[0]), s[1], s[2], s[3]))
# extrasnvheaders = filter(lambda h: h in usedsnvheaders, extrasnvheaders)
progress.message("SNVs: %d\n" % len(snvdata))
Beispiel #3
0
print("Reading", filename, "...", end=' ')
sys.stdout.flush()

if filename.rsplit('.', 1)[-1].lower() == 'vcf':
    fileheader, chrlab, snvdata = ReadVCF(filename)
else:
    fileheader, chrlab, snvdata = ReadTSV(filename)

from chromreg import ChromLabelRegistry

chrreg = ChromLabelRegistry()
chrreg.add_labels(filename, chrlab)

for i in range(len(snvdata) - 1, -1, -1):
    chrlab = snvdata[i][0]
    chrom = chrreg.label2chrom(filename, chrlab)
    if not chrreg.isnumberedchrom(chrom) and \
       not chrreg.issexchrom(chrom):
        del snvdata[i]
        continue
    snvdata[i][0] = chrom
    snvdata[i][1] = int(snvdata[i][1])

# Expected chromosome labels from UCSC file...
# Extra numeric chromosomes might make this more robust for organisms
# with more chromosomes? Doesn't hurt normal (human) case either way.
# Should we add mitochondria here too? What notation does UCSC use?
exonlabels = list(map(str, list(range(1, 100)))) + ["X", "Y", "MT"]
chrreg.add_labels(exoncoords, exonlabels)

chrreg.default_chrom_order()
Beispiel #4
0
        #         usedsnvheaders.add(h)
        snvkey = (filename, chr, locus, ref, alt)
        if snvkey not in snvdata:
            snvdata[snvkey] = r

    progress.update()
progress.done()

chrreg = ChromLabelRegistry()

for snvfile in snvchroms:
    chrreg.add_labels(snvfile,snvchroms[snvfile])

snvdata1 = {}
for (sf, chr, locus, ref, alt), r in snvdata.iteritems():
    chrom = chrreg.label2chrom(sf,chr)
    assert(chrom)
    snvkey = (chrom,locus,ref,alt)
    if snvkey not in snvdata1:
        snvdata1[snvkey] = (chrom,locus,ref,alt,r)

for bamfile in opt.alignments:
    chrreg.add_bamlabels(bamfile)

chrreg.determine_chrom_order()

snvdata = sorted(snvdata1.values(),key=lambda s: (chrreg.chrom_order(s[0]),s[1],s[2],s[3]))
# extrasnvheaders = filter(lambda h: h in usedsnvheaders, extrasnvheaders)
progress.message("SNVs: %d\n" % len(snvdata))

outheaders = snvheaders + filter(None, """
Beispiel #5
0
regex["NRNA"] = opt.normaltransre
regex["SDNA"] = opt.tumordnare
regex["TRNA"] = opt.tumortransre

progress = ProgressText()

base = os.path.split(os.path.abspath(opt.counts))[0]

TRNA = {}; NRNA = {}; GDNA = {}; SDNA = {}

from chromreg import ChromLabelRegistry
chrreg = ChromLabelRegistry()
labels = map(str,range(1,100)) + ["X","Y","MT"]
chrreg.add_labels(opt.counts,labels)
chrreg.default_chrom_order()
chrorder = lambda l: chrreg.chrom_order(chrreg.label2chrom(opt.counts,l))

progress.stage("Parsing read-counts")
f = open(opt.counts, 'r')
reader = csv.DictReader(f, delimiter='\t')
types2files = defaultdict(set)
files2types = defaultdict(set)
for row in reader:
    key = (row['CHROM'],row['POS'])
    filename = row['AlignedReads']
    for k in row:
        if k.endswith('Count') and row[k] != "":
            row[k] = int(row[k])
        if k.endswith('Sc') and row[k] != "":
            row[k] = float(row[k])
    if re.search(regex["GDNA"],filename) and key not in GDNA: