Exemple #1
0
for (sf, chr, locus, ref, alt), r in snvdata.iteritems():
    chrom = chrreg.label2chrom(sf, chr)
    assert (chrom)
    snvkey = (chrom, locus, ref, alt)
    if snvkey not in snvdata1:
        snvdata1[snvkey] = (chrom, locus, ref, alt, r)

for bamfile in opt.alignments:
    chrreg.add_bamlabels(bamfile)

chrreg.determine_chrom_order()

snvdata = sorted(snvdata1.values(),
                 key=lambda s: (chrreg.chrom_order(s[0]), s[1], s[2], s[3]))
# extrasnvheaders = filter(lambda h: h in usedsnvheaders, extrasnvheaders)
progress.message("SNVs: %d\n" % len(snvdata))

outheaders = snvheaders + filter(
    None, """
SNVCountForward
SNVCountReverse
RefCountForward
RefCountReverse
SNVCount
RefCount
GoodReads
%BadRead
R
HomoVarSc
HetSc
HomoRefSc
Exemple #2
0
sampsig = "".join(
    map(str,
        [1 * (len(types2files[t]) > 0)
         for t in "GDNA SDNA NRNA TRNA".split()]))
if sampsig == "1111":
    events = AllSamplesEvent
elif sampsig == "1100":
    events = DNAOnlyEvent
elif sampsig == "1010":
    events = NormalOnlyEvent
elif sampsig == "0111":
    events = NoGDNAEvent
else:
    raise RuntimeError("Bad combination of sample files")
progress.message("Testing for events: %s." % (", ".join(events.listall()), ))

events.setCounts(GDNA, SDNA, NRNA, TRNA)

cosmic_headers = []
if opt.cosmic:
    progress.stage("Parsing COSMIC annotation file")
    if opt.cosmic.endswith('.gz'):
        f = gzip.open(opt.cosmic, mode='rt', encoding='utf8')
    else:
        f = open(opt.cosmic, mode='rt', encoding='utf8')
    reader = csv.DictReader(f, delimiter='\t')
    for cos in reader:
        if cos['Mutation genome position']:
            chr, locus = cos['Mutation genome position'].split(':', 1)
            pos_st, pos_ed = locus.split('-', 1)
Exemple #3
0
            continue
        if not re.search(r'^[ACGT](,[ACGT])*$', alt):
            continue
        for h in r:
            if r.get(h):
                usedsnpheaders.add(h)
        cannonr = (",".join(map(lambda t: "%s:%s" % t, sorted(r.items()))))
        snpkey = (chr, locus, ref, alt, cannonr)
        if snpkey not in snpdata:
            snpdata[snpkey] = (chr, locus, ref, alt, r)

    progress.update()
progress.done()
snpdata = sorted(snpdata.values())
extrasnpheaders = filter(lambda h: h in usedsnpheaders, extrasnpheaders)
progress.message("SNPs: %d" % len(snpdata))

progress.stage("Read splice junction data", len(opt.junctions))
juncdata = set()
for juncfile in opt.junctions:
    junc = BEDFile(filename=juncfile)
    for r in junc:
        chr = r['chrom']
        st = int(r['chromStart'])
        ed = int(r['chromEnd'])
        bs = map(int, r['blockSizes'].split(','))
        assert(len(bs) == 2)
        gap = (st + bs[0], ed - bs[1])
        key = (chr, gap)
        juncdata.add(key)
    progress.update()
Exemple #4
0
snvdata1 = {}
for (sf, chr, locus, ref, alt), r in snvdata.iteritems():
    chrom = chrreg.label2chrom(sf,chr)
    assert(chrom)
    snvkey = (chrom,locus,ref,alt)
    if snvkey not in snvdata1:
        snvdata1[snvkey] = (chrom,locus,ref,alt,r)

for bamfile in opt.alignments:
    chrreg.add_bamlabels(bamfile)

chrreg.determine_chrom_order()

snvdata = sorted(snvdata1.values(),key=lambda s: (chrreg.chrom_order(s[0]),s[1],s[2],s[3]))
# extrasnvheaders = filter(lambda h: h in usedsnvheaders, extrasnvheaders)
progress.message("SNVs: %d\n" % len(snvdata))

outheaders = snvheaders + filter(None, """
SNVCountForward
SNVCountReverse
RefCountForward
RefCountReverse
SNVCount
RefCount
GoodReads
%BadRead
R
HomoVarSc
HetSc
HomoRefSc
VarDomSc
Exemple #5
0
cmdargs = " ".join(args)

execution_log = """
readCounts Options:
  ReadCounts Files (-c): %s
  Matrix Output (-M):    %s
  Min. Reads (-m):       %s%s
  Quiet (-q):            %s
  Outfile File (-o):     %s

Command-Line: readCountsMatrix %s
""" % (", ".join(opt.counts), None if not matrix else opt.matrix, opt.minreads,
       "" if opt.matrix not in ("Ref:Var", "Ref;Var") or opt.minreads == 0 else
       " (ignored)", opt.quiet, opt.output, cmdargs)

progress.message(execution_log)

from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable

progress.stage("Read ReadCounts input files", len(opt.counts))
headers = "CHROM POS REF ALT ReadGroup RefCount SNVCount GoodReads".split()
# NOTE: This *MUST* correspond to the columns in the readCounts .txt file output
txtheaders = "CHROM   POS     REF     ALT     ReadGroup       SNVCountForward SNVCountReverse RefCountForward RefCountReverse SNVCount   RefCount GoodReads".split(
)

allrg = set()
vafmatrix = defaultdict(dict)
for filename in opt.counts:
    base, extn = filename.rsplit('.', 1)
    extn = extn.lower()
    if extn == 'csv':
Exemple #6
0
if fatal:
    sys.exit(1)

from event import *
sampsig = "".join(map(str,map(lambda t: 1*(len(types2files[t])>0),"GDNA SDNA NRNA TRNA".split())))
if sampsig == "1111":
    events = AllSamplesEvent
elif sampsig == "1100":
    events = DNAOnlyEvent
elif sampsig == "1010":
    events = NormalOnlyEvent
elif sampsig == "0111":
    events = NoGDNAEvent
else:
    raise RuntimeError("Bad combination of sample files")
progress.message("Testing for events: %s."%(", ".join(events.listall()),))

events.setCounts(GDNA,SDNA,NRNA,TRNA)

cosmic_headers = []
if opt.cosmic:
    progress.stage("Parsing COSMIC annotation file")
    if opt.cosmic.endswith('.gz'):
        f = gzip.open(opt.cosmic, 'r')
    else:
        f = open(opt.cosmic, 'r')
    reader = csv.DictReader(f, delimiter='\t')
    for cos in reader:
        if cos['Mutation genome position']:
	     chr,locus = cos['Mutation genome position'].split(':',1)
	     pos_st,pos_ed = locus.split('-',1)
Exemple #7
0
for juncfile in juncchroms:
    chrreg.add_labels(juncfile, juncchroms[juncfile])

juncdata1 = set()
for jf, chr, gap in juncdata:
    chrom = chrreg.label2chrom(jf, chr)
    juncdata1.add((chrom, gap))

chrreg.determine_chrom_order()

snpdata = sorted(list(snvdata1.values()),
                 key=lambda s: (chrreg.chrom_order(s[0]), s[1], s[2], s[3]))

extrasnpheaders = [h for h in extrasnpheaders if h in usedsnpheaders]
progress.message("SNPs: %d" % len(snpdata))

juncdata = sorted(
    ((chr, gap[i], gap) for i in (0, 1) for chr, gap in juncdata1),
    key=lambda j: (chrreg.chrom_order(j[0]), j[1], j[2]))
progress.message("Exon/Intron junctions: %d" % len(juncdata))

outheaders = snpheaders + [
    _f for _f in """
NumofJuncs
Distance
Junctions
SNPJuncIntronCount
SNPJuncNoIntronCount
NoSNPJuncIntronCount
NoSNPJuncNoIntronCount
Exemple #8
0
  Advanced:
    Min. Reads (-m)           %s (applied only to VAF matrix)
    Max. Reads (-M):          %s
    Read Groups (-G):         %s%s
    Threads per BAM (-t):     %s
    Quiet (-q):               %s

Command-Line: scReadCounts %s
""" % (", ".join(opt.snvs), ", ".join(opt.alignments), opt.filter,
       "" if readfilter == None else "\n" + indent(readfilter.tostr(), 10),
       opt.output, opt.minreads, opt.maxreads,
       None if readgroup == None else opt.readgroup, "" if readgroup == None
       else "\n" + indent(readgroup.tostr(), 12), opt.tpb, opt.quiet, cmdargs)

progress.message(execution_log)

args = []
args.extend(["-s", " ".join(opt.snvs)])
args.extend(["-r", " ".join(opt.alignments)])
args.extend(["-f", opt.filter])
args.extend(["-o", opt.output])
args.extend(["-m", 0])
if opt.maxreads != maxreads_default:
    args.extend(["-M", opt.maxreads])
if readgroup != None:
    args.extend(["-G", opt.readgroup])
args.extend(["-t", opt.tpb])
if opt.quiet:
    args.extend(["-q"])
args = [str(x) for x in args]
Exemple #9
0
    Unique Reads (-U):        %s
    Read Groups (-G):         %s%s
    Threads per BAM (-t):     %s
    Full Headers (-F):        %s
    Quiet (-q):               %s
    Debug (-d):               %s

Command-Line: readCounts %s
""" % (", ".join(opt.snvs), ", ".join(
    opt.alignments), opt.filter, "" if readfilter == None else "\n" +
       indent(readfilter.tostr(), 10), opt.output, opt.minreads, opt.maxreads,
       opt.unique, None if readgroup == None else opt.readgroup,
       "" if readgroup == None else "\n" + indent(readgroup.tostr(), 12),
       opt.tpb, opt.full, opt.quiet, opt.debug, cmdargs)

progress.message(execution_log)

if opt.maxreads == None:
    opt.maxreads = 1e+20

from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable, BEDFile, VCFFile

progress.stage("Read SNV data", len(opt.snvs))
snvheaders = [_f for _f in """
CHROM POS REF ALT
""".split() if _f]

snvdata = {}
# extrasnvheaders = []
# usedsnvheaders = set()
snvchroms = defaultdict(set)