def readFile(inFile, afCutoff, aoCutoff): target = open(inFile, 'r') print 'Chrom-Loc\tAO\t\tDP\t\tAF' for line in target: if '#' not in line and 'chr' in line: # skip vcf info lineObj = seqRead(line) if (lineObj.af() >= afCutoff) and (lineObj.ao() >= aoCutoff): print '%s-%s\t%f\t%f\t%f' % (lineObj.chrom(), lineObj.loc(), lineObj.ao(), lineObj.dp(), lineObj.af())
def elimBadAligns(inFile, outFile): from parseLine import seqRead inTarget = open(inFile, 'r') outTarget = open(outFile, 'w') targetLocs = {'chr1':['1152278','1152279','1152564','1152565','1152566','1152587','1152588'],'chr2':['254572','254573','2091130','2091131','2091132','1982668','1982669','2231906','2231907','2231908','2290411','2290412'],'chr4':['1061972','1061973','1061974','1061551','1061552','1105411','1105412','1105413','1129972','1129973','1211677','1211678','1235477','1235478','1235479','1244286','1244287'],'chr9':['50737','50738'],'chr11':['21262','21263','21264','23899','2390','2593','2594','114865','114866','114867','5342','5343'],'chr12':['253982','253983','253984','253802','253803'],'chr15':['925270','925271','906318','906319'],'chr16':['733796','733797','733798','824550','824551','859491','859492'],'chr17':['75775','75776','75783','75784','75785','75770','75771','75772'],'chrX':['486496','486497','486498']} for line in inTarget: if '#' not in line and 'chr' in line: # skip the damn info lineObj = seqRead(line) if lineObj.chrom() in targetLocs: # is this chrom probed? goodLoc = False for i in targetLocs[lineObj.chrom()]: if i in lineObj.loc(): # is location probed? goodLoc = True if goodLoc == True: outTarget.write(line) elif '#' in line: outTarget.write(line) inTarget.close() outTarget.close()
def mutationsPerProbe(inFile, outputDir): from parseLine import seqRead target = open(inFile, 'r') # Number of unique variants found within a particular capture region uniqVars = { 'TIIIa': 0, 'NRAS-1': 0, 'NRAS-2': 0, 'DNMT3a': 0, 'IDH1': 0, 'SF3B1': 0, 'TIIIb': 0, 'TIIIc': 0, 'TET2-1': 0, 'TET2-2': 0, 'TIIId': 0, 'TIIIe': 0, 'TIIIf': 0, 'TIIIg': 0, 'TIIIh': 0, 'JAK2': 0, 'TIIIj': 0, 'TIIIk': 0, 'TIIIl': 0, 'TIIIm': 0, 'HRAS': 0, 'KRAS-1': 0, 'KRAS-2': 0, 'TIIIn': 0, 'IDH2': 0, 'TIIIo': 0, 'TIIIp': 0, 'TIIIq': 0, 'p53-1': 0, 'p53-2': 0, 'p53-3': 0, 'GATA1': 0 } # Number of total variants found within a particular capture region totalVars = { 'TIIIa': 0, 'NRAS-1': 0, 'NRAS-2': 0, 'DNMT3a': 0, 'IDH1': 0, 'SF3B1': 0, 'TIIIb': 0, 'TIIIc': 0, 'TET2-1': 0, 'TET2-2': 0, 'TIIId': 0, 'TIIIe': 0, 'TIIIf': 0, 'TIIIg': 0, 'TIIIh': 0, 'JAK2': 0, 'TIIIj': 0, 'TIIIk': 0, 'TIIIl': 0, 'TIIIm': 0, 'HRAS': 0, 'KRAS-1': 0, 'KRAS-2': 0, 'TIIIn': 0, 'IDH2': 0, 'TIIIo': 0, 'TIIIp': 0, 'TIIIq': 0, 'p53-1': 0, 'p53-2': 0, 'p53-3': 0, 'GATA1': 0 } # Number of total probes capturing a particular region totalCoverage = { 'TIIIa': [], 'NRAS-1': [], 'NRAS-2': [], 'DNMT3a': [], 'IDH1': [], 'SF3B1': [], 'TIIIb': [], 'TIIIc': [], 'TET2-1': [], 'TET2-2': [], 'TIIId': [], 'TIIIe': [], 'TIIIf': [], 'TIIIg': [], 'TIIIh': [], 'JAK2': [], 'TIIIj': [], 'TIIIk': [], 'TIIIl': [], 'TIIIm': [], 'HRAS': [], 'KRAS-1': [], 'KRAS-2': [], 'TIIIn': [], 'IDH2': [], 'TIIIo': [], 'TIIIp': [], 'TIIIq': [], 'p53-1': [], 'p53-2': [], 'p53-3': [], 'GATA1': [] } for line in target: if '#' not in line and 'chr' in line: lineObj = seqRead(line) probeNum = identifyProbe(lineObj.loc()) if probeNum: uniqVars[probeNum] += 1 if lineObj.af() < 0.4: # Don't want germline bias totalVars[probeNum] += lineObj.ao() totalCoverage[probeNum].append(lineObj.dp()) print totalCoverage from matplotlib.backends.backend_pdf import PdfPages pdf = PdfPages(outputDir + '/probeBias.pdf') totalCoverage = getMeanCoverage(totalCoverage) pdf = plotTally(outputDir, totalCoverage, pdf, 'Probe Bias', 'Avg Num of Captures') uniqVars = normalizeCounts(uniqVars, totalCoverage) #displayTally(uniqVars) pdf = plotTally(outputDir, uniqVars, pdf, 'Normalized Unique Variants Per Probe', 'Number of Variants') totalVars = normalizeCounts(totalVars, totalCoverage) #displayTally(totalVars) pdf = plotTally(outputDir, totalVars, pdf, 'Normalized Total Variants Per Probe', 'Number of Variants') pdf.close()
#!/usr/bin/python from parseLine import seqRead line = 'chr18\t22343095\t.\tG\tA\t13777\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=636;CIGAR=1X;DP=637;DPB=637;DPRA=0;EPP=1384.07;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=875.012;PAIRED=0;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=21725;QR=36;RO=1;RPL=636;RPP=1384.07;RPPR=5.18177;RPR=0;RUN=1;SAF=636;SAP=1384.07;SAR=0;SRF=1;SRP=5.18177;SRR=0;TYPE=snp\tGT:DP:DPR:RO:QR:AO:QA:GL\t1/1:637:637,636:1:36:636:21725:-1951.16,-188.158,0\n' x = seqRead(line) print x.chrom() print x.loc() print x.wt() print x.var() print x.ao() print x.dp() print x.af()