def main(inFileName,outFileName,pileupDirL): inFile = open(inFileName) outFile = open(outFileName,'w') outFile.write(inFile.readline()) for line in inFile: tokL = line[:-1].split('\t') if tokL[-3]==tokL[-4]=='0': flag = 0 # Recur elif tokL[-5]==tokL[-6]=='0': flag = 1 # Prim else: outFile.write(line) continue rm = re.match('(chr[^:]*):([0-9]*)~([0-9]*)',tokL[2]) (chrom,chrSta,chrEnd) = rm.groups() if int(chrEnd)-int(chrEnd)!=0: outFile.write(line) continue refAllele = tokL[3] altAllele = tokL[4] #print tokL[1], tokL[2], refAllele, '>', altAllele, tokL[-4:], sId = tokL[1].split('-')[1-flag] result = mygenome.lookupPileup(pileupDirL,sId,chrom,chrSta,refAllele,altAllele) if result: tokL[-1-flag*2-2] = str(result[1]) tokL[-2-flag*2-2] = str(result[0]) outFile.write('\t'.join(tokL)+'\n') else: outFile.write(line) outFile.close()
def main(inFileName, outFileName, pileupDirL=mysetting.wxsPileupProcDirL, mutectDirL=['/EQL3/pipeline/somatic_mutect']): inFile = open(inFileName) if outFileName == '': outFile = sys.stdout else: outFile = open(outFileName, 'w') header = inFile.readline() hcolL = header.rstrip().split('\t') idxH = {} for i in range(len(hcolL)): idxH[hcolL[i]] = i outFile.write(header) outH = {} for line in inFile: tokL = line[:-1].split('\t') pair = tokL[idxH['sId_pair']] rm = re.match('(chr[^:]*):([0-9]*)~([0-9]*)', tokL[2]) (chrom, chrSta, chrEnd) = rm.groups() refAllele = tokL[3] altAllele = tokL[4] if pair not in outH: print_dat(outH, outFile) outH = {} if tokL[-3] == tokL[-4] == '0': flag = 0 # Recur elif tokL[-5] == tokL[-6] == '0': flag = 1 # Prim else: parse_line(outH, idxH, line) continue if int(chrEnd) - int(chrSta) != 0: parse_line(outH, idxH, line) continue #print tokL[1], tokL[2], refAllele, '>', altAllele, tokL[-4:], sId = tokL[1].split('-')[1 - flag] if tokL[-1] != '0' or tokL[-2] != '0': ##has matched normal fileNL = [] for mutDir in mutectDirL: fileNL += filter( lambda x: 'backup' not in x, os.popen('find %s -name *%sT.union_pos.mutect' % (mutDir, sId[1:])).readlines()) if len(fileNL) > 1: print 'Mutiple files: %s' % ','.join(fileNL) sys.exit(1) fileN = fileNL[0].rstrip() lines = os.popen( 'grep -w %s %s | grep -w %s | cut -f 1,2,4,5,21,22' % (chrom, fileN, chrSta)).readlines() for ln in lines: colL = ln.rstrip().split('\t') ref = colL[2] alt = colL[3] if ref == refAllele and alt == altAllele: result = [] result.append(colL[5]) result.append(colL[4]) else: result = mygenome.lookupPileup(pileupDirL, sId, chrom, chrSta, refAllele, altAllele) if result: tokL[-1 - flag * 2 - 2] = str(result[1]) tokL[-2 - flag * 2 - 2] = str(result[0]) parse_line(outH, idxH, '\t'.join(tokL) + '\n') else: parse_line(outH, idxH, line) print_dat(outH, outFile)
def main(inFileName,outFileName,pileupDirL=mysetting.wxsPileupProcDirL,mutectDirL=['/EQL3/pipeline/somatic_mutect']): inFile = open(inFileName) if outFileName == '': outFile = sys.stdout else: outFile = open(outFileName,'w') header = inFile.readline() hcolL = header.rstrip().split('\t') idxH = {} for i in range(len(hcolL)): idxH[hcolL[i]] = i outFile.write(header) outH = {} for line in inFile: tokL = line[:-1].split('\t') pair = tokL[idxH['sId_pair']] rm = re.match('(chr[^:]*):([0-9]*)~([0-9]*)',tokL[2]) (chrom,chrSta,chrEnd) = rm.groups() refAllele = tokL[3] altAllele = tokL[4] if pair not in outH: print_dat(outH, outFile) outH = {} if tokL[-3]==tokL[-4]=='0': flag = 0 # Recur elif tokL[-5]==tokL[-6]=='0': flag = 1 # Prim else: parse_line(outH, idxH, line) continue if int(chrEnd)-int(chrSta)!=0: parse_line(outH, idxH, line) continue #print tokL[1], tokL[2], refAllele, '>', altAllele, tokL[-4:], sId = tokL[1].split('-')[1-flag] if tokL[-1] != '0' or tokL[-2] != '0': ##has matched normal fileNL = [] for mutDir in mutectDirL: fileNL += filter(lambda x: 'backup' not in x, os.popen('find %s -name *%sT.union_pos.mutect' % (mutDir, sId[1:])).readlines()) if len(fileNL) > 1: print 'Mutiple files: %s' % ','.join(fileNL) sys.exit(1) fileN = fileNL[0].rstrip() lines = os.popen('grep -w %s %s | grep -w %s | cut -f 1,2,4,5,21,22' % (chrom, fileN, chrSta)).readlines() for ln in lines: colL = ln.rstrip().split('\t') ref = colL[2] alt = colL[3] if ref == refAllele and alt == altAllele: result = [] result.append(colL[5]) result.append(colL[4]) else: result = mygenome.lookupPileup(pileupDirL,sId,chrom,chrSta,refAllele,altAllele) if result: tokL[-1-flag*2-2] = str(result[1]) tokL[-2-flag*2-2] = str(result[0]) parse_line(outH, idxH, '\t'.join(tokL) + '\n') else: parse_line(outH, idxH, line) print_dat(outH, outFile)