Example #1
0
def main(inFileName,outFileName,pileupDirL):

	inFile = open(inFileName)
	outFile = open(outFileName,'w')

	outFile.write(inFile.readline())

	for line in inFile:

		tokL = line[:-1].split('\t')

		if tokL[-3]==tokL[-4]=='0':
			flag = 0 # Recur
		elif tokL[-5]==tokL[-6]=='0':
			flag = 1 # Prim
		else:
			outFile.write(line)
			continue
			
		rm = re.match('(chr[^:]*):([0-9]*)~([0-9]*)',tokL[2])
		(chrom,chrSta,chrEnd) = rm.groups()

		if int(chrEnd)-int(chrEnd)!=0:
			outFile.write(line)
			continue

		refAllele = tokL[3]
		altAllele = tokL[4]

		#print tokL[1], tokL[2], refAllele, '>', altAllele, tokL[-4:],

		sId = tokL[1].split('-')[1-flag]

		result = mygenome.lookupPileup(pileupDirL,sId,chrom,chrSta,refAllele,altAllele)
		
		if result:

			tokL[-1-flag*2-2] = str(result[1])
			tokL[-2-flag*2-2] = str(result[0])
			outFile.write('\t'.join(tokL)+'\n')

		else:

			outFile.write(line)

	outFile.close()
Example #2
0
def main(inFileName,
         outFileName,
         pileupDirL=mysetting.wxsPileupProcDirL,
         mutectDirL=['/EQL3/pipeline/somatic_mutect']):

    inFile = open(inFileName)
    if outFileName == '':
        outFile = sys.stdout
    else:
        outFile = open(outFileName, 'w')

    header = inFile.readline()
    hcolL = header.rstrip().split('\t')
    idxH = {}
    for i in range(len(hcolL)):
        idxH[hcolL[i]] = i
    outFile.write(header)

    outH = {}
    for line in inFile:

        tokL = line[:-1].split('\t')
        pair = tokL[idxH['sId_pair']]
        rm = re.match('(chr[^:]*):([0-9]*)~([0-9]*)', tokL[2])
        (chrom, chrSta, chrEnd) = rm.groups()
        refAllele = tokL[3]
        altAllele = tokL[4]

        if pair not in outH:
            print_dat(outH, outFile)
            outH = {}

        if tokL[-3] == tokL[-4] == '0':
            flag = 0  # Recur
        elif tokL[-5] == tokL[-6] == '0':
            flag = 1  # Prim
        else:
            parse_line(outH, idxH, line)
            continue

        if int(chrEnd) - int(chrSta) != 0:
            parse_line(outH, idxH, line)
            continue

        #print tokL[1], tokL[2], refAllele, '>', altAllele, tokL[-4:],

        sId = tokL[1].split('-')[1 - flag]

        if tokL[-1] != '0' or tokL[-2] != '0':  ##has matched normal
            fileNL = []
            for mutDir in mutectDirL:
                fileNL += filter(
                    lambda x: 'backup' not in x,
                    os.popen('find %s -name *%sT.union_pos.mutect' %
                             (mutDir, sId[1:])).readlines())
            if len(fileNL) > 1:
                print 'Mutiple files: %s' % ','.join(fileNL)
                sys.exit(1)
            fileN = fileNL[0].rstrip()
            lines = os.popen(
                'grep -w %s %s | grep -w %s | cut -f 1,2,4,5,21,22' %
                (chrom, fileN, chrSta)).readlines()
            for ln in lines:
                colL = ln.rstrip().split('\t')
                ref = colL[2]
                alt = colL[3]
                if ref == refAllele and alt == altAllele:
                    result = []
                    result.append(colL[5])
                    result.append(colL[4])
        else:
            result = mygenome.lookupPileup(pileupDirL, sId, chrom, chrSta,
                                           refAllele, altAllele)

        if result:

            tokL[-1 - flag * 2 - 2] = str(result[1])
            tokL[-2 - flag * 2 - 2] = str(result[0])
            parse_line(outH, idxH, '\t'.join(tokL) + '\n')
        else:
            parse_line(outH, idxH, line)
    print_dat(outH, outFile)
Example #3
0
def main(inFileName,outFileName,pileupDirL=mysetting.wxsPileupProcDirL,mutectDirL=['/EQL3/pipeline/somatic_mutect']):

	inFile = open(inFileName)
	if outFileName == '':
		outFile = sys.stdout
	else:
		outFile = open(outFileName,'w')

	header = inFile.readline()
	hcolL = header.rstrip().split('\t')
	idxH = {}
	for i in range(len(hcolL)):
		idxH[hcolL[i]] = i
	outFile.write(header)

	outH = {}
	for line in inFile:

		tokL = line[:-1].split('\t')
		pair = tokL[idxH['sId_pair']]
		rm = re.match('(chr[^:]*):([0-9]*)~([0-9]*)',tokL[2])
		(chrom,chrSta,chrEnd) = rm.groups()
		refAllele = tokL[3]
		altAllele = tokL[4]

		if pair not in outH:
			print_dat(outH, outFile)
			outH = {}

		if tokL[-3]==tokL[-4]=='0':
			flag = 0 # Recur
		elif tokL[-5]==tokL[-6]=='0':
			flag = 1 # Prim
		else:
			parse_line(outH, idxH, line)
			continue
			
		if int(chrEnd)-int(chrSta)!=0:
			parse_line(outH, idxH, line)
			continue

		#print tokL[1], tokL[2], refAllele, '>', altAllele, tokL[-4:],

		sId = tokL[1].split('-')[1-flag]

		if tokL[-1] != '0' or tokL[-2] != '0': ##has matched normal 
			fileNL = []
			for mutDir in mutectDirL:
				fileNL += filter(lambda x: 'backup' not in x, os.popen('find %s -name *%sT.union_pos.mutect' % (mutDir, sId[1:])).readlines())
			if len(fileNL) > 1:
				print 'Mutiple files: %s' % ','.join(fileNL)
				sys.exit(1)
			fileN = fileNL[0].rstrip()
			lines = os.popen('grep -w %s %s | grep -w %s | cut -f 1,2,4,5,21,22' % (chrom, fileN, chrSta)).readlines()
			for ln in lines:
				colL = ln.rstrip().split('\t')
				ref = colL[2]
				alt = colL[3]
				if ref == refAllele and alt == altAllele:
					result = []
					result.append(colL[5])
					result.append(colL[4])
		else:
			result = mygenome.lookupPileup(pileupDirL,sId,chrom,chrSta,refAllele,altAllele)
		
		if result:

			tokL[-1-flag*2-2] = str(result[1])
			tokL[-2-flag*2-2] = str(result[0])
			parse_line(outH, idxH, '\t'.join(tokL) + '\n')
		else:
			parse_line(outH, idxH, line)
	print_dat(outH, outFile)