Exemple #1
0
def filter_known(dbsnpH, inFileN):
	inFile = open(inFileN, 'r')
	outFile = open('%s.dbsnp_flt' % inFileN, 'w')
	for line in inFile:
		colL = line.rstrip().split('\t')
		chrom = colL[0]
		pos = int(colL[1])
		idx =  mybasic.index(dbsnpH[chrom], pos)
		if idx < 0:## not found in dbsnp
			outFile.write(line)
	outFile.flush()
	outFile.close()
Exemple #2
0
def get_alt_frac_mutscan(sampN, snpH, fileL):
    outH = {}

    for chrom in snpH:
        for pos in snpH[chrom]:
            outH[(chrom, pos)] = {}
            for alt in snpH[chrom][pos]:
                outH[(chrom, pos)][alt] = 'NA'

    chromL = snpH.keys()
    chromL.sort()

    for file in fileL:
        inFile = open(file, 'r')
        prevChr = ''
        posL = []
        for line in inFile:
            colL = line.rstrip().split('\t')
            chrom = colL[0]
            pos = int(colL[1])
            alt = colL[3]
            r1 = int(colL[4])
            r2 = int(colL[5])
            frac = float(colL[6])
            if chrom == 'chrM' or (r1 + r2) < 15:
                continue
            if prevChr != chrom:
                prevChr = chrom
                posL = snpH[chrom].keys()
                posL.sort()
            idx = mybasic.index(posL, pos)
            if idx >= 0 and alt in snpH[chrom][pos]:  ## mutation is in dbsnp
                outH[(chrom, pos)][alt] = frac
        ##for line
    ##for file

    for chrom in chromL:
        posL = snpH[chrom].keys()
        posL.sort()
        for pos in posL:
            altL = list(snpH[chrom][pos])
            for alt in altL:
                sys.stdout.write(
                    '%s\t%s:%s:%s\t%s\n' %
                    (sampN, chrom, pos, alt, outH[(chrom, pos)][alt]))
Exemple #3
0
def get_alt_frac_mutscan(sampN, snpH, fileL):
	outH = {}

	for chrom in snpH:
		for pos in snpH[chrom]:
			outH[(chrom, pos)] = {}
			for alt in snpH[chrom][pos]:
				outH[(chrom,pos)][alt] = 'NA'
	
	chromL = snpH.keys()
	chromL.sort()

	for file in fileL:
		inFile = open(file, 'r')
		prevChr = ''
		posL = []
		for line in inFile:
			colL = line.rstrip().split('\t')
			chrom = colL[0]
			pos = int(colL[1])
			alt = colL[3]
			r1 = int(colL[4])
			r2 = int(colL[5])
			frac = float(colL[6])
			if chrom == 'chrM' or (r1 + r2) < 15:
				continue
			if prevChr != chrom:
				prevChr = chrom
				posL = snpH[chrom].keys()
				posL.sort()
			idx = mybasic.index(posL, pos)
			if idx >= 0 and alt in snpH[chrom][pos]: ## mutation is in dbsnp
				outH[(chrom,pos)][alt] = frac
		##for line
	##for file
				
	for chrom in chromL:
		posL = snpH[chrom].keys()
		posL.sort()
		for pos in posL:
			altL = list(snpH[chrom][pos])
			for alt in altL:
				sys.stdout.write('%s\t%s:%s:%s\t%s\n' % (sampN, chrom,pos,alt, outH[(chrom,pos)][alt]))