Exemple #1
0
def calculate_FETpvalues_readcounts(vcffile,poolstatustable,poolsize):
	maxMAF = 0.01; # maximum minor allele frequency, variants above this are not used
	poolDX = determine_poolphenotype(vcffile,poolstatustable);  # phenotype status for each pool
	variants =0;

	File = open(vcffile);
	for line in File:
		if line[0] == '#': continue;
		variants +=1; 
		variant = line.strip().split('\t');
		samples = len(variant)-9; 
		chrom = variant[0]; position = int(variant[1]); refallele = variant[3]; varalleles = variant[4].split(','); 

		if len(varalleles) ==2: triallelic=1;
		else: triallelic=0;
		if len(varalleles) > 2: 
			print >>sys.stderr, '##triallelic',chrom,position,refallele,varalleles;
			continue; # ignore quad or greater allelic variants for now

		#if triallelic ==0 or position != 64527465: continue;

		H0 = 0.0; H1 = 0.0; H2=0.0; D0 = 0.0; D1 = 0.0; D2=0.0;
		casepools=0; controlpools=0;
		for i in xrange(samples):
			if poolDX[i] == -1: continue;  # ignore pool for case control analysis 
			try: 
				counts = variant[i+9].split(':');
				readsf = counts[2].split(','); readsr = counts[3].split(','); 
				total = int(readsf[0]) + int(readsf[1]) + int(readsr[0]) + int(readsr[1]); 
				if triallelic==1: total += int(readsf[2])+int(readsr[2]); # tri-allelic

				alt = (float(readsf[1]) + float(readsr[1]))*poolsize; alt /= (total+0.0001); 
				if triallelic==1: alt2 = (float(readsf[2]) + float(readsr[2]))*poolsize; alt2 /= (total+0.0001); 
				else: alt2=0.0;

				sf = float(2*total)/(2*total + poolsize); 
				if poolDX[i][0] == 0: 
					#H0 += poolsize; H1 = alt; 
					H0 += sf*float(poolsize); H1 += sf*alt; H2 += sf*alt2; controlpools +=1;
					# 0/1 bit of allelecounts stores case-control status...
				elif poolDX[i][0] >= 1: 
					D0 += sf*float(poolsize); D1 += sf*alt; D2 += sf*alt2; casepools +=1; 
					#D0 += poolsize; D1 += alt; 
			except IndexError: print 'Exception',i,samples,variant; 

		#if H1 + D1 < 10 or (H0-H1 + D0-D1 <10): lowcountvariants +=1; continue; 
		pvalue = [0,0,0]; pvalue2=[0,0,0];
		if H1 + D1 >= 4: pvalue = fet(int(round(H1,0)),int(round(H0,0)),int(round(D1,0)),int(round(D0,0)));


		print '%0.2f %.2f:%.2f %.2f:%.2f'  %(pvalue[0],H0,H1,D0,D1),#'Control',float(H1)/(H0+0.001),'Case',float(D1)/(D0+0.001),
		if triallelic ==1 and H2+D2 >=4: 
			pvalue2 = fet(int(round(H2,0)),int(round(H0,0)),int(round(D2,0)),int(round(D0,0)));
			print '2nd-allele %0.2f %.2f:%.2f %.2f:%.2f'  %(pvalue2[0],H0,H2,D0,D2),

		print '%3s %9s %10s %10s %20s ' %(variant[0],variant[1],variant[3],variant[4],variant[5]+':'+variant[6]),
		print variant[7];
Exemple #2
0
def calculate_FETpvalues_allelecounts(vcffile, poolstatustable, poolsize):
    poolDX = determine_poolphenotype(vcffile, poolstatustable)
    # phenotype status for each pool
    poolDX_random = []
    missing = []
    for i in xrange(len(poolDX)):
        if poolDX[i][0] == -1: missing.append(i)
        poolDX_random.append(poolDX[i])
    """
	"""
    random.shuffle(poolDX_random)
    #print missing,len(poolDX_random);
    j = 0
    for i in xrange(len(poolDX_random)):
        if poolDX_random[i][0] == -1 and j < len(missing):
            #print j,poolDX_random[i],i;
            poolDX_random[i] = poolDX_random[missing[j]]
            poolDX_random[missing[j]] = [-1]
            j += 1
    for i in xrange(len(poolDX)):
        poolDX[i] = poolDX_random[i]
    print >> sys.stderr, poolDX_random

    variants = 0
    trivariants = 0

    File = open(vcffile)
    for line in File:
        if line[0] == '#': continue
        variant = line.strip().split('\t')
        samples = len(variant) - 9
        chrom = variant[0]
        position = int(variant[1])
        refallele = variant[3]
        varalleles = variant[4].split(',')

        if len(varalleles) == 2:
            triallelic = 1
            trivariants += 1
        else:
            triallelic = 0
            variants += 1

        if len(varalleles) >= 3 or len(varalleles) == 4:
            #print >>sys.stderr, '##triallelic',chrom,position,refallele,varalleles,variant[2];
            continue
            # ignore multi-allelic variants for now

        ## healthy (controls), D (disease), E (early onset), DC (diabetic complications)
        H0 = 0.00001
        H1 = 0.0
        H2 = 0.0
        D0 = 0.00001
        D1 = 0.0
        D2 = 0.0
        E0 = 0.00001
        E1 = 0.0
        E2 = 0
        DC0 = 0.00001
        DC1 = 0
        DC2 = 0
        casepools = 0
        controlpools = 0
        for i in xrange(samples):
            if poolDX_random[i][0] == -1:
                continue
                # ignore pool for case control analysis
            try:
                genotypes = variant[i + 9].split(':')
                if triallelic == 0:
                    MLAC = int(genotypes[0])
                    #meanAC = float(genotypes[2]);
                    QAC = float(genotypes[1])
                    MLAC2 = 0
                else:
                    MLAC = int(genotypes[0].split(',')[0])
                    #meanAC = float(genotypes[2]); QAC = float(genotypes[1]);
                    MLAC2 = int(genotypes[0].split(',')[1])

                #if genotypes[3] == '-inf': varAF = -0.1*QAC;
                #else: varAF = float(genotypes[3]);

                if poolDX_random[i][0] == 0:
                    H0 += poolsize
                    H1 += MLAC
                    H2 += MLAC2
                elif poolDX_random[i][0] >= 1:
                    D0 += poolsize
                    D1 += MLAC
                    D2 += MLAC2
                if poolDX_random[i][0] == 2:
                    E0 += poolsize
                    E1 += MLAC
                    E2 += MLAC2
                if len(poolDX_random[i]) >= 2 and poolDX_random[i][1] == 3:
                    DC0 += poolsize
                    DC1 += MLAC
                    DC2 += MLAC2

            except IndexError:
                print 'Exception', i, samples, variant

        nopvalue_calc = 0
        if (H1 + D1 >= 5):
            pvalue = fet(int(round(H1, 0)), int(round(H0, 0)),
                         int(round(D1, 0)), int(round(D0, 0)))
        else:
            pvalue = [0, 0, 0]
            nopvalue_calc = 1

        ## calculate p-values between early onset and controls
        if (H1 + E1 >= 5):
            pvalue1 = fet(int(round(H1, 0)), int(round(H0, 0)),
                          int(round(E1, 0)), int(round(E0, 0)))
        else:
            pvalue1 = [0, 0, 0]

        ## calculate p-value between early onset and late-onset
        if (D1 + E1 >= 5):
            pvalue2 = fet(int(round(D1, 0)), int(round(D0, 0)),
                          int(round(E1, 0)), int(round(E0, 0)))
        else:
            pvalue2 = [0, 0, 0]

        if nopvalue_calc == 1: continue

        print '%0.2f %.1f/%.1f %.1f/%.1f %.1f/%.1f %.1f/%.1f' % (
            pvalue[0], H1, H0, D1, D0, E1, E0, DC1,
            DC0),  #'Control',float(H1)/(H0+0.001),'Case',float(D1)/(D0+0.001),
        print '%0.2f %0.2f' % (pvalue1[0], pvalue2[0]),
        print '%0.4f %0.4f %0.4f %0.4f' % (H1 / H0, D1 / D0, E1 / E0,
                                           DC1 / DC0),

        if triallelic == 1:
            if H2 + D2 >= 4:
                pvalue_tri = fet(int(round(H2, 0)), int(round(H0, 0)),
                                 int(round(D2, 0)), int(round(D0, 0)))
            else:
                pvalue_tri = [0, 0, 0]
            print 'TRIALLELIC:%0.2f:%.1f:%.1f' % (pvalue_tri[0], H2, D2),
        else:
            print '-',

        print '%3s %9s %s %10s %10s %8s %10s' % (
            variant[0], variant[1], variant[2], variant[3], variant[4],
            variant[5], variant[6]),
        print variant[7],

        if PRINTGENOTYPES == 1:
            for i in xrange(samples):
                if poolDX_random[i][0] == -1:
                    continue
                    # ignore pool for case control analysis
                genotypes = variant[i + 9].split(':')
                print variant[i + 9],
                #MLAC = int(genotypes[0]); meanAC = float(genotypes[2]); QAC = float(genotypes[1]);
                #if genotypes[3] == '-inf': varAF = -0.1*QAC;
                #else: varAF = float(genotypes[3]);
                #print '%2d:%0.2f:%0.2f' %(MLAC,meanAC,math.sqrt(pow(10,varAF))),
        print
    print >> sys.stderr, "variants evaluated", variants, "triallelic or more", trivariants
Exemple #3
0
def calculate_FETpvalues_allelecounts(vcffile,poolstatustable):
	poolDX = determine_poolphenotype(vcffile,poolstatustable);  # phenotype status for each pool
	variants =0; trivariants =0;

	File = open(vcffile);
	for line in File:
		if line[0] == '#': continue;
		variant = line.strip().split('\t');
		samples = len(variant)-9;
		chrom = variant[0]; position = int(variant[1]); refallele = variant[3]; varalleles = variant[4].split(','); 

		if len(varalleles) ==2: triallelic=1; trivariants +=1; 
		else: triallelic=0; variants +=1; 

		if len(varalleles) >= 3 or len(varalleles) ==4: 
			#print >>sys.stderr, '##triallelic',chrom,position,refallele,varalleles,variant[2];
			continue; # ignore multi-allelic variants for now

		## healthy (controls), D (disease), E (early onset), DC (diabetic complications)  
		H0 = 0.0; H1 = 0.0; H2 = 0.0; D0 = 0.0; D1 = 0.0; D2 = 0.0; E0 = 0.0; E1= 0.0; E2 = 0; 
		casepools=0; controlpools=0;
		for i in xrange(samples):
			if poolDX[i][0] == -1 or variant[i+9].split(':')[0] == '.': continue;  # ignore pool for case control analysis 
			try: 
				poolsize = poolDX[i][1]
				genotypes = variant[i+9].split(':');
				if triallelic==0: 
					MLAC = int(genotypes[0].split(',')[1]); #meanAC = float(genotypes[2]); QAC = float(genotypes[1]); 
					QAC = float(genotypes[1]); 
					MLAC2 = 0;
				else: 
					MLAC = int(genotypes[0].split(',')[1]); #meanAC = float(genotypes[2]); QAC = float(genotypes[1]); 
					QAC = float(genotypes[1]); 
					MLAC2 = int(genotypes[0].split(',')[2]); #meanAC = float(genotypes[2]); QAC = float(genotypes[1]); 

				#else: varAF = float(genotypes[3]);
 
				if poolDX[i][0] == 0: H0 += poolsize; H1 += MLAC; H2 += MLAC2;
				elif poolDX[i][0] >= 1:  D0 +=poolsize; D1 += MLAC; D2 += MLAC2;
				if poolDX[i][0] == 2: E0 += poolsize; E1 += MLAC; E2 += MLAC2;

			except IndexError: print 'Exception',i,samples,genotypes; 


		if (H1 + D1 >= 4): pvalue = fet(int(round(H1,0)),int(round(H0,0)),int(round(D1,0)),int(round(D0,0)));
		else: pvalue = [0,0,0];

		## calculate p-values between early onset and controls
		if (H1+E1 >=4): pvalue1 = fet(int(round(H1,0)),int(round(H0,0)),int(round(E1,0)),int(round(E0,0)));
		else: pvalue1 = [0,0,0]; 

		## calculate p-value between early onset and late-onset
		if (D1+E1 >=4): pvalue2 = fet(int(round(D1,0)),int(round(D0,0)),int(round(E1,0)),int(round(E0,0)));
		else: pvalue2 = [0,0,0]; 
		

		print '%0.2f %.1f/%.1f %.1f/%.1f %.1f/%.1f %.1f/%.1f'  %(pvalue[0],H1,H0,D1,D0,E1,E0,DC1,DC0),#'Control',float(H1)/(H0+0.001),'Case',float(D1)/(D0+0.001),
		print '%0.2f %0.2f' %(pvalue1[0],pvalue2[0]),
                print '%0.4f %0.4f %0.4f %0.4f' %(H1/(H0+epsilon),D1/(D0+epsilon),E1/(E0+epsilon),DC1/(DC0+epsilon)),
		#sys.exit()

		if triallelic ==1: 
			if H2 + D2 >=4: pvalue_tri = fet(int(round(H2,0)),int(round(H0,0)),int(round(D2,0)),int(round(D0,0)));
			else: pvalue_tri = [0,0,0];
			print 'TRIALLELIC:%0.2f:%.1f:%.1f'  %(pvalue_tri[0],H2,D2),
		else: print '-',

		print '%3s %9s %s %10s %10s %8s %10s' %(variant[0],variant[1],variant[2],variant[3],variant[4],variant[5],variant[6]),
		print variant[7], 
		print;
	print >>sys.stderr, "variants evaluated",variants,"triallelic or more",trivariants;
Exemple #4
0
def calculate_FETpvalues_new(vcffile, poolstatustable, poolsize):
    maxMAF = 0.01
    # maximum minor allele frequency, variants above this are not used
    poolDX = []
    # phenotype status for each pool
    Variants = []

    File = open(vcffile)
    for line in File:
        if line[0] == '#' and line[1] == '#': continue
        variant = line.strip().split('\t')
        if variant[0] == '#CHROM':
            for i in xrange(9, len(variant)):
                sampleid = variant[i].split('/')[-1].split('.')[0]
                #print sampleid;
                try:
                    status = poolstatustable[sampleid]
                except KeyError:
                    status = -1
                poolDX.append(status)
                print >> sys.stderr, status,
            #samples = len(variant)-9; print 'samples',samples,len(variant);
            print >> sys.stderr
            continue

        samples = len(variant) - 9
        chrom = variant[0]
        position = int(variant[1])
        refallele = variant[3]
        varalleles = variant[4].split(',')

        if len(varalleles) == 2: triallelic = 1
        else: triallelic = 0
        if len(varalleles) > 2:
            continue
            # ignore multi-allelic variants for now

        #if triallelic ==0 or position != 64527465: continue;

        H0 = 0.0
        H1 = 0.0
        H2 = 0.0
        D0 = 0.0
        D1 = 0.0
        D2 = 0.0
        casepools = 0
        controlpools = 0
        for i in xrange(samples):
            if poolDX[i] == -1:
                continue
                # ignore pool for case control analysis
            try:
                counts = variant[i + 9].split(':')
                readsf = counts[2].split(',')
                readsr = counts[3].split(',')
                total = int(readsf[0]) + int(readsf[1]) + int(readsr[0]) + int(
                    readsr[1])
                if triallelic == 1:
                    total += int(readsf[2]) + int(readsr[2])
                    # tri-allelic

                alt = (float(readsf[1]) + float(readsr[1])) * poolsize
                alt /= (total + 0.0001)
                if triallelic == 1:
                    alt2 = (float(readsf[2]) + float(readsr[2])) * poolsize
                    alt2 /= (total + 0.0001)
                else:
                    alt2 = 0.0

                sf = float(2 * total) / (2 * total + poolsize)
                if poolDX[i] == 0:
                    #H0 += poolsize; H1 = alt;
                    H0 += sf * float(poolsize)
                    H1 += sf * alt
                    H2 += sf * alt2
                    controlpools += 1
                    # 0/1 bit of allelecounts stores case-control status...
                elif poolDX[i] >= 1:
                    D0 += sf * float(poolsize)
                    D1 += sf * alt
                    D2 += sf * alt2
                    casepools += 1
                    #D0 += poolsize; D1 += alt;
            except IndexError:
                print 'Exception', i, samples, variant

        #if H1 + D1 < 10 or (H0-H1 + D0-D1 <10): lowcountvariants +=1; continue;
        pvalue = [0, 0, 0]
        pvalue2 = [0, 0, 0]
        if H1 + D1 >= 4:
            pvalue = fet(int(round(H1, 0)), int(round(H0, 0)),
                         int(round(D1, 0)), int(round(D0, 0)))
        print '%0.2f %.2f:%.2f %.2f:%.2f' % (
            pvalue[0], H0, H1, D0,
            D1),  #'Control',float(H1)/(H0+0.001),'Case',float(D1)/(D0+0.001),
        if triallelic == 1 and H2 + D2 >= 4:
            pvalue2 = fet(int(round(H2, 0)), int(round(H0, 0)),
                          int(round(D2, 0)), int(round(D0, 0)))
            print '2nd-allele %0.2f %.2f:%.2f %.2f:%.2f' % (pvalue2[0], H0, H2,
                                                            D0, D2),

        print '%3s %9s %10s %10s %20s ' % (variant[0], variant[1], variant[3],
                                           variant[4],
                                           variant[5] + ':' + variant[6]),
        print variant[7]
Exemple #5
0
def calculate_FETpvalues(vcffile, poolDX, poolsize):
    File = open(vcffile)
    for line in File:
        if line[0] == '#' and line[1] == '#': continue
        variant = line.strip().split()
        if variant[0] == '#CHROM':
            samples = len(variant) - 9
            #samplelist = variant[9+offset:]; samples = len(samplelist);
            #print samplelist,samples;
            continue

        if 'SNP' in variant[2] or 'INDEL' in variant[2]:
            H0 = 0
            H1 = float(0)
            D0 = 0
            D1 = float(0)
            allelecounts = []
            allelecountsD = []
            poolsH = 0
            poolsD = 0
            for i in xrange(samples):
                if poolDX[i] == -1:
                    continue
                    # ignore pool for case control analysis

                counts = variant[i + 9].split(':')
                total = int(counts[0]) + int(counts[1])
                #if total < 240: continue;
                alt = float(counts[1]) * poolsize
                alt /= (total + 0.01)
                #if alt < 0.5: alt = 0;
                #if alt > 0.5 and alt < 1: alt = 1;

                if poolDX[i] == 0:
                    H0 += poolsize
                    H1 += alt
                    allelecounts.append([0, 0,
                                         int(counts[0]),
                                         int(counts[1])])
                    poolsH += 1
                    # 0/1 bit of allelecounts stores case-control status...
                elif poolDX[i] == 1:
                    D0 += poolsize
                    D1 += alt
                    allelecounts.append([1, 1,
                                         int(counts[0]),
                                         int(counts[1])])
                    poolsD += 1
            #	print counts,total,alt;
            if H1 + D1 >= 5:
                #print allelecounts;
                allelecounts.sort()
                HAF = estimate_AF(allelecounts, poolsH, poolsize / 2, 0.001, 0)
                DAF = estimate_AF(allelecounts, poolsD, poolsize / 2, 0.001,
                                  poolsH)
                """
				for p in xrange(HAF[2],HAF[3]): print '%3d %2.2f ' %(p,HAF[0][p]),
				print 'Binomial maxll',HAF[1],HAF[0][HAF[1]];
				for p in xrange(DAF[2],DAF[3]): print '%3d %2.2f ' %(p,DAF[0][p]),
				print 'Binomial maxll',DAF[1],DAF[0][DAF[1]];
				"""

                pvalue = fet(int(round(H1, 0)), int(H0), int(round(D1, 0)),
                             int(D0))
                if pvalue[0] < -2:
                    pvalueperm = probabilisticFET(allelecounts, poolsH, poolsD,
                                                  poolsize)
                    # output from allele frequency estimation
                    print 'PERM',
                else:
                    pvalueperm = 1
                    print 'FET',
                print math.log(
                    pvalueperm, 10
                ), pvalue[0], variant[0], variant[1], variant[2], variant[
                    3], variant[4], variant[5], variant[6], variant[7],
                print '%2.1f %2.1f %2.1f %2.1f' % (
                    H0, H1, D0, D1
                ),  #'Control',float(H1)/(H0+0.001),'Case',float(D1)/(D0+0.001),
                if pvalue < 0.001: print 'LOW'
                else: print
Exemple #6
0
def calculate_FETpvalues_allelecounts(vcffile,poolstatustable,poolsize,VCFfile2):


	[SAMPLES,VARLIST,SAMPLELIST] = read_phase3_VCF(VCFfile2);


	poolDX = determine_poolphenotype(vcffile,poolstatustable);  # phenotype status for each pool
	variants =0; trivariants =0;

	File = open(vcffile);
	for line in File:
		if line[0] == '#': continue;
		variant = line.strip().split('\t');
		samples = len(variant)-9;
		chrom = variant[0]; position = int(variant[1]); refallele = variant[3]; varalleles = variant[4].split(','); 

		if len(varalleles) ==2: triallelic=1; trivariants +=1; 
		else: triallelic=0; variants +=1; 

		if len(varalleles) >= 3 or len(varalleles) ==4: 
			#print >>sys.stderr, '##triallelic',chrom,position,refallele,varalleles,variant[2];
			continue; # ignore multi-allelic variants for now


		## healthy (controls), D (disease), E (early onset), DC (diabetic complications)  
		H0 = 0.0; H1 = 0.0; H2 = 0.0; D0 = 0.0; D1 = 0.0; D2 = 0.0; E0 = 0.0; E1= 0.0; E2 = 0; DC0 = 0; DC1 = 0;  DC2 = 0;
		casepools=0; controlpools=0;
		for i in xrange(samples):
			if poolDX[i][0] == -1: continue;  # ignore pool for case control analysis 
			try: 
				genotypes = variant[i+9].split(':');
				if triallelic==0: 
					MLAC = int(genotypes[0]); #meanAC = float(genotypes[2]); QAC = float(genotypes[1]); 
					MLAC2 = 0;
				else: 
					MLAC = int(genotypes[0].split(',')[0]); #meanAC = float(genotypes[2]); QAC = float(genotypes[1]); 
					MLAC2 = int(genotypes[0].split(',')[1]);

				#if genotypes[3] == '-inf': varAF = -0.1*QAC; 
				#else: varAF = float(genotypes[3]);
 
				if poolDX[i][0] == 0: H0 += poolsize; H1 += MLAC; H2 += MLAC2;
				elif poolDX[i][0] >= 1:  D0 +=poolsize; D1 += MLAC; D2 += MLAC2;
				if poolDX[i][0] == 2: E0 += poolsize; E1 += MLAC; E2 += MLAC2;
				if len(poolDX[i]) >=2 and poolDX[i][1] == 3: DC0 += poolsize; DC1 += MLAC; DC2 += MLAC2; 

			except IndexError: print 'Exception',i,samples,variant; 

		## calculate p-value between cases and controls 
		if (H1 + D1 >= 4): pvalue = fet(int(round(H1,0)),int(round(H0,0)),int(round(D1,0)),int(round(D0,0)));
		else: pvalue = [0,0,0];

		## calculate p-values between early onset and controls
		if (H1+E1 >=4): pvalue1 = fet(int(round(H1,0)),int(round(H0,0)),int(round(E1,0)),int(round(E0,0)));
		else: pvalue1 = [0,0,0]; 

		## calculate p-value between early onset and late-onset
		if (D1+E1 >=4): pvalue2 = fet(int(round(D1,0)),int(round(D0,0)),int(round(E1,0)),int(round(E0,0)));
		else: pvalue2 = [0,0,0]; 
	
		if (chrom,position,refallele,varalleles[0]) in VARLIST: 
			VAR2 = VARLIST[(chrom,position,refallele,varalleles[0])];
			print 'foundvar',VAR2,
			E11 = E1 - VAR2[1]; 
			E01 = E0 - VAR2[1] - VAR2[0]; 
			if E11 < 0: E11 = 0;
			D11 = D1 - VAR2[1]-VAR2[3]; 
			if D11 < 0: D11 = 0;
			D01 = D0 - VAR2[1] - VAR2[0] - VAR2[2] -VAR2[3]; 
			H01=H0; H11 = H1;
			if pvalue[0] < -0.5: ## low p-values
				new_pvalue = fet(int(round(H1,0)),int(round(H0,0)),int(round(D11,0)),int(round(D01,0)));
			else: new_pvalue = pvalue; 
			if pvalue[0] < -0.5: ## low p-values
				new_pvalue1 = fet(int(round(H1,0)),int(round(H0,0)),int(round(E11,0)),int(round(E01,0)));
			else: new_pvalue1 = pvalue1; 

			print ' %0.2f %0.2f %d/%d %d/%d %d/%d ' %(new_pvalue[0],new_pvalue1[0],int(H1),int(H0),int(D11),int(D01),int(E11),int(E01));
			print 'corr-new %0.2f %0.2f %0.4f %0.4f %0.4f ' %(new_pvalue[0],new_pvalue1[0],H11/H01,D11/D01,E11/E01),
		else: 
			#print 'missing'; ## 88 for early onset, 218 for late onset
			#E11 = E1; E01 = E0-88; D11 = D1; D01 = D0-218-88;
			#print 'corrected -\t-\t
			H11 = H1; H01 = H0; D11 = D1; D01 = D0; E11 = E1; E01 = E0;
			print 'corr-orig %0.2f %0.2f %0.4f %0.4f %0.4f ' %(pvalue[0],pvalue1[0],H11/H01,D11/D01,E11/E01),
			#print 'corrected -\t-\t-\t-',


		print '%0.2f %0.2f %.1f/%.1f %.1f/%.1f %.1f/%.1f'  %(pvalue[0],pvalue1[0],H1,H0,D1,D0,E1,E0),#'Control',float(H1)/(H0+0.001),'Case',float(D1)/(D0+0.001),
		#print '%0.2f %0.2f' %(pvalue1[0],pvalue2[0]),
		#print '%0.4f %0.4f %0.4f %0.4f' %(H1/H0,D1/D0,E1/E0,DC1/DC0),
		print '%0.4f %0.4f %0.4f' %(H1/H0,D1/D0,E1/E0),

		if triallelic ==1: 
			if H2 + D2 >=4: pvalue_tri = fet(int(round(H2,0)),int(round(H0,0)),int(round(D2,0)),int(round(D0,0)));
			else: pvalue_tri = [0,0,0];
			print 'TRIALLELIC:%0.2f:%.1f:%.1f'  %(pvalue_tri[0],H2,D2),
		else: print '-',

		print '%3s %9s %s %10s %10s %8s %10s' %(variant[0],variant[1],variant[2],variant[3],variant[4],variant[5],variant[6]),
		print variant[7], 

		if PRINTGENOTYPES ==1: 
			for i in xrange(samples): 
				if poolDX[i][0] == -1: continue;  # ignore pool for case control analysis 
				genotypes = variant[i+9].split(':');
				print variant[i+9],
				#MLAC = int(genotypes[0]); meanAC = float(genotypes[2]); QAC = float(genotypes[1]); 
				#if genotypes[3] == '-inf': varAF = -0.1*QAC; 
				#else: varAF = float(genotypes[3]); 
				#print '%2d:%0.2f:%0.2f' %(MLAC,meanAC,math.sqrt(pow(10,varAF))),
		print;
	print >>sys.stderr, "variants evaluated",variants,"triallelic or more",trivariants;