def _secondRun_(srOutput, srInput, srTopQuantile, srWindowSize, snpsds, phed, p_i, binary=False): """ Sets up a second run, using and/or on SNPs. """ print "Preparing a second stage analyzis." sys.stdout.flush() #Collect top n% SNPs. print "Loading first stage results." sys.stdout.flush() result = gwaResults.SNPResult(resultFile=srInput, snpsds=snpsds, phenotypeID=p_i) # - filter results result.negLogTransform() res = SecondStageAnalysis.retrieveSecondRunSNPs(result, srTopQuantile, srWindowSize) snps = res['snps'] snpTypes = res['snpTypes'] chromosomes = res['chromosomes'] positions = res['positions'] marfs = res['marfs'] mafs = res['mafs'] if binary: #Run Fisher's Exact Test: sys.stdout.write("Running Fisher's exact test (in R).\n") sys.stdout.flush() phenVals = phed.getPhenVals(p_i) pvals = run_fet(snps, phenVals) #print zip(positions,pvals,genotype_var_perc) else: #Run Emma: sys.stdout.write("Running Kruskal-Wallis (in R).\n") sys.stdout.flush() phenVals = phed.getPhenVals(p_i) pvals = run_kw(snps, phenVals) #print zip(positions,pvals,genotype_var_perc) sys.stdout.write("Writing results to file.\n") sys.stdout.flush() #Write results to file! f = open(srOutput, "w") f.write("Chromosome,position,p-value,marf,maf,snpType,second_pos\n") for (chr, pos, pval, marf, maf, snpType) in zip(chromosomes, positions, pvals, marfs, mafs, snpTypes): f.write( str(chr) + "," + str(pos[0]) + "," + str(pval) + "," + str(marf) + "," + str(maf) + "," + str(snpType) + "," + str(pos[1]) + "\n") f.close()
def _secondRun_(srOutput, srInput, srTopQuantile, srWindowSize, snpsds, phed, p_i, kinshipSnpsDataset): """ Sets up a second run, using and/or on SNPs. """ print "Preparing a second stage analyzis." sys.stdout.flush() #Collect top n% SNPs. print "Loading first stage results." sys.stdout.flush() result = gwaResults.SNPResult(resultFile=srInput, snpsds=snpsds, phenotypeID=p_i) # - filter results result.negLogTransform() res = SecondStageAnalysis.retrieveSecondRunSNPs(result, srTopQuantile, srWindowSize) snps = res['snps'] snpTypes = res['snpTypes'] chromosomes = res['chromosomes'] positions = res['positions'] marfs = res['marfs'] mafs = res['mafs'] #Calc kinship matrix: print "Calculating kinship matrix." sys.stdout.flush() k_snps = kinshipSnpsDataset.getSnps() #k_snps = _sampleSNPs_(k_snps,10000) k = calcKinship(k_snps) #print "K:",k #Run Emma: sys.stdout.write("Running Emma.\n") sys.stdout.flush() res = runEmma(phed, p_i, k, snps) pvals = list(res["ps"]) genotype_var_perc = list(res["genotype_var_perc"]) #print zip(positions,pvals,genotype_var_perc) sys.stdout.write("Writing results to file.\n") sys.stdout.flush() #Write results to file! f = open(srOutput, "w") f.write( "Chromosome,position,p-value,marf,maf,genotype_var_perc,snpType,second_pos\n" ) for (chr, pos, pval, marf, maf, gvp, snpType) in zip(chromosomes, positions, pvals, marfs, mafs, genotype_var_perc, snpTypes): f.write( str(chr) + "," + str(pos[0]) + "," + str(pval[0]) + "," + str(marf) + "," + str(maf) + "," + str(gvp[0]) + "," + str(snpType) + "," + str(pos[1]) + "\n") f.close()
def _secondRun_(srOutput,srInput,srTopQuantile,srWindowSize,snpsds,phed,p_i,kinshipSnpsDataset): """ Sets up a second run, using and/or on SNPs. """ print "Preparing a second stage analyzis." sys.stdout.flush() #Collect top n% SNPs. print "Loading first stage results." sys.stdout.flush() result = gwaResults.SNPResult(resultFile=srInput,snpsds=snpsds,phenotypeID=p_i) # - filter results result.negLogTransform() res = SecondStageAnalysis.retrieveSecondRunSNPs(result,srTopQuantile,srWindowSize) snps = res['snps'] snpTypes = res['snpTypes'] chromosomes = res['chromosomes'] positions = res['positions'] marfs = res['marfs'] mafs = res['mafs'] #Calc kinship matrix: print "Calculating kinship matrix." sys.stdout.flush() k_snps = kinshipSnpsDataset.getSnps() #k_snps = _sampleSNPs_(k_snps,10000) k = calcKinship(k_snps) #print "K:",k #Run Emma: sys.stdout.write( "Running Emma.\n") sys.stdout.flush() res = runEmma(phed,p_i,k,snps) pvals = list(res["ps"]) genotype_var_perc = list(res["genotype_var_perc"]) #print zip(positions,pvals,genotype_var_perc) sys.stdout.write("Writing results to file.\n") sys.stdout.flush() #Write results to file! f = open(srOutput,"w") f.write("Chromosome,position,p-value,marf,maf,genotype_var_perc,snpType,second_pos\n") for (chr,pos,pval,marf,maf,gvp,snpType) in zip(chromosomes,positions,pvals,marfs,mafs,genotype_var_perc,snpTypes): f.write(str(chr)+","+str(pos[0])+","+str(pval[0])+","+str(marf)+","+str(maf)+","+str(gvp[0])+","+str(snpType)+","+str(pos[1])+"\n") f.close()
def _secondRun_(srOutput,srInput,srTopQuantile,srWindowSize,snpsds,phed,p_i,binary = False): """ Sets up a second run, using and/or on SNPs. """ print "Preparing a second stage analyzis." sys.stdout.flush() #Collect top n% SNPs. print "Loading first stage results." sys.stdout.flush() result = gwaResults.SNPResult(resultFile=srInput,snpsds=snpsds,phenotypeID=p_i) # - filter results result.negLogTransform() res = SecondStageAnalysis.retrieveSecondRunSNPs(result,srTopQuantile,srWindowSize) snps = res['snps'] snpTypes = res['snpTypes'] chromosomes = res['chromosomes'] positions = res['positions'] marfs = res['marfs'] mafs = res['mafs'] if binary: #Run Fisher's Exact Test: sys.stdout.write( "Running Fisher's exact test (in R).\n") sys.stdout.flush() phenVals = phed.getPhenVals(p_i) pvals = run_fet(snps,phenVals) #print zip(positions,pvals,genotype_var_perc) else: #Run Emma: sys.stdout.write( "Running Kruskal-Wallis (in R).\n") sys.stdout.flush() phenVals = phed.getPhenVals(p_i) pvals = run_kw(snps,phenVals) #print zip(positions,pvals,genotype_var_perc) sys.stdout.write("Writing results to file.\n") sys.stdout.flush() #Write results to file! f = open(srOutput,"w") f.write("Chromosome,position,p-value,marf,maf,snpType,second_pos\n") for (chr,pos,pval,marf,maf,snpType) in zip(chromosomes,positions,pvals,marfs,mafs,snpTypes): f.write(str(chr)+","+str(pos[0])+","+str(pval)+","+str(marf)+","+str(maf)+","+str(snpType)+","+str(pos[1])+"\n") f.close()