def run_paml(infile, treefile): cml = codeml.Codeml(alignment = infile, tree = treefile, \ out_file = "results.out", working_dir = "./temp") cml.read_ctl_file("alt_codeml.ctl") #cml.print_options() fore = cml.run() fore_dat = ((fore.get('NSsites')).get(2)) fore_lnL = fore_dat.get('lnL') fore_omegas = (fore_dat.get('parameters')).get('site classes') fore_omega = ((fore_omegas.get(2)).get('branch types')).get('foreground') cml.read_ctl_file("null_codeml.ctl") #cml.print_options() null = cml.run() null_lnL = ((null.get('NSsites')).get(2)).get('lnL') df = 1 chisq = 2 * (fore_lnL - null_lnL) try: pval = cdf_chi2(df, chisq) except: pval = 1.000 return fore_omega, chisq, pval
def testCdfChi2(self): self.assertRaises(ValueError, chi2.cdf_chi2, df = 0, stat = 3.84) self.assertRaises(ValueError, chi2.cdf_chi2, df = 1, stat = -3.84) self.assertRaises(TypeError, chi2.cdf_chi2, df = "d", stat ="stat") self.assertAlmostEqual(chi2.cdf_chi2(2, 3.84), 0.1466070, places=5)
print "\nThe sum of all posterior probabilities should equal unity. The actual sum is:", (sum(posterior)) if sum(posterior) == 1: print "\nSummation checks out!\n" else: print "Sum does not equal 1. Something went wrong...or maybe not. Remember that these are high-precision libraries, and 0.99999... is acceptable.\n" print "Traditional likelihood ratio tests:\n" #teststat = -2 * log ( likelihood1 / likelihood 0) print "M1a vs. M2a\n" df = 2 print "The two likelihoods are:", round(floatlike[1], 2), round(floatlike[2], 2) teststat = abs(2 * (-1 * round(floatlike[2], 2) - -1 * round(floatlike[1], 2))) print "The test statistic is:", teststat m12_p = cdf_chi2(df, float(teststat)) print "p-value:", cdf_chi2(df, float(teststat)) print "\nM7 vs. M8\n" df = 2 print "The two likelihoods are:", round(floatlike[7], 2), round(floatlike[8], 2) teststat = abs(2 * (-1 * round(floatlike[8], 2) - -1 * round(floatlike[7], 2))) print "The test statistic is:", teststat m78_p = cdf_chi2(df, float(teststat)) print "p-value:", cdf_chi2(df, float(teststat)) if args.M8a_test: print "\nM8a vs. M8\n" df = 1
#!/usr/bin/env python import sys from Bio.Phylo.PAML.chi2 import cdf_chi2 # This script calculates the p-value for the likelihood ratio test give two # likelihoods and degrees of freedom. def usage(): print "LRT.py <Likelihood 1> <Likelihood 2> <degrees of freedom>" if len(sys.argv) != 4: usage() sys.exit() lnL1 = float(sys.argv[1]) lnL2 = float(sys.argv[2]) degrees = int(sys.argv[3]) LRTstat = 2*(lnL1 - lnL2) print LRTstat p = cdf_chi2(degrees, LRTstat) print p
tree, loglk = wrappers.phyml(a) return (a, tree) def run_paml(a, tree, alignName, outfile, nonsignificantFile, neutralFile): try: codemlInstance = wrappers.Codeml(a, tree) neutral = codemlInstance.fit("M1a") positive = codemlInstance.fit("M2a") except ValueError, e: print (alignName, e) return fitDict = {} n = True LRTstat = 2*(positive["lnL"] - neutral["lnL"]) if LRTstat > 0: p = cdf_chi2(2, LRTstat) if p < 0.05: n = False if not n: outfile.write("%s\t%f\n" % (alignName, p)) else: nonsignificantFile.write("%s\t%f\n" % (alignName, p)) else: p = cdf_chi2(2, LRTstat*-1) neutralFile.write("%s\t%f\n" % (alignName, p)) alignment, directory = get_arguments(sys.argv[1:]) alignDict = {} # Check if alignment or directory was given and calculate stats accordingly
def testCdfChi2(self): self.assertRaises(ValueError, chi2.cdf_chi2, df=0, stat=3.84) self.assertRaises(ValueError, chi2.cdf_chi2, df=1, stat=-3.84) self.assertRaises(TypeError, chi2.cdf_chi2, df="d", stat="stat") self.assertAlmostEqual(chi2.cdf_chi2(2, 3.84), 0.1466070, places=5)
# This script returns the p-value from chi2 distributions # for likelihood ratio tests # Imports from Bio.Phylo.PAML.chi2 import cdf_chi2 df = 1 statistic = 24.26 chi2 = cdf_chi2(df, statistic) print ("Chi2:",chi2)
expansion_sfs_file.write(str(expansion_sfs[i]) + '\n') expansion_sfs_file.close() # Output SFS for growth model growth_sfs = dadi.Inference.optimally_scaled_sfs(growth_model, data) growth_sfs_file = open("growthModelSFS.txt", 'w') for i in range(1,len(growth_sfs)-1): growth_sfs_file.write(str(growth_sfs[i]) + '\n') growth_sfs_file.close() if expansion_ll_opt > growth_ll_opt: print "Testing significance of expansion..." LRTstat_neutral = 2*(expansion_ll_opt - neutral_ll) degrees = len(expansion_params) print "LRT Statistic- Neutral:", LRTstat_neutral p_neutral = cdf_chi2(degrees, LRTstat_neutral) print "p-value neutral =",p_neutral LRTstat_growth = 2*(expansion_ll_opt - growth_ll_opt) print "LRT Statistic- Exponential Growth:", LRTstat_growth p_growth = cdf_chi2(degrees, LRTstat_growth) print "p-value growth =",p_growth if p_neutral < 0.05: print "Working on likelihood surface..." likelihood_grid(expansion_func_ex, data, ns, pts_l, "expansion") if p_growth > 0.05: likelihood_grid(growth_func_ex, data, ns, pts_l, "growth") else: print "Testing significance of exponential growth..." LRTstat_neutral = 2*(growth_ll_opt - neutral_ll) degrees = len(growth_params)
#!/usr/bin/env python import sys from Bio.Phylo.PAML.chi2 import cdf_chi2 # This script calculates the p-value for the likelihood ratio test give two # likelihoods and degrees of freedom. def usage(): print "LRT.py <Likelihood 1> <Likelihood 2> <degrees of freedom>" if len(sys.argv) != 4: usage() sys.exit() lnL1 = float(sys.argv[1]) lnL2 = float(sys.argv[2]) degrees = int(sys.argv[3]) LRTstat = 2 * (lnL1 - lnL2) print LRTstat p = cdf_chi2(degrees, LRTstat) print p
# 1.4 Results results = .read() dict.get(key) #returns a specific dictionary result, based on desired key # 2.0 CodeML cml = codeml.Codeml() cml.alignment = "align.phylip" #tells codeML the alignment name cml.tree = "species.tree" #tells codeML the tree name cml.out_file = "results.out" #sets output file name cml.working_dir = "./scratch" #sets working directory to store output files cml.run() # 3.0 BaseML bml = baseml.Baseml() # >Note: use same methods/attributes as CodeML, just replace with 'bml' # 4.0 yn00 yn = yn00.Yn00() # >Note: use same methods/attributes as CodeML, replace with 'yn' # >Note: you do not need a tree file # 5.0 Chi-Squared df = 2 #sets degrees of freedom statistic = 7.21 #sets chi2 test statistic cdf_chi2(df, statistic) #returns chi2 value