Esempio n. 1
0
 def testParseNgene2Mgene34(self):
     res_dir = os.path.join(self.results_dir, "codeml", "ngene2_mgene34")
     for results_file in os.listdir(res_dir):
         version = results_file.split('-')[1].split('.')[0]
         version_msg = ("Improper parsing for version %s"
                        % version.replace('_', '.'))
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         self.assertEqual(len(results), 4, version_msg)
         self.assertIn("NSsites", results, version_msg)
         models = results["NSsites"]
         self.assertEqual(len(models), 1, version_msg)
         self.assertIn(0, models, version_msg)
         model = models[0]
         self.assertEqual(len(model), 5, version_msg)
         self.assertIn("parameters", model, version_msg)
         params = model["parameters"]
         # This type of model has fewer parameters for model 0
         self.assertEqual(len(params), 3, version_msg)
         self.assertIn("rates", params, version_msg)
         rates = params["rates"]
         self.assertEqual(len(rates), 2, version_msg)
         self.assertIn("genes", params, version_msg)
         genes = params["genes"]
         self.assertEqual(len(genes), 2, version_msg)
Esempio n. 2
0
 def testParseFreeRatio(self):
     res_dir = os.path.join(self.results_dir, "codeml", "freeratio")
     for results_file in os.listdir(res_dir):
         version = results_file.split('-')[1].split('.')[0]
         version_msg = ("Improper parsing for version %s"
                        % version.replace('_', '.'))
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         self.assertEqual(len(results), 4, version_msg)
         self.assertIn("NSsites", results, version_msg)
         models = results["NSsites"]
         self.assertEqual(len(models), 1, version_msg)
         self.assertIn(0, models, version_msg)
         model = models[0]
         # With the free ratio model, you get 3 extra trees: dN tree,
         # dS tree and omega tree
         self.assertEqual(len(model), 8, version_msg)
         self.assertIn("parameters", model, version_msg)
         params = model["parameters"]
         self.assertEqual(len(params), SITECLASS_PARAMS[0], version_msg)
         self.assertIn("branches", params, version_msg)
         # There should be 7 branches
         branches = params["branches"]
         self.assertEqual(len(branches), 7, version_msg)
         self.assertIn("omega", params, version_msg)
         omega = params["omega"]
         self.assertEqual(len(omega), 7, version_msg)
Esempio n. 3
0
 def testParseAA(self):
     results_file = os.path.join("PAML", "Results", "codeml",
         "codeml_aa_model0.out")
     results = codeml.read(results_file)
     self.assertEqual(len(results), 5)
     distances = results["distances"]
     self.assertEqual(len(distances), 1) 
Esempio n. 4
0
 def testParseCladeModelC(self):
     results_file = os.path.join("PAML", "Results", "codeml",
         "codeml_clademodelC.out")
     results = codeml.read(results_file)
     self.assertEqual(len(results), 5)
     site_classes = results["NSsites"][2]["parameters"]["site classes"]
     self.assertEqual(len(site_classes), 3)        
Esempio n. 5
0
 def testParseNSsite3(self):
     res_dir = os.path.join(self.results_dir, "codeml", "NSsite3")
     for results_file in os.listdir(res_dir):
         version = results_file.split('-')[1].split('.')[0]
         version_msg = ("Improper parsing for version %s"
                        % version.replace('_', '.'))
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         # There should be 5 top-level items: 'codon model', 'model',
         # 'version', 'NSsites' & site-class model, the last of which
         # is only there when only one NSsites class is used
         self.assertEqual(len(results), 5, version_msg)
         self.assertIn('site-class model', results, version_msg)
         self.assertEqual(results['site-class model'], 'discrete',
                          version_msg)
         self.assertIn("NSsites", results, version_msg)
         # There should be 1 NSsites classe: 3
         self.assertEqual(len(results["NSsites"]), 1, version_msg)
         # Each site class model should have 5 sub-items: 'lnL', 'tree',
         # 'description', 'parameters', & 'tree length'. It should
         # have the correct number of parameters also.
         model = results["NSsites"][3]
         self.assertEqual(len(model), 5, version_msg)
         self.assertIn("parameters", model, version_msg)
         params = model["parameters"]
         self.assertEqual(len(params), SITECLASS_PARAMS[3],
                          version)
         self.assertIn("site classes", params, version_msg)
         site_classes = params["site classes"]
         self.assertEqual(len(site_classes), 4, version_msg)
Esempio n. 6
0
 def testParseBranchSiteA(self):
     res_dir = os.path.join(self.results_dir, "codeml", "branchsiteA")
     for results_file in os.listdir(res_dir):
         version = results_file.split('-')[1].split('.')[0]
         version_msg = ("Improper parsing for version %s"
                        % version.replace('_', '.'))
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         # There are 5 top-level items in this case:
         # 'codon model', 'model', 'version', 'NSsites' & 'site-class model'
         self.assertEqual(len(results), 5, version_msg)
         self.assertIn("NSsites", results, version_msg)
         models = results["NSsites"]
         # Only site class model 2 is simulated for Branch Site A
         self.assertEqual(len(models), 1, version_msg)
         self.assertIn(2, models, version_msg)
         model = models[2]
         self.assertEqual(len(model), 5, version_msg)
         self.assertIn("parameters", model, version_msg)
         params = model["parameters"]
         # Branch Site A results lack a "branches" parameter
         self.assertEqual(len(params), SITECLASS_PARAMS[2] - 1, version_msg)
         self.assertIn("site classes", params, version_msg)
         site_classes = params["site classes"]
         # Branch Site A adds another site class
         self.assertEqual(len(site_classes), SITECLASSES[2] + 1,
                          version)
         for class_num in [0, 1, 2, 3]:
             self.assertIn(class_num, site_classes, version_msg)
             site_class = site_classes[class_num]
             self.assertEqual(len(site_class), 2, version_msg)
             self.assertIn("branch types", site_class, version_msg)
             branches = site_class["branch types"]
             self.assertEqual(len(branches), 2, version_msg)
Esempio n. 7
0
 def testParseAllNSsites(self):
     res_dir = os.path.join(self.results_dir, "codeml", "all_NSsites")
     for results_file in os.listdir(res_dir):
         version = results_file.split('-')[1].split('.')[0]
         version_msg = ("Improper parsing for version %s"
                        % version.replace('_', '.'))
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         # There should be 4 top-level items: 'codon model', 'model',
         # 'version', & 'NSsites'
         self.assertEqual(len(results), 4, version_msg)
         self.assertIn("NSsites", results, version_msg)
         # There should be 6 NSsites classes: 0, 1, 2, 3, 7 & 8
         self.assertEqual(len(results["NSsites"]), 6, version_msg)
         # Each site class model should have 5 sub-items: 'lnL', 'tree',
         # 'description', 'parameters', & 'tree length'. It should
         # have the correct number of parameters also.
         for model_num in [0, 1, 2, 3, 7, 8]:
             model = results["NSsites"][model_num]
             self.assertEqual(len(model), 5, version_msg)
             self.assertIn("parameters", model, version_msg)
             params = model["parameters"]
             self.assertEqual(len(params), SITECLASS_PARAMS[model_num],
                              version_msg)
             self.assertIn("branches", params, version_msg)
             branches = params["branches"]
             # There are 7 branches in the test case (specific to these
             # test cases)
             self.assertEqual(len(branches), 7, version_msg)
             if "site classes" in params:
                 self.assertEqual(len(params["site classes"]),
                                  SITECLASSES[model_num], version_msg)
Esempio n. 8
0
 def testParseCladeModelC(self):
     cladeC_res_dir = os.path.join(self.results_dir, "codeml",
                                   "clademodelC")
     for results_file in os.listdir(cladeC_res_dir):
         version = results_file.split('-')[1].split('.')[0]
         version_msg = ("Improper parsing for version %s"
                        % version.replace('_', '.'))
         results_path = os.path.join(cladeC_res_dir, results_file)
         results = codeml.read(results_path)
         # 5 top-level items again in this case
         self.assertEqual(len(results), 5, version_msg)
         self.assertIn("NSsites", results, version_msg)
         models = results["NSsites"]
         # Only site class model 2 is simulated for Clade Model C
         self.assertEqual(len(models), 1, version_msg)
         self.assertIn(2, models, version_msg)
         model = models[2]
         self.assertEqual(len(model), 5, version_msg)
         self.assertIn("parameters", model, version_msg)
         params = model["parameters"]
         # Clade Model C results lack a "branches" parameter
         self.assertEqual(len(params), SITECLASS_PARAMS[2] - 1, version_msg)
         self.assertIn("site classes", params, version_msg)
         site_classes = params["site classes"]
         self.assertEqual(len(site_classes), SITECLASSES[2],
                          version)
         for class_num in [0, 1, 2]:
             self.assertIn(class_num, site_classes, version_msg)
             site_class = site_classes[class_num]
             self.assertEqual(len(site_class), 2, version_msg)
             self.assertIn("branch types", site_class, version_msg)
             branches = site_class["branch types"]
             self.assertEqual(len(branches), 2, version_msg)
Esempio n. 9
0
 def testParseNgene2Mgene34(self):
     results_file = os.path.join("PAML", "Results", "codeml",
         "codeml_ngene2_mgene34.out")
     results = codeml.read(results_file)
     self.assertEqual(len(results), 4)
     site_classes = results["NSsites"][0]["parameters"]["genes"]
     self.assertEqual(len(site_classes), 2)   
Esempio n. 10
0
 def testParsePairwise(self):
     results_file = os.path.join("PAML", "Results", "codeml",
         "codeml_pairwise.out")
     results = codeml.read(results_file)
     self.assertEqual(len(results), 5)
     pairwise = results["pairwise"]
     self.assertEqual(len(pairwise), 5) 
 def testParseAllNSsites(self):
     res_dir = os.path.join(self.results_dir, "codeml", "all_NSsites")
     for results_file in os.listdir(res_dir):
         version = results_file.split('-')[1].split('.')[0]
         version_msg = "Improper parsing for version %s" \
                     % version.replace('_', '.')
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         # There should be 4 top-level items: 'codon model', 'model',
         # 'version', & 'NSsites'
         self.assertEqual(len(results), 4, version_msg)
         self.assertTrue("NSsites" in results, version_msg)
         # There should be 6 NSsites classes: 0, 1, 2, 3, 7 & 8
         self.assertEqual(len(results["NSsites"]), 6, version_msg)
         # Each site class model should have 5 sub-items: 'lnL', 'tree',
         # 'description', 'parameters', & 'tree length'. It should
         # have the correct number of parameters also.
         for model_num in [0, 1, 2, 3, 7, 8]:
             model = results["NSsites"][model_num]
             self.assertEqual(len(model), 5, version_msg)
             self.assertTrue("parameters" in model, version_msg)
             params = model["parameters"]
             self.assertEqual(len(params), SITECLASS_PARAMS[model_num],
                              version_msg)
             self.assertTrue("branches" in params, version_msg)
             branches = params["branches"]
             # There are 7 branches in the test case (specific to these
             # test cases)
             self.assertEqual(len(branches), 7, version_msg)
             if "site classes" in params:
                 self.assertEqual(len(params["site classes"]),
                                  SITECLASSES[model_num], version_msg)
 def testParseNSsite3(self):
     res_dir = os.path.join(self.results_dir, "codeml", "NSsite3")
     for results_file in os.listdir(res_dir):
         version = results_file.split('-')[1].split('.')[0]
         version_msg = "Improper parsing for version %s" \
                     % version.replace('_', '.')
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         # There should be 5 top-level items: 'codon model', 'model',
         # 'version', 'NSsites' & site-class model, the last of which
         # is only there when only one NSsites class is used
         self.assertEqual(len(results), 5, version_msg)
         self.assertTrue('site-class model' in results, version_msg)
         self.assertEqual(results['site-class model'], 'discrete',
                          version_msg)
         self.assertTrue("NSsites" in results, version_msg)
         # There should be 1 NSsites classe: 3
         self.assertEqual(len(results["NSsites"]), 1, version_msg)
         # Each site class model should have 5 sub-items: 'lnL', 'tree',
         # 'description', 'parameters', & 'tree length'. It should
         # have the correct number of parameters also.
         model = results["NSsites"][3]
         self.assertEqual(len(model), 5, version_msg)
         self.assertTrue("parameters" in model, version_msg)
         params = model["parameters"]
         self.assertEqual(len(params), SITECLASS_PARAMS[3], version)
         self.assertTrue("site classes" in params, version_msg)
         site_classes = params["site classes"]
         self.assertEqual(len(site_classes), 4, version_msg)
Esempio n. 13
0
 def testParseFreeRatio(self):
     res_dir = os.path.join(self.results_dir, "codeml", "freeratio")
     for results_file in os.listdir(res_dir):
         version = results_file.split('-')[1].split('.')[0]
         version_msg = ("Improper parsing for version %s" %
                        version.replace('_', '.'))
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         self.assertEqual(len(results), 4, version_msg)
         self.assertIn("NSsites", results, version_msg)
         models = results["NSsites"]
         self.assertEqual(len(models), 1, version_msg)
         self.assertIn(0, models, version_msg)
         model = models[0]
         # With the free ratio model, you get 3 extra trees: dN tree,
         # dS tree and omega tree
         self.assertEqual(len(model), 8, version_msg)
         self.assertIn("parameters", model, version_msg)
         params = model["parameters"]
         self.assertEqual(len(params), SITECLASS_PARAMS[0], version_msg)
         self.assertIn("branches", params, version_msg)
         # There should be 7 branches
         branches = params["branches"]
         self.assertEqual(len(branches), 7, version_msg)
         self.assertIn("omega", params, version_msg)
         omega = params["omega"]
         self.assertEqual(len(omega), 7, version_msg)
Esempio n. 14
0
 def testParseSitesParamsForPairwise(self):
     """Verify that pairwise site estimates are indeed parsed. Fixes #483"""
     res_dir = os.path.join(self.results_dir, "codeml", "pairwise")
     for results_file in os.listdir(res_dir):
         version = results_file.split('-')[1].split('.')[0]
         version_msg = ("Improper parsing for version %s" %
                        version.replace('_', '.'))
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         self.assertIn("pairwise", results)
         seqs = list(results["pairwise"].keys())
         self.assertGreaterEqual(
             len(seqs), 2,
             version_msg + ": should have at least two sequences")
         for seq1, seq2 in itertools.combinations(seqs, 2):
             params = results["pairwise"][seq1][seq2]
             self.assertEqual(
                 len(params), 7,
                 version_msg + ": wrong number of parsed parameters" +
                 " for %s-%s" % (seq1, seq2))
             for param in ("t", "S", "N", "omega", "dN", "dS", "lnL"):
                 self.assertTrue(
                     param in params, version_msg +
                     ": '%s' not in parsed parameters" % (param))
                 self.assertTrue(isinstance(params[param], float))
                 if param != "lnL":
                     self.assertTrue(params[param] >= 0)
Esempio n. 15
0
    def get_pairwise_dn_ds(self):
        aligned_phylip = self.nucl_align_file()
        dr = collections.OrderedDict()
        dr['pairwise'] = {}
        nullctl = filehash['ALL']['pamlctl']['pairwise']
        outfile = aligned_phylip + "." + 'pairwise' + "." + ".phylip"
        if not os.path.isfile(outfile):
            print >> sys.stderr, 'working_dir = ', filehash['ALL']['workdir']['NA'],'aligned_phylip=', \
            aligned_phylip
            cml = codeml.Codeml(alignment = aligned_phylip, out_file = aligned_phylip + "." +'pairwise' + "." +".phylip", \
            working_dir = filehash['ALL']['workdir']['NA'])
            cml.read_ctl_file(nullctl)
            cml.get_option("NSsites")
            print 'cml=', cml
            results = cml.run()
        else:
            results = codeml.read(outfile)
            print >> sys.stderr, results

        paircapture = "cat " + outfile + " | perl -p -e \'s/\n/\t/g\' | grep -oP \"(?<=\t)([0-9]+)\s+\([a-zA-Z0-9]+\)\s+\.\.\.\s+([0-9]+)\s+\([a-zA-Z0-9]+\)\tlnL\s*=\s*[0-9\-\.]+\t\s+[0-9\.\-]+\s+[0-9\.\-]+\t\t([0-9a-zA-Z=\S \.\-]+)\t\" | perl -p -e \'s/\s*=\s*/\t=\t/g\' | perl -p -e \'s/[ \t]+/\t/g\' | cut -f 1,4,22,25,28 > " + outfile + ".table"
        os.system(paircapture)
        with open(outfile + ".table") as f:
            for line in f:
                print >> sys.stderr, line
                line = line.rstrip('\n')
                fields = line.split()
                dr['pairwise'][fields[0] + ".." + fields[1] +
                               ":dN/dS"] = fields[2]
                dr['pairwise'][fields[0] + ".." + fields[1] +
                               ":dN"] = fields[3]
                dr['pairwise'][fields[0] + ".." + fields[1] +
                               ":dS"] = fields[4]
        printhash(dr, self.name, 'paml')
 def testParseCladeModelC(self):
     cladeC_res_dir = os.path.join(self.results_dir, "codeml",
                                   "clademodelC")
     for results_file in os.listdir(cladeC_res_dir):
         version = results_file.split('-')[1].split('.')[0]
         version_msg = "Improper parsing for version %s" \
                     % version.replace('_', '.')
         results_path = os.path.join(cladeC_res_dir, results_file)
         results = codeml.read(results_path)
         # 5 top-level items again in this case
         self.assertEqual(len(results), 5, version_msg)
         self.assertTrue("NSsites" in results, version_msg)
         models = results["NSsites"]
         # Only site class model 2 is simulated for Clade Model C
         self.assertEqual(len(models), 1, version_msg)
         self.assertTrue(2 in models, version_msg)
         model = models[2]
         self.assertEqual(len(model), 5, version_msg)
         self.assertTrue("parameters" in model, version_msg)
         params = model["parameters"]
         # Clade Model C results lack a "branches" parameter
         self.assertEqual(len(params), SITECLASS_PARAMS[2] - 1, version_msg)
         self.assertTrue("site classes" in params, version_msg)
         site_classes = params["site classes"]
         self.assertEqual(len(site_classes), SITECLASSES[2], version)
         for class_num in [0, 1, 2]:
             self.assertTrue(class_num in site_classes, version_msg)
             site_class = site_classes[class_num]
             self.assertEqual(len(site_class), 2, version_msg)
             self.assertTrue("branch types" in site_class, version_msg)
             branches = site_class["branch types"]
             self.assertEqual(len(branches), 2, version_msg)
Esempio n. 17
0
 def testParseSitesParamsForPairwise(self):
     """Verify that pairwise site estimates are indeed parsed. Fixes #483."""
     res_dir = os.path.join(self.results_dir, "codeml", "pairwise")
     for results_file in os.listdir(res_dir):
         version = results_file.split('-')[1].split('.')[0]
         version_msg = ("Improper parsing for version %s"
                        % version.replace('_', '.'))
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         self.assertIn("pairwise", results)
         seqs = list(results["pairwise"].keys())
         self.assertGreaterEqual(len(seqs), 2, version_msg +
                                 ": should have at least two sequences")
         for seq1, seq2 in itertools.combinations(seqs, 2):
             params = results["pairwise"][seq1][seq2]
             self.assertEqual(len(params), 7,
                              version_msg + ": wrong number of parsed parameters" +
                              " for %s-%s" % (seq1, seq2))
             for param in ("t", "S", "N", "omega", "dN", "dS", "lnL"):
                 self.assertTrue(param in params, version_msg +
                                 ": '%s' not in parsed parameters"
                                 % (param))
                 self.assertTrue(isinstance(params[param], float))
                 if param != "lnL":
                     self.assertTrue(params[param] >= 0)
Esempio n. 18
0
 def testParsePairwise(self):
     res_dir = os.path.join(self.results_dir, "codeml", "pairwise")
     for results_file in os.listdir(res_dir):
         version = results_file.split("-")[1].split(".")[0]
         version_msg = "Improper parsing for version %s" % version.replace(
             "_", ".")
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         # Pairwise models have an extra top-level item: pairwise
         self.assertEqual(len(results), 5, version_msg)
         self.assertIn("pairwise", results, version_msg)
         pairwise = results["pairwise"]
         self.assertGreaterEqual(
             len(pairwise), 2,
             version_msg + ": should have at least two sequences")
         for seq1, seq2 in itertools.combinations(pairwise.keys(), 2):
             self.assertEqual(
                 len(pairwise[seq1][seq2]),
                 7,
                 version_msg + ": wrong number of parameters parsed",
             )
             self.assertEqual(
                 len(pairwise[seq2][seq1]),
                 7,
                 version_msg + ": wrong number of parameters parsed",
             )
Esempio n. 19
0
 def testParseAAPairwise(self):
     results_file = os.path.join("PAML", "Results", "codeml",
         "codeml_aa_pairwise.out")
     results = codeml.read(results_file)
     self.assertEqual(len(results), 4)
     distances = results["distances"]
     self.assertEqual(len(distances), 2) 
Esempio n. 20
0
 def testParseFreeBranch(self):
     results_file = os.path.join("PAML", "Results", "codeml",
         "codeml_freebranch.out")
     results = codeml.read(results_file)
     self.assertEqual(len(results), 4)
     branches = results["NSsites"][0]["parameters"]["branches"]
     self.assertEqual(len(branches), 7) 
Esempio n. 21
0
 def testParseBranchSiteA(self):
     results_file = os.path.join("PAML", "Results", "codeml",
         "codeml_branchsiteA.out")
     results = codeml.read(results_file)
     self.assertEqual(len(results), 5)
     site_classes = results["NSsites"][2]["parameters"]["site classes"]
     self.assertEqual(len(site_classes), 4)        
 def testParseBranchSiteA(self):
     res_dir = os.path.join(self.results_dir, "codeml", "branchsiteA")
     for results_file in os.listdir(res_dir):
         version = results_file.split('-')[1].split('.')[0]
         version_msg = "Improper parsing for version %s" \
                     % version.replace('_', '.')
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         # There are 5 top-level items in this case:
         # 'codon model', 'model', 'version', 'NSsites' & 'site-class model'
         self.assertEqual(len(results), 5, version_msg)
         self.assertTrue("NSsites" in results, version_msg)
         models = results["NSsites"]
         # Only site class model 2 is simulated for Branch Site A
         self.assertEqual(len(models), 1, version_msg)
         self.assertTrue(2 in models, version_msg)
         model = models[2]
         self.assertEqual(len(model), 5, version_msg)
         self.assertTrue("parameters" in model, version_msg)
         params = model["parameters"]
         # Branch Site A results lack a "branches" parameter
         self.assertEqual(len(params), SITECLASS_PARAMS[2] - 1, version_msg)
         self.assertTrue("site classes" in params, version_msg)
         site_classes = params["site classes"]
         # Branch Site A adds another site class
         self.assertEqual(len(site_classes), SITECLASSES[2] + 1, version)
         for class_num in [0, 1, 2, 3]:
             self.assertTrue(class_num in site_classes, version_msg)
             site_class = site_classes[class_num]
             self.assertEqual(len(site_class), 2, version_msg)
             self.assertTrue("branch types" in site_class, version_msg)
             branches = site_class["branch types"]
             self.assertEqual(len(branches), 2, version_msg)
Esempio n. 23
0
 def testParseNgene2Mgene34(self):
     res_dir = os.path.join(self.results_dir, "codeml", "ngene2_mgene34")
     for results_file in os.listdir(res_dir):
         version = results_file.split('-')[1].split('.')[0]
         version_msg = ("Improper parsing for version %s" %
                        version.replace('_', '.'))
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         self.assertEqual(len(results), 4, version_msg)
         self.assertIn("NSsites", results, version_msg)
         models = results["NSsites"]
         self.assertEqual(len(models), 1, version_msg)
         self.assertIn(0, models, version_msg)
         model = models[0]
         self.assertEqual(len(model), 5, version_msg)
         self.assertIn("parameters", model, version_msg)
         params = model["parameters"]
         # This type of model has fewer parameters for model 0
         self.assertEqual(len(params), 3, version_msg)
         self.assertIn("rates", params, version_msg)
         rates = params["rates"]
         self.assertEqual(len(rates), 2, version_msg)
         self.assertIn("genes", params, version_msg)
         genes = params["genes"]
         self.assertEqual(len(genes), 2, version_msg)
Esempio n. 24
0
 def testParseAllNSsites(self):
     results_file = os.path.join("PAML", "Results", "codeml",
         "codeml_NSsites_all.out")
     results = codeml.read(results_file)
     models = results.get("NSsites")
     self.assertEqual(len(models), 6)
     for model in models:
         self.assertEqual(len(models.get(model)), 5)
Esempio n. 25
0
 def testParseSEs(self):
     SE_results_file = os.path.join("PAML", "Results", "codeml",
         "codeml_SE.out")
     SE_results = codeml.read(SE_results_file)
     SE_models = SE_results.get("NSsites")
     for model in SE_models:
         SE_model = SE_models.get(model)
         SE_parameters = SE_model.get("parameters")
         self.assertNotEqual(SE_parameters.get("SEs"), None)
Esempio n. 26
0
 def testParsePairwise(self):
     res_dir = os.path.join(self.results_dir, "codeml", "pairwise")
     for results_file in os.listdir(res_dir):
         version = results_file.split("-")[1].split(".")[0]
         version_msg = "Improper parsing for version %s" % version.replace("_", ".")
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         # Pairwise models have an extra top-level item: pairwise
         self.assertEqual(len(results), 5, version_msg)
         self.assertTrue("pairwise" in results, version_msg)
         pairwise = results["pairwise"]
         self.assertEqual(len(pairwise), 5, version_msg)
Esempio n. 27
0
 def testParseAllVersions(self):
     for results_file in os.listdir(os.path.join("PAML",
             "Results","codeml","versions")):
         if os.path.isfile(results_file) and results_file[:6] == "codeml":
             results = codeml.read(os.path.join("PAML",
                 "Results", results_file))
             self.assertEqual(len(results["NSsites"]), 6)
             self.assertEqual(len(results["NSsites"][0]), 7)
             self.assertEqual(len(results["NSsites"][1]), 5)
             self.assertEqual(len(results["NSsites"][2]), 5)
             self.assertEqual(len(results["NSsites"][3]), 5)
             self.assertEqual(len(results["NSsites"][7]), 6)
             self.assertEqual(len(results["NSsites"][8]), 6)
Esempio n. 28
0
 def testTreeParseVersatility(self):
     """Test finding trees in the results, in response to bug #453, where
     trees like (A, (B, C)); weren't being caught"""
     res_file = os.path.join(self.results_dir, "codeml",
                             "tree_regexp_versatility.out")
     results = codeml.read(res_file)
     self.assertTrue("NSsites" in results)
     nssites = results["NSsites"]
     self.assertTrue(0 in nssites)
     m0 = nssites[0]
     self.assertTrue("tree" in m0)
     self.assertTrue(m0["tree"] is not None)
     self.assertNotEqual(len(m0["tree"]), 0)
Esempio n. 29
0
 def testTreeParseVersatility(self):
     """Test finding trees in the results, in response to bug #453, where
     trees like (A, (B, C)); weren't being caught"""
     res_file = os.path.join(self.results_dir, "codeml",
                             "tree_regexp_versatility.out")
     results = codeml.read(res_file)
     self.assertIn("NSsites", results)
     nssites = results["NSsites"]
     self.assertIn(0, nssites)
     m0 = nssites[0]
     self.assertIn("tree", m0)
     self.assertTrue(m0["tree"] is not None)
     self.assertNotEqual(len(m0["tree"]), 0)
Esempio n. 30
0
 def testParseM2arel(self):
     res_dir = os.path.join(self.results_dir, "codeml", "m2a_rel")
     for results_file in os.listdir(res_dir):
         version = results_file.split("-")[1].split(".")[0]
         version_msg = f"Improper parsing for version {version.replace('_', '.')}"
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         self.assertIn("NSsites", results)
         self.assertIn(22, results["NSsites"])
         model = results["NSsites"][22]
         self.assertEqual(len(model), 5, version_msg)
         params = model["parameters"]
         self.assertEqual(len(params), SITECLASS_PARAMS[22], version_msg)
 def testParsePairwise(self):
     res_dir = os.path.join(self.results_dir, "codeml", "pairwise")
     for results_file in os.listdir(res_dir):
         version = results_file.split('-')[1].split('.')[0]
         version_msg = "Improper parsing for version %s" \
                     % version.replace('_', '.')
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         # Pairwise models have an extra top-level item: pairwise
         self.assertEqual(len(results), 5, version_msg)
         self.assertTrue("pairwise" in results, version_msg)
         pairwise = results["pairwise"]
         self.assertEqual(len(pairwise), 5, version_msg)
Esempio n. 32
0
 def testParseAAPairwise(self):
     res_dir = os.path.join(self.results_dir, "codeml", "aa_pairwise")
     for results_file in os.listdir(res_dir):
         version = results_file.split("-")[1].split(".")[0]
         version_msg = "Improper parsing for version %s" % version.replace("_", ".")
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         # Pairwise AA analysis has one top-level fewer than non-pairwise
         self.assertEqual(len(results), 4, version_msg)
         self.assertTrue("lnL max" in results, version_msg)
         self.assertTrue("distances" in results, version_msg)
         distances = results["distances"]
         # Pairwise AA analysis has ML & raw distances
         self.assertEqual(len(distances), 2, version_msg)
Esempio n. 33
0
def get_dn_ds(aligned_phylip, control_file, marker, dr):
    print >> sys.stderr, 'marker = in getdnds', marker
    if not os.path.isfile(aligned_phylip + "." + marker + "." + ".phylip"):
        cml = codeml.Codeml(alignment = aligned_phylip, out_file = aligned_phylip + "." +marker + "." +".phylip", \
        working_dir = filehash['ALL']['workdir']['NA'])
        print >> sys.stderr, 'cml=', cml
        cml.read_ctl_file(control_file)
        cml.get_option("NSsites")
        results = cml.run()
    else:
        results = codeml.read(aligned_phylip + "." + marker + "." + ".phylip")
        print >> sys.stderr, results
    dr2 = rprint(results, 'start', marker, dr)
    return dr2
Esempio n. 34
0
 def testParseAAPairwise(self):
     res_dir = os.path.join(self.results_dir, "codeml", "aa_pairwise")
     for results_file in os.listdir(res_dir):
         version = results_file.split("-")[1].split(".")[0]
         version_msg = f"Improper parsing for version {version.replace('_', '.')}"
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         # Pairwise AA analysis has one top-level fewer than non-pairwise
         self.assertEqual(len(results), 4, version_msg)
         self.assertIn("lnL max", results, version_msg)
         self.assertIn("distances", results, version_msg)
         distances = results["distances"]
         # Pairwise AA analysis has ML & raw distances
         self.assertEqual(len(distances), 2, version_msg)
Esempio n. 35
0
 def testParseM2arel(self):
     res_dir = os.path.join(self.results_dir, "codeml", "m2a_rel")
     for results_file in os.listdir(res_dir):
         version = results_file.split('-')[1].split('.')[0]
         version_msg = ("Improper parsing for version %s"
                        % version.replace('_', '.'))
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         self.assertIn("NSsites", results)
         self.assertIn(22, results["NSsites"])
         model = results["NSsites"][22]
         self.assertEqual(len(model), 5, version_msg)
         params = model["parameters"]
         self.assertEqual(len(params), SITECLASS_PARAMS[22],
                          version_msg)
Esempio n. 36
0
 def testParseNgene2Mgene1(self):
     res_dir = os.path.join(self.results_dir, "codeml", "ngene2_mgene1")
     for results_file in os.listdir(res_dir):
         version = results_file.split("-")[1].split(".")[0]
         version_msg = f"Improper parsing for version {version.replace('_', '.')}"
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         self.assertEqual(len(results), 4, version_msg)
         self.assertIn("genes", results, version_msg)
         genes = results["genes"]
         self.assertEqual(len(genes), 2, version_msg)
         model = genes[0]
         self.assertEqual(len(model), 5, version_msg)
         self.assertIn("parameters", model, version_msg)
         params = model["parameters"]
         self.assertEqual(len(params), SITECLASS_PARAMS[0], version_msg)
Esempio n. 37
0
 def testParseNgene2Mgene1(self):
     res_dir = os.path.join(self.results_dir, "codeml", "ngene2_mgene1")
     for results_file in os.listdir(res_dir):
         version = results_file.split("-")[1].split(".")[0]
         version_msg = "Improper parsing for version %s" % version.replace("_", ".")
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         self.assertEqual(len(results), 4, version_msg)
         self.assertTrue("genes" in results, version_msg)
         genes = results["genes"]
         self.assertEqual(len(genes), 2, version_msg)
         model = genes[0]
         self.assertEqual(len(model), 5, version_msg)
         self.assertTrue("parameters" in model, version_msg)
         params = model["parameters"]
         self.assertEqual(len(params), SITECLASS_PARAMS[0], version_msg)
Esempio n. 38
0
 def testParsePairwise(self):
     res_dir = os.path.join(self.results_dir, "codeml", "pairwise")
     for results_file in os.listdir(res_dir):
         version = results_file.split('-')[1].split('.')[0]
         version_msg = ("Improper parsing for version %s"
                        % version.replace('_', '.'))
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         # Pairwise models have an extra top-level item: pairwise
         self.assertEqual(len(results), 5, version_msg)
         self.assertIn("pairwise", results, version_msg)
         pairwise = results["pairwise"]
         self.assertGreaterEqual(len(pairwise), 2, version_msg +
                                 ": should have at least two sequences")
         for seq1, seq2 in itertools.combinations(pairwise.keys(), 2):
             self.assertEqual(len(pairwise[seq1][seq2]), 7, version_msg +
                              ": wrong number of parameters parsed")
             self.assertEqual(len(pairwise[seq2][seq1]), 7, version_msg +
                              ": wrong number of parameters parsed")
Esempio n. 39
0
 def testParseAA(self):
     res_dir = os.path.join(self.results_dir, "codeml", "aa_model0")
     for results_file in os.listdir(res_dir):
         version = results_file.split("-")[1].split(".")[0]
         version_msg = f"Improper parsing for version {version.replace('_', '.')}"
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         # Amino Acid analysis has different top-levels:
         # 'NSsites', 'model', 'version', 'lnL max', 'distances'
         # Version 4.1 doesn't seem to produce distances in the results
         if version == "4_1":
             self.assertEqual(len(results), 4, version_msg)
             self.assertIn("lnL max", results, version_msg)
         else:
             self.assertEqual(len(results), 5, version_msg)
             self.assertIn("lnL max", results, version_msg)
             self.assertIn("distances", results, version_msg)
             distances = results["distances"]
             # non-pairwise AA analysis only gives raw distances
             self.assertEqual(len(distances), 1, version_msg)
Esempio n. 40
0
 def testParseSEs(self):
     res_dir = os.path.join(self.results_dir, "codeml", "SE")
     for results_file in os.listdir(res_dir):
         version = results_file.split("-")[1].split(".")[0]
         version_msg = f"Improper parsing for version {version.replace('_', '.')}"
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         self.assertEqual(len(results), 4, version_msg)
         self.assertIn("NSsites", results, version_msg)
         models = results["NSsites"]
         # Only site class model 0 was simulated
         self.assertEqual(len(models), 1, version_msg)
         self.assertIn(0, models, version_msg)
         model = models[0]
         self.assertEqual(len(model), 5, version_msg)
         self.assertIn("parameters", model, version_msg)
         params = model["parameters"]
         # There should be one new item in the parameters, "SEs"
         self.assertEqual(len(params), SITECLASS_PARAMS[0] + 1, version_msg)
         self.assertIn("SEs", params, version_msg)
Esempio n. 41
0
 def testParseSEs(self):
     res_dir = os.path.join(self.results_dir, "codeml", "SE")
     for results_file in os.listdir(res_dir):
         version = results_file.split("-")[1].split(".")[0]
         version_msg = "Improper parsing for version %s" % version.replace("_", ".")
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         self.assertEqual(len(results), 4, version_msg)
         self.assertTrue("NSsites" in results, version_msg)
         models = results["NSsites"]
         # Only site class model 0 was simulated
         self.assertEqual(len(models), 1, version_msg)
         self.assertTrue(0 in models, version_msg)
         model = models[0]
         self.assertEqual(len(model), 5, version_msg)
         self.assertTrue("parameters" in model, version_msg)
         params = model["parameters"]
         # There should be one new item in the parameters, "SEs"
         self.assertEqual(len(params), SITECLASS_PARAMS[0] + 1, version_msg)
         self.assertTrue("SEs" in params, version_msg)
Esempio n. 42
0
 def testParseAA(self):
     res_dir = os.path.join(self.results_dir, "codeml", "aa_model0")
     for results_file in os.listdir(res_dir):
         version = results_file.split("-")[1].split(".")[0]
         version_msg = "Improper parsing for version %s" % version.replace("_", ".")
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         # Amino Acid analysis has different top-levels:
         # 'NSsites', 'model', 'version', 'lnL max', 'distances'
         # Version 4.1 doesn't seem to produce distances in the results
         if version == "4_1":
             self.assertEqual(len(results), 4, version_msg)
             self.assertTrue("lnL max" in results, version_msg)
         else:
             self.assertEqual(len(results), 5, version_msg)
             self.assertTrue("lnL max" in results, version_msg)
             self.assertTrue("distances" in results, version_msg)
             distances = results["distances"]
             # non-pairwise AA analysis only gives raw distances
             self.assertEqual(len(distances), 1, version_msg)
Esempio n. 43
0
 def testParseNgene2Mgene02(self):
     res_dir = os.path.join(self.results_dir, "codeml", "ngene2_mgene02")
     for results_file in os.listdir(res_dir):
         version = results_file.split("-")[1].split(".")[0]
         version_msg = "Improper parsing for version %s" % version.replace("_", ".")
         results_path = os.path.join(res_dir, results_file)
         results = codeml.read(results_path)
         self.assertEqual(len(results), 4, version_msg)
         self.assertTrue("NSsites" in results, version_msg)
         models = results["NSsites"]
         self.assertEqual(len(models), 1, version_msg)
         self.assertTrue(0 in models, version_msg)
         model = models[0]
         self.assertEqual(len(model), 5, version_msg)
         self.assertTrue("parameters" in model, version_msg)
         params = model["parameters"]
         # This type of model has fewer parameters for model 0
         self.assertEqual(len(params), 4, version_msg)
         self.assertTrue("rates" in params, version_msg)
         rates = params["rates"]
         self.assertEqual(len(rates), 2, version_msg)
Esempio n. 44
0
                IGC_geo) + '_Sim_' + str(sim_num) + '.fasta'
            ctl_loc = wk_dir + 'geo_' + str(IGC_geo) + '_Sim_' + str(
                sim_num) + '_codeml.ctl'
            out_file = wk_dir + 'unrooted_MG94_geo_' + str(
                IGC_geo) + '_Sim_' + str(sim_num) + '_codeml_output.txt'
            ##            prepare_ctl(tree_loc, seq_loc, out_file, ctl_loc)
            ##            run_paml(wk_dir, ctl_loc)#, "/Users/Xiang/Software/paml4.8/bin/codeml")
            out_tree1_file = out_file.replace('_output.txt',
                                              '_tree1_output.txt')
            out_tree2_file = out_file.replace('_output.txt',
                                              '_tree2_output.txt')
            out_tree_files = [out_tree1_file, out_tree2_file]
            Seperate_codeml_result(out_file, out_tree_files)

            if os.path.isfile(out_tree1_file):
                codeml_result = codeml.read(out_tree1_file)
                tree1_file = out_file.replace('codeml_output.txt',
                                              'codeml_tree1_est.newick')
                with open(tree1_file, 'w+') as f:
                    f.write(codeml_result['NSsites'][0]['tree'] + '\n')

                edge_to_blen, edge_list_1 = get_tree(tree1_file, name_tree_1st)
                if sim_num == 0:
                    edge_list_1_fix = deepcopy(edge_list_1)
                summary = [
                    codeml_result['NSsites'][0]['lnL'],
                    codeml_result['NSsites'][0]['parameters']['kappa'],
                    codeml_result['NSsites'][0]['parameters']['omega']
                ]
                summary.extend(
                    [edge_to_blen[edge] for edge in edge_list_1_fix])
Esempio n. 45
0
import time
import glob
from math import sqrt
from rpy2 import robjects

r = robjects.r
len = len(glob.glob1(".", "*.out"))


def compare_models(m1_lnl, m2_lnl, df):
    likelihood = 2 * (abs(m2_lnl - m1_lnl))
    p = 1 - robjects.r.pchisq(likelihood, df)[0]
    return p


results = codeml.read(sys.argv[1])
nssites = results.get("NSsites")
m1 = nssites.get(1)
m1_lnl = m1.get("lnL")
m2 = nssites.get(2)
m2_lnl = m2.get("lnL")
m7 = nssites.get(7)
m7_lnl = m7.get("lnL")
m8 = nssites.get(8)
m8_lnl = m8.get("lnL")
m2_p_pos = compare_models(m1_lnl, m2_lnl, 2)
m8_p_pos = compare_models(m7_lnl, m8_lnl, 2)

r.assign('m2_p_pos', m2_p_pos)
r.assign('m8_p_pos', m8_p_pos)
r.assign('len', len)
import shutil
import time
import glob
from math import sqrt
from rpy2 import robjects

r = robjects.r
len = len(glob.glob1(".","*.out"))

def compare_models(null_lnl, alt_lnl, df):
    likelihood = 2*(abs(null_lnl-alt_lnl))
    p = 1 - robjects.r.pchisq(likelihood, df)[0]
    return p


null_results = codeml.read(sys.argv[1])
alt_results = codeml.read(sys.argv[2])

null_nssites = null_results.get("NSsites")
alt_nssites = alt_results.get("NSsites")

#null_model = null_results.get("model")
#alt_model = alt_results.get("model")

null_value = null_nssites.get(2)
null_lnl = null_value.get("lnL")

alt_value = alt_nssites.get(2)
alt_lnl = alt_value.get("lnL")

bs_p_pos = compare_models(null_lnl,alt_lnl,1)
Esempio n. 47
0
def read_results(out_file, hyp):
    """Use the Biopython codeml output parser to read the output file and
    extract all the relevant information."""
    res = codeml.read(out_file)
    # extract the values that we are interested in, depending on which model
    # we have fit.
    if hyp == 'Null':
        lnl = res['NSsites'][22].get('lnL', 'NA')
        if 'parameters' not in res['NSsites'][22]:
            dat = {
                'null_lnl': lnl,
                'null_kappa': 'NA',
                'null_omega0dnds': 'NA',
                'null_omega1dnds': 'NA',
                'null_omega2dnds': 'NA',
                'null_omega0prop': 'NA',
                'null_omega1prop': 'NA',
                'null_omega2prop': 'NA'
            }
        else:
            kappa = res['NSsites'][22]['parameters'].get('kappa', 'NA')
            if 'site classes' not in res['NSsites'][22]['parameters']:
                omega_0_dnds = 'NA'
                omega_1_dnds = 'NA'
                omega_2_dnds = 'NA'
                omega_0_prop = 'NA'
                omega_1_prop = 'NA'
                omega_2_prop = 'NA'
            else:
                omega_0_dnds = res['NSsites'][22]['parameters'][
                    'site classes'][0].get('omega', 'NA')
                omega_1_dnds = res['NSsites'][22]['parameters'][
                    'site classes'][1].get('omega', 'NA')
                omega_2_dnds = res['NSsites'][22]['parameters'][
                    'site classes'][2].get('omega', 'NA')
                omega_0_prop = res['NSsites'][22]['parameters'][
                    'site classes'][0].get('proportion', 'NA')
                omega_1_prop = res['NSsites'][22]['parameters'][
                    'site classes'][1].get('proportion', 'NA')
                omega_2_prop = res['NSsites'][22]['parameters'][
                    'site classes'][2].get('proportion', 'NA')
            dat = {
                'null_lnl': lnl,
                'null_kappa': str(kappa),
                'null_omega0dnds': str(omega_0_dnds),
                'null_omega1dnds': str(omega_1_dnds),
                'null_omega2dnds': str(omega_2_dnds),
                'null_omega0prop': str(omega_0_prop),
                'null_omega1prop': str(omega_1_prop),
                'null_omega2prop': str(omega_2_prop)
            }
    elif hyp == 'Ha1':
        # For this model, #1 is maize/tandem, #0 is grass
        lnl = res['NSsites'][2].get('lnL', 'NA')
        if 'parameters' not in res['NSsites'][2]:
            dat = {
                'ha1_lnl': lnl,
                'ha1_kappa': 'NA',
                'ha1_omegagrass0dnds': 'NA',
                'ha1_omegamaize0dnds': 'NA',
                'ha1_omegagrass1dnds': 'NA',
                'ha1_omegamaize1dnds': 'NA',
                'ha1_omegagrass2dnds': 'NA',
                'ha1_omegamaize2dnds': 'NA',
                'ha1_omega0prop': 'NA',
                'ha1_omega1prop': 'NA',
                'ha1_omega2prop': 'NA'
            }
        else:
            kappa = res['NSsites'][2]['parameters'].get('kappa', 'NA')
            if 'site classes' not in res['NSsites'][2]['parameters']:
                omega_grass_0_dnds = 'NA'
                omega_maize_0_dnds = 'NA'
                omega_grass_1_dnds = 'NA'
                omega_maize_1_dnds = 'NA'
                omega_grass_2_dnds = 'NA'
                omega_maize_2_dnds = 'NA'
                omega_0_prop = 'NA'
                omega_1_prop = 'NA'
                omega_2_prop = 'NA'
            else:
                omega_grass_0_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][0]['branch types'].get(0, 'NA')
                omega_maize_0_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][0]['branch types'].get(1, 'NA')
                omega_grass_1_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][1]['branch types'].get(0, 'NA')
                omega_maize_1_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][1]['branch types'].get(1, 'NA')
                omega_grass_2_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][2]['branch types'].get(0, 'NA')
                omega_maize_2_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][2]['branch types'].get(1, 'NA')
                omega_0_prop = res['NSsites'][2]['parameters']['site classes'][
                    0].get('proportion', 'NA')
                omega_1_prop = res['NSsites'][2]['parameters']['site classes'][
                    1].get('proportion', 'NA')
                omega_2_prop = res['NSsites'][2]['parameters']['site classes'][
                    2].get('proportion', 'NA')
            dat = {
                'ha1_lnl': lnl,
                'ha1_kappa': str(kappa),
                'ha1_omegagrass0dnds': str(omega_grass_0_dnds),
                'ha1_omegamaize0dnds': str(omega_maize_0_dnds),
                'ha1_omegagrass1dnds': str(omega_grass_1_dnds),
                'ha1_omegamaize1dnds': str(omega_maize_1_dnds),
                'ha1_omegagrass2dnds': str(omega_grass_2_dnds),
                'ha1_omegamaize2dnds': str(omega_maize_2_dnds),
                'ha1_omega0prop': str(omega_0_prop),
                'ha1_omega1prop': str(omega_1_prop),
                'ha1_omega2prop': str(omega_2_prop)
            }
    elif hyp == 'Ha2':
        # For this model, #1 is tandem, #0 is grass/maize
        lnl = res['NSsites'][2].get('lnL', 'NA')
        if 'parameters' not in res['NSsites'][2]:
            dat = {
                'ha2_lnl': lnl,
                'ha2_kappa': 'NA',
                'ha2_omegagrass0dnds': 'NA',
                'ha2_omegatandem0dnds': 'NA',
                'ha2_omegagrass1dnds': 'NA',
                'ha2_omegatandem1dnds': 'NA',
                'ha2_omegagrass2dnds': 'NA',
                'ha2_omegatandem2dnds': 'NA',
                'ha2_omega0prop': 'NA',
                'ha2_omega1prop': 'NA',
                'ha2_omega2prop': 'NA'
            }
        else:
            kappa = res['NSsites'][2]['parameters'].get('kappa', 'NA')
            if 'site classes' not in res['NSsites'][2]['parameters']:
                omega_grass_0_dnds = 'NA'
                omega_tandem_0_dnds = 'NA'
                omega_grass_1_dnds = 'NA'
                omega_tandem_1_dnds = 'NA'
                omega_grass_2_dnds = 'NA'
                omega_tandem_2_dnds = 'NA'
                omega_0_prop = 'NA'
                omega_1_prop = 'NA'
                omega_2_prop = 'NA'
            else:
                omega_grass_0_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][0]['branch types'].get(0, 'NA')
                omega_tandem_0_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][0]['branch types'].get(1, 'NA')
                omega_grass_1_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][1]['branch types'].get(0, 'NA')
                omega_tandem_1_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][1]['branch types'].get(1, 'NA')
                omega_grass_2_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][2]['branch types'].get(0, 'NA')
                omega_tandem_2_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][2]['branch types'].get(1, 'NA')
                omega_0_prop = res['NSsites'][2]['parameters']['site classes'][
                    0].get('proportion', 'NA')
                omega_1_prop = res['NSsites'][2]['parameters']['site classes'][
                    1].get('proportion', 'NA')
                omega_2_prop = res['NSsites'][2]['parameters']['site classes'][
                    2].get('proportion', 'NA')
            dat = {
                'ha2_lnl': lnl,
                'ha2_kappa': str(kappa),
                'ha2_omegagrass0dnds': str(omega_grass_0_dnds),
                'ha2_omegatandem0dnds': str(omega_tandem_0_dnds),
                'ha2_omegagrass1dnds': str(omega_grass_1_dnds),
                'ha2_omegatandem1dnds': str(omega_tandem_1_dnds),
                'ha2_omegagrass2dnds': str(omega_grass_2_dnds),
                'ha2_omegatandem2dnds': str(omega_tandem_2_dnds),
                'ha2_omega0prop': str(omega_0_prop),
                'ha2_omega1prop': str(omega_1_prop),
                'ha2_omega2prop': str(omega_2_prop)
            }
    elif hyp == 'Ha3':
        # Recall that for this model, #1 is tandem and #2 is maize
        lnl = res['NSsites'][2].get('lnL', 'NA')
        if 'parameters' not in res['NSsites'][2]:
            dat = {
                'ha3_lnl': lnl,
                'ha3_kappa': 'NA',
                'ha3_omegagrass0dnds': 'NA',
                'ha3_omegatandem0dnds': 'NA',
                'ha3_omegamaize0dnds': 'NA',
                'ha3_omegagrass1dnds': 'NA',
                'ha3_omegatandem1dnds': 'NA',
                'ha3_omegamaize1dnds': 'NA',
                'ha3_omegagrass2dnds': 'NA',
                'ha3_omegatandem2dnds': 'NA',
                'ha3_omegamaize2dnds': 'NA',
                'ha3_omega0prop': 'NA',
                'ha3_omega1prop': 'NA',
                'ha3_omega2prop': 'NA'
            }
        else:
            kappa = res['NSsites'][2]['parameters'].get('kappa', 'NA')
            if 'site classes' not in res['NSsites'][2]['parameters']:
                omega_grass_0_dnds = 'NA'
                omega_tandem_0_dnds = 'NA'
                omega_maize_0_dnds = 'NA'
                omega_grass_1_dnds = 'NA'
                omega_tandem_1_dnds = 'NA'
                omega_maize_1_dnds = 'NA'
                omega_grass_2_dnds = 'NA'
                omega_tandem_2_dnds = 'NA'
                omega_maize_2_dnds = 'NA'
                omega_0_prop = 'NA'
                omega_1_prop = 'NA'
                omega_2_prop = 'NA'
            else:
                omega_grass_0_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][0]['branch types'].get(0, 'NA')
                omega_tandem_0_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][0]['branch types'].get(1, 'NA')
                omega_maize_0_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][0]['branch types'].get(2, 'NA')
                omega_grass_1_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][1]['branch types'].get(0, 'NA')
                omega_tandem_1_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][1]['branch types'].get(1, 'NA')
                omega_maize_1_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][1]['branch types'].get(2, 'NA')
                omega_grass_2_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][2]['branch types'].get(0, 'NA')
                omega_tandem_2_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][2]['branch types'].get(1, 'NA')
                omega_maize_2_dnds = res['NSsites'][2]['parameters'][
                    'site classes'][2]['branch types'].get(2, 'NA')
                omega_0_prop = res['NSsites'][2]['parameters']['site classes'][
                    0].get('proportion', 'NA')
                omega_1_prop = res['NSsites'][2]['parameters']['site classes'][
                    1].get('proportion', 'NA')
                omega_2_prop = res['NSsites'][2]['parameters']['site classes'][
                    2].get('proportion', 'NA')
            dat = {
                'ha3_lnl': lnl,
                'ha3_kappa': str(kappa),
                'ha3_omegagrass0dnds': str(omega_grass_0_dnds),
                'ha3_omegatandem0dnds': str(omega_tandem_0_dnds),
                'ha3_omegamaize0dnds': str(omega_maize_0_dnds),
                'ha3_omegagrass1dnds': str(omega_grass_1_dnds),
                'ha3_omegatandem1dnds': str(omega_tandem_1_dnds),
                'ha3_omegamaize1dnds': str(omega_maize_1_dnds),
                'ha3_omegagrass2dnds': str(omega_grass_2_dnds),
                'ha3_omegatandem2dnds': str(omega_tandem_2_dnds),
                'ha3_omegamaize2dnds': str(omega_maize_2_dnds),
                'ha3_omega0prop': str(omega_0_prop),
                'ha3_omega1prop': str(omega_1_prop),
                'ha3_omega2prop': str(omega_2_prop)
            }
    return dat
import csv

from Bio.Phylo.PAML import codeml

#runparse.sh supplies the following args
#sysargv1 is directory
#sysargv2 is geneID

fname = os.path.join(sys.argv[1])
gene = sys.argv[2]
filename = sys.argv[3]

#with open(filename, 'a') as outfile:
#	outfile.write(str(gene) + '-null' +'\t' + 'lnL' + '\t' +  'branch length' + '\t' +  'prop0' + '\t ' + 'background0' + '\t' + 'foreground0'+ '\t' + '\t '+ 'prop1' + 'background1' + '\t' + 'foreground1'+  '\t' + 'prop2' + '\t '+ 'background2' + '\t' + 'foreground2'+  '\t' + 'prop3' + '\t ' 'background3' + '\t' + 'foreground3' + '\n')

results = codeml.read(fname)
lnl = results['NSsites'][2]['lnL']
treel = results['NSsites'][2]['tree length']

p0 = results['NSsites'][2]['parameters']['site classes'][0]['proportion']
b0 = results['NSsites'][2]['parameters']['site classes'][0]['branch types'][
    'background']
f0 = results['NSsites'][2]['parameters']['site classes'][0]['branch types'][
    'foreground']

p1 = results['NSsites'][2]['parameters']['site classes'][1]['proportion']
b1 = results['NSsites'][2]['parameters']['site classes'][1]['branch types'][
    'background']
f1 = results['NSsites'][2]['parameters']['site classes'][1]['branch types'][
    'foreground']
Esempio n. 49
0
    os.chdir('/Users/xji3/GitFolders/Genconv/PAMLCheck')

if __name__ == '__main__':
    pairs = []
    all_pairs = './Filtered_pairs.txt'
    with open(all_pairs, 'r') as f:
        for line in f.readlines():
            pairs.append(line.replace('\n','').split('_'))

    
##    for paralog in pairs:
##        initialize(paralog)
##        ctl_file = '/Users/xji3/GitFolders/Genconv/PAMLCheck/output/' + '_'.join(paralog) + '/' + '_'.join(paralog) + '_codeml.ctl'
##        run_paml(paralog, ctl_file)

    summary_mat = []
    finished_list = []
    label = ['MG94_codeml_tree_length', 'MG94_codeml_lnL']
    footer = ' '.join(label)


    #pairs = pairs[0:2]
    for pair in pairs:
        codeml_result = codeml.read('/Users/xji3/GitFolders/Genconv/PAMLCheck/output/' + '_'.join(pair) + '/' + '_'.join(pair) + '_codeml_result.txt')
        summary_mat.append([codeml_result['NSsites'][0]['tree length'],
                            codeml_result['NSsites'][0]['lnL']])
        finished_list.append(pair)

    header = ' '.join(['_'.join(pair) for pair in finished_list])  # column labels
    np.savetxt(open('/Users/xji3/GitFolders/Genconv/PAMLCheck/paml_summary.txt', 'w+'), np.matrix(summary_mat).T, delimiter = ' ', footer = footer, header = header)
                      tree=tree_file,
                      out_file=m0_out)
cmlM0.set_options(seqtype=1)
cmlM0.set_options(model=0)
cmlM0.set_options(NSsites=[0])
cmlM0.set_options(omega=0.5)
cmlM0.set_options(CodonFreq=2)
cmlM0.set_options(ndata=1)
cmlM0.set_options(fix_alpha=1)
cmlM0.set_options(Small_Diff=5e-7)

# Run the M0 model
cmlM0.run(command="/Users/kmoney/Documents/paml4.9e/bin/codeml")

# Get tree from m0 results
m0result = codeml.read(m0_out)
NSsites_dict = m0result.get("NSsites")
NSsites0_dict = NSsites_dict.get(0)
estimated_tree = NSsites0_dict.get("tree")

# Write tree to output tree file
f = open(estimated_tree_name, "w")
f.write(estimated_tree)
f.close()

# Now run all sites models
cml = codeml.Codeml(alignment=alignment_file,
                    tree=estimated_tree_name,
                    out_file=final_out)
cml.set_options(seqtype=1)
cml.set_options(model=0)
Esempio n. 51
0
                
        for IGC_geo in IGC_geo_list:
            label = ['ll', 'kappa', 'omega']
            header = []
            summary_mat = []
            for sim_num in range(100):
                #wk_dir = '/Users/xji3/GitFolders/IGCCodonSimulation/YDR418W_YEL054C/IGCgeo_' + str(IGC_geo) + '/sim_' + str(sim_num) + '/'
                wk_dir = '/Users/xji3/GitFolders/IGCCodonSimulation/YDR418W_YEL054C_estimatedTau/IGCgeo_' + str(IGC_geo) + '/sim_' + str(sim_num) + '/'
                seq_loc = wk_dir + 'YDR418W_YEL054C_MG94_geo_' + str(IGC_geo) + '_Sim_' + str(sim_num) + '.fasta'
                ctl_loc = wk_dir + 'geo_' + str(IGC_geo) + '_Sim_' + str(sim_num) + '_localTree_' + str(local_tree_num) + '_codeml.ctl'
                out_file = wk_dir + 'unrooted_MG94_geo_' + str(IGC_geo) + '_Sim_' + str(sim_num) + '_localTree_' + str(local_tree_num) + '_codeml_output.txt'
##                prepare_ctl(tree_loc, seq_loc, out_file, ctl_loc)
##                run_paml(wk_dir, ctl_loc)#, "/Users/xji3/Software/paml4.8/bin/codeml")
##                
                if os.path.isfile(out_file):
                    codeml_result = codeml.read(out_file)
                    tree_file = out_file.replace('codeml_output.txt', 'codeml_est.newick')
                    with open(tree_file, 'w+') as f:
                        f.write(codeml_result['NSsites'][0]['tree'] + '\n')

                    edge_to_blen, edge_list = get_tree(tree_file, name_tree)
                    if sim_num == 0:
                        edge_list_fix = deepcopy(edge_list)
                    summary = [codeml_result['NSsites'][0]['lnL'],
                               codeml_result['NSsites'][0]['parameters']['kappa'],
                               codeml_result['NSsites'][0]['parameters']['omega']]
                    summary.extend([edge_to_blen[edge] for edge in edge_list_fix])
                    summary_mat.append(summary)
                    header.append('geo_' + str(IGC_geo) + '_sim_' + str(sim_num))

            
Esempio n. 52
0
        sim_list.extend(range(1, 100))
        for sim_num in sim_list:
            #wk_dir = '/Users/xji3/GitFolders/IGCSimulation/YDR418W_YEL054C/IGCgeo_' + str(IGC_geo) + '/sim_' + str(sim_num) + '/'
            wk_dir = '/Users/xji3/GitFolders/IGCSimulation/YDR418W_YEL054C_estimatedTau/IGCgeo_' + str(IGC_geo) + '/sim_' + str(sim_num) + '/'
            seq_loc = wk_dir + 'YDR418W_YEL054C_MG94_geo_' + str(IGC_geo) + '_Sim_' + str(sim_num) + '.fasta'
            ctl_loc = wk_dir + 'geo_' + str(IGC_geo) + '_Sim_' + str(sim_num) + '_codeml.ctl'
            out_file = wk_dir + 'unrooted_MG94_geo_' + str(IGC_geo) + '_Sim_' + str(sim_num) + '_codeml_output.txt'
            #prepare_ctl(tree_loc, seq_loc, out_file, ctl_loc)
            #run_paml(wk_dir, ctl_loc)#, "/Users/Xiang/Software/paml4.8/bin/codeml")
            out_tree1_file = out_file.replace('_output.txt', '_tree1_output.txt')
            out_tree2_file = out_file.replace('_output.txt', '_tree2_output.txt')
            out_tree_files = [out_tree1_file, out_tree2_file]
            Seperate_codeml_result(out_file, out_tree_files)

            if os.path.isfile(out_tree1_file):
                codeml_result = codeml.read(out_tree1_file)
                tree1_file = out_file.replace('codeml_output.txt', 'codeml_tree1_est.newick')
                with open(tree1_file, 'w+') as f:
                    f.write(codeml_result['NSsites'][0]['tree'] + '\n')

                edge_to_blen, edge_list_1 = get_tree(tree1_file, name_tree_1st)
                if sim_num == 0:
                    edge_list_1_fix = deepcopy(edge_list_1)
                summary = [codeml_result['NSsites'][0]['lnL'],
                           codeml_result['NSsites'][0]['parameters']['kappa'],
                           codeml_result['NSsites'][0]['parameters']['omega']]
                summary.extend([edge_to_blen[edge] for edge in edge_list_1_fix])
                summary_mat.append(summary)
                header.append('geo_' + str(IGC_geo) + '_sim_' + str(sim_num))

            edge_to_blen = None                
num_list = range(1, 1673)
for num in num_list:
    try:
        wdir = "prot_" + str(num)
        acc_file = "all_prot.part-" + str(num) + "_filtered.acc"
        acc = str(open(acc_file).readline().rstrip())
        infile_null = wdir + "/paml_results_null.out"
        infile_alt = wdir + "/paml_results_alt.out"
        outfile2 = wdir + "/CodemlCompare_prot" + str(num) + ".txt"

        print(wdir)
        print(acc)

        # read in data and parse it for relevant values
        results_null = codeml.read(infile_null)
        results_alt = codeml.read(infile_alt)

        lnL_null = results_null.get("NSsites").get(0).get("lnL")
        lnL_alt = results_alt.get("NSsites").get(0).get("lnL")

        likelihood_ratio = -2 * (lnL_null - lnL_alt)
        p_value = chi2.sf(likelihood_ratio, 1)

        arth_omega = results_alt.get("NSsites").get(0).get("parameters").get(
            "omega")[1]
        background_omega = results_alt.get("NSsites").get(0).get(
            "parameters").get("omega")[0]
        dS_0 = results_null.get("NSsites").get(0).get("parameters").get("dS")
        dN_0 = results_null.get("NSsites").get(0).get("parameters").get("dN")
        dS_2 = results_alt.get("NSsites").get(0).get("parameters").get("dS")
Esempio n. 54
0
                #wk_dir = '/Users/xji3/GitFolders/IGCCodonSimulation/YDR418W_YEL054C/IGCgeo_' + str(IGC_geo) + '/sim_' + str(sim_num) + '/'
                wk_dir = '/Users/xji3/GitFolders/IGCCodonSimulation/YDR418W_YEL054C_estimatedTau/IGCgeo_' + str(
                    IGC_geo) + '/sim_' + str(sim_num) + '/'
                seq_loc = wk_dir + 'YDR418W_YEL054C_MG94_geo_' + str(
                    IGC_geo) + '_Sim_' + str(sim_num) + '.fasta'
                ctl_loc = wk_dir + 'geo_' + str(IGC_geo) + '_Sim_' + str(
                    sim_num) + '_localTree_' + str(
                        local_tree_num) + '_codeml.ctl'
                out_file = wk_dir + 'unrooted_MG94_geo_' + str(
                    IGC_geo) + '_Sim_' + str(sim_num) + '_localTree_' + str(
                        local_tree_num) + '_codeml_output.txt'
                ##                prepare_ctl(tree_loc, seq_loc, out_file, ctl_loc)
                ##                run_paml(wk_dir, ctl_loc)#, "/Users/xji3/Software/paml4.8/bin/codeml")
                ##
                if os.path.isfile(out_file):
                    codeml_result = codeml.read(out_file)
                    tree_file = out_file.replace('codeml_output.txt',
                                                 'codeml_est.newick')
                    with open(tree_file, 'w+') as f:
                        f.write(codeml_result['NSsites'][0]['tree'] + '\n')

                    edge_to_blen, edge_list = get_tree(tree_file, name_tree)
                    if sim_num == 0:
                        edge_list_fix = deepcopy(edge_list)
                    summary = [
                        codeml_result['NSsites'][0]['lnL'],
                        codeml_result['NSsites'][0]['parameters']['kappa'],
                        codeml_result['NSsites'][0]['parameters']['omega']
                    ]
                    summary.extend(
                        [edge_to_blen[edge] for edge in edge_list_fix])
# Module 10
#
# 1. Run another python script from within python
import os
os.system("python /home/arina/other_script.py")
# 2. Create a new file with the “>” pipe command
os.system("python /home/arina/other_script.py > logfile")
# 3. Run the program “codeml” (in the subfolder paml). It requires an inputfile, 
# which you can copy or explicitly define as argument
from Bio.Phylo.PAML import codeml
cml = codeml.Codeml(alignment = "/home/arina/codeml/alignment.phylip", tree = "/home/arina/codeml/species.tree", out_file = "results.out", working_dir = "/home/arina/codeml/")
results = cml.run()
''' >> Now here I stuck! :( I am getting an error - OSError: [Errno 2] No such file or directory
 and when trying to indicate path to codeml explicitly 
 results = cml.run(command="/usr/local/lib/python2.7/dist-packages/Bio/Phylo/PAML/codeml.py")
 getting error - OSError: [Errno 13] Permission denied '''

# 4. Obtain the likelihood of the output file of codeml (file codeml_output, 
# it's indicated on the line with lnL = ) using a script that prints the likelihood
# >> I suppose, it should be something like that:
results = codeml.read("results.out")
print(results.get("lnL max"))
Esempio n. 56
0
cml.set_options(seqtype = 1)
cml.set_options(omega = 0.4)
cml.set_options(getSE = 0)
cml.set_options(noisy = 3)
cml.set_options(Mgene = 0)
cml.set_options(kappa = 2)
cml.set_options(model = 0)
cml.set_options(ndata = 1)	

##### PROGRAM #####				

paml_folders = glob.glob('*PAML')							# Creation of a sorted number list the length of the files in the directory
paml_folder = range(len(paml_folders))						# Creates a list the length of the number of folders
paml_folder = [x+1 for x in paml_folder]					# Shifts all values up by 1 for naming reasons

for folder in paml_folder:									# Loops through all current PAML folders
	folder_name = ('%s' + 'PAML') % folder					# ID of the folder name
	curr_dir = "C:\\analysis\\" + folder_name				# Sets current directory
	curr_test = folder_name[:-4]							# Assigns test number to a variable
	aln_file = curr_test + 'fix.afa'						# Assigning file names
	tree_file = curr_test + '.ph'
	
	cml.alignment = curr_dir + "\\" + aln_file				# Setting up files for PAML analysis
	cml.tree = curr_dir + "\\" + tree_file 
	cml.out_file = curr_dir + "\\analysis\\output.txt"
	cml.working_dir = curr_dir + "\\analysis"
	
	cml.run(verbose = True, command = "C:\\analysis\\Phylogenetic\\paml4.8\\bin\\codeml.exe")
															
	results = codeml.read(cml.out_file)