def testParseAllVersions(self): pattern = os.path.join(self.results_dir, "yn00", "yn00-*") for results_file in glob.glob(pattern): results = yn00.read(results_file) self.assertEqual(len(results), 5) self.assertEqual(len(results["Homo_sapie"]), 4) self.assertEqual(len(results["Homo_sapie"]["Pan_troglo"]), 5)
def testParseAllVersions(self): pattern = os.path.join(self.results_dir, "yn00", 'yn00-*') for results_file in glob.glob(pattern): results = yn00.read(results_file) self.assertEqual(len(results), 5) self.assertEqual(len(results["Homo_sapie"]), 4) self.assertEqual(len(results["Homo_sapie"]["Pan_troglo"]), 5)
def testParseAllVersions(self): folder = os.path.join(self.results_dir, "yn00") for results_file in os.listdir(folder): file_path = os.path.join(folder, results_file) results = yn00.read(file_path) self.assertEqual(len(results), 5) self.assertEqual(len(results["Homo_sapie"]), 4) self.assertEqual(len(results["Homo_sapie"]["Pan_troglo"]), 5)
def testParseAllVersions(self): folder = os.path.join("PAML", "Results", "yn00", "versions") for results_file in os.listdir(folder): file_path = os.path.join(folder, results_file) if os.path.isfile(file_path) and results_file[:4] == "yn00": results = yn00.read(file_path) self.assertEqual(len(results), 5) self.assertEqual(len(results["Homo_sapie"]), 4) self.assertEqual(len(results["Homo_sapie"]["Pan_troglo"]), 5)
def testParseLongNames(self): pattern = os.path.join(self.results_dir, "yn00", 'yn00_long-*') for results_file in glob.glob(pattern): results = yn00.read(results_file) # Expect seven taxa... self.assertEqual(len(results), 7) # ...each of which is compared to the other six. self.assertEqual(set([len(v) for v in results.values()]), set([6])) # ...each of which has five measures. self.assertEqual(set([len(v) for taxa in results.values() for v in taxa.values()]), set([5]))
def testParseAllVersions(self): folder = os.path.join("PAML","Results", "yn00", "versions") for results_file in os.listdir(folder): file_path = os.path.join(folder, results_file) if os.path.isfile(file_path) and results_file[:4] == "yn00": results = yn00.read(file_path) self.assertEqual(len(results), 5) self.assertEqual(len(results["Homo_sapie"]), 4) self.assertEqual(len(results["Homo_sapie"]["Pan_troglo"]), 5)
def testParseDottedNumNames(self): pattern = os.path.join(self.results_dir, "yn00", 'yn00_dottednum-*') for results_file in glob.glob(pattern): results = yn00.read(results_file) # Expect seven taxa... self.assertEqual(len(results), 7) # ...each of which is compared to the other six. self.assertEqual({len(v) for v in results.values()}, {6}) # ...each of which has five measures. self.assertEqual({len(v) for taxa in results.values() for v in taxa.values()}, {5})
def SummarizeYn00(refine=False): """ Summarize yn00 results for core and accessory gene clusters and write to file. """ results = {} if refine: clusters = glob( "./panoct/clusters/refined/core/fna/Core*.fna.aln.yn00") + glob( "./panoct/clusters/refined/acc/fna/Acc*.fna.aln.yn00") else: clusters = glob("./panoct/clusters/core/fna/Core*.fna.aln.yn00" ) + glob("./panoct/clusters/acc/fna/Acc*.fna.aln.yn00") for cluster in clusters: cl_number = cluster.split("_")[1].split(".")[0] cl_comp = cluster.split("_")[0].split("/")[-1] results[cl_number] = { "Component": cl_comp, "Size": None, "Kappa": None, "Omega > 1": None } try: yn = yn00.read(cluster) results[cl_number]["Size"] = len(yn) if len(yn) == 1: pass else: with_omega = 0 for gene in yn: for subject in yn[gene]: pair = yn[gene][subject]["YN00"] if not results[cl_number]["Kappa"]: results[cl_number]["Kappa"] = pair["kappa"] if all([pair["dS"] == -0.0, pair["omega"] == 99.0]): pass elif pair["omega"] > 1.0: with_omega = with_omega + 1 results[cl_number]["Omega > 1"] = with_omega / 2 except (IndexError, ValueError): pass with open("./panoct/clusters/yn00_summary.txt", "w") as output: output.write("Cluster\tComponent\tSize\tKappa\tOmega > 1\n") for cluster in results: output.write("{0}\t{1}\t{2}\t{3}\t{4}\n".format( str(cluster), results[cluster]["Component"], results[cluster]["Size"], results[cluster]["Kappa"], results[cluster]["Omega > 1"])) to_remove = ["rub", "rst1", "rst", "yn00.ctl", "2YN.dS", "2YN.dN", "2YN.t"] for f in to_remove: if os.path.isfile(f): os.remove(f)
def testParseDottedNames(self): pattern = os.path.join(self.results_dir, "yn00", 'yn00_dotted-*') for results_file in glob.glob(pattern): results = yn00.read(results_file) # Expect seven taxa... self.assertEqual(len(results), 5) # ...each of which is compared to the other six. self.assertEqual(set([len(v) for v in results.values()]), set([4])) # ...each of which has five measures. self.assertEqual(set([len(v) for taxa in results.values() for v in taxa.values()]), set([5])) self.assertEqual(len(results["H**o.sapie"]), 4) self.assertEqual(len(results["H**o.sapie"]["Pan.troglo"]), 5)
def testParseDottedNames(self): pattern = os.path.join(self.results_dir, "yn00", "yn00_dotted-*") for results_file in glob.glob(pattern): results = yn00.read(results_file) # Expect seven taxa... self.assertEqual(len(results), 5) # ...each of which is compared to the other six. self.assertEqual({len(v) for v in results.values()}, {4}) # ...each of which has five measures. self.assertEqual( {len(v) for taxa in results.values() for v in taxa.values()}, {5}) self.assertEqual(len(results["H**o.sapie"]), 4) self.assertEqual(len(results["H**o.sapie"]["Pan.troglo"]), 5)
import os from sys import argv from Bio.Phylo.PAML import yn00 script, folder = argv folder_in = os.listdir(folder) oup = open("pairwise_dNdS.txt", "w") oup.write("file\tpair\tdNdS\tdN\tdS\n") for inl in folder_in: print inl result = yn00.read("%s/%s" % (folder, inl)) for k in result.keys(): if "BSIN" in k: dNx = dSx = Wx = 0 for x in result[k].keys(): if "WS68" in x: dNx = result[k][x]["YN00"]["dN"] dSx = result[k][x]["YN00"]["dS"] Wx = result[k][x]["YN00"]["omega"] oup.write("%s\tBSIN_WS68\t%r\t%r\t%r\n" % (inl, Wx, dNx, dSx)) else: pass elif "AQ15" in k: dNy = dSy = Wy = 0 for x in result[k].keys():
if taxons[organism] in i: copy1 = i.replace(taxons[organism], "") copy2 = inparalogs_dic[i].replace(taxons[organism], "") if copy1 in organism_fasta and copy2 in organism_fasta: seq1 = organism_fasta[copy1] seq2 = organism_fasta[copy2] namealnphy = copy1 + "_" + copy2 + ".aln.phy" print("Runing yn00 for", namealnphy) #run PAML yn00 software yn = yn00.Yn00() yn.alignment = namealnphy yn.out_file = "yn_out.txt" yn.working_dir = "./" yn.commonf3x4 = 1 yn.weighting = None yn.icode = None yn.ndata = None yn.verbose = None yn.run() result = yn00.read("yn_out.txt")[copy1] for uu in result.values(): ds = str(uu["YN00"]["dS"]) dsse = str(uu["YN00"]["dS SE"]) dn = str(uu["YN00"]["dN"]) dnse = str(uu["YN00"]["dN SE"]) output_yn00.write(copy1 + "\t" + copy2 + "\t" + ds + "\t" + dsse + "\t" + dn + "\t" + dnse + "\n") os.remove(namealnphy)