def run_yn00(input_phy, yn00_binary): binary = yn00_binary yn = yn00.Yn00() yn.alignment = input_phy yn.out_file = input_phy + ".out" yn.working_dir = "./" yn.set_options(commonf3x4=1) print("Analyzing " + input_phy) run_result = yn.run(command=binary, verbose=False) return run_result
def run_yn00(self): yn = yn00.Yn00() yn.alignment = self.mrtrans yn.out_file = self.pair_yn yn.set_options(icode=0, commonf3x4=0, weighting=0, verbose=1) try: run_result = yn.run(command=self.yn00_path) except: run_result = None return run_result
def RunYn00(yn_path, alignment): """ Run yn00 on untranslated alignment with default parameters and output to file. """ yn = yn00.Yn00(alignment=alignment, out_file="{0}.yn00".format(alignment)) yn.set_options(verbose=0, icode=0, weighting=0, commonf3x4=0) try: yn.run(ctl_file=None, command=yn_path, parse=False) except PamlError as e: print "{0}, {1} may have internal stop codons.".format(e, alignment) pass
def run_yn00(input_phy, yn00_binary="/Users/longtian/Desktop/paml4.8/bin/yn00"): binary = yn00_binary yn = yn00.Yn00() yn.alignment = input_phy yn.out_file = input_phy + ".out" yn.working_dir = "./" yn.set_options(commonf3x4=1) print "Analyzing " + input_phy run_result = yn.run(command=binary, verbose=True) return run_result
def runYn00(ogFastaPath2, allCDSpath, wd): Omega = {} Dn = {} Ds = {} for ogFastaPath in ogFastaPath2: OG_fasta = os.path.basename(ogFastaPath) prefix = str(OG_fasta.split(".")[0]) PEP_aln = wd + prefix + ".pep.aln" protein_id = [] with open(PEP_aln, "w+") as f: for seq1 in SeqIO.parse(ogFastaPath, "fasta"): id = seq1.id.split("_")[2] + "_" + seq1.id.split("_")[3] protein_id.append(id) f.write(">" + id + "\n" + str(seq1.seq) + "\n") # generate CDS .fasta file CDSfile = wd + prefix + ".cds" records = (r for r in SeqIO.parse(allCDSpath, "fasta") if r.id in protein_id) SeqIO.write(records, CDSfile, "fasta") ## align coding regions using clustalo output .aln file CDS_aln = CDSfile + ".aln" cdsAlign(CDSfile=CDSfile, outfile=CDS_aln) ## generate .nuc file for yn00, nuc_file = wd + prefix + ".nuc" runPAL2NAL(PEP_aln=PEP_aln, CDS_aln=CDS_aln, outfile=nuc_file) ## run yn00 in PAML yn00_res = wd + prefix + "_yn00.txt" yn = yn00.Yn00(alignment=nuc_file, out_file=yn00_res, working_dir=wd) yn.set_options(verbose=True) Yn00_results = yn.run(verbose=True) memo = [] for gene1 in Yn00_results: memo.append(gene1) for gene2 in Yn00_results[gene1]: if not gene2 in memo: Omega["{0}-{1}".format( gene1, gene2)] = Yn00_results[gene1][gene2]["YN00"]["omega"] Dn["{0}-{1}".format( gene1, gene2)] = Yn00_results[gene1][gene2]["YN00"]["dN"] Ds["{0}-{1}".format( gene1, gene2)] = Yn00_results[gene1][gene2]["YN00"]["dS"] return Omega, Dn, Ds
def yn00(): from Bio.Phylo.PAML import yn00 tests = ["yn00"] alignment = os.path.join("Alignments", "alignment.phylip") for test in tests: print test[0] yn = yn00.Yn00() for version in VERSIONS: print "\t{0}".format(version.replace('_', '.')) ctl_file = os.path.join("Control_files", "yn00", "{0}.ctl".format(test)) yn.read_ctl_file(ctl_file) yn.alignment = alignment out_file = "{0}-{1}.out".format(test, version) yn.out_file = os.path.join("Results", "yn00", out_file) bin = "yn00{0}".format(version) yn.run(command=bin, verbose=VERBOSE)
def yn00(vers=None, verbose=False): from Bio.Phylo.PAML import yn00 if vers is not None: versions = [vers] else: versions = VERSIONS tests = ["yn00", "yn00_long", "yn00_dotted", "yn00_dottednum"] for test in tests: print(test) yn = yn00.Yn00() for version in versions: print(f"\t{version.replace('_', '.')}") ctl_file = os.path.join("Control_files", "yn00", f"{test}.ctl") yn.read_ctl_file(ctl_file) out_file = f"{test}-{version}.out" yn.out_file = os.path.join("Results", "yn00", out_file) bin = f"yn00{version}" yn.run(command=bin, verbose=verbose, parse=False)
def yn00(vers=None, verbose=False): from Bio.Phylo.PAML import yn00 if vers is not None: versions = [vers] else: versions = VERSIONS tests = ["yn00", "yn00_long", "yn00_dotted", "yn00_dottednum"] for test in tests: print(test) yn = yn00.Yn00() for version in versions: print("\t{0}".format(version.replace('_', '.'))) ctl_file = (os.path.join("Control_files", "yn00", "{0}.ctl".format(test))) yn.read_ctl_file(ctl_file) out_file = "{0}-{1}.out".format(test, version) yn.out_file = os.path.join("Results", 'yn00', out_file) bin = "yn00{0}".format(version) yn.run(command=bin, verbose=verbose, parse=False)
def yn00(vers=None, verbose=False): from Bio.Phylo.PAML import yn00 if vers is not None: versions = [vers] else: versions = VERSIONS tests = ["yn00"] alignment = os.path.join("Alignments", "alignment.phylip") for test in tests: print(test[0]) yn = yn00.Yn00() for version in versions: print("\t{0}".format(version.replace('_', '.'))) ctl_file = os.path.join("Control_files", "yn00", "{0}.ctl".format(test)) yn.read_ctl_file(ctl_file) yn.alignment = alignment out_file = "{0}-{1}.out".format(test, version) yn.out_file = os.path.join("Results", "yn00", out_file) bin = "yn00{0}".format(version) yn.run(command=bin, verbose=verbose)
def setUp(self): self.yn00 = yn00.Yn00()
] if len(has_stop): #sys.stderr.write("\nfound stop codon at {} in sequence '{}'\n".format(has_stop[0], s.id)) first_stop = has_stop[0] fa = fa[:, 0:first_stop] trimmed_len = len(fa[0]) with open(seq_converted, "w") as ph: AlignIO.write(fa, ph, "phylip-sequential") ## print some diagnostics to stderr sys.stderr.write("\nWorking directory: {}\n".format(wd)) sys.stderr.write("Input file: {} (length {})\n".format(args.sequences, untrimmed_len)) sys.stderr.write("Converted to phylip: {} (length {})\n\n".format( seq_converted, trimmed_len)) ## run yn00 yn = yn00.Yn00(alignment=seq_converted, working_dir=wd, out_file="results.out") rez = yn.run(verbose=False) ## get results for all sequence pairs ph = AlignIO.read(seq_converted, "phylip-relaxed") ids = [s.id for s in ph] for s1, s2 in itertools.combinations(ids, 2): omega = rez[s1][s2][args.method]["omega"] if omega < 0 or omega > 10: omega = "NA" print s1, s2, omega, abs(rez[s1][s2][args.method]["dN"]), abs( rez[s1][s2][args.method]["dS"])
#Run PAML yn00 for each PHYLIP alignment and create the final output table output_yn00 = open(organism + "_yn00_finalresult.csv", "w") output_yn00.write("Seq1\tSeq2\tdS\tdS SE\tdN\tdN SE\n") for i in inparalogs_dic.keys(): if taxons[organism] in i: copy1 = i.replace(taxons[organism], "") copy2 = inparalogs_dic[i].replace(taxons[organism], "") if copy1 in organism_fasta and copy2 in organism_fasta: seq1 = organism_fasta[copy1] seq2 = organism_fasta[copy2] namealnphy = copy1 + "_" + copy2 + ".aln.phy" print("Runing yn00 for", namealnphy) #run PAML yn00 software yn = yn00.Yn00() yn.alignment = namealnphy yn.out_file = "yn_out.txt" yn.working_dir = "./" yn.commonf3x4 = 1 yn.weighting = None yn.icode = None yn.ndata = None yn.verbose = None yn.run() result = yn00.read("yn_out.txt")[copy1] for uu in result.values(): ds = str(uu["YN00"]["dS"]) dsse = str(uu["YN00"]["dS SE"]) dn = str(uu["YN00"]["dN"]) dnse = str(uu["YN00"]["dN SE"])