コード例 #1
0
ファイル: preprocess.py プロジェクト: jwayne/mol455
def run_pal2nal(fname_aln, fname_nuc, fname_prot):
    """
    Generate a codon alignment via PAL2NAL.

    @param fname_aln:
        MSA of protein sequences in CLUSTAL format (.aln)
    @param fname_nuc:
        Nucleotide sequences in FASTA format (.fasta)
    @param fname_prot:
        Protein sequences in FASTA format (.fasta)
    @return:
        Codon alignment in CLUSTAL format (.aln), suitable for codeml
    1"""
    sys.stderr.write("\nSTEP: run_pal2nal(%s, %s)\n" % (fname_aln, fname_nuc))

    # Reorder fname_nuc according to the order of the proteins in fname_aln, which
    # was reordered due to CLUSTALW2.  Note that the first protein in each of
    # these files remains the same as at the start, however; this first protein
    # is our original query protein.
    nuc_records = [record for record in SeqIO.parse(fname_nuc, "fasta")]
    prot_records = [record for record in SeqIO.parse(fname_prot, "fasta")]
    records_map = dict((pr.id, nr) for pr, nr in zip(prot_records, nuc_records))
    fname_nuc2 = "homologs_ordered.dna.fasta"
    with open(fname_nuc2, "w") as f:
        for record in SeqIO.parse(fname_aln, "clustal"):
            SeqIO.write(records_map[record.id], f, "fasta")
    fname_codon = "homologs.codon.aln"
    # TODO: use subprocess
    os.system("%s/pal2nal.pl %s %s -output paml > %s" % (bin_dir(), fname_aln, fname_nuc2, fname_codon))
    return fname_codon
コード例 #2
0
ファイル: preprocess.py プロジェクト: jwayne/mol455
def run_clustalw2(fname_prot):
    """
    Generate a MSA of the amino acids (in fasta format) via clustalw.

    @param fname_prot:
        Protein sequences in FASTA format (.fasta)
    @return:
        MSA of protein sequences in CLUSTAL format (.aln)
    """
    sys.stderr.write("\nSTEP: run_clustalw2(%s)\n" % fname_prot)

    fname_aln = "homologs.aa.aln"
    fname_log = "clustalw2.log"
    sys.stderr.write("\tRunning clustalw2, please be patient (may take minutes)...\n")
    proc = subprocess.Popen(
        "%s/clustalw2 -INFILE=%s -OUTFILE=%s" % (bin_dir(), os.path.abspath(fname_prot), os.path.abspath(fname_aln)),
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        shell=True,
    )
    stdout, stderr = proc.communicate()
    with open(fname_log, "w") as f_log:
        f_log.write(stderr)
    sys.stderr.write("\tclustalw2 run successful, log available at %s\n" % os.path.abspath(fname_log))

    # Remove the extra file...
    if "." in os.path.split(fname_prot)[-1]:
        fname_dnd = ".".join(fname_prot.split(".")[:-1]) + ".dnd"
    else:
        fname_dnd = fname_prot + ".dnd"
    os.remove(fname_dnd)

    return fname_aln
コード例 #3
0
ファイル: preprocess.py プロジェクト: jwayne/mol455
def run_codeml(fname_ctl):
    """
    XXX
    Input:
        fname_ctl
    Output:
        codeml output, currently
    """
    os.system("%s/codeml %s" % (bin_dir(), fname_ctl))
コード例 #4
0
ファイル: preprocess.py プロジェクト: jwayne/mol455
def run_phyml(fname_aln, n_bootstrap):
    """
    Generate a phylogenetic tree via PHYML.

    @param fname_aln:
        MSA of protein sequences in CLUSTAL format (.aln)
    @return:
        (tree_file, bootstrap_file) = File of phylo tree with clade confidences (_tree.txt),
        file of bootstrapped phylo trees (_boot_trees.txt)
    """
    sys.stderr.write("\nSTEP: run_phyml(%s, %s)\n" % (fname_aln, n_bootstrap))
    fname_phy = "homologs.aa.phy"
    with open(fname_aln, "rU") as f_in:
        with open(fname_phy, "w") as f_out:
            SeqIO.convert(f_in, "clustal", f_out, "phylip-relaxed")

    current_dir = os.getcwd()
    fname_tree = fname_phy + "_phyml_tree.txt"
    if n_bootstrap > 1:
        bootstrap_str = "-b %d" % n_bootstrap
        fname_boot_trees = fname_phy + "_phyml_boot_trees.txt"
    else:
        bootstrap_str = ""
        fname_boot_trees = None

    fname_log = "phyml.log"
    sys.stderr.write("\tRunning phyml, please be patient (may take minutes)...\n")
    proc = subprocess.Popen(
        "%s/phyml -i %s -d aa %s %s" % (bin_dir(), os.path.abspath(fname_phy), bootstrap_str, current_dir),
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        shell=True,
    )
    stdout, stderr = proc.communicate()
    with open(fname_log, "w") as f_log:
        f_log.write(stderr)
    sys.stderr.write("\tclustalw2 run successful, log available at %s\n" % os.path.abspath(fname_log))

    return fname_tree, fname_boot_trees