コード例 #1
0
 def test_TCoffee_clustalw(self):
     """Round-trip through app and read clustalw alignment from file."""
     cmdline = TCoffeeCommandline(t_coffee_exe, gapopen=-2)
     cmdline.infile = self.infile1
     cmdline.outfile = self.outfile4
     cmdline.set_parameter("output", "clustalw_aln")
     cmdline.outorder = "input"
     cmdline.set_parameter("gapext", -5)
     cmdline.type = "protein"
     self.assertEqual(
         str(cmdline),
         t_coffee_exe +
         " -output clustalw_aln -infile Fasta/fa01 -outfile Fasta/tc_out.aln "
         "-type protein -outorder input -gapopen -2 -gapext -5",
     )
     stdout, stderr = cmdline()
     self.assertTrue(stderr.strip().startswith("PROGRAM: T-COFFEE"))
     align = AlignIO.read(self.outfile4, "clustal")
     records = list(SeqIO.parse(self.infile1, "fasta"))
     self.assertEqual(len(records), len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(
             str(new.seq).replace("-", ""),
             str(old.seq).replace("-", ""))
コード例 #2
0
ファイル: test_TCoffee_tool.py プロジェクト: Mat-D/biopython
 def test_TCoffee_3(self):
     """Round-trip through app and read clustalw alignment from file
     """
     cmdline = TCoffeeCommandline(t_coffee_exe, gapopen=-2)
     cmdline.infile = self.infile1
     cmdline.outfile = self.outfile4
     cmdline.set_parameter("output", "clustalw_aln")
     cmdline.outorder = "input"
     cmdline.set_parameter("gapext", -5)
     cmdline.type = "protein"
     self.assertEqual(str(cmdline), t_coffee_exe + " -output clustalw_aln "
                      "-infile Fasta/fa01 -outfile Fasta/tc_out.phy "
                      "-type protein -outorder input -gapopen -2 -gapext -5")
     child = subprocess.Popen(str(cmdline),
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              shell=(sys.platform!="win32"))
     return_code = child.wait()
     self.assertEqual(return_code, 0)
     self.assert_(child.stderr.read().strip().startswith("PROGRAM: T-COFFEE"))
     align = AlignIO.read(open(self.outfile4), "clustal")
     records = list(SeqIO.parse(open(self.infile1),"fasta"))
     self.assertEqual(len(records),len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-",""), str(old.seq).replace("-",""))
     del child
コード例 #3
0
def t_coffee(inf, outdir="default"):
    outfile = inf.replace(".fasta", "_tcoffee.aln")
    if outdir == "default":
        from Bio.Align.Applications import TCoffeeCommandline
        tcoffee_cline = TCoffeeCommandline(infile=inf,
                                           output="clustalw",
                                           outfile=outfile)
    else:
        from Bio.Align.Applications import TCoffeeCommandline
        tcoffee_cline = TCoffeeCommandline(infile=inf,
                                           output="clustalw",
                                           outfile=outdir + outfile)
    tcoffee_cline()
    return outfile
コード例 #4
0
ファイル: msa_caller.py プロジェクト: llrs/PYT-SBI
def call_msa_method(method, in_file, out_file, output_format=None):
    """Calls the appropriate program to generate a MSA.
    the muscle is the only one that only produce FASTA alignments,
    in the other programs this can be changed"""
    logging.info("Creating a MSA  with {} from {} to {}".format(
        method, in_file, out_file))

    if find_executable(method):
        pass
    else:
        msg = "Program {} not found it is installed?"
        raise UnboundLocalError(msg.format(method))

    if method == "clustalw":
        cline = ClustalwCommandline(method,
                                    infile=in_file,
                                    output=output_format,
                                    OUTFILE=out_file,
                                    type="PROTEIN")
    elif method == "muscle":
        cline = MuscleCommandline(method, input=in_file, out=out_file)
    elif method == "t_coffee":
        cline = TCoffeeCommandline("t_coffee",
                                   infile=in_file,
                                   output=output_format,
                                   outfile=out_file)
    try:
        stdout, stderr = cline()
    except ApplicationError:
        raise IOError("The input file doesn't contain an alignment!")
    logging.debug(stderr)
    return (stdout)
コード例 #5
0
 def test_TCoffee_msf(self):
     """Round-trip through app and read GCG MSF alignment from file."""
     cmdline = TCoffeeCommandline(
         t_coffee_exe,
         infile=self.infile1,
         outfile=self.outfile6,
         quiet=True,
         output="msf_aln",
     )
     self.assertEqual(
         str(cmdline),
         t_coffee_exe +
         " -output msf_aln -infile Fasta/fa01 -outfile Fasta/tc_out.msf -quiet",
     )
     stdout, stderr = cmdline()
     # Can get warnings in stderr output
     self.assertNotIn("error", stderr.lower(), stderr)
     align = AlignIO.read(self.outfile6, "msf")
     records = list(SeqIO.parse(self.infile1, "fasta"))
     self.assertEqual(len(records), len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(
             str(new.seq).replace("-", ""),
             str(old.seq).replace("-", ""))
コード例 #6
0
 def test_TCoffee_phylip(self):
     """Round-trip through app and read PHYLIP alignment from file."""
     cmdline = TCoffeeCommandline(
         t_coffee_exe,
         infile=self.infile1,
         outfile=self.outfile5,
         quiet=True,
         output="phylip_aln",
     )
     self.assertEqual(
         str(cmdline),
         t_coffee_exe + " -output phylip_aln "
         "-infile Fasta/fa01 -outfile Fasta/tc_out.phy -quiet",
     )
     stdout, stderr = cmdline()
     # Can get warnings in stderr output
     self.assertNotIn("error", stderr.lower(), stderr)
     align = AlignIO.read(self.outfile5, "phylip")
     records = list(SeqIO.parse(self.infile1, "fasta"))
     self.assertEqual(len(records), len(align))
     for old, new in zip(records, align):
         # TCoffee does strict 10 character truncation as per original PHYLIP
         self.assertEqual(old.id[:10], new.id[:10])
         self.assertEqual(
             str(new.seq).replace("-", ""),
             str(old.seq).replace("-", ""))
コード例 #7
0
 def test_TCoffee_2(self):
     """Round-trip through app and read pir alignment from file."""
     cmdline = TCoffeeCommandline(t_coffee_exe, quiet=True)
     cmdline.infile = self.infile1
     cmdline.outfile = self.outfile3
     cmdline.output = "pir_aln"
     self.assertEqual(str(cmdline), t_coffee_exe + " -output pir_aln "
                      "-infile Fasta/fa01 -outfile Fasta/tc_out.pir -quiet")
     stdout, stderr = cmdline()
     # Can get warnings in stderr output
     self.assertNotIn("error", stderr.lower(), stderr)
     align = AlignIO.read(self.outfile3, "pir")
     records = list(SeqIO.parse(self.infile1, "fasta"))
     self.assertEqual(len(records), len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-", ""), str(old.seq).replace("-", ""))
コード例 #8
0
 def test_TCoffee_2(self):
     """Round-trip through app and read pir alignment from file
     """
     cmdline = TCoffeeCommandline(t_coffee_exe, quiet=True)
     cmdline.infile = self.infile1
     cmdline.outfile = self.outfile3
     cmdline.output = "pir_aln"
     self.assertEqual(str(cmdline), t_coffee_exe + " -output pir_aln "
                 "-infile Fasta/fa01 -outfile Fasta/tc_out.pir -quiet")
     stdout, stderr = cmdline()
     # Can get warnings in stderr output
     self.assertTrue("error" not in stderr.lower(), stderr)
     align = AlignIO.read(self.outfile3, "pir")
     records = list(SeqIO.parse(self.infile1, "fasta"))
     self.assertEqual(len(records), len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-", ""), str(old.seq).replace("-", ""))
コード例 #9
0
 def test_TCoffee_2(self):
     """Round-trip through app and read pir alignment from file
     """
     cmdline = TCoffeeCommandline(t_coffee_exe, quiet=True)
     cmdline.infile = self.infile1
     cmdline.outfile = self.outfile3
     cmdline.output = "pir_aln"
     self.assertEqual(str(cmdline), t_coffee_exe + " -output pir_aln "
                 "-infile Fasta/fa01 -outfile Fasta/tc_out.pir -quiet")
     result, stdout, stderr = Application.generic_run(cmdline)
     self.assertEquals(result.return_code, 0)
     self.assertEquals(stderr.read(), "")
     align = AlignIO.read(open(self.outfile3), "pir")
     records = list(SeqIO.parse(open(self.infile1),"fasta"))
     self.assertEqual(len(records),len(align))
     for old, new in zip(records, align) :
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-",""), str(old.seq).replace("-",""))
コード例 #10
0
 def test_TCoffee_2(self):
     """Round-trip through app and read pir alignment from file
     """
     cmdline = TCoffeeCommandline(t_coffee_exe, quiet=True)
     cmdline.infile = self.infile1
     cmdline.outfile = self.outfile3
     cmdline.output = "pir_aln"
     self.assertEqual(str(cmdline), t_coffee_exe + " -output pir_aln "
                 "-infile Fasta/fa01 -outfile Fasta/tc_out.pir -quiet")
     result, stdout, stderr = Application.generic_run(cmdline)
     self.assertEquals(result.return_code, 0)
     self.assertEquals(stderr.read(), "")
     align = AlignIO.read(open(self.outfile3), "pir")
     records = list(SeqIO.parse(open(self.infile1),"fasta"))
     self.assertEqual(len(records),len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-",""), str(old.seq).replace("-",""))
コード例 #11
0
 def test_TCoffee_fasta(self):
     """Round-trip through app and read clustal alignment from file."""
     cmdline = TCoffeeCommandline(t_coffee_exe, infile=self.infile1)
     self.assertEqual(str(cmdline), t_coffee_exe + " -infile Fasta/fa01")
     stdout, stderr = cmdline()
     self.assertTrue(stderr.strip().startswith("PROGRAM: T-COFFEE"))
     align = AlignIO.read(self.outfile1, "clustal")
     records = list(SeqIO.parse(self.infile1, "fasta"))
     self.assertEqual(len(records), len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-", ""), str(old.seq).replace("-", ""))
コード例 #12
0
 def test_TCoffee_3(self):
     """Round-trip through app and read clustalw alignment from file
     """
     cmdline = TCoffeeCommandline(t_coffee_exe, gapopen=-2)
     cmdline.infile = self.infile1
     cmdline.outfile = self.outfile4
     cmdline.set_parameter("output", "clustalw_aln")
     cmdline.outorder = "input"
     cmdline.set_parameter("gapext", -5)
     cmdline.type = "protein"
     self.assertEqual(str(cmdline), t_coffee_exe + " -output clustalw_aln "
                      "-infile Fasta/fa01 -outfile Fasta/tc_out.phy "
                      "-outorder input -gapopen -2 -gapext -5")
     result, stdout, stderr = Application.generic_run(cmdline)
     self.assertEquals(result.return_code, 0)
     self.assert_(stderr.read().strip().startswith("PROGRAM: T-COFFEE"))
     align = AlignIO.read(open(self.outfile4), "clustal")
     records = list(SeqIO.parse(open(self.infile1),"fasta"))
     self.assertEqual(len(records),len(align))
     for old, new in zip(records, align) :
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-",""), str(old.seq).replace("-",""))
コード例 #13
0
ファイル: test_TCoffee_tool.py プロジェクト: Mat-D/biopython
 def test_TCoffee_2(self):
     """Round-trip through app and read pir alignment from file
     """
     cmdline = TCoffeeCommandline(t_coffee_exe, quiet=True)
     cmdline.infile = self.infile1
     cmdline.outfile = self.outfile3
     cmdline.output = "pir_aln"
     self.assertEqual(str(cmdline), t_coffee_exe + " -output pir_aln "
                 "-infile Fasta/fa01 -outfile Fasta/tc_out.pir -quiet")
     child = subprocess.Popen(str(cmdline),
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              shell=(sys.platform!="win32"))
     return_code = child.wait()
     self.assertEqual(return_code, 0)
     self.assertEquals(child.stderr.read(), "")
     align = AlignIO.read(open(self.outfile3), "pir")
     records = list(SeqIO.parse(open(self.infile1),"fasta"))
     self.assertEqual(len(records),len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-",""), str(old.seq).replace("-",""))
     del child
コード例 #14
0
 def test_TCoffee_3(self):
     """Round-trip through app and read clustalw alignment from file."""
     cmdline = TCoffeeCommandline(t_coffee_exe, gapopen=-2)
     cmdline.infile = self.infile1
     cmdline.outfile = self.outfile4
     cmdline.set_parameter("output", "clustalw_aln")
     cmdline.outorder = "input"
     cmdline.set_parameter("gapext", -5)
     cmdline.type = "protein"
     self.assertEqual(str(cmdline), t_coffee_exe + " -output clustalw_aln "
                      "-infile Fasta/fa01 -outfile Fasta/tc_out.phy "
                      "-type protein -outorder input -gapopen -2 -gapext -5")
     stdout, stderr = cmdline()
     self.assertTrue(stderr.strip().startswith("PROGRAM: T-COFFEE"))
     align = AlignIO.read(self.outfile4, "clustal")
     records = list(SeqIO.parse(self.infile1, "fasta"))
     self.assertEqual(len(records), len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-", ""), str(old.seq).replace("-", ""))
コード例 #15
0
 def test_TCoffee_1(self):
     """Round-trip through app and read clustal alignment from file
     """
     cmdline = TCoffeeCommandline(t_coffee_exe, infile=self.infile1)
     self.assertEqual(str(cmdline), t_coffee_exe + " -infile Fasta/fa01")
     result, stdout, stderr = Application.generic_run(cmdline)
     self.assertEquals(result.return_code, 0)
     self.assertEquals(str(result._cl), t_coffee_exe + " -infile Fasta/fa01")
     self.assert_(stderr.read().strip().startswith("PROGRAM: T-COFFEE"))
     align = AlignIO.read(open(self.outfile1), "clustal")
     records = list(SeqIO.parse(open(self.infile1),"fasta"))
     self.assertEqual(len(records),len(align))
     for old, new in zip(records, align) :
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-",""), str(old.seq).replace("-",""))
コード例 #16
0
def run_multiple_sequence_alignment(records, workdir, msa):
    """
    This runs the MSA, user can choose between emma, clustalw (old and busted), clustal omega (recommended for proteins and
    also uses HMM), MUSCLE or MAFFT (recommended for nucleotide data, and MUSCLE should be pretty fast), T-Coffee
    (good for distantly related sequences).
    FUTURE: Add more iterative methods to improve runtime? Add HMMER? HHpred is also quite fast
    """
    #get filename for fasta file
    sequence_list_file = os.path.join(workdir, "msa.fasta")
    #write sequences
    SeqIO.write(records, sequence_list_file, "fasta")
    #prepare filenames for MSA output
    outfile = os.path.join(workdir, "msa.aln")
    treefile = os.path.join(workdir, "msa.dnd")
    #Prepare command line according to chosen algorithm
    if msa.lower() == "emma":  #output is fasta
        print "Aligning by emma"
        cmd = EmmaCommandline(sequence=sequence_list_file,
                              outseq=outfile,
                              dendoutfile=treefile)
    elif msa.lower() == "clustalo" or msa.lower(
    ) == "clustal_omega" or msa.lower() == "clustal-omega":
        print "Aligning by Clustal Omega"
        cmd = ClustalOmegaCommandline(infile=sequence_list_file,
                                      outfile=outfile,
                                      verbose=True,
                                      auto=True,
                                      guidetree_out=treefile,
                                      outfmt="clu",
                                      force=True)
    elif msa.lower() == "t-coffee" or msa.lower(
    ) == "t_coffee":  #should output tree file automatically
        print "Aligning by T-Coffeee"
        cmd = TCoffeeCommandline(infile=sequence_list_file,
                                 output="clustalw",
                                 outfile=outfile)
    elif msa.lower() == "muscle":
        print "Aligning by MUSCLE"
        #cmd = MuscleCommandline(input=sequence_list_file, out=outfile, tree2=treefile, clw=True)
        cmd = MuscleCommandline(input=sequence_list_file,
                                out=outfile,
                                tree2=treefile)
    elif msa.lower() == "mafft":  #probably gonna save tree as input.tree
        print "Aligning by MAFFT"
        cmd = MafftCommandline(input=sequence_list_file,
                               clustalout=True,
                               treeout=True)
    elif msa.lower() == "clustalw" or msa.lower() == "clustalw2":
        print "Aligning by ClustalW2"
        cmd = ClustalwCommandline("clustalw",
                                  infile=sequence_list_file,
                                  outfile=outfile,
                                  tree=True,
                                  newtree=treefile)
    elif msa.lower(
    ) == "prank":  #output is fasta, tree will be outputted to .dnd file?
        print "Aligning by PRANK"
        cmd = PrankCommandline(d=sequence_list_file,
                               o=outfile,
                               f=8,
                               showtree=True,
                               noxml=True)
    elif msa.lower() == "msaprobs":  #doesn't use a guide tree
        print "Aligning by MSAprobs"
        cmd = MSAProbsCommandline(infile=sequence_list_file,
                                  outfile=outfile,
                                  clustalw=True)
    elif msa.lower() == "probcons":
        print "Aligning by ProbCons"
        cmd = ProbconsCommandline(input=sequence_list_file, clustalw=True)
    elif msa.lower(
    ) == "dialign":  #phylip tree should be created automatically, names are a mystery?
        print "Aligning by Dialign"
        cmd = DialignCommandline(input=sequence_list_file, cw=True, fn=outfile)
    else:
        raise BaseException(
            "Only Multiple Sequence Alignment algorithms currently supported are emma, clustalo, t_coffee, muscle and mafft"
        )
    #Execute the command
    stdout, stderr = cmd()
    #For algorithms that don't have an option to save ouptut to file, capture the stdout
    if msa.lower() == "mafft" or msa.lower() == "probcons":
        with open(outfile, "w") as handle:
            handle.write(stdout)
コード例 #17
0
#!/usr/bin/env python3

import os, Bio

from Bio.Align.Applications import TCoffeeCommandline

# Run BioPython wrapper for T-Coffee
tcoffee_cline = TCoffeeCommandline(infile="MSA_sequences_file.fasta",
                                   output="clustalw",
                                   outfile="tcoffee_MSA_output.aln")
print(tcoffee_cline)
stdout, stderr = tcoffee_cline()
コード例 #18
0
import os, subprocess
import re
from Bio.Align.Applications import TCoffeeCommandline

tcoffee_exe = "/Users/qingye/tcoffee/Version_11.00.8cbe486/bin/t_coffee"
tcoffee_out = "/Users/qingye/Desktop/experiment_test/5_tcoffee/"
t = os.walk(r"/Users/qingye/Desktop/experiment_test/1_blast/3/3/")

tcoffee_valid_file_list = []
for path, dir_list, file_list in t:
    for file_name in file_list:
        prog = re.compile('^\d')
        result = prog.match(file_name)
        if (result):
            tcoffee_valid_file_list.append(file_name)
        else:
            print('Invalid File Name :' + file_name)
    for file_name in tcoffee_valid_file_list:
        tcoffee_input_path = path + file_name
        tcoffee_fasta_file_name = file_name[0:(len(file_name) - 6)]
        tcoffee_out_put_path = tcoffee_out + tcoffee_fasta_file_name + '.fasta'
        tcoffee_cline = TCoffeeCommandline(tcoffee_exe,
                                           infile=tcoffee_input_path,
                                           outfile=tcoffee_out_put_path,
                                           output="fasta")
        child = subprocess.call(str(tcoffee_cline),
                                stdout=subprocess.PIPE,
                                shell=True)
コード例 #19
0
ファイル: EZProt.py プロジェクト: cjcoope2/EZProt
files = [f for f in os.listdir('.') if os.path.isfile(f)]
i = 0
for f in files:
    if f.endswith(".fasta"):
        in_file = f
        out_file = f+"_aligned"
        out_file2 = f+"_optimized"

        print("initializing multiple sequence alignment")

        #code block executes the preselected MSA
        if program1 == "clustalo" :
            clustalo_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=True)
            child = subprocess.call(str(clustalo_cline), shell=(sys.platform!=platform))
        elif program1 == "tcoffee" :
            tcoffee_cline = TCoffeeCommandline(infile=in_file, output="fasta", outfile=out_file)
            child = subprocess.call(str(tcoffee_cline), shell=(sys.platform!=platform))
        elif program1 == 'muscle' :
            muscle_cline = MuscleCommandline(input = in_file, out = out_file)
            child = subprocess.call(str(muscle_cline), shell=(sys.platform!=platform))
        else:
            print("error: invalid input, terminating program")
            time.sleep(3)
            sys.exit()

        print("Multiple sequence alignment complete")

        #code block executes or skips trimal
        if trimal == 'yes':
            print("Optimization in progress")
            subprocess.call(['trimal', '-in', out_file, '-out', out_file2])