예제 #1
0
def reads2clones_align():
    """
    
    """
    from Bio.Application import generic_run
    from cmline import NeedleCommandline
    go = 6.0
    ge = 3.0

    ref_genome = 'tmp.fas'

    needle_run = NeedleCommandline()
    needle_run.set_parameter('-asequence', ref_genome)
    needle_run.set_parameter('-gapopen', go)
    needle_run.set_parameter('-gapextend', ge)
    needle_run.set_parameter('-aformat', 'markx10')

    # forward
    needle_run.set_parameter('-bsequence', 'tmp_reads_f.fas')
    outfile = 'tmp_align_f.needle'
    needle_run.set_parameter('-outfile', outfile)

    result_1, messages_1, errors_1 = generic_run(needle_run)

    # reverse
    needle_run.set_parameter('-bsequence', 'tmp_reads_r.fas')
    outfile = 'tmp_align_r.needle'
    needle_run.set_parameter('-outfile', outfile)

    result_2, messages_2, errors_2 = generic_run(needle_run)

    for ar in result_1.available_results():
        print ar, result_1.get_result(ar)

    if Verbose:
        for m in messages_1.readlines():
            print >> sys.stderr, m

        for e in errors_1.readlines():
            print >> sys.stderr, e

    for ar in result_2.available_results():
        print ar, result_2.get_result(ar)

    if Verbose:
        for m in messages_2.readlines():
            print >> sys.stderr, m

        for e in errors_2.readlines():
            print >> sys.stderr, e
    return
예제 #2
0
def reads2clones_align(ref_genome):
    """
    
    """
    from Bio.Application import generic_run
    from pythonlib.cmline import NeedleCommandline
    go = 6.0
    ge = 3.0
    Verbose = False

    needle_run = NeedleCommandline()
    needle_run.set_parameter('-asequence', ref_genome)
    needle_run.set_parameter('-gapopen', go)
    needle_run.set_parameter('-gapextend', ge)
    needle_run.set_parameter('-aformat', 'markx10')
    needle_run.set_parameter('-usa', True)
    needle_run.set_parameter('-auto', True)

    # forward
    needle_run.set_parameter('-bsequence', 'tmp_reads_f.fas')
    outfile_1 = ref_genome + '_f.needle'
    needle_run.set_parameter('-outfile', outfile_1)

    result_1, messages_1, errors_1 = generic_run(needle_run)

    # reverse
    needle_run.set_parameter('-bsequence', 'tmp_reads_r.fas')
    outfile_2 = ref_genome + '_r.needle'
    needle_run.set_parameter('-outfile', outfile_2)

    result_2, messages_2, errors_2 = generic_run(needle_run)

    for ar in result_1.available_results():
        logfun.info(ar + result_1.get_result(ar))

    for m in messages_1.readlines():
        logfun.debug(m)

    for e in errors_1.readlines():
        logfun.debug(e)

    for ar in result_2.available_results():
        logfun.info(ar + result_2.get_result(ar))

    for m in messages_2.readlines():
        logfun.debug(m)

    for e in errors_2.readlines():
        logfun.debug(e)

    return outfile_1, outfile_2
예제 #3
0
 def test_water_file(self):
     """water with the asis trick, output to a file."""
     #Setup, try a mixture of keyword arguments and later additions:
     cline = WaterCommandline(cmd=exes["water"],
                              gapopen="10",
                              gapextend="0.5")
     #Try using both human readable names, and the literal ones:
     cline.set_parameter("asequence", "asis:ACCCGGGCGCGGT")
     cline.set_parameter("-bsequence", "asis:ACCCGAGCGCGGT")
     #Try using a property set here:
     cline.outfile = "Emboss/temp with space.water"
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Smith-Waterman local alignment"),
                  errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0: print >> sys.stderr, "\n%s" % cline
     self.assertEqual(result.return_code, 0)
     filename = result.get_result("outfile")
     self.assertEqual(filename, "Emboss/temp with space.water")
     assert os.path.isfile(filename)
     #Check we can parse the output...
     align = AlignIO.read(open(filename), "emboss")
     self.assertEqual(len(align), 2)
     self.assertEqual(str(align[0].seq), "ACCCGGGCGCGGT")
     self.assertEqual(str(align[1].seq), "ACCCGAGCGCGGT")
     #Clean up,
     os.remove(filename)
예제 #4
0
def haps2clones_align(est_hap):
    """
    
    """
    from Bio.Application import generic_run
    from pythonlib.cmline import NeedleCommandline
    go = 6.0
    ge = 3.0
    Verbose = False

    needle_run = NeedleCommandline()
    needle_run.set_parameter('-asequence', est_hap)
    needle_run.set_parameter('-gapopen', go)
    needle_run.set_parameter('-gapextend', ge)
    needle_run.set_parameter('-aformat', 'markx10')
    #    needle_run.set_parameter('-usa', True)
    #    needle_run.set_parameter('-des', True)
    needle_run.set_parameter('-auto', True)
    needle_run.set_parameter('-bsequence', 'tmp_ref.fas')
    outfile_1 = est_hap + '_hap.needle'
    needle_run.set_parameter('-outfile', outfile_1)

    result_1, messages_1, errors_1 = generic_run(needle_run)

    for ar in result_1.available_results():
        logfun.info(ar + result_1.get_result(ar))

    for m in messages_1.readlines():
        logfun.debug(m)

    for e in errors_1.readlines():
        logfun.debug(e)

    return outfile_1
예제 #5
0
 def test_water_file2(self):
     """water with the asis trick and nucleotide FASTA file, output to a file."""
     #Setup,
     query = "ACACACTCACACACACTTGGTCAGAGATGCTGTGCTTCTTGGAAGCAAGGNCTCAAAGGCAAGGTGCACGCAGAGGGACGTTTGAGTCTGGGATGAAGCATGTNCGTATTATTTATATGATGGAATTTCACGTTTTTATG"
     out_file = "Emboss/temp_test2.water"
     in_file = "Fasta/f002"
     self.assert_(os.path.isfile(in_file))
     if os.path.isfile(out_file) :
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("-asequence", "asis:%s" % query)
     cline.set_parameter("-bsequence", in_file)
     cline.set_parameter("-gapopen", "10")
     cline.set_parameter("-gapextend", "0.5")
     cline.set_parameter("-outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Smith-Waterman local alignment"), errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline
     self.assertEqual(result.return_code, 0)
     self.assertEqual(result.get_result("outfile"), out_file)
     assert os.path.isfile(out_file)
     #Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(query,
                                   SeqIO.parse(open(in_file),"fasta"),
                                   AlignIO.parse(open(out_file),"emboss"),
                                   local=True)
     #Clean up,
     os.remove(out_file)
예제 #6
0
 def test_needle_file(self):
     """needle with the asis trick, output to a file."""
     #Setup,
     cline = NeedleCommandline(cmd=exes["needle"])
     cline.set_parameter("-asequence", "asis:ACCCGGGCGCGGT")
     cline.set_parameter("-bsequence", "asis:ACCCGAGCGCGGT")
     cline.set_parameter("-gapopen", "10")
     cline.set_parameter("-gapextend", "0.5")
     #EMBOSS would guess this, but let's be explicit:
     cline.set_parameter("-snucleotide", "True")
     cline.set_parameter("-outfile", "Emboss/temp with space.needle")
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Needleman-Wunsch global alignment"), errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline
     self.assertEqual(result.return_code, 0)
     filename = result.get_result("outfile")
     self.assertEqual(filename, "Emboss/temp with space.needle")
     assert os.path.isfile(filename)
     #Check we can parse the output...
     align = AlignIO.read(open(filename),"emboss")
     self.assertEqual(len(align), 2)
     self.assertEqual(str(align[0].seq), "ACCCGGGCGCGGT")
     self.assertEqual(str(align[1].seq), "ACCCGAGCGCGGT")
     #Clean up,
     os.remove(filename)
예제 #7
0
 def test_water_file(self):
     """water with the asis trick, output to a file."""
     #Setup, try a mixture of keyword arguments and later additions:
     cline = WaterCommandline(cmd=exes["water"],
                              gapopen="10", gapextend="0.5")
     #Try using both human readable names, and the literal ones:
     cline.set_parameter("asequence", "asis:ACCCGGGCGCGGT")
     cline.set_parameter("-bsequence", "asis:ACCCGAGCGCGGT")
     #Try using a property set here:
     cline.outfile = "Emboss/temp with space.water"
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Smith-Waterman local alignment"), errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline
     self.assertEqual(result.return_code, 0)
     filename = result.get_result("outfile")
     self.assertEqual(filename, "Emboss/temp with space.water")
     assert os.path.isfile(filename)
     #Check we can parse the output...
     align = AlignIO.read(open(filename),"emboss")
     self.assertEqual(len(align), 2)
     self.assertEqual(str(align[0].seq), "ACCCGGGCGCGGT")
     self.assertEqual(str(align[1].seq), "ACCCGAGCGCGGT")
     #Clean up,
     os.remove(filename)            
예제 #8
0
 def test_needle_file(self):
     """needle with the asis trick, output to a file."""
     #Setup,
     cline = NeedleCommandline(cmd=exes["needle"])
     cline.set_parameter("-asequence", "asis:ACCCGGGCGCGGT")
     cline.set_parameter("-bsequence", "asis:ACCCGAGCGCGGT")
     cline.set_parameter("-gapopen", "10")
     cline.set_parameter("-gapextend", "0.5")
     #EMBOSS would guess this, but let's be explicit:
     cline.set_parameter("-snucleotide", "True")
     cline.set_parameter("-outfile", "Emboss/temp with space.needle")
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Needleman-Wunsch global alignment"),
                  errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0: print >> sys.stderr, "\n%s" % cline
     self.assertEqual(result.return_code, 0)
     filename = result.get_result("outfile")
     self.assertEqual(filename, "Emboss/temp with space.needle")
     assert os.path.isfile(filename)
     #Check we can parse the output...
     align = AlignIO.read(open(filename), "emboss")
     self.assertEqual(len(align), 2)
     self.assertEqual(str(align[0].seq), "ACCCGGGCGCGGT")
     self.assertEqual(str(align[1].seq), "ACCCGAGCGCGGT")
     #Clean up,
     os.remove(filename)
예제 #9
0
 def test_water_file2(self):
     """water with the asis trick and nucleotide FASTA file, output to a file."""
     #Setup,
     query = "ACACACTCACACACACTTGGTCAGAGATGCTGTGCTTCTTGGAAGCAAGGNCTCAAAGGCAAGGTGCACGCAGAGGGACGTTTGAGTCTGGGATGAAGCATGTNCGTATTATTTATATGATGGAATTTCACGTTTTTATG"
     out_file = "Emboss/temp_test2.water"
     in_file = "Fasta/f002"
     self.assert_(os.path.isfile(in_file))
     if os.path.isfile(out_file):
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("-asequence", "asis:%s" % query)
     cline.set_parameter("-bsequence", in_file)
     cline.set_parameter("-gapopen", "10")
     cline.set_parameter("-gapextend", "0.5")
     cline.set_parameter("-outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Smith-Waterman local alignment"),
                  errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0: print >> sys.stderr, "\n%s" % cline
     self.assertEqual(result.return_code, 0)
     self.assertEqual(result.get_result("outfile"), out_file)
     assert os.path.isfile(out_file)
     #Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(query,
                                   SeqIO.parse(open(in_file), "fasta"),
                                   AlignIO.parse(open(out_file), "emboss"),
                                   local=True)
     #Clean up,
     os.remove(out_file)
예제 #10
0
 def test_water_file3(self):
     """water with the asis trick and GenBank file, output to a file."""
     #Setup,
     query = "TGTTGTAATGTTTTAATGTTTCTTCTCCCTTTAGATGTACTACGTTTGGA"
     out_file = "Emboss/temp_test3.water"
     in_file = "GenBank/cor6_6.gb"
     self.assert_(os.path.isfile(in_file))
     if os.path.isfile(out_file):
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("asequence", "asis:%s" % query)
     cline.set_parameter("bsequence", in_file)
     #TODO - Tell water this is a GenBank file!
     cline.set_parameter("gapopen", "1")
     cline.set_parameter("gapextend", "0.5")
     cline.set_parameter("outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Smith-Waterman local alignment"),
                  errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0: print >> sys.stderr, "\n%s" % cline
     self.assertEqual(result.return_code, 0)
     self.assertEqual(result.get_result("outfile"), out_file)
     assert os.path.isfile(out_file)
     #Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(query,
                                   SeqIO.parse(open(in_file), "genbank"),
                                   AlignIO.parse(open(out_file), "emboss"),
                                   local=True)
     #Clean up,
     os.remove(out_file)
예제 #11
0
def needle_align(a_file, b_file, out_file):
    """

    """
    from Bio.Application import generic_run
    from cmline import NeedleCommandline
    go = 6.0
    ge = 3.0

    needle_run = NeedleCommandline()
    needle_run.set_parameter('-asequence', a_file)
    needle_run.set_parameter('-bsequence', b_file)
    needle_run.set_parameter('-gapopen', go)
    needle_run.set_parameter('-gapextend', ge)
    needle_run.set_parameter('-outfile', out_file)
    needle_run.set_parameter('-aformat', 'markx10')
    result, messages, errors = generic_run(needle_run)

    if Pedantic:
        for ar in result.available_results():
            print ar, result.get_result(ar)

        for m in messages.readlines():
            print >> sys.stderr, m

        for e in errors.readlines():
            print >> sys.stderr, e

    return
예제 #12
0
 def test_long(self):
     """Simple muscle call using long file."""
     #Create a large input file by converting some of another example file
     temp_large_fasta_file = "temp_cw_prot.fasta"
     handle = open(temp_large_fasta_file, "w")
     records = list(SeqIO.parse(open("NBRF/Cw_prot.pir", "rU"), "pir"))[:40]
     SeqIO.write(records, handle, "fasta")
     handle.close()
     #Prepare the command...
     cmdline = MuscleCommandline(muscle_exe)
     cmdline.set_parameter("in", temp_large_fasta_file)
     #Preserve input record order
     cmdline.set_parameter("stable", True) #Default None treated as False!
     #Use fast options
     cmdline.set_parameter("maxiters", 1)
     cmdline.set_parameter("diags", True) #Default None treated as False!
     #Use clustal output
     cmdline.set_parameter("clwstrict", True) #Default None treated as False!
     #Shoudn't need this, but just to make sure it is accepted
     cmdline.set_parameter("maxhours", 0.1)
     #No progress reports to stderr
     cmdline.set_parameter("quiet", True) #Default None treated as False!
     self.assertEqual(str(cmdline).rstrip(), muscle_exe + \
                      " -in temp_cw_prot.fasta -diags -maxhours 0.1" + \
                      " -maxiters 1 -clwstrict -stable -quiet")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     result, out_handle, err_handle = generic_run(cmdline)
     align = AlignIO.read(out_handle, "clustal")
     self.assertEqual(len(records), len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-",""), str(old.seq))
     os.remove(temp_large_fasta_file)
     #See if quiet worked:
     self.assertEqual("", err_handle.read().strip())
예제 #13
0
 def test_water_file3(self):
     """water with the asis trick and GenBank file, output to a file."""
     #Setup,
     query = "TGTTGTAATGTTTTAATGTTTCTTCTCCCTTTAGATGTACTACGTTTGGA"
     out_file = "Emboss/temp_test3.water"
     in_file = "GenBank/cor6_6.gb"
     self.assert_(os.path.isfile(in_file))
     if os.path.isfile(out_file) :
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("asequence", "asis:%s" % query)
     cline.set_parameter("bsequence", in_file)
     #TODO - Tell water this is a GenBank file!
     cline.set_parameter("gapopen", "1")
     cline.set_parameter("gapextend", "0.5")
     cline.set_parameter("outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Smith-Waterman local alignment"), errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline
     self.assertEqual(result.return_code, 0)
     self.assertEqual(result.get_result("outfile"), out_file)
     assert os.path.isfile(out_file)
     #Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(query,
                                   SeqIO.parse(open(in_file),"genbank"),
                                   AlignIO.parse(open(out_file),"emboss"),
                                   local=True)
     #Clean up,
     os.remove(out_file)
예제 #14
0
 def _run_genepop(self, extensions, option, fname, opts={}):
     for extension in extensions:
         self._remove_garbage(fname + extension)
     self.controller.set_menu(option)
     self.controller.set_input(fname)
     for opt in opts:
         self.controller.set_parameter(opt, opt+"="+str(opts[opt]))
     ret, out, err = generic_run(self.controller)
     self._remove_garbage(None)
     if ret.return_code != 0: raise IOError("GenePop not found")
     return ret, out, err
예제 #15
0
 def test_Muscle_simple(self):
     """Simple round-trip through app just infile and outfile."""
     cmdline = MuscleCommandline(muscle_exe,
                                 input=self.infile1,
                                 out=self.outfile1)
     self.assertEqual(str(cmdline), muscle_exe \
                      + " -in Fasta/f002 -out Fasta/temp_align_out1.fa")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     stdin, stdout, stderr = generic_run(cmdline)
     self.assertEqual(stdin.return_code, 0)
     self.assertEqual(stdout.read(), "")
     self.assert_("ERROR" not in stderr.read())
     self.assertEqual(str(stdin._cl), str(cmdline))
예제 #16
0
 def test_Muscle_profile_simple(self):
     """Simple round-trip through app doing a profile alignment."""
     cmdline = MuscleCommandline(muscle_exe)
     cmdline.set_parameter("out", self.outfile3)
     cmdline.set_parameter("profile", True)
     cmdline.set_parameter("in1", self.infile2)
     cmdline.set_parameter("in2", self.infile3)
     self.assertEqual(str(cmdline), muscle_exe + \
                      " -out Fasta/temp_align_out3.fa" + \
                      " -profile -in1 Fasta/fa01 -in2 Fasta/f001")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     stdin, stdout, stderr = generic_run(cmdline)
     self.assertEqual(stdin.return_code, 0)
     self.assertEqual(stdout.read(), "")
     self.assert_("ERROR" not in stderr.read())
     self.assertEqual(str(stdin._cl), str(cmdline))
예제 #17
0
def mutations(hap, ref):
    '''
    '''
    from Bio.Application import generic_run
    from pythonlib.cmline import NeedleCommandline
    from pythonlib.MarkxIO import Markx10Iterator

    needle_run = NeedleCommandline()
    needle_run.set_parameter('-asequence', 'asis:%s' % hap)
    needle_run.set_parameter('-bsequence', 'asis:%s' % ref)
    needle_run.set_parameter('-aformat', 'markx10')
    needle_run.set_parameter('-gapopen', 10)
    needle_run.set_parameter('-gapextend', 1)
    #    needle_run.set_parameter('-usa', True)
    #    needle_run.set_parameter('-des', True)
    needle_run.set_parameter('-auto', True)
    needle_run.set_parameter('-outfile', 'tmpout')

    result_1, messages_1, errors_1 = generic_run(needle_run)
    for ar in result_1.available_results():
        logfun.info(ar + result_1.get_result(ar))

    for m in messages_1.readlines():
        logfun.debug(m)

    for e in errors_1.readlines():
        logfun.debug(e)

    oh = open('tmpout')
    aligniter = Markx10Iterator(oh)
    mt = []
    while True:
        try:
            al = aligniter.next()
        except:
            break
        if al is None:
            break

        seq_pair = zip(al.get_all_seqs()[0].seq, al.get_all_seqs()[1].seq)
        i = 0
        for sp in seq_pair:
            i += 1
            if '-' not in sp and 'N' not in sp and sp[0] != sp[1]:
                mt.append('%s%d%s' % (sp[0], i, sp[1]))

    return mt
예제 #18
0
 def test_Muscle_with_options(self):
     """Round-trip through app with a switch and valued option."""
     cmdline = MuscleCommandline(muscle_exe)
     cmdline.set_parameter("input", self.infile1) #"input" is alias for "in"
     cmdline.set_parameter("out", self.outfile2)
     #Use property:
     cmdline.objscore = "sp"
     cmdline.noanchors = True
     self.assertEqual(str(cmdline), muscle_exe +\
                      " -in Fasta/f002" + \
                      " -out Fasta/temp_align_out2.fa" + \
                      " -objscore sp -noanchors")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     stdin, stdout, stderr = generic_run(cmdline)
     self.assertEqual(stdin.return_code, 0)
     self.assertEqual(stdout.read(), "")
     self.assert_("ERROR" not in stderr.read())
     self.assertEqual(str(stdin._cl), str(cmdline))
예제 #19
0
 def test_simple_clustal(self):
     """Simple muscle call using Clustal output with a MUSCLE header."""
     input_file = "Fasta/f002"
     self.assert_(os.path.isfile(input_file))
     records = list(SeqIO.parse(open(input_file),"fasta"))
     #Prepare the command... use Clustal output (with a MUSCLE header)
     cmdline = MuscleCommandline(muscle_exe, input=input_file,
                                 stable=True, clw = True)
     self.assertEqual(str(cmdline).rstrip(), muscle_exe + \
                      " -in Fasta/f002 -clw -stable")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     result, out_handle, err_handle = generic_run(cmdline)
     align = AlignIO.read(out_handle, "clustal")
     self.assertEqual(len(records),len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-",""), str(old.seq))
     #Didn't use -quiet so there should be progress reports on stderr,
     self.assert_(err_handle.read().strip().startswith("MUSCLE"))
예제 #20
0
 def test_water_file4(self):
     """water with the asis trick and SwissProt file, output to a file."""
     #Setup,
     query = "DVCTGKALCDPVTQNIKTYPVKIENLRVMI"
     out_file = "Emboss/temp_test4.water"
     in_file = "SwissProt/sp004"
     self.assert_(os.path.isfile(in_file))
     if os.path.isfile(out_file):
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("-asequence", "asis:%s" % query)
     cline.set_parameter("-bsequence", in_file)
     #EMBOSS should work this out, but let's be explicit:
     cline.set_parameter("-sprotein", True)
     #TODO - Tell water this is a SwissProt file!
     cline.set_parameter("-gapopen", "20")
     cline.set_parameter("-gapextend", "5")
     cline.set_parameter("-outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Smith-Waterman local alignment"),
                  errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0: print >> sys.stderr, "\n%s" % cline
     self.assertEqual(result.return_code, 0)
     #Should be able to access this via any alias:
     self.assertEqual(result.get_result("-outfile"), out_file)
     assert os.path.isfile(out_file)
     #Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(query,
                                   SeqIO.parse(open(in_file), "swiss"),
                                   AlignIO.parse(open(out_file), "emboss"),
                                   local=True)
     #Clean up,
     os.remove(out_file)
예제 #21
0
 def test_simple_clustal_strict(self):
     """Simple muscle call using strict Clustal output."""
     input_file = "Fasta/f002"
     self.assert_(os.path.isfile(input_file))
     records = list(SeqIO.parse(open(input_file),"fasta"))
     #Prepare the command...
     cmdline = MuscleCommandline(muscle_exe)
     cmdline.set_parameter("in", input_file)
     #Preserve input record order (makes checking output easier)
     cmdline.set_parameter("stable", True) #Default None treated as False!
     #Use clustal output (with a CLUSTAL header)
     cmdline.set_parameter("clwstrict", True) #Default None treated as False!
     self.assertEqual(str(cmdline).rstrip(), muscle_exe + \
                      " -in Fasta/f002 -clwstrict -stable")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     result, out_handle, err_handle = generic_run(cmdline)
     align = AlignIO.read(out_handle, "clustal")
     self.assertEqual(len(records),len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-",""), str(old.seq))
     #Didn't use -quiet so there should be progress reports on stderr,
     self.assert_(err_handle.read().strip().startswith("MUSCLE"))
예제 #22
0
 def test_water_file4(self):
     """water with the asis trick and SwissProt file, output to a file."""
     #Setup,
     query = "DVCTGKALCDPVTQNIKTYPVKIENLRVMI"
     out_file = "Emboss/temp_test4.water"
     in_file = "SwissProt/sp004"
     self.assert_(os.path.isfile(in_file))
     if os.path.isfile(out_file) :
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("-asequence", "asis:%s" % query)
     cline.set_parameter("-bsequence", in_file)
     #EMBOSS should work this out, but let's be explicit:
     cline.set_parameter("-sprotein", True)
     #TODO - Tell water this is a SwissProt file!
     cline.set_parameter("-gapopen", "20")
     cline.set_parameter("-gapextend", "5")
     cline.set_parameter("-outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Smith-Waterman local alignment"), errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline
     self.assertEqual(result.return_code, 0)
     #Should be able to access this via any alias:
     self.assertEqual(result.get_result("-outfile"), out_file)
     assert os.path.isfile(out_file)
     #Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(query,
                                   SeqIO.parse(open(in_file),"swiss"),
                                   AlignIO.parse(open(out_file),"emboss"),
                                   local=True)
     #Clean up,
     os.remove(out_file)
예제 #23
0
    #And again, but this time using Bio.Align.Applications wrapper
    #Any filesnames with spaces should get escaped with quotes automatically.
    #Using keyword arguments here.
    cline = ClustalwCommandline(clustalw_exe,
                                infile=input_file,
                                outfile=output_file)
    assert str(eval(repr(cline))) == str(cline)
    if newtree_file is not None:
        #Test using a property:
        cline.newtree = newtree_file
        #I don't just want the tree, also want the alignment:
        cline.align = True
        assert str(eval(repr(cline))) == str(cline)
    #print cline
    return_code, out_handle, err_handle = generic_run(cline)
    assert out_handle.read().strip().startswith("CLUSTAL")
    assert err_handle.read().strip() == ""
    align = AlignIO.read(open(output_file), "clustal")
    assert set(input_records.keys()) == set(output_records.keys())
    for record in align:
        assert str(record.seq) == str(output_records[record.id].seq)
        assert str(record.seq).replace("-","") == \
               str(input_records[record.id].seq)

    #Clean up...
    os.remove(output_file)

    #Check the DND file was created.
    #TODO - Try and parse this with Bio.Nexus?
    if newtree_file is not None:
예제 #24
0
def reads2clones_align(sample_dir, max_freq=2):
    """
    
    """
    from Bio.Application import generic_run
    from cmline import NeedleCommandline
    go = 6.0
    ge = 3.0

    if max_freq > 10:
        sys.exit('10 reference genomes maximum')

    # align uncorrected reads

    for f in range(max_freq):
        ref_genome = './raw/Fastas/clone' + ref_genomes[f] + '.fsta'
        outfile = '%s/reads-%s.needle' % (sample_dir, ref_genomes[f])
        needle_run = NeedleCommandline()
        needle_run.set_parameter('-asequence', ref_genome)
        needle_run.set_parameter('-bsequence', '%s/reads.fas' % sample_dir)
        needle_run.set_parameter('-gapopen', go)
        needle_run.set_parameter('-gapextend', ge)
        needle_run.set_parameter('-outfile', outfile)
        needle_run.set_parameter('-aformat', 'markx10')
        result, messages, errors = generic_run(needle_run)

        for ar in result.available_results():
            print ar, result.get_result(ar)

        if Verbose:
            for m in messages.readlines():
                print >> sys.stderr, m

            for e in errors.readlines():
                print >> sys.stderr, e

# align corrected reads

    for f in range(max_freq):
        ref_genome = './raw/Fastas/clone' + ref_genomes[f] + '.fsta'
        outfile = '%s/reads-cor-%s.needle' % (sample_dir, ref_genomes[f])
        needle_run = NeedleCommandline()
        needle_run.set_parameter('-asequence', ref_genome)
        needle_run.set_parameter('-bsequence', '%s/reads.cor.fas' % sample_dir)
        needle_run.set_parameter('-gapopen', go)
        needle_run.set_parameter('-gapextend', ge)
        needle_run.set_parameter('-outfile', outfile)
        needle_run.set_parameter('-aformat', 'markx10')
        result, messages, errors = generic_run(needle_run)

        for ar in result.available_results():
            print ar, result.get_result(ar)

        if Verbose:
            for m in messages.readlines():
                print >> sys.stderr, m

            for e in errors.readlines():
                print >> sys.stderr, e

    return
예제 #25
0
 def run(self):
     return generic_run(self)
예제 #26
0
    SeqIO.write(rec, 'in.fas', 'fasta')
    fw2.close()
    primer_cl = Primer3Commandline(sequence="in.fas", auto=True)
    primer_cl.outfile = "out.pr3"
    primer_cl.numreturn = 3
    #    primer_cl.target = str(overgo1s)+","+str(overgo1e/3+overgo2s*2/3) # can specify here the region that requires inclusion in the product
    primer_cl.osize = 20
    primer_cl.maxsize = 26
    primer_cl.otm = 58
    primer_cl.mintm = 52
    primer_cl.mingc = 35
    primer_cl.maxgc = 75
    primer_cl.psizeopt = 200
    primer_cl.prange = "100-400"

    result, messages, errors = generic_run(primer_cl)
    print result

    try:
        open_outfile = file("out.pr3", "r")
    except:
        pass
    else:
        primer_record = read(open_outfile)
        for primer in primer_record.primers:
            product_len = -primer.forward_start + primer.reverse_start + primer.reverse_length
            fw.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % \
                     (rec.id,overgos[rec.id][0][0],primer.forward_seq,primer.forward_start,primer.forward_length,primer.forward_tm,primer.forward_gc,primer.reverse_seq,primer.reverse_start,primer.reverse_length,primer.reverse_tm,primer.reverse_gc,len(rec.seq),product_len))
        open_outfile.close()
    try:
        os.system("rm out.pr3")
예제 #27
0
 def run(self):
     return generic_run(self)
예제 #28
0
    SeqIO.write(rec,'in.fas','fasta')
    fw2.close()
    primer_cl = Primer3Commandline(sequence="in.fas",auto=True)
    primer_cl.outfile = "out.pr3"
    primer_cl.numreturn = 3
#    primer_cl.target = str(overgo1s)+","+str(overgo1e/3+overgo2s*2/3) # can specify here the region that requires inclusion in the product
    primer_cl.osize = 20
    primer_cl.maxsize = 26
    primer_cl.otm = 58
    primer_cl.mintm = 52
    primer_cl.mingc = 35
    primer_cl.maxgc = 75
    primer_cl.psizeopt = 200
    primer_cl.prange = "100-400"

    result, messages, errors = generic_run(primer_cl)
    print result

    try: 
        open_outfile = file("out.pr3", "r")
    except: pass
    else:    
        primer_record = read(open_outfile)
        for primer in primer_record.primers:
            product_len = -primer.forward_start+primer.reverse_start+primer.reverse_length
            fw.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % \
                     (rec.id,overgos[rec.id][0][0],primer.forward_seq,primer.forward_start,primer.forward_length,primer.forward_tm,primer.forward_gc,primer.reverse_seq,primer.reverse_start,primer.reverse_length,primer.reverse_tm,primer.reverse_gc,len(rec.seq),product_len))
        open_outfile.close()
    try: os.system("rm out.pr3")
    except: pass
예제 #29
0
    #And again, but this time using Bio.Align.Applications wrapper
    #Any filesnames with spaces should get escaped with quotes automatically.
    #Using keyword arguments here.
    cline = ClustalwCommandline(clustalw_exe,
                                infile=input_file,
                                outfile=output_file)
    assert str(eval(repr(cline)))==str(cline)
    if newtree_file is not None :
        #Test using a property:
        cline.newtree = newtree_file
        #I don't just want the tree, also want the alignment:
        cline.align = True
        assert str(eval(repr(cline)))==str(cline)
    #print cline
    return_code, out_handle, err_handle = generic_run(cline)
    assert out_handle.read().strip().startswith("CLUSTAL")
    assert err_handle.read().strip() == ""
    align = AlignIO.read(open(output_file), "clustal")
    assert set(input_records.keys()) == set(output_records.keys())
    for record in align :
        assert str(record.seq) == str(output_records[record.id].seq)
        assert str(record.seq).replace("-","") == \
               str(input_records[record.id].seq)

    #Clean up...
    os.remove(output_file)

    #Check the DND file was created.
    #TODO - Try and parse this with Bio.Nexus?
    if newtree_file is not None :