예제 #1
0
 def test_water_file3(self):
     """Run water with the asis trick and GenBank file, output to a file."""
     # Setup,
     query = "TGTTGTAATGTTTTAATGTTTCTTCTCCCTTTAGATGTACTACGTTTGGA"
     out_file = "Emboss/temp_test3.water"
     in_file = "GenBank/cor6_6.gb"
     self.assertTrue(os.path.isfile(in_file))
     if os.path.isfile(out_file):
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("asequence", "asis:%s" % query)
     cline.set_parameter("bsequence", in_file)
     # TODO - Tell water this is a GenBank file!
     cline.set_parameter("gapopen", "1")
     cline.set_parameter("gapextend", "0.5")
     cline.set_parameter("outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     # Run the tool,
     self.run_water(cline)
     # Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(
         query,
         SeqIO.parse(in_file, "genbank"),
         AlignIO.parse(out_file, "emboss"),
         local=True,
     )
     # Clean up,
     os.remove(out_file)
예제 #2
0
 def test_water_piped(self):
     """Run water with asis trick, output piped to stdout."""
     cline = WaterCommandline(cmd=exes["water"],
                              asequence="asis:ACCCGGGCGCGGT",
                              bsequence="asis:ACCCGAGCGCGGT",
                              gapopen=10,
                              gapextend=0.5,
                              auto=True,
                              filter=True)
     self.assertEqual(
         str(cline), exes["water"] + " -auto -filter" +
         " -asequence=asis:ACCCGGGCGCGGT" +
         " -bsequence=asis:ACCCGAGCGCGGT" + " -gapopen=10 -gapextend=0.5")
     # Run the tool,
     child = subprocess.Popen(str(cline),
                              stdin=subprocess.PIPE,
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              universal_newlines=True,
                              shell=(sys.platform != "win32"))
     child.stdin.close()
     # Check we could read it's output
     align = AlignIO.read(child.stdout, "emboss")
     self.assertEqual(len(align), 2)
     self.assertEqual(str(align[0].seq), "ACCCGGGCGCGGT")
     self.assertEqual(str(align[1].seq), "ACCCGAGCGCGGT")
     # Check no error output:
     self.assertEqual(child.stderr.read(), "")
     self.assertEqual(0, child.wait())
     child.stdout.close()
     child.stderr.close()
예제 #3
0
 def test_water_file2(self):
     """Run water with the asis trick and nucleotide FASTA file, output to a file."""
     # Setup,
     query = "ACACACTCACACACACTTGGTCAGAGATGCTGTGCTTCTTGGAAGCAAGGNCTCAAAGGCAAGGTGCACGCAGAGGGACGTTTGAGTCTGGGATGAAGCATGTNCGTATTATTTATATGATGGAATTTCACGTTTTTATG"
     out_file = "Emboss/temp_test2.water"
     in_file = "Fasta/f002"
     self.assertTrue(os.path.isfile(in_file))
     if os.path.isfile(out_file):
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("-asequence", "asis:%s" % query)
     cline.set_parameter("-bsequence", in_file)
     cline.set_parameter("-gapopen", "10")
     cline.set_parameter("-gapextend", "0.5")
     cline.set_parameter("-outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     # Run the tool,
     self.run_water(cline)
     # Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(
         query,
         SeqIO.parse(in_file, "fasta"),
         AlignIO.parse(out_file, "emboss"),
         local=True,
     )
     # Clean up,
     os.remove(out_file)
예제 #4
0
def generate_water_cmd(macierz, pliki_fasta_rodzina):
    """
    Generuje polecenia wywolania programu water EMBOSS dla wszystkich sekwencji podanych jako nazwy plikow je zawierajacych
    :param macierz: lokalizacja/nazwa pliku z macierza substytucji PAM/BLOSUM
    :param pliki_fasta_fodzina: lista lokalizacji/nazw plikow z sekwencjami bialkowymi fasta nalezacymi do danej rodziny
    :return: polecenie wywolania programu water
    """
    records = []
    for file in pliki_fasta_rodzina:
        handle = open(file, "rU")
        records.extend(list(SeqIO.parse(handle, "fasta")))
        handle.close()

    from Bio.Emboss.Applications import WaterCommandline
    all_water_cmd = []
    for i in range(len(records)):
        for j in range(len(records)):
            if i < j:

                water_cmd = WaterCommandline(gapopen=100, gapextend=10)#maksymalne wartosci aby uzyskac uliniowienia bezspacjowe
                water_cmd.asequence = "asis:" + str(records[i].seq)
                water_cmd.bsequence = "asis:" + str(records[j].seq)
                water_cmd.stdout = True
                water_cmd.sprotein=True
                water_cmd.datafile=macierz
                all_water_cmd.append(str(water_cmd))

    return all_water_cmd
예제 #5
0
파일: EMBOSSW23.py 프로젝트: cwt1/BioGUI
    def GetExec(self, optList, frame):
        # Respond to the "embossn" type command.
        self.frame = frame
        plugin_exe = r"C:/mEMBOSS/water.exe"
        self.outfile = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\water.txt"
        self.outtype = "fasta"
        cline = WaterCommandline(
            plugin_exe,
            asequence=str(self.frame.paramBoxes[1].GetValue()),
            bsequence=str(self.frame.paramBoxes[3].GetValue()))
        cline.outfile = self.outfile
        cline.gapopen = self.param[7].GetValue()
        cline.gapextend = self.param[9].GetValue()
        if self.param[10].GetValue():
            cline.similarity = True
        else:
            cline.similarity = False

        if self.frame.abet == "AA":
            cline.snucleotide = True
            cline.sprotein = False
        elif self.frame.abet == "DNA" or self.frame.abet == "RNA":
            cline.snucleotide = True
            cline.sprotein = False
        if self.frame.options:
            t = self.boxList[3].GetValue()
            if t != '':
                cline.datafile = str(t)
        return str(cline)
예제 #6
0
 def test_water_file4(self):
     """Run water with the asis trick and SwissProt file, output to a file."""
     # Setup,
     query = "DVCTGKALCDPVTQNIKTYPVKIENLRVMI"
     out_file = "Emboss/temp_test4.water"
     in_file = "SwissProt/P0A186.txt"
     self.assertTrue(os.path.isfile(in_file))
     if os.path.isfile(out_file):
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("-asequence", "asis:%s" % query)
     cline.set_parameter("-bsequence", in_file)
     # EMBOSS should work this out, but let's be explicit:
     cline.set_parameter("-sprotein", True)
     # TODO - Tell water this is a SwissProt file!
     cline.set_parameter("-gapopen", "20")
     cline.set_parameter("-gapextend", "5")
     cline.set_parameter("-outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     # Run the tool,
     self.run_water(cline)
     # Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(
         query,
         SeqIO.parse(in_file, "swiss"),
         AlignIO.parse(out_file, "emboss"),
         local=True,
     )
     # Clean up,
     os.remove(out_file)
def water_align(query_seq, target_seq):
    water_cline = WaterCommandline(asequence="asis:" + query_seq,
                                   bsequence="asis:" + target_seq,
                                   aformat="simple",
                                   gapopen=10,
                                   gapextend=0.5,
                                   outfile='stdout')
    out_data, err = water_cline()
    return out_data
예제 #8
0
파일: check.py 프로젝트: dani2s/ReverSa
    def localAlign(self):
        self.reverseFile()
        queryDict = OrderedDict()
        reverDict = OrderedDict()
        patt = re.compile(r'\# Score: (.*)')
        for seq in SeqIO.parse(self.file, "fasta"):
            #local alignment of the original fasta
            cline = WaterCommandline(asequence='asis:%s' % str(seq.seq),
                                     bsequence=self.file,
                                     gapopen=10,
                                     gapextend=0.5,
                                     outfile='water.txt')
            cline()
            matchObj = []
            for l in open('water.txt', 'r'):
                findScore = patt.match(l)
                if findScore is not None:
                    matchObj.append(str(findScore.group(1)))
            #print matchObj
            queryDict[str(seq.id)] = matchObj

            #local alignment with the reverse sequences of the original fasta
            clineRever = WaterCommandline(asequence='asis:%s' % str(seq.seq),
                                          bsequence='rever_seq.fasta',
                                          gapopen=10,
                                          gapextend=0.5,
                                          outfile='waterRever.txt')
            clineRever()
            scores = []
            for line in open('waterRever.txt', 'r'):
                reverseScore = patt.match(line)
                if reverseScore is not None:
                    scores.append(str(reverseScore.group(1)))
            reverDict[str(seq.id)] = scores

        #save the results of the alignment in a data frame
        self.dfQuery = pd.DataFrame.from_dict(queryDict,
                                              orient='index',
                                              dtype=float)
        self.dfQuery.columns = self.dfQuery.index.tolist()
        self.dfRever = pd.DataFrame.from_dict(reverDict,
                                              orient='index',
                                              dtype=float)
        self.dfRever.columns = self.dfRever.index.tolist()
예제 #9
0
def waterAlign(seq1, seq2, gapopen, gapextend):
    water = WaterCommandline()
    water.asequence = seq1
    water.bsequence = seq2
    water.gapopen = gapopen
    water.gapextend = gapextend
    water.outfile = "needle.txt"

    stdout, stderr = water()
    print(stdout)
예제 #10
0
 def water_align_code(query_seq, target_seq):
     needle_cline = WaterCommandline(asequence="asis:" + query_seq,
                                     bsequence="asis:" + target_seq,
                                     aformat="simple",
                                     gapopen=10,
                                     gapextend=0.5,
                                     outfile='stdout'
                                     )
     out_data, err = needle_cline()
     out_split = out_data.split("\n")
     p = re.compile("\((.*)\)")
     return p.search(out_split[25]).group(1).replace("%", "")
예제 #11
0
def water_alignment(sequence1, sequence2):
    with TemporaryDirectory() as tmpdir:
        water_fname = os.path.join(tmpdir, "alignment.fas")
        SeqIO.write(sequence1, os.path.join(tmpdir, "seq1.fas"), "fasta")
        SeqIO.write(sequence2, os.path.join(tmpdir, "seq2.fas"), "fasta")
        water_cli = WaterCommandline(asequence=os.path.join(tmpdir,"seq1.fas"), \
                               bsequence=os.path.join(tmpdir,"seq2.fas"), \
                               gapopen=10, \
                               gapextend=0.5, \
                               outfile=water_fname)
        water_cli()
        alignment = AlignIO.read(water_fname, "emboss")
    return (alignment[0], alignment[1])
예제 #12
0
 def test_water_needs_output(self):
     """water without output file or stdout/filter should give error."""
     cline = WaterCommandline(cmd=exes["water"],
                              asequence="asis:ACCCGGGCGCGGT",
                              bsequence="asis:ACCCGAGCGCGGT",
                              gapopen=10,
                              gapextend=0.5,
                              auto=True)
     self.assertTrue(cline.auto)
     self.assertTrue(not cline.stdout)
     self.assertTrue(not cline.filter)
     self.assertEqual(cline.outfile, None)
     self.assertRaises(ValueError, str, cline)
예제 #13
0
파일: swat.py 프로젝트: danshea/python
def main():
    if len(sys.argv) != 3:
        print 'usage {0:s} uniprot_id1 uniprot_id2'.format(sys.argv[0])
        sys.exit(1)
    uniprot_a = sys.argv[1]
    uniprot_b = sys.argv[2]
    sequence_a = getFasta(uniprot_a)
    sequence_b = getFasta(uniprot_b)
    water_cline = WaterCommandline('/usr/local/bin/water',
                                   asequence=sequence_a,
                                   bsequence=sequence_b,
                                   gapopen=10,
                                   gapextend=1,
                                   outfile='{0:s}_{1:s}_water.txt'.format(
                                       uniprot_a, uniprot_b))
    stdout, stderr = water_cline()
    sys.exit(0)
예제 #14
0
 def water_c(RMfile, aname, bname):
     with tempfile.NamedTemporaryFile('w+') as tempout:
         astr = "%s:%s" % (RMfile, aname)
         bstr = "%s:%s" % (RMfile, bname)
         # bstr = "asis::%s" % bseq
         water_cline = WaterCommandline(
             r"/Users/Xiaoyu/EMBOSS/EMBOSS-6.6.0/emboss/water",
             asequence=astr,
             bsequence=bstr,
             gapopen=16,
             gapextend=4,
             aformat="pair",
             outfile=tempout.name)
         # print(water_cline)
         stdout, stderr = water_cline()
         astart, aend, bstart, bend = Align2_pos(tempout, aname, bname)
         return (astart, aend, bstart, bend)
예제 #15
0
def emboss_local_pairwise_alignment(query_dir, seq_type):
    if seq_type == 'fg':
        print '\n   ...pairwise comparison of functional gene sequences...\n' 
    elif seq_type == 'ssu':
        print '\n   ...pairwise comparison of SSU rRNA sequences...\n'
    water_cline = WaterCommandline()
    water_cline.gapopen=10
    water_cline.gapextend=0.5
    query_list = [query for query in sorted(glob.glob(query_dir+"/*.fa"))]
    for i, a_seq in enumerate(query_list): 
        water_cline.asequence=str(a_seq)
        for j, b_seq in enumerate(query_list[i:]):
            water_cline.bsequence=str(b_seq)
            align_out = query_dir+"/pairwise_"+str(i+1)+"_"+str(i+j+1)+".aln"
            water_cline.outfile=str(align_out)
            water_cline()
    print 'Done\n'
    return query_dir+"/*.aln"
예제 #16
0
 def test_water_file(self):
     """Run water with the asis trick, output to a file."""
     # Setup, try a mixture of keyword arguments and later additions:
     cline = WaterCommandline(cmd=exes["water"], gapopen="10", gapextend="0.5")
     # Try using both human readable names, and the literal ones:
     cline.set_parameter("asequence", "asis:ACCCGGGCGCGGT")
     cline.set_parameter("-bsequence", "asis:ACCCGAGCGCGGT")
     # Try using a property set here:
     cline.outfile = "Emboss/temp with space.water"
     self.assertEqual(str(eval(repr(cline))), str(cline))
     # Run the tool,
     self.run_water(cline)
     # Check we can parse the output...
     align = AlignIO.read(cline.outfile, "emboss")
     self.assertEqual(len(align), 2)
     self.assertEqual(align[0].seq, "ACCCGGGCGCGGT")
     self.assertEqual(align[1].seq, "ACCCGAGCGCGGT")
     # Clean up,
     os.remove(cline.outfile)
예제 #17
0
def call_emboss(emboss_tool, aseq, bseq, outfile):

    if 'needle' in emboss_tool:  # global alignment
        tool = NeedleCommandline(emboss_tool,
                                 asequence=aseq,
                                 bsequence=bseq,
                                 gapopen=10,
                                 gapextend=0.5,
                                 outfile=outfile)

    elif 'water' in emboss_tool:  # local alignment
        tool = WaterCommandline(emboss_tool,
                                asequence=aseq,
                                bsequence=bseq,
                                gapopen=10,
                                gapextend=0.5,
                                outfile=outfile)

    stdout, stderr = tool()

    return None
예제 #18
0
 def test_water_file4(self):
     """water with the asis trick and SwissProt file, output to a file."""
     #Setup,
     query = "DVCTGKALCDPVTQNIKTYPVKIENLRVMI"
     out_file = "Emboss/temp_test4.water"
     in_file = "SwissProt/sp004"
     self.assert_(os.path.isfile(in_file))
     if os.path.isfile(out_file):
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("-asequence", "asis:%s" % query)
     cline.set_parameter("-bsequence", in_file)
     #EMBOSS should work this out, but let's be explicit:
     cline.set_parameter("-sprotein", True)
     #TODO - Tell water this is a SwissProt file!
     cline.set_parameter("-gapopen", "20")
     cline.set_parameter("-gapextend", "5")
     cline.set_parameter("-outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Smith-Waterman local alignment"),
                  errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0: print >> sys.stderr, "\n%s" % cline
     self.assertEqual(result.return_code, 0)
     #Should be able to access this via any alias:
     self.assertEqual(result.get_result("-outfile"), out_file)
     assert os.path.isfile(out_file)
     #Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(query,
                                   SeqIO.parse(open(in_file), "swiss"),
                                   AlignIO.parse(open(out_file), "emboss"),
                                   local=True)
     #Clean up,
     os.remove(out_file)
예제 #19
0
def getCellBarcodeAlignment(read, fil):
    """
    use stdin and stdout to simplify water
        asequence: one SMRT read
        bsequence: {index}_CBC-list.fasta
        return: best matched CBC for this SMRT read and the corresponding score
    """
    water_cline = WaterCommandline(asequence='stdin',
                                   filter=True,
                                   bsequence=fil,
                                   gapopen=10.0,
                                   gapextend=.5,
                                   stdout=True)
    child = subprocess.Popen(str(water_cline),
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             universal_newlines=True,
                             shell=(sys.platform != "win32"))
    rec = SeqRecord(Seq(read), id="temp")
    SeqIO.write(rec, child.stdin, "fasta")
    child.stdin.close()
    seqs, scores = [], []
    line = child.stdout.readline()
    eof = False
    while True:
        if not line:
            eof = True
        if eof:
            break
        if '2:' in line[:8]:
            seqs.append(line.strip().split(':')[1])
        elif 'Score' in line:
            scores.append(float(line.split(':')[1]))
        line = child.stdout.readline()
    assert len(seqs) == len(
        scores), "ERROR: incorrect alignment file line counting."
    return seqs[scores.index(max(scores))], max(scores)
예제 #20
0
def main():
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    loc = '01-fix_orientation'
    chunk = 'chunk-' + '{:03}'.format(rank + 1)
    if not os.path.exists(loc):
        os.mkdir(loc)
    for fragment in ('inside', 'inside_rc', 'outside', 'outside_rc'):
        subloc = loc + '/' + fragment
        if not os.path.exists(subloc):
            os.mkdir(subloc)
        infile = os.path.join('00-external', fragment + '.txt')
        data = os.path.join('00-data', chunk + '.fasta')
        if not os.path.isfile(data):
            continue
        outfile = os.path.join(subloc, chunk + '.txt')
        water_cline = WaterCommandline(asequence=infile,
                                       bsequence=data,
                                       gapopen=10.0,
                                       gapextend=.5,
                                       outfile=outfile)
        stdout, stderr = water_cline()
예제 #21
0
def water(*id, gop=10, gex=0.5, out='emb.aln'):
    """Alignement global par la methode de Needleman"""

    lso = list(SeqIO.parse(workfile, "fasta"))

    mkfasx('seqa.fas', id[0])

    mkfasx('seqb.fas', *id[1:])

    water_cline = WaterCommandline(asequence='seqa.fas',
                                   bsequence='seqb.fas',
                                   gapopen=gop,
                                   gapextend=gex,
                                   outfile=out)

    stdout, stderr = water_cline()

    os.remove('seqa.fas')
    os.remove('seqb.fas')

    if len(id) < 3:
        align = AlignIO.read(out, "emboss")
        return align
예제 #22
0
def emboss_water(seq_a_file: str, seq_b_file: str, out_file: str):
    """ Do a global pairwise alignment using EMBOSS

        Args: 
            seq_a_file: First sequence
            seq_b_file: second sequence
            out_file: Output file

        Returns: 
            r [subprocess object]: Execute the commandline command for EMBOSS
        
    """
    water_cline = WaterCommandline(asequence=seq_a_file,
                                        bsequence=seq_b_file,
                                        outfile=out_file,
                                        verbose=True,
                                        gapextend=1,
                                        gapopen=10)

    cmd = str(water_cline)
    cmd = cmd.split(" ")
    cmd.append("-aformat=msf")

    return subprocess.run(cmd, check=True)
예제 #23
0
import Bio.Seq
import os
from Bio.Emboss.Applications import WaterCommandline
from Bio.Align.Applications import ClustalwCommandline

fasta = open('/home/nastia/fasta_end.txt', 'r')
string = fasta.readline()
outfileput = open('/home/nastia/Desktop/output.txt', 'w')
while len(string) > 0:
    m = string.find('\t')
    n = string.rfind('\t')
    my_seq_1 = Bio.Seq.Seq(string[m + 1:n])
    my_seg_2 = Bio.Seq.Seq(string[n + 1:-1])
    cline = WaterCommandline(gapopen=10,
                             gapextend=0.5,
                             asequence=my_seq_1,
                             bsequence=my_seg_2,
                             outfile='/home/nastia/Desktop/Water.txt')
    #os.system('clustalw'+cline)
    #print(type(cline))
    #print(cline)
    #outfileput.write(cline)
    string = fasta.readline()
outfileput.close()
예제 #24
0
"""
This is the first example of Python script.
"""
a = 10  # variable a
b = 33  # variable b
c = a / b  # variable c holds the ratio

# Let's print the result to screen.
print("a:", a, " b:", b, " a/b=", c)

from Bio.Seq import Seq

a = Seq("ATATATACG")

a.alphabet
a.sequence()

from Bio.Emboss.Applications import WaterCommandline
cline = WaterCommandline(gapopen=10, gapextend=0.5)
cline.asequence = "asis:ACCCGGGCGCGGT"
cline.bsequence = "asis:ACCCGAGCGCGGT"
cline.outfile = "temp_water.txt"
print(cline)
예제 #25
0
파일: EMBOSSW23.py 프로젝트: cwt1/BioGUI
def GetExec(inF, outF):
    # Create User Modifiable search check boxes.
    plugin_exe = r"C:/mEMBOSS/water.exe"
    cline = WaterCommandline(plugin_exe, infile=inF, outfile=outF)
    p = subprocess.Popen(str(self.cline))
    p.wait()
예제 #26
0
def water_aligner(TR_frame, seqrec_array, m, go, ge, args):
    """ Performs TR alignment using the provided EMBOSS-water aligner executable.
    TR_frame: A data frame containing TR instances
    seqrec_array: An array containing indexed seqrecord instances from the query feature array
    m: match score
    go: gap open penalty
    ge: gap extension penalty
    min_match: The minimum percentage similarity to accept the alignment, otherwise realign with reverse complement or remove
    """
    tr_count = len(TR_frame)

    missing_features = 0  ## counter for instances of missing features in the query lib

    water_log = open("./TR_aln.log",
                     "w")  ## file to dump water subprocess output

    vprint(subprocessID, "Starting alignments...", "prYellow")
    print(
        f"\n\t\t\tEMBOSS-water Smith-Waterman Aligner.\n\t\t\tmatch={m}\n\t\t\tgap_open={go}\n\t\t\tgap_extend={ge}\n",
        flush=True)

    for i, tr in enumerate(TR_frame):
        time = strftime("%H:%M:%S", localtime())
        print("\r{time} {subprocess} :: Aligning TR {i}/{tr_count}".format(
            time=time,
            subprocess=prYellow(subprocessID),
            i=i + 1,
            tr_count=tr_count),
              end="... ",
              file=sys.stdout,
              flush=True)

        ## generate a homolog dict from the TR and track the number of missing features from the query library
        rfa_out, qfa_out, missing = tr.get_homologs_fasta(seqrec_array, args.m)
        missing_features += missing

        aln_out = path.join(args.o, f"{tr.id}.water")
        flank1_out = path.join(args.o, f"{tr.id}_F1.water")
        flank2_out = path.join(args.o, f"{tr.id}_F2.water")

        def run_alignment(water_cline, a_prefix):
            """ Runs water alignment using a Biopython water commandline object and a prefix to identify which sequence is being aligned
            """
            p = Popen(str(water_cline),
                      stdin=PIPE,
                      stdout=PIPE,
                      stderr=PIPE,
                      shell=True)
            output, err = p.communicate()
            rc = p.returncode

            if rc == 0:
                print(f"\nAlignment of {a_prefix}:{tr.id} exited with 0",
                      file=water_log)
            else:
                print(
                    f"\nAlignment of {a_prefix}:{tr.id} exited with {rc} and warning:\n{str(err)}",
                    file=water_log)

        ## align full TR region
        water_cline = WaterCommandline(args.w,
                                       asequence=rfa_out,
                                       bsequence=qfa_out,
                                       gapopen=go,
                                       gapextend=ge,
                                       outfile=aln_out)

        run_alignment(water_cline, "FULL")

        ## align flank 1
        water_cline = WaterCommandline(args.w,
                                       asequence=f"asis:{tr.flank1}",
                                       bsequence=qfa_out,
                                       gapopen=go,
                                       gapextend=ge,
                                       outfile=flank1_out)

        run_alignment(water_cline, "flank1")

        ## align flank 2
        water_cline = WaterCommandline(args.w,
                                       asequence=f"asis:{tr.flank2}",
                                       bsequence=qfa_out,
                                       gapopen=go,
                                       gapextend=ge,
                                       outfile=flank2_out)

        run_alignment(water_cline, "flank2")

        if i > 10:
            break

    print(f"Done with {missing_features} missing seqeuences.\n", flush=True)
    water_log.close()
예제 #27
0
            # Get telomere ref sequence
            ref_length = int(math.ceil(float(size / float(6))))
            if strand == "+":
                telo_ref = "TTAGGC" * ref_length
            elif strand == "-":
                telo_ref = "GCCTAA" * ref_length
            else:
                print("ERROR: strand must be + or -")
                sys.exit(1)

            # Perform alignment with water
            with open("its_seq.temp", "w") as fi:
                fi.write(str(seq))
            with open("telo.temp", "w") as ft:
                ft.write(telo_ref)

            water_cmd = WaterCommandline(gapopen=10,
                                         gapextend=0.5,
                                         asequence="its_seq.temp",
                                         bsequence="telo.temp",
                                         stdout=True,
                                         auto=True)
            stdout, stderr = water_cmd()
            identity = re.findall("# Identity:.*\((.+)\%\)", stdout)[0]

            outfile.write(line.strip() + "\t" + str(identity) + "\n")

outfile.close()
os.remove("its_seq.temp")
os.remove("telo.temp")
예제 #28
0
# http://rosalind.info/problems/swat/

from Bio.Emboss.Applications import WaterCommandline
from Bio import ExPASy, SeqIO

if __name__ == "__main__":
    ids = open('rosalind_swat.txt').read().split(' ')

    for i in ids:
        handle = ExPASy.get_sprot_raw(i)
        r = SeqIO.read(handle, "swiss")
        handle.close()

        with open(i, 'w') as f:
            SeqIO.write(r, f, 'fasta')

    water_cline = WaterCommandline()
    water_cline.asequence = ids[0]
    water_cline.bsequence = ids[1]
    water_cline.outfile = "rosalind_swat_output.txt"
    water_cline.gapopen = 10
    water_cline.gapextend = 1
    water_cline()

    for line in open('rosalind_swat_output.txt').readlines():
        if 'Score:' in line:
            print(int(float(line[:-1].split(':')[-1].strip())))