Example #1
0
def align(seqs):
  '''
  Multiple seqeuence alignment using ClustalW

  @param seqs: list of biological sequences
  @type  seqs: list of strs
  @return    : aligned sequences
  @rtype     : list of strs
  '''
  name = 'tmpFoo%d' % time.time()
  tfile = open(name+'.fasta','w')
  try:
    for i,seq in enumerate(seqs):
      tfile.write('>%d\n' % (i+1))
      tfile.write('%s\n' % seq)
    tfile.flush()

    cline = MultipleAlignCL(tfile.name)
    cline.set_output(name+'.aln')

    alignment = do_alignment(cline)

    return [ rec.seq.tostring() for rec in alignment.get_all_seqs() ]
  finally:
    os.unlink(name + '.fasta')
    os.unlink(name + '.dnd')
    os.unlink(name + '.aln')
  def align(self):
    seqs = True
    while seqs:
      seqs = self.phamServer.request_seqs(self.server, self.numSeqs, self.client)
      try:
        import pynotify
        if pynotify.init("Phamerator"):
          n = pynotify.Notification("Phamerator Update", "doing clustalw alignments", "file:///home/steve/Applications/git/PhamDB/phageManager_logo.png")
          n.show()
        else:
          pass
          #print "there was a problem initializing the pynotify module"
      except:
        pass
      if len(seqs) == 0:
        self._logger.log('server returned no sequences to align.')
        return
      self._logger.log('server returned ' + str(len(seqs)) + ' to align')
      self._logger.log('aligning sequences')
      results = []
###########################################################################
#			                BEGIN MATT'S ALTERATIONS			                      #
#									                                                        #
###########################################################################
      # tuple of all parallel python servers to connect with
      ppservers = ()

      # Creates jobserver with automatically detected number of workers
      job_server = pp.Server(ppservers=ppservers)

      #grab number of processors
      numcpus = job_server.get_ncpus()
      print "numcpus =",numcpus
      
      #for n, person in enumerate(people):
      #for seq in seqs:
      for i,currentseq in enumerate(seqs):
        jobs = []
        id, querySeq, subjectSeq = currentseq
        f = open(os.path.join(self.rootdir, 'temp' + str(i) + '.fasta'), 'w')
        f.write('>' + 'a' + '\n' + querySeq + '\n>' + 'b' + '\n' + subjectSeq + '\n')
        f.close()
        cline = MultipleAlignCL(os.path.join(self.rootdir, 'temp' + str(i) + '.fasta'))
        #cline.is_quick = True
        cline.set_output(os.path.join(self.rootdir, 'temp' + str(i) + '.aln'))
        #alignment = Clustalw.do_alignment(cline)
        #jobs = [(input, job_server.submit(sum_primes,(input,), (isprime,), ("math",))) for input in inputs]
        jobs.append(job_server.submit(clustalwAligner.run_clustalw, (id,cline), (), ("clustalwAligner.run_clustalw","Bio.Clustalw",)))
      
      for job in jobs:
        id,alignment = job()
        length = alignment.get_alignment_length()
        star = alignment._star_info.count('*')
        score = float(star)/length
        #print 'length:', length, 'identical:', star, 'score:', score
        #_logger.log queryName, subjectName, score
        results.append((id, score))
      self._logger.log('reporting scores back to server')
      self.phamServer.report_scores(results, self.server, self.client)
Example #3
0
    def Align(self, s1, s2, result):

        result.clear()

        handle_tmpfile1, filename_tmpfile1 = tempfile.mkstemp()
        handle_tmpfile2, filename_tmpfile2 = tempfile.mkstemp()
        os.write(handle_tmpfile1, ">s1\n%s\n" % (s1))
        os.write(handle_tmpfile1, ">s2\n%s\n" % (s2))
        os.close(handle_tmpfile1)
        os.close(handle_tmpfile2)
        cline = MultipleAlignCL(filename_tmpfile1)
        cline.set_output(filename_tmpfile2)

        align = do_alignment(cline)

        seqs = align.get_all_seqs()
        if len(seqs) != 2:
            return result

        a1 = seqs[0].seq.tostring()
        a2 = seqs[1].seq.tostring()

        x1 = 0
        x2 = 0
        for pos in range(len(a1)):

            if a1[pos] not in "Nn-" and a2[pos] not in "Nn-":
                result.addPair(x1, x2, 0)
                x1 += 1
                x2 += 1
                continue

            if a1[pos] != "-":
                x1 += 1
            if a2[pos] != "-":
                x2 += 1

        os.remove(filename_tmpfile1)
        os.remove(filename_tmpfile2)

        return result
Example #4
0
    def Align(self, s1, s2, result):

        result.clear()

        handle_tmpfile1, filename_tmpfile1 = tempfile.mkstemp()
        handle_tmpfile2, filename_tmpfile2 = tempfile.mkstemp()
        os.write(handle_tmpfile1, ">s1\n%s\n" % (s1))
        os.write(handle_tmpfile1, ">s2\n%s\n" % (s2))
        os.close(handle_tmpfile1)
        os.close(handle_tmpfile2)
        cline = MultipleAlignCL(filename_tmpfile1)
        cline.set_output(filename_tmpfile2)

        align = do_alignment(cline)

        seqs = align.get_all_seqs()
        if len(seqs) != 2:
            return result

        a1 = seqs[0].seq.tostring()
        a2 = seqs[1].seq.tostring()

        x1 = 0
        x2 = 0
        for pos in range(len(a1)):

            if a1[pos] not in "Nn-" and a2[pos] not in "Nn-":
                result.addPair(x1, x2, 0)
                x1 += 1
                x2 += 1
                continue

            if a1[pos] != "-":
                x1 += 1
            if a2[pos] != "-":
                x2 += 1

        os.remove(filename_tmpfile1)
        os.remove(filename_tmpfile2)

        return result
Example #5
0
def align_seq_clustalw(sequences,mat='BLOSUM',output_order='INPUT'):
    """Return a clustal alignment of an arbitrary number of sequences.  Requires clustalw on path.
    Sequences are of type SeqRecord"""         
    assert(len(sequences) > 1)
    from Bio.Clustalw import MultipleAlignCL
    from Bio import Clustalw
    from Bio.SeqIO.FastaIO import FastaWriter

    import tempfile

    def write(f):
        i = 0
        for seqrecord in sequences:
            if seqrecord.id == "<unknown id>":
                seqrecord.id = str(i)
            i+=1
        writer = FastaWriter(f)
        writer.write_file(sequences)
        f.flush() #IMPORTANT
    
    input = tempfile.NamedTemporaryFile("w")
    #input = open("input","w")
    try:
        write(input)
        cline = MultipleAlignCL(input.name)
        cline.set_protein_matrix(MultipleAlignCL.PROTEIN_MATRIX[MultipleAlignCL.PROTEIN_MATRIX.index(mat)])
        cline.is_quick = False
        # Set to PAM matrix.
        output = tempfile.NamedTemporaryFile("w")
        #output = open("output","w")
        try:
            cline.set_output(output.name, output_order=MultipleAlignCL.OUTPUT_ORDER[MultipleAlignCL.OUTPUT_ORDER.index(output_order)])
            #print "input",open(input.name).read()
            #print "cline",cline
            subprocess.check_call(str(cline),shell=True,stdout=open(os.devnull,"w"))
            f = open(output.name)
            try:
                iter = Bio.AlignIO.ClustalIO.ClustalIterator(f)
                # There should only be one alignment in the file
                alignments = list(iter)
                assert(len(alignments)==1)
                alignment = alignments[0]
            finally:
                f.close()
            #print "output",output.read()
            return alignment
        finally:
            output.close()
    finally:
        input.close()
def Align_Results(OutputFileName):
    import os
    
    FileIN_Name = """/users/rwbarrettemac/bioinformatics/pythonfolders/FMDanalysisScript/FMDserotypingARRAY/Consensus_Results/%s.FASTA""" % (OutputFileName)
    FileOUT_ALN = """/users/rwbarrettemac/bioinformatics/pythonfolders/FMDanalysisScript/FMDserotypingARRAY/Consensus_Results/%s.ALN""" % (OutputFileName)
    print FileIN_Name
    print FileOUT_ALN
    
    from Bio.Clustalw import MultipleAlignCL
    from Bio import Clustalw

    cline = MultipleAlignCL(os.path.join(os.curdir, FileIN_Name))
    cline.set_output(FileOUT_ALN)
    
    alignment = Clustalw.do_alignment(cline)

    cline.close()
Example #7
0
and parse the results into an object that can be dealt with easily."""
# standard library
import os

# biopython
from Bio.Alphabet import IUPAC
from Bio import Clustalw
from Bio.Clustalw import MultipleAlignCL
from Bio.Align import AlignInfo
from Bio.SubsMat import FreqTable

# create the command line to run clustalw
# this assumes you've got clustalw somewhere on your path, otherwise
# you need to pass a second argument to MultipleAlignCL with the complete
# path to clustalw
cline = MultipleAlignCL(os.path.join(os.curdir, 'opuntia.fasta'))
cline.set_output('test.aln')

# actually perform the alignment and get back an alignment object
alignment = Clustalw.do_alignment(cline)

# get the records in the alignment
all_records = alignment.get_all_seqs()

print 'description:', all_records[0].description
print 'sequence:', all_records[0].seq

# get the length of the alignment
print 'length', alignment.get_alignment_length()

print alignment
        if "not found" not in output and "CLUSTAL" in output.upper() :
            clustalw_exe = "clustalw"

if not clustalw_exe :
    raise MissingExternalDependencyError(\
        "Install clustalw or clustalw2 if you want to use Bio.Clustalw.")

#################################################################

print "Checking error conditions"
print "========================="

print "Empty file"
input_file = "does_not_exist.fasta"
assert not os.path.isfile(input_file)
cline = MultipleAlignCL(input_file, command=clustalw_exe)
try :
    align = Clustalw.do_alignment(cline)
    assert False, "Should have failed, returned %s" % repr(align)
except IOError, err :
    print "Failed (good)"
    #Python 2.3 on Windows gave (0, 'Error')
    #Python 2.5 on Windows gives [Errno 0] Error
    assert "Cannot open sequence file" in str(err) \
           or "not produced" in str(err) \
           or str(err) == "[Errno 0] Error" \
           or str(err) == "(0, 'Error')", str(err)

print
print "Single sequence"
input_file = "Fasta/f001"
  def align(self):
    id = True
    ppservers = ()
    job_server = pp.Server(ppservers=ppservers,secret="secret")
    while id: # this should test to make sure there are still alignments to do
      print 'getting work unit'
      try:
        clustalw_work_unit = self.phamServer.request_seqs(self.client)
        if not clustalw_work_unit.query_id:
          print 'no work units available...sleeping'
          logo = os.path.join(os.path.dirname(__file__),"pixmaps/phamerator.png")
          #print "logo: %s" % logo
          try:
            import pynotify
            if pynotify.init("Phamerator"):
              n = pynotify.Notification("Phamerator Update", "No Clustalw alignments left to do...sleeping", "file:///%s" % logo)
              n.show()
            else:
              pass
              #print "there was a problem initializing the pynotify module"
          except:
            pass
          time.sleep(30)


          continue
      except Exception, x:
        print ''.join(Pyro.util.getPyroTraceback(x))
      server, db = self.phamServer.request_db_info()
      #c = db_conf(username=self.username, password=self.password, server=server, db=db)
      #clustalw_work_unit.set_cursor(c)
      print 'got it'

      try:
        import pynotify
        if pynotify.init("Phamerator"):
          logo = os.path.join(os.path.dirname(__file__),"pixmaps/phamerator.png")
          #print "logo: %s" % logo
          n = pynotify.Notification("Phamerator Update", "Clustalw alignments in progress for id %s" % clustalw_work_unit.query_id, "file:///%s" % logo)
          n.show()
        else:
          pass
          #print "there was a problem initializing the pynotify module"
      except:
        pass

      self._logger.log('aligning sequences')
###########################################################################
#			                BEGIN MATT'S ALTERATIONS          #
#                                                                         #
###########################################################################
      results = []
      open_files = []
      # tuple of all parallel python servers to connect with

      # Creates jobserver with automatically detected number of workers

      #grab number of processors
      numcpus = job_server.get_ncpus()
      print "numcpus =",numcpus
      
      #for n, person in enumerate(people):
      #for seq in seqs:
      jobs = []
      #for i,currentseq in enumerate(seqs):
      query_id = clustalw_work_unit.query_id
      query_translation = clustalw_work_unit.query_translation
      counter = 0
      for record in clustalw_work_unit.database:
        subject_id, subject_translation = record.id, record.translation
        fname = os.path.join(self.rootdir, 'temp' + query_id + '_' + subject_id + '.fasta')
        f = open(fname, 'w')
        open_files.append(fname)
        open_files.append(fname.replace('.fasta','.dnd'))
        open_files.append(fname.replace('.fasta','.aln'))
        f.write('>%s\n%s\n>%s\n%s\n' % (query_id, query_translation, subject_id, subject_translation))
        f.close()

        clustalw_infile = os.path.join(self.rootdir, 'temp' + str(query_id) + '_' + str(subject_id) + '.fasta')

        if float(Bio.__version__) >= 1.56:
          # pass the query id (qid) and the subject id (sid) to run_clustalw
          jobs.append(job_server.submit(clustalwAligner.run_clustalw, (clustalw_infile, query_id, subject_id), (), ()))
        else:
          cline = MultipleAlignCL(clustalw_infile)
          cline.set_output(os.path.join(self.rootdir, 'temp' + str(query_id) + '_' + str(subject_id) + '.aln'))
          # pass the query id (qid) and the subject id (sid) to run_clustalw
          jobs.append(job_server.submit(clustalwAligner.run_clustalw_old, (query_id, subject_id,cline), (), ("Bio.Clustalw",)))

        counter = counter + 3
        if counter > 50:
          results = self.process_jobs(jobs, results, open_files)
          jobs = []
          open_files = []
          counter = 0

      results = self.process_jobs(jobs, results, open_files)
      jobs = []
      open_files = []
      counter = 0
      # must report everything back in atomic transaction
      print 'reporting scores back to server'
      try:
        self.phamServer.report_scores(clustalw_work_unit, results, self.client)
      except Exception, x:
        print ''.join(Pyro.util.getPyroTraceback(x))
        print 'exiting on pyro traceback'
        sys.exit()
        and "Multiple Sequence Alignments" in output:
            clustalw_exe = "clustalw"

if not clustalw_exe:
    raise MissingExternalDependencyError(\
        "Install clustalw or clustalw2 if you want to use Bio.Clustalw.")

#################################################################

print "Checking error conditions"
print "========================="

print "Empty file"
input_file = "does_not_exist.fasta"
assert not os.path.isfile(input_file)
cline = MultipleAlignCL(input_file, command=clustalw_exe)
try:
    align = Clustalw.do_alignment(cline)
    assert False, "Should have failed, returned %s" % repr(align)
except IOError, err:
    print "Failed (good)"
    #Python 2.3 on Windows gave (0, 'Error')
    #Python 2.5 on Windows gives [Errno 0] Error
    assert "Cannot open sequence file" in str(err) \
           or "not produced" in str(err) \
           or str(err) == "[Errno 0] Error" \
           or str(err) == "(0, 'Error')", str(err)

print
print "Single sequence"
input_file = "Fasta/f001"
Example #11
0
    def align(self):
        id = True
        ppservers = ()
        job_server = pp.Server(ppservers=ppservers, secret="secret")
        while id:  # this should test to make sure there are still alignments to do
            print 'getting work unit'
            try:
                clustalw_work_unit = self.phamServer.request_seqs(self.client)
                if not clustalw_work_unit.query_id:
                    print 'no work units available...sleeping'
                    logo = os.path.join(os.path.dirname(__file__),
                                        "pixmaps/phamerator.png")
                    #print "logo: %s" % logo
                    try:
                        import pynotify
                        if pynotify.init("Phamerator"):
                            n = pynotify.Notification(
                                "Phamerator Update",
                                "No Clustalw alignments left to do...sleeping",
                                "file:///%s" % logo)
                            n.show()
                        else:
                            pass
                            #print "there was a problem initializing the pynotify module"
                    except:
                        pass
                    time.sleep(30)

                    continue
            except Exception, x:
                print ''.join(Pyro.util.getPyroTraceback(x))
            server, db = self.phamServer.request_db_info()
            #c = db_conf(username=self.username, password=self.password, server=server, db=db)
            #clustalw_work_unit.set_cursor(c)
            print 'got it'

            try:
                import pynotify
                if pynotify.init("Phamerator"):
                    logo = os.path.join(os.path.dirname(__file__),
                                        "pixmaps/phamerator.png")
                    #print "logo: %s" % logo
                    n = pynotify.Notification(
                        "Phamerator Update",
                        "Clustalw alignments in progress for id %s" %
                        clustalw_work_unit.query_id, "file:///%s" % logo)
                    n.show()
                else:
                    pass
                    #print "there was a problem initializing the pynotify module"
            except:
                pass

            self._logger.log('aligning sequences')
            ###########################################################################
            #			                BEGIN MATT'S ALTERATIONS          #
            #                                                                         #
            ###########################################################################
            results = []
            open_files = []
            # tuple of all parallel python servers to connect with

            # Creates jobserver with automatically detected number of workers

            #grab number of processors
            numcpus = job_server.get_ncpus()
            print "numcpus =", numcpus

            #for n, person in enumerate(people):
            #for seq in seqs:
            jobs = []
            #for i,currentseq in enumerate(seqs):
            query_id = clustalw_work_unit.query_id
            query_translation = clustalw_work_unit.query_translation
            counter = 0
            for record in clustalw_work_unit.database:
                subject_id, subject_translation = record.id, record.translation
                fname = os.path.join(
                    self.rootdir,
                    'temp' + query_id + '_' + subject_id + '.fasta')
                f = open(fname, 'w')
                open_files.append(fname)
                open_files.append(fname.replace('.fasta', '.dnd'))
                open_files.append(fname.replace('.fasta', '.aln'))
                f.write('>%s\n%s\n>%s\n%s\n' %
                        (query_id, query_translation, subject_id,
                         subject_translation))
                f.close()

                clustalw_infile = os.path.join(
                    self.rootdir,
                    'temp' + str(query_id) + '_' + str(subject_id) + '.fasta')

                if float(Bio.__version__) >= 1.56:
                    # pass the query id (qid) and the subject id (sid) to run_clustalw
                    jobs.append(
                        job_server.submit(
                            clustalwAligner.run_clustalw,
                            (clustalw_infile, query_id, subject_id), (), ()))
                else:
                    cline = MultipleAlignCL(clustalw_infile)
                    cline.set_output(
                        os.path.join(
                            self.rootdir, 'temp' + str(query_id) + '_' +
                            str(subject_id) + '.aln'))
                    # pass the query id (qid) and the subject id (sid) to run_clustalw
                    jobs.append(
                        job_server.submit(clustalwAligner.run_clustalw_old,
                                          (query_id, subject_id, cline), (),
                                          ("Bio.Clustalw", )))

                counter = counter + 3
                if counter > 50:
                    results = self.process_jobs(jobs, results, open_files)
                    jobs = []
                    open_files = []
                    counter = 0

            results = self.process_jobs(jobs, results, open_files)
            jobs = []
            open_files = []
            counter = 0
            # must report everything back in atomic transaction
            print 'reporting scores back to server'
            try:
                self.phamServer.report_scores(clustalw_work_unit, results,
                                              self.client)
            except Exception, x:
                print ''.join(Pyro.util.getPyroTraceback(x))
                print 'exiting on pyro traceback'
                sys.exit()
Example #12
0
and parse the results into an object that can be dealt with easily."""
# standard library
import os

# biopython
from Bio.Alphabet import IUPAC
from Bio import Clustalw
from Bio.Clustalw import MultipleAlignCL
from Bio.Align import AlignInfo
from Bio.SubsMat import FreqTable

# create the command line to run clustalw
# this assumes you've got clustalw somewhere on your path, otherwise
# you need to pass a second argument to MultipleAlignCL with the complete
# path to clustalw
cline = MultipleAlignCL(os.path.join(os.curdir, "opuntia.fasta"))
cline.set_output("test.aln")

# actually perform the alignment and get back an alignment object
alignment = Clustalw.do_alignment(cline)

print alignment

print "first description:", alignment[0].description
print "first sequence:", alignment[0].seq

# get the length of the alignment
print "length", alignment.get_alignment_length()

print alignment