Ejemplo n.º 1
0
 def _MakeAln(query_id, hit_id, query_string, templ_string,
              q_offset, t_offset):
     s1 = seq.CreateSequence(query_id, query_string)
     s1.offset = q_offset-1
     s2 = seq.CreateSequence(hit_id, templ_string)
     s2.offset = t_offset-1
     return seq.CreateAlignment(s1, s2)
Ejemplo n.º 2
0
  def _ParseHsp(query_id, hit_id, hsp, tot_query_len, seqid_thres=0, evalue_thres=float("infinity")):
    bit_score=float(_GetValue(hsp, 'Hsp_bit-score'))
    score=float(_GetValue(hsp, 'Hsp_score'))
    evalue=float(_GetValue(hsp, 'Hsp_evalue'))
    try:
      identity=float(_GetValue(hsp, 'Hsp_identity'))
    except AssertionError:
      # The Hsp_identity tag is not a 'must' in the BLAST XML format. It
      # describes the number of matching characters. Hence we assume, if it is
      # missing, there are 0 matches.
      identity=0
    hsp_align_len=float(_GetValue(hsp, 'Hsp_align-len'))
    seqid=identity/hsp_align_len
    query_offset=_GetInt(hsp, 'Hsp_query-from')-1
    hit_offset=_GetInt(hsp, 'Hsp_hit-from')-1
    query_seq=seq.CreateSequence(str(query_id), str(_GetValue(hsp, 'Hsp_qseq')))
    query_seq.offset=query_offset
    hit_seq=seq.CreateSequence(str(hit_id), str(_GetValue(hsp, 'Hsp_hseq')))
    hit_seq.offset=hit_offset
    try:
      if seqid > float(seqid_thres) and evalue < evalue_thres:
        aln=seq.CreateAlignment(query_seq, hit_seq)
        return AlignedPatch(aln, bit_score, score, evalue, seqid)

    except Exception as e:
      print(str(e), query_seq, hit_seq)
Ejemplo n.º 3
0
 def testNormalise(self):
     seq_a = seq.CreateSequence("A", "B-D-FGH")
     self.assertEqual("B-D-FGH", seq_a.GetString())
     seq_a.Normalise()
     self.assertEqual("BDFGH", seq_a.GetString())
     seq_a = seq.CreateSequence("A", "b.d-fgh")
     self.assertEqual("b.d-fgh", seq_a.GetString())
     seq_a.Normalise()
     self.assertEqual("BDFGH", seq_a.GetString())
Ejemplo n.º 4
0
 def testViewsFromSequences_03(self):
     seq_a = seq.CreateSequence("A", "ABCD--GH")
     seq_a.AttachView(self.ent.Select('rname=A,B,C,D,G,H'))
     seq_b = seq.CreateSequence("B", "ABCD-FGH")
     seq_b.AttachView(self.ent.Select('rname=A,B,C,D,F,G,H'))
     a, b = seq.ViewsFromSequences(seq_a, seq_b)
     string_a = ''.join([r.one_letter_code for r in a.residues])
     string_b = ''.join([r.one_letter_code for r in b.residues])
     self.assertEqual(string_a, 'ABCDGH')
     self.assertEqual(string_b, 'ABCDGH')
Ejemplo n.º 5
0
 def testViewsFromSequences_08(self):
     seq_a = seq.CreateSequence("A", "A-C-E-G")
     seq_a.AttachView(self.ent.Select('rname=A,C,E,G'))
     seq_b = seq.CreateSequence("B", "-B-D-H-")
     seq_b.AttachView(self.ent.Select('rname=B,D,H'))
     a, b = seq.ViewsFromSequences(seq_a, seq_b)
     string_a = ''.join([r.one_letter_code for r in a.residues])
     string_b = ''.join([r.one_letter_code for r in b.residues])
     self.assertEqual(string_a, '')
     self.assertEqual(string_b, '')
Ejemplo n.º 6
0
 def testDeletionInSeqB(self):
   seq_a=seq.CreateSequence('A', 'aacdefghiklmn')
   seq_b=seq.CreateSequence('B', 'acdhiklmn')
   alns=seq.alg.GlobalAlign(seq_a, seq_b, seq.alg.BLOSUM62)
   self.assertEqual(len(alns), 1)
   self.assertEqual(alns[0].sequences[0].name, 'A')
   self.assertEqual(alns[0].sequences[1].name, 'B')
   self.assertEqual(str(alns[0].sequences[0]), 'aacdefghiklmn')
   self.assertEqual(str(alns[0].sequences[1]), '-acd---hiklmn')
   self.assertEqual(alns[0].sequences[0].offset, 0)
   self.assertEqual(alns[0].sequences[1].offset, 0)
 def testSemiGlobalAlignment(self):
   seq_a=seq.CreateSequence('A', 'abcdefghijklmnok')
   seq_b=seq.CreateSequence('B', 'cdehijk')
   alns=seq.alg.SemiGlobalAlign(seq_a, seq_b, seq.alg.BLOSUM62)
   self.assertEqual(len(alns), 1)
   self.assertEqual(alns[0].sequences[0].name, 'A')
   self.assertEqual(alns[0].sequences[1].name, 'B')
   self.assertEqual(str(alns[0].sequences[0]), 'abcdefghijklmnok')
   self.assertEqual(str(alns[0].sequences[1]), '--cde--hijk-----')
   self.assertEqual(alns[0].sequences[0].offset, 0)
   self.assertEqual(alns[0].sequences[1].offset, 0)
Ejemplo n.º 8
0
  def testOffset(self):
    seq_a=seq.CreateSequence('A', 'acdhiklmn')
    seq_b=seq.CreateSequence('B', 'ggiklmn')
    alns=seq.alg.GlobalAlign(seq_a, seq_b, seq.alg.BLOSUM62)
    self.assertEqual(len(alns), 1)
    self.assertEqual(alns[0].sequences[0].name, 'A')
    self.assertEqual(alns[0].sequences[1].name, 'B')

    self.assertEqual(str(alns[0].sequences[0]), 'acdhiklmn')
    self.assertEqual(str(alns[0].sequences[1]), 'g--giklmn')
    self.assertEqual(alns[0].sequences[0].offset, 0)
    self.assertEqual(alns[0].sequences[1].offset, 0)
Ejemplo n.º 9
0
 def testViewsFromSequences_09(self):
     seq_a = seq.CreateSequence("A", "B-D-FGH")
     seq_a.AttachView(self.ent.Select('rname=A,B,D,F,G,H'))
     seq_a.offset = 1
     seq_b = seq.CreateSequence("B", "B-DEF-H")
     seq_b.offset = 1
     seq_b.AttachView(self.ent.Select('rname=A,B,D,E,F,H'))
     a, b = seq.ViewsFromSequences(seq_a, seq_b)
     string_a = ''.join([r.one_letter_code for r in a.residues])
     string_b = ''.join([r.one_letter_code for r in b.residues])
     self.assertEqual(string_a, 'BDFH')
     self.assertEqual(string_b, 'BDFH')
Ejemplo n.º 10
0
 def testSeqSlice(self):
     seq_string = 'abcdefg'
     s = seq.CreateSequence('A', seq_string)
     self.assertEqual(s[1:5], seq_string[1:5])
     self.assertEqual(s[:-1], seq_string[:-1])
     self.assertEqual(s[-3:-2], seq_string[-3:-2])
     self.assertEqual(s[-3:], seq_string[-3:])
     self.assertEqual(s[3:4], seq_string[3:4])
Ejemplo n.º 11
0
def _ParseTmAlign(lines,lines_matrix):
  info_line=lines[12].split(',')
  aln_length=int(info_line[0].split('=')[1].strip())
  rmsd=float(info_line[1].split('=')[1].strip())  
  tm_score=float(lines[14].split('=')[1].split('(')[0].strip())
  tf1=[float(i.strip()) for i in lines_matrix[2].split()]
  tf2=[float(i.strip()) for i in lines_matrix[3].split()]
  tf3=[float(i.strip()) for i in lines_matrix[4].split()]
  rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
                tf2[4], tf3[2], tf3[3], tf3[4])
  tf=geom.Mat4(rot)
  tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
  seq1 = seq.CreateSequence("1",lines[18].strip())
  seq2 = seq.CreateSequence("2",lines[20].strip())
  alignment = seq.CreateAlignment()
  alignment.AddSequence(seq2)
  alignment.AddSequence(seq1)
  return ost.bindings.TMAlignResult(rmsd, tm_score, aln_length, tf, alignment)
Ejemplo n.º 12
0
 def testProfile(self):
     # try to create a search profile
     query_seq = seq.CreateSequence('Test', 'VLSPADKTNVKAAWGKVGAHAGEYGAEA'+
                                    'LERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVAD'+
                                    'ALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKL'+
                                    'LSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVL'+
                                    'TSKYR')
     self.hh = hhblits.HHblits(query_seq, self.hhroot)
     a3m = self.hh.BuildQueryMSA('testfiles/hhblitsdb/hhblitsdb')
     self.assertTrue(filecmp.cmp(a3m, "testfiles/testali.a3m"))
Ejemplo n.º 13
0
 def testSeqListSlice(self):
     a = seq.CreateSequence('A', 'aaaa')
     b = seq.CreateSequence('B', 'bbbb')
     c = seq.CreateSequence('C', 'cccc')
     d = seq.CreateSequence('D', 'dddd')
     sl = seq.CreateSequenceList(a, b, c, d)
     sliced = sl[1:]
     self.assertEqual(len(sliced), 3)
     self.assertEqual(str(sliced[0]), str(b))
     self.assertEqual(str(sliced[1]), str(c))
     self.assertEqual(str(sliced[2]), str(d))
     sliced = sl[:-1]
     self.assertEqual(len(sliced), 3)
     self.assertEqual(str(sliced[0]), str(a))
     self.assertEqual(str(sliced[1]), str(b))
     self.assertEqual(str(sliced[2]), str(c))
     sliced = sl[-1:]
     self.assertEqual(len(sliced), 1)
     self.assertEqual(str(sliced[0]), str(d))
Ejemplo n.º 14
0
 def testAlnSlice(self):
     a = seq.CreateSequence('A', 'abcd')
     b = seq.CreateSequence('B', 'efgh')
     c = seq.CreateSequence('C', 'ijkl')
     d = seq.CreateSequence('D', 'mnop')
     aln = seq.CreateAlignment(a, b, c, d)
     sliced = aln[1:]
     self.assertEqual(len(sliced), 3)
     self.assertEqual(str(sliced[0]), 'bfjn')
     self.assertEqual(str(sliced[1]), 'cgko')
     self.assertEqual(str(sliced[2]), 'dhlp')
     sliced = aln[:-1]
     self.assertEqual(len(sliced), 3)
     self.assertEqual(str(sliced[0]), 'aeim')
     self.assertEqual(str(sliced[1]), 'bfjn')
     self.assertEqual(str(sliced[2]), 'cgko')
     sliced = aln[-1:]
     self.assertEqual(len(sliced), 1)
     self.assertEqual(str(sliced[0]), 'dhlp')
Ejemplo n.º 15
0
 def testSearchNotWorking(self):
     # successful search
     query_seq = seq.CreateSequence('Test', 'VLSPADKTNVKAAWGKVGAHAGEYGAEA'+
                                    'LERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVAD'+
                                    'ALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKL'+
                                    'LSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVL'+
                                    'TSKYR')
     self.hh = hhblits.HHblits(query_seq, self.hhroot)
     search_file = self.hh.Search("doesnotexist.a3m",
                                  'testfiles/hhblitsdb/hhblitsdb')
     self.assertEqual(search_file, None)
Ejemplo n.º 16
0
 def testOSTSequenceNoWDir(self):
     # OST sequence as input, no working dir should work
     query_seq = seq.CreateSequence('1AKE.B',
                                    'MRIILLGAPGAGKGTQAQFIMEKYGIPQISTGDMLRA'+
                                    'AVKSGSELGKQAKDIMDAGKLVTDELVIALVKERIAQ'+
                                    'EDCRNGFLLDGFPRTIPQADAMKEAGINVDYVLEFDV'+
                                    'PDELIVDRIVGRRVHAPSGRVYHVKFNPPKVEGKDDV'+
                                    'TGEELTTRKDDQEETVRKRLVEYHQMTAPLIGYYYYS'+
                                    'KEAEAGNTKYAKVDGTKPVAEVRADLEKILG')
     self.hh = hhblits.HHblits(query_seq, self.hhroot)
     # this works only as long as utils.TempDirWithFiles() names the first
     # fasta file seq01.fasta
     self.assertEqual(self.hh.filename[-11:], 'seq01.fasta')
Ejemplo n.º 17
0
 def testA3mToProfileWithExistingFile(self):
     # test A3mToProfile to work with an existing hmm file
     query_seq = seq.CreateSequence('Test', 'VLSPADKTNVKAAWGKVGAHAGEYGAEA'+
                                    'LERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVAD'+
                                    'ALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKL'+
                                    'LSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVL'+
                                    'TSKYR')
     self.hh = hhblits.HHblits(query_seq, self.hhroot)
     hhfile = self.hh.A3MToProfile("testfiles/testali.a3m",
                                   hhm_file="testfiles/test.hmm")
     # when the hmm file already exists, its not touched, so files should be
     # completely equal.
     self.assertTrue(filecmp.cmp(hhfile, "testfiles/test.hmm"))
Ejemplo n.º 18
0
 def testA3mToProfileWithoutA3m(self):
     # test A3mToProfile to fail if we do not have an a3m file.
     query_seq = seq.CreateSequence('Test', 'VLSPADKTNVKAAWGKVGAHAGEYGAEA'+
                                    'LERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVAD'+
                                    'ALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKL'+
                                    'LSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVL'+
                                    'TSKYR')
     self.hh = hhblits.HHblits(query_seq, self.hhroot)
     with self.assertRaises(IOError) as ioe:
         self.hh.A3MToProfile("doesnotexist.a3m")
     self.assertEqual(ioe.exception.errno, None)
     self.assertEqual(ioe.exception.args[0],
                      "could not convert a3m to hhm file")
Ejemplo n.º 19
0
def _ParseiAlign(lines):
    info_line = lines[18].split(',')
    is_score = float(info_line[0].split('=')[1].strip())
    aln_residues = int(lines[19].split('=')[1].strip())
    aln_contacts = int(lines[20].split('=')[1].strip())
    info_line = lines[21].split(',')
    rmsd = float(info_line[0].split('=')[1].strip())

    tf1 = [float(i.strip()) for i in lines[25][1:].split()]
    tf2 = [float(i.strip()) for i in lines[26][1:].split()]
    tf3 = [float(i.strip()) for i in lines[27][1:].split()]
    rot = geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3], tf2[4], tf3[2],
                    tf3[3], tf3[4])
    tf = geom.Mat4(rot)
    tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
    seq1 = seq.CreateSequence("1", lines[32].strip())
    seq2 = seq.CreateSequence("2", lines[34].strip())
    alignment = seq.CreateAlignment()
    alignment.AddSequence(seq2)
    alignment.AddSequence(seq1)
    return iAlignResult(rmsd, tf, alignment, is_score, aln_residues,
                        aln_contacts)
Ejemplo n.º 20
0
def AlignmentFromChainView(chain,
                           handle_seq_name='handle',
                           view_seq_name='view'):
    """
  Creates and returns the sequence alignment of the given chain view to the 
  chain handle. The alignment contains two sequences, the first containing all 
  non-ligand peptide-linking residues, the second containing all non-ligand 
  peptide-linking residues that are part of the view. 
  
  :param chain: A valid chain
  :type chain: :class:`~ost.mol.ChainView`
  
  :param handle_seq_name: Name of the handle sequence in the output alignment
  :param view_seq_name: Name of the view sequence in the output alignment
  :returns: The alignment
  :rtype: :class:`~ost.seq.AlignmentHandle`
  
  """
    from ost import seq
    v0 = chain.handle.Select('ligand=false and peptide=true')
    v1 = chain.Select('ligand=false and peptide=true')
    s0 = seq.CreateSequence(handle_seq_name, '')
    s1 = seq.CreateSequence(view_seq_name, '')
    s0.AttachView(v0)
    s1.AttachView(v1)
    res0 = v0.residues
    res1 = v1.residues
    idx0, idx1 = (0, 0)
    while idx0 < len(res0):
        s0.Append(res0[idx0].one_letter_code)
        if idx1 < len(res1) and res1[idx1].handle == res0[idx0].handle:
            s1.Append(res1[idx1].one_letter_code)
            idx1 += 1
        else:
            s1.Append('-')
        idx0 += 1
    return seq.CreateAlignment(s0, s1)
Ejemplo n.º 21
0
 def testA3mToCSFileExists(self):
     # test A3mToCS to work with a given hhmake_file name
     query_seq = seq.CreateSequence('Test', 'VLSPADKTNVKAAWGKVGAHAGEYGAEA'+
                                    'LERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVAD'+
                                    'ALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKL'+
                                    'LSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVL'+
                                    'TSKYR')
     self.hh = hhblits.HHblits(query_seq, self.hhroot)
     csfile = self.hh.A3MToCS("testfiles/testali.a3m",
                              cs_file='testfiles/test.seq219',
                              options={'-alphabet' :
                                       os.path.join(self.hh.hhlib_dir,
                                                    'data',
                                                    'cs219.lib')})
     self.assertTrue(filecmp.cmp(csfile, 'testfiles/test.seq219'))
Ejemplo n.º 22
0
 def testOSTSequenceWDir(self):
     # OST sequence as input, with working dir should work
     query_seq = seq.CreateSequence('1AKE.B',
                                    'MRIILLGAPGAGKGTQAQFIMEKYGIPQISTGDMLRA'+
                                    'AVKSGSELGKQAKDIMDAGKLVTDELVIALVKERIAQ'+
                                    'EDCRNGFLLDGFPRTIPQADAMKEAGINVDYVLEFDV'+
                                    'PDELIVDRIVGRRVHAPSGRVYHVKFNPPKVEGKDDV'+
                                    'TGEELTTRKDDQEETVRKRLVEYHQMTAPLIGYYYYS'+
                                    'KEAEAGNTKYAKVDGTKPVAEVRADLEKILG')
     tmpdir = tempfile.mkdtemp()
     self.hh = hhblits.HHblits(query_seq, self.hhroot, working_dir=tmpdir)
     self.assertEqual(self.hh.filename, os.path.join(tmpdir,
                                                     'query_hhblits.fasta'))
     self.assertEqual(self.hh.working_dir, tmpdir)
     self.hh.needs_cleanup = True
Ejemplo n.º 23
0
 def testA3mToProfileWithoutFileName(self):
     # test A3mToProfile to work without a given hhmake_file name
     query_seq = seq.CreateSequence('Test', 'VLSPADKTNVKAAWGKVGAHAGEYGAEA'+
                                    'LERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVAD'+
                                    'ALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKL'+
                                    'LSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVL'+
                                    'TSKYR')
     self.hh = hhblits.HHblits(query_seq, self.hhroot)
     hhfile = self.hh.A3MToProfile("testfiles/testali.a3m")
     with open(hhfile) as tfh:
       tlst = tfh.readlines()
     with open("testfiles/test.hmm") as efh:
       elst = efh.readlines()
     self.assertEqual(len(elst), len(tlst))
     for i in range(0, len(elst)):
         if not elst[i].startswith(('FILE', 'COM', 'DATE')):
             self.assertEqual(elst[i], tlst[i])
     os.remove(hhfile)
Ejemplo n.º 24
0
    def testSearchWorking(self):
        # successful search
        query_seq = seq.CreateSequence('Test', 'VLSPADKTNVKAAWGKVGAHAGEYGAEA'+
                                       'LERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVAD'+
                                       'ALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKL'+
                                       'LSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVL'+
                                       'TSKYR')
        self.hh = hhblits.HHblits(query_seq, self.hhroot)
        search_file = self.hh.Search("testfiles/testali.a3m",
                                     'testfiles/hhblitsdb/hhblitsdb')

        with open(search_file) as tfh:
          tlst = tfh.readlines()
        with open("testfiles/test.hhr") as efh:
          elst = efh.readlines()

        self.assertEqual(len(elst), len(tlst))
        for i in range(0, len(elst)):
            if not elst[i].startswith(('Date', 'Command')):
                self.assertEqual(elst[i], tlst[i])
Ejemplo n.º 25
0
 def __DisplayAlignment(self, ent_list, res_list):
   if(len(res_list)>0):
     ref_seq = seq.CreateSequence("%s (ref)"%ent_list[0].GetName(),
                                  res_list[0].alignment.GetSequence(1).GetGaplessString())
     aln_list = seq.AlignmentList()
     if(ref_seq.IsValid()):
       for i in range(0, len(res_list)):
         # WrappedTMAlign returns an alignment with second sequence
         # being reference... let's swap...
         new_aln = seq.CreateAlignment()
         new_aln.AddSequence(res_list[i].alignment.GetSequence(1))
         new_aln.AddSequence(res_list[i].alignment.GetSequence(0))
         new_aln.SetSequenceName(1, ent_list[i+1].GetName())
         aln_list.append(new_aln)
       alignment = alg.MergePairwiseAlignments(aln_list, ref_seq)
       gosty = gui.GostyApp.Instance()
       main_area = gosty.perspective.GetMainArea()
       if self.seq_viewer:
         self.seq_viewer.qobject.close()
       self.seq_viewer = gui.SequenceViewer(True)
       self.seq_viewer.AddAlignment(alignment)
       self.seq_viewer.ChangeDisplayMode("Highlight conservation 1")
       self.seq_viewer.Show()
Ejemplo n.º 26
0
def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True):
    """
  Aligns the residues of chain to the SEQRES sequence, inserting gaps where 
  needed. The function uses the connectivity of the protein backbone to find 
  consecutive peptide fragments. These fragments are then aligned to the SEQRES 
  sequence.
  
  All the non-ligand, peptide-linking residues of the chain must be listed in 
  SEQRES. If there are any additional residues in the chain, the function
  raises a ValueError.

  :param chain: Source of the sequence
  :type chain: :class:`~ost.mol.ChainHandle`
  :param seqres: SEQRES sequence
  :type seqres: :class:`str`
  :param try_resnum_first: If set to True, this first builds an alignment using
                           residue numbers and checks if the one-letter-codes
                           match. If they all match, this alignment is used
                           (and possibly validated). Otherwise, it displays a
                           warning and falls back to the connectivity-based
                           alignment.
  :type try_resnum_first: :class:`bool`
  :param validate: If set to True, the alignment is additionally checked by
                   :func:`~ost.seq.alg.ValidateSEQRESAlignment` and raises
                   a ValueError if the validation failed.
  :type validate: :class:`bool`

  :returns: The alignment of the residues in the chain and the SEQRES entries.
  :rtype: :class:`~ost.seq.AlignmentHandle`
  """
    def IsEqual(olc1, olc2):
        return olc1 in ('X', '?') or olc2 in ('X', '?') or olc1 == olc2

    from ost import seq
    from ost import mol
    from ost import LogWarning
    view = chain
    residues = view.residues
    if len(residues) == 0:
        return seq.CreateAlignment()
    if try_resnum_first:
        aln_seq = seq.CreateSequence('atoms', '-' * len(seqres))
        for r1 in residues:
            if r1.number.num <= len(seqres) and r1.number.num > 0:
                if IsEqual(seqres[r1.number.num - 1], r1.one_letter_code):
                    aln_seq[r1.number.num - 1] = r1.one_letter_code
                else:
                    LogWarning('Sequence mismatch: chain has "' +
                               r1.one_letter_code + '", while SEQRES is "' +
                               seqres[r1.number.num - 1] +
                               '" at the corresponding position.')
                    try_resnum_first = False
                    break
    if not try_resnum_first:
        fragments = [residues[0].one_letter_code]
        for r1, r2 in zip(residues[:-1], residues[1:]):
            if not mol.InSequence(r1.handle, r2.handle):
                fragments.append('')
            fragments[-1] += r2.one_letter_code
        ss = str(seqres)
        pos = 0
        aln_seq = ''
        for frag in fragments:
            new_pos = ss.find(frag, pos)
            if new_pos == -1:
                raise ValueError('"%s" is not a substring of "%s"' %
                                 (frag, ss))
            aln_seq += '-' * (new_pos - pos) + frag
            pos = new_pos + len(frag)
        aln_seq = seq.CreateSequence(
            'atoms', aln_seq + ('-' * (len(seqres) - len(aln_seq))))
    alignment = seq.CreateAlignment(seq.CreateSequence('SEQRES', str(seqres)),
                                    aln_seq)
    if validate and not ValidateSEQRESAlignment(alignment, view):
        raise ValueError(
            "SEQRES cannot be aligned with its corresponding chain.")
    return alignment
Ejemplo n.º 27
0
def ParseA3M(a3m_file):
    '''
    Parse secondary structure information and the multiple sequence alignment 
    out of an A3M file as produced by :meth:`HHblits.BuildQueryMSA`.
    
    :param a3m_file: Iterable containing the lines of the A3M file
    :type a3m_file: iterable (e.g. an open file handle)
    
    :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
             (:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`).
    '''
    profile_dict = dict()
    state = 'NONE'
    pred_seq_txt = ''
    conf_seq_txt = ''
    msa_seq = list()
    msa_head = list()
    for line in a3m_file:
        if len(line.rstrip()) == 0:
            continue
        elif line.startswith('>ss_pred'):
            state = 'sspred'
            continue
        elif line.startswith('>ss_conf'):
            state = 'ssconf'
            continue
        elif line[0] == '>':
            if state == 'ssconf' or state == 'msa':
                msa_seq.append('')
                msa_head.append(line[1:].rstrip())
            else:
                raise IOError('The A3M file is missing the "ss_conf" section')
            state = 'msa'
            continue

        if state == 'sspred':
            pred_seq_txt += line.rstrip()
        elif state == 'ssconf':
            conf_seq_txt += line.rstrip()
        elif state == 'msa':
            msa_seq[len(msa_seq)-1] += line.rstrip()

    profile_dict['ss_pred'] = list()
    profile_dict['ss_conf'] = list()
    for i in range(0, len(pred_seq_txt)):
        profile_dict['ss_pred'].append(pred_seq_txt[i])
        profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
    
    # post processing
    # MSA
    profile_dict['msa'] = None
    if len(msa_seq) > 1:
        t = msa_seq[0]
        al = seq.AlignmentList()
        for i in range(1, len(msa_seq)):
            qs = ''
            ts = ''
            k = 0
            for c in msa_seq[i]:
                if c.islower():
                    qs += '-'
                    ts += c.upper()
                else:
                    qs += t[k]
                    ts += c
                    k += 1
            nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs), 
                                     seq.CreateSequence(msa_head[i], ts))
            al.append(nl)
        profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
            al, seq.CreateSequence(msa_head[0], t))
    return profile_dict
Ejemplo n.º 28
0
def ParseHHM(profile):
    '''
    Parse secondary structure information and the MSA out of an HHM profile as
    produced by :meth:`HHblits.A3MToProfile`.

    :param profile: Opened file handle holding the profile.
    :type profile: :class:`file`

    :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
             (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and
             "consensus" (:class:`~ost.seq.SequenceHandle`).
    '''
    profile_dict = dict()
    state = 'NONE'
    pred_seq_txt = ''
    conf_seq_txt = ''
    consensus_txt = ''
    msa_seq = list()
    msa_head = list()
    for line in profile:
        if len(line.rstrip()) == 0:
            continue
        if line.rstrip() == '>ss_pred PSIPRED predicted secondary structure':
            state = 'sspred'
            continue
        elif line.rstrip() == '>ss_conf PSIPRED confidence values':
            state = 'ssconf'
            continue
        elif line.rstrip() == '>Consensus':
            state = 'consensus'
            continue
        elif line[0] == '>':
            if state == 'consensus' or state == 'msa':
                msa_seq.append('')
                msa_head.append(line[1:].rstrip())
            else:
                raise IOError('Profile file "%s" is missing ' % profile.name+
                              'the "Consensus" section')
            state = 'msa'
            continue
        elif line[0] == '#':
            state = 'NONE'
            continue

        if state == 'sspred':
            pred_seq_txt += line.rstrip()
        elif state == 'ssconf':
            conf_seq_txt += line.rstrip()
        elif state == 'msa':
            msa_seq[len(msa_seq)-1] += line.rstrip()
        elif state == 'consensus':
            consensus_txt += line.rstrip()

    profile_dict['ss_pred'] = list()
    profile_dict['ss_conf'] = list()
    for i in range(0, len(pred_seq_txt)):
        profile_dict['ss_pred'].append(pred_seq_txt[i])
        profile_dict['ss_conf'].append(int(conf_seq_txt[i]))

    # post processing
    # MSA
    profile_dict['msa'] = None
    if len(msa_seq):
        t = msa_seq[0]
        al = seq.AlignmentList()
        for i in range(1, len(msa_seq)):
            qs = ''
            ts = ''
            k = 0
            for c in msa_seq[i]:
                if c.islower():
                    qs += '-'
                    ts += c.upper()
                else:
                    qs += t[k]
                    ts += c
                    k += 1
            nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
                                     seq.CreateSequence(msa_head[i], ts))
            al.append(nl)
        profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
            al, seq.CreateSequence(msa_head[0], t))
        #print profile_dict['msa'].ToString(80)
    # Consensus
    profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt)

    return profile_dict
Ejemplo n.º 29
0
 def testSeqIterBZDNG148(self):
     s = seq.CreateSequence('A', 'abcdef')
     for x in s:
         pass
Ejemplo n.º 30
0
 def testValidateEmptySequenceWorking(self):
     alignment = seq.CreateAlignment(seq.CreateSequence('SEQRES', ''),
                                     seq.CreateSequence('atoms', ''))
     chain = mol.ChainHandle()
     self.assertEqual(seq.alg.ValidateSEQRESAlignment(alignment, chain),
                      True)