Esempio n. 1
0
 def _MakeAln(query_id, hit_id, query_string, templ_string,
              q_offset, t_offset):
     s1 = seq.CreateSequence(query_id, query_string)
     s1.offset = q_offset-1
     s2 = seq.CreateSequence(hit_id, templ_string)
     s2.offset = t_offset-1
     return seq.CreateAlignment(s1, s2)
Esempio n. 2
0
  def _ParseHsp(query_id, hit_id, hsp, tot_query_len, seqid_thres=0, evalue_thres=float("infinity")):
    bit_score=float(_GetValue(hsp, 'Hsp_bit-score'))
    score=float(_GetValue(hsp, 'Hsp_score'))
    evalue=float(_GetValue(hsp, 'Hsp_evalue'))
    try:
      identity=float(_GetValue(hsp, 'Hsp_identity'))
    except AssertionError:
      # The Hsp_identity tag is not a 'must' in the BLAST XML format. It
      # describes the number of matching characters. Hence we assume, if it is
      # missing, there are 0 matches.
      identity=0
    hsp_align_len=float(_GetValue(hsp, 'Hsp_align-len'))
    seqid=identity/hsp_align_len
    query_offset=_GetInt(hsp, 'Hsp_query-from')-1
    hit_offset=_GetInt(hsp, 'Hsp_hit-from')-1
    query_seq=seq.CreateSequence(str(query_id), str(_GetValue(hsp, 'Hsp_qseq')))
    query_seq.offset=query_offset
    hit_seq=seq.CreateSequence(str(hit_id), str(_GetValue(hsp, 'Hsp_hseq')))
    hit_seq.offset=hit_offset
    try:
      if seqid > float(seqid_thres) and evalue < evalue_thres:
        aln=seq.CreateAlignment(query_seq, hit_seq)
        return AlignedPatch(aln, bit_score, score, evalue, seqid)

    except Exception as e:
      print(str(e), query_seq, hit_seq)
Esempio n. 3
0
def _RunkClust(tmp_dir_name, clustering_thresh, create_alignments):

    bitscore = clustering_thresh * 0.060269 - 0.68498

    executable = settings.Locate('kClust')

    cmd = []
    cmd.append(executable)
    cmd.append('-i')
    cmd.append(os.path.join(tmp_dir_name, 'fastadb.fasta'))
    cmd.append('-d')
    cmd.append(tmp_dir_name)
    cmd.append('-s')
    cmd.append(str(bitscore))

    cmd = ' '.join(cmd)
    ps = subprocess.Popen(cmd,
                          shell=True,
                          stdout=subprocess.PIPE,
                          stderr=subprocess.PIPE)
    stdout, stderr = ps.communicate()

    result = _ParseOutput(tmp_dir_name)

    if (create_alignments):
        from ost.bindings import clustalw
        for c in result:
            if len(c.sequences) > 1:
                c.alignment = clustalw.ClustalW(c.sequences)
            else:
                aln = seq.CreateAlignment()
                aln.AddSequence(c.sequences[0])
                c.alignment = aln

    return result
Esempio n. 4
0
def _ParseTmAlign(lines,lines_matrix):
  info_line=lines[12].split(',')
  aln_length=int(info_line[0].split('=')[1].strip())
  rmsd=float(info_line[1].split('=')[1].strip())  
  tm_score=float(lines[14].split('=')[1].split('(')[0].strip())
  tf1=[float(i.strip()) for i in lines_matrix[2].split()]
  tf2=[float(i.strip()) for i in lines_matrix[3].split()]
  tf3=[float(i.strip()) for i in lines_matrix[4].split()]
  rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3],
                tf2[4], tf3[2], tf3[3], tf3[4])
  tf=geom.Mat4(rot)
  tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
  seq1 = seq.CreateSequence("1",lines[18].strip())
  seq2 = seq.CreateSequence("2",lines[20].strip())
  alignment = seq.CreateAlignment()
  alignment.AddSequence(seq2)
  alignment.AddSequence(seq1)
  return ost.bindings.TMAlignResult(rmsd, tm_score, aln_length, tf, alignment)
Esempio n. 5
0
 def testAlnSlice(self):
     a = seq.CreateSequence('A', 'abcd')
     b = seq.CreateSequence('B', 'efgh')
     c = seq.CreateSequence('C', 'ijkl')
     d = seq.CreateSequence('D', 'mnop')
     aln = seq.CreateAlignment(a, b, c, d)
     sliced = aln[1:]
     self.assertEqual(len(sliced), 3)
     self.assertEqual(str(sliced[0]), 'bfjn')
     self.assertEqual(str(sliced[1]), 'cgko')
     self.assertEqual(str(sliced[2]), 'dhlp')
     sliced = aln[:-1]
     self.assertEqual(len(sliced), 3)
     self.assertEqual(str(sliced[0]), 'aeim')
     self.assertEqual(str(sliced[1]), 'bfjn')
     self.assertEqual(str(sliced[2]), 'cgko')
     sliced = aln[-1:]
     self.assertEqual(len(sliced), 1)
     self.assertEqual(str(sliced[0]), 'dhlp')
Esempio n. 6
0
def _ParseiAlign(lines):
    info_line = lines[18].split(',')
    is_score = float(info_line[0].split('=')[1].strip())
    aln_residues = int(lines[19].split('=')[1].strip())
    aln_contacts = int(lines[20].split('=')[1].strip())
    info_line = lines[21].split(',')
    rmsd = float(info_line[0].split('=')[1].strip())

    tf1 = [float(i.strip()) for i in lines[25][1:].split()]
    tf2 = [float(i.strip()) for i in lines[26][1:].split()]
    tf3 = [float(i.strip()) for i in lines[27][1:].split()]
    rot = geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3], tf2[4], tf3[2],
                    tf3[3], tf3[4])
    tf = geom.Mat4(rot)
    tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1]))
    seq1 = seq.CreateSequence("1", lines[32].strip())
    seq2 = seq.CreateSequence("2", lines[34].strip())
    alignment = seq.CreateAlignment()
    alignment.AddSequence(seq2)
    alignment.AddSequence(seq1)
    return iAlignResult(rmsd, tf, alignment, is_score, aln_residues,
                        aln_contacts)
 def __DisplayAlignment(self, ent_list, res_list):
   if(len(res_list)>0):
     ref_seq = seq.CreateSequence("%s (ref)"%ent_list[0].GetName(),
                                  res_list[0].alignment.GetSequence(1).GetGaplessString())
     aln_list = seq.AlignmentList()
     if(ref_seq.IsValid()):
       for i in range(0, len(res_list)):
         # WrappedTMAlign returns an alignment with second sequence
         # being reference... let's swap...
         new_aln = seq.CreateAlignment()
         new_aln.AddSequence(res_list[i].alignment.GetSequence(1))
         new_aln.AddSequence(res_list[i].alignment.GetSequence(0))
         new_aln.SetSequenceName(1, ent_list[i+1].GetName())
         aln_list.append(new_aln)
       alignment = alg.MergePairwiseAlignments(aln_list, ref_seq)
       gosty = gui.GostyApp.Instance()
       main_area = gosty.perspective.GetMainArea()
       if self.seq_viewer:
         self.seq_viewer.qobject.close()
       self.seq_viewer = gui.SequenceViewer(True)
       self.seq_viewer.AddAlignment(alignment)
       self.seq_viewer.ChangeDisplayMode("Highlight conservation 1")
       self.seq_viewer.Show()
Esempio n. 8
0
def AlignmentFromChainView(chain,
                           handle_seq_name='handle',
                           view_seq_name='view'):
    """
  Creates and returns the sequence alignment of the given chain view to the 
  chain handle. The alignment contains two sequences, the first containing all 
  non-ligand peptide-linking residues, the second containing all non-ligand 
  peptide-linking residues that are part of the view. 
  
  :param chain: A valid chain
  :type chain: :class:`~ost.mol.ChainView`
  
  :param handle_seq_name: Name of the handle sequence in the output alignment
  :param view_seq_name: Name of the view sequence in the output alignment
  :returns: The alignment
  :rtype: :class:`~ost.seq.AlignmentHandle`
  
  """
    from ost import seq
    v0 = chain.handle.Select('ligand=false and peptide=true')
    v1 = chain.Select('ligand=false and peptide=true')
    s0 = seq.CreateSequence(handle_seq_name, '')
    s1 = seq.CreateSequence(view_seq_name, '')
    s0.AttachView(v0)
    s1.AttachView(v1)
    res0 = v0.residues
    res1 = v1.residues
    idx0, idx1 = (0, 0)
    while idx0 < len(res0):
        s0.Append(res0[idx0].one_letter_code)
        if idx1 < len(res1) and res1[idx1].handle == res0[idx0].handle:
            s1.Append(res1[idx1].one_letter_code)
            idx1 += 1
        else:
            s1.Append('-')
        idx0 += 1
    return seq.CreateAlignment(s0, s1)
Esempio n. 9
0
def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True):
    """
  Aligns the residues of chain to the SEQRES sequence, inserting gaps where 
  needed. The function uses the connectivity of the protein backbone to find 
  consecutive peptide fragments. These fragments are then aligned to the SEQRES 
  sequence.
  
  All the non-ligand, peptide-linking residues of the chain must be listed in 
  SEQRES. If there are any additional residues in the chain, the function
  raises a ValueError.

  :param chain: Source of the sequence
  :type chain: :class:`~ost.mol.ChainHandle`
  :param seqres: SEQRES sequence
  :type seqres: :class:`str`
  :param try_resnum_first: If set to True, this first builds an alignment using
                           residue numbers and checks if the one-letter-codes
                           match. If they all match, this alignment is used
                           (and possibly validated). Otherwise, it displays a
                           warning and falls back to the connectivity-based
                           alignment.
  :type try_resnum_first: :class:`bool`
  :param validate: If set to True, the alignment is additionally checked by
                   :func:`~ost.seq.alg.ValidateSEQRESAlignment` and raises
                   a ValueError if the validation failed.
  :type validate: :class:`bool`

  :returns: The alignment of the residues in the chain and the SEQRES entries.
  :rtype: :class:`~ost.seq.AlignmentHandle`
  """
    def IsEqual(olc1, olc2):
        return olc1 in ('X', '?') or olc2 in ('X', '?') or olc1 == olc2

    from ost import seq
    from ost import mol
    from ost import LogWarning
    view = chain
    residues = view.residues
    if len(residues) == 0:
        return seq.CreateAlignment()
    if try_resnum_first:
        aln_seq = seq.CreateSequence('atoms', '-' * len(seqres))
        for r1 in residues:
            if r1.number.num <= len(seqres) and r1.number.num > 0:
                if IsEqual(seqres[r1.number.num - 1], r1.one_letter_code):
                    aln_seq[r1.number.num - 1] = r1.one_letter_code
                else:
                    LogWarning('Sequence mismatch: chain has "' +
                               r1.one_letter_code + '", while SEQRES is "' +
                               seqres[r1.number.num - 1] +
                               '" at the corresponding position.')
                    try_resnum_first = False
                    break
    if not try_resnum_first:
        fragments = [residues[0].one_letter_code]
        for r1, r2 in zip(residues[:-1], residues[1:]):
            if not mol.InSequence(r1.handle, r2.handle):
                fragments.append('')
            fragments[-1] += r2.one_letter_code
        ss = str(seqres)
        pos = 0
        aln_seq = ''
        for frag in fragments:
            new_pos = ss.find(frag, pos)
            if new_pos == -1:
                raise ValueError('"%s" is not a substring of "%s"' %
                                 (frag, ss))
            aln_seq += '-' * (new_pos - pos) + frag
            pos = new_pos + len(frag)
        aln_seq = seq.CreateSequence(
            'atoms', aln_seq + ('-' * (len(seqres) - len(aln_seq))))
    alignment = seq.CreateAlignment(seq.CreateSequence('SEQRES', str(seqres)),
                                    aln_seq)
    if validate and not ValidateSEQRESAlignment(alignment, view):
        raise ValueError(
            "SEQRES cannot be aligned with its corresponding chain.")
    return alignment
Esempio n. 10
0
def ParseHHM(profile):
    '''
    Parse secondary structure information and the MSA out of an HHM profile as
    produced by :meth:`HHblits.A3MToProfile`.

    :param profile: Opened file handle holding the profile.
    :type profile: :class:`file`

    :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
             (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and
             "consensus" (:class:`~ost.seq.SequenceHandle`).
    '''
    profile_dict = dict()
    state = 'NONE'
    pred_seq_txt = ''
    conf_seq_txt = ''
    consensus_txt = ''
    msa_seq = list()
    msa_head = list()
    for line in profile:
        if len(line.rstrip()) == 0:
            continue
        if line.rstrip() == '>ss_pred PSIPRED predicted secondary structure':
            state = 'sspred'
            continue
        elif line.rstrip() == '>ss_conf PSIPRED confidence values':
            state = 'ssconf'
            continue
        elif line.rstrip() == '>Consensus':
            state = 'consensus'
            continue
        elif line[0] == '>':
            if state == 'consensus' or state == 'msa':
                msa_seq.append('')
                msa_head.append(line[1:].rstrip())
            else:
                raise IOError('Profile file "%s" is missing ' % profile.name+
                              'the "Consensus" section')
            state = 'msa'
            continue
        elif line[0] == '#':
            state = 'NONE'
            continue

        if state == 'sspred':
            pred_seq_txt += line.rstrip()
        elif state == 'ssconf':
            conf_seq_txt += line.rstrip()
        elif state == 'msa':
            msa_seq[len(msa_seq)-1] += line.rstrip()
        elif state == 'consensus':
            consensus_txt += line.rstrip()

    profile_dict['ss_pred'] = list()
    profile_dict['ss_conf'] = list()
    for i in range(0, len(pred_seq_txt)):
        profile_dict['ss_pred'].append(pred_seq_txt[i])
        profile_dict['ss_conf'].append(int(conf_seq_txt[i]))

    # post processing
    # MSA
    profile_dict['msa'] = None
    if len(msa_seq):
        t = msa_seq[0]
        al = seq.AlignmentList()
        for i in range(1, len(msa_seq)):
            qs = ''
            ts = ''
            k = 0
            for c in msa_seq[i]:
                if c.islower():
                    qs += '-'
                    ts += c.upper()
                else:
                    qs += t[k]
                    ts += c
                    k += 1
            nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
                                     seq.CreateSequence(msa_head[i], ts))
            al.append(nl)
        profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
            al, seq.CreateSequence(msa_head[0], t))
        #print profile_dict['msa'].ToString(80)
    # Consensus
    profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt)

    return profile_dict
Esempio n. 11
0
def ParseA3M(a3m_file):
    '''
    Parse secondary structure information and the multiple sequence alignment 
    out of an A3M file as produced by :meth:`HHblits.BuildQueryMSA`.
    
    :param a3m_file: Iterable containing the lines of the A3M file
    :type a3m_file: iterable (e.g. an open file handle)
    
    :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
             (:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`).
    '''
    profile_dict = dict()
    state = 'NONE'
    pred_seq_txt = ''
    conf_seq_txt = ''
    msa_seq = list()
    msa_head = list()
    for line in a3m_file:
        if len(line.rstrip()) == 0:
            continue
        elif line.startswith('>ss_pred'):
            state = 'sspred'
            continue
        elif line.startswith('>ss_conf'):
            state = 'ssconf'
            continue
        elif line[0] == '>':
            if state == 'ssconf' or state == 'msa':
                msa_seq.append('')
                msa_head.append(line[1:].rstrip())
            else:
                raise IOError('The A3M file is missing the "ss_conf" section')
            state = 'msa'
            continue

        if state == 'sspred':
            pred_seq_txt += line.rstrip()
        elif state == 'ssconf':
            conf_seq_txt += line.rstrip()
        elif state == 'msa':
            msa_seq[len(msa_seq)-1] += line.rstrip()

    profile_dict['ss_pred'] = list()
    profile_dict['ss_conf'] = list()
    for i in range(0, len(pred_seq_txt)):
        profile_dict['ss_pred'].append(pred_seq_txt[i])
        profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
    
    # post processing
    # MSA
    profile_dict['msa'] = None
    if len(msa_seq) > 1:
        t = msa_seq[0]
        al = seq.AlignmentList()
        for i in range(1, len(msa_seq)):
            qs = ''
            ts = ''
            k = 0
            for c in msa_seq[i]:
                if c.islower():
                    qs += '-'
                    ts += c.upper()
                else:
                    qs += t[k]
                    ts += c
                    k += 1
            nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs), 
                                     seq.CreateSequence(msa_head[i], ts))
            al.append(nl)
        profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
            al, seq.CreateSequence(msa_head[0], t))
    return profile_dict
Esempio n. 12
0
 def testValidateEmptySequenceWorking(self):
     alignment = seq.CreateAlignment(seq.CreateSequence('SEQRES', ''),
                                     seq.CreateSequence('atoms', ''))
     chain = mol.ChainHandle()
     self.assertEqual(seq.alg.ValidateSEQRESAlignment(alignment, chain),
                      True)