コード例 #1
0
ファイル: ali.py プロジェクト: umbriel47/saab_plus
 def make_gaps_consistent(self):
     mainseq = self.master.seq
     for i in xrange(1,len(self)):
       entry = self[i]
       seq = array('c', entry.seq)
       for i,c in enumerate(mainseq):
         if isGap(c):
           seq[i] = '-'
         elif isGap(seq[i]):
           seq[i] = '?'
       entry.seq = seq.tostring()
コード例 #2
0
ファイル: ali.py プロジェクト: umbriel47/saab_plus
 def remove_gaps(self, template_seq=None):
     """remove_gaps([template_seq]) : Removes all alignment columns containing gaps.
     
     Only applicable if sequences are aligned (at least within each EntryGroup).
     If template_seq is given, all entries must be aligned to template_seq.
     """
     # Note that using this function will unalign entries with different codes!
     
     if template_seq:
       assert isinstance(template_seq[0], str)
     
     for eg in self:
         code = eg.code
         
         if template_seq:
             template = template_seq
         else:
             if not eg.hasMasterEntry():
               continue
             template = eg[0].seq
         
         seqs = []
         for entry in eg:
             assert len(entry.seq) == len(template)
             seqs.append(array('c', entry.seq))
         
         iend = len(template)
         ingap  = False
         for i in xrange(len(template), 0, -1):
             c = template[i-1]
             #print i,c,ingap
             if ingap:
                 if not isGap(c):
                     ingap = False
                     for seq in seqs:
                         del seq[i:iend]
             else:
                 if isGap(c):
                     ingap = True
                     iend  = i
         if ingap:
             #print "removing final gap"
             for seq in seqs:
                 del seq[:iend]
         
         for i,entry in enumerate(eg):
             entry.seq = seqs[i].tostring()
コード例 #3
0
ファイル: seq.py プロジェクト: umbriel47/saab_plus
def remove_gapped_columns(seqs, master=0):
  # Keep only those columns where master sequence has no gap
  #
  residuesToDelete = []
  n = 0
  for i,c in enumerate(seqs[master]):
    if isGap(c):
      residuesToDelete.append(i)

  for i in xrange(len(seqs)):
    assert len(seqs[i]) == len(seqs[-1]), "Sequences not aligned"
    seqs[i] = __stringRemoveIndeces(seqs[i], residuesToDelete)
コード例 #4
0
ファイル: seq.py プロジェクト: umbriel47/saab_plus
def find_aligned_residues(seq1, seq2, equiv=None, eqchars=":."):
    assert len(seq1) == len(seq2)
    assert None == equiv or len(seq1) == len(equiv)
    
    numbering_list1 = []
    numbering_list2 = []
    
    n1=0
    n2=0
    if None == equiv:
      for i in xrange(len(seq1)):
          if not isGap(seq1[i]) and not isGap(seq2[i]):
              numbering_list1.append(n1)
              numbering_list2.append(n2)
              n1 += 1
              n2 += 1
          elif not isGap(seq1[i]):
              n1 += 1
          elif not isGap(seq2[i]):
              n2 += 1    
    else:
      for i, e in enumerate(equiv):
          if equiv[i] in eqchars:
              numbering_list1.append(n1)
              numbering_list2.append(n2)
          if not isGap(seq1[i]):
              n1 += 1
          if not isGap(seq2[i]):
              n2 += 1

    return numbering_list1, numbering_list2
コード例 #5
0
ファイル: seq.py プロジェクト: umbriel47/saab_plus
def __get_pid_counts(seq1, seq2):
  assert len(seq1) == len(seq2), "\n>seq1\n%s\n>seq2\n%s\n"%(seq1, seq2)
  
  l1 = 0   # ungapped length of seq1
  l2 = 0   # ungapped length of seq2
  cov = 0  # number of aligned pairs (absolute coverage)
  idn = 0  # number of identical pairs (absolute identity)
  
  for i in xrange(len(seq1)):
    c1 = seq1[i]
    c2 = seq2[i]
    if (not isGap(c1)):
      l1 += 1
      if (not isGap(c2)):
        l2 += 1
        cov += 1
        if c1 == c2:
          idn += 1
    elif (not isGap(c2)):
      l2 += 1
  
  return (l1, l2, cov, idn)
コード例 #6
0
ファイル: seq.py プロジェクト: umbriel47/saab_plus
def consensus(seqs):
  cons=[]
  totals=[]
  length = len(seqs[0])
  for s in seqs:
    assert len(s) == length, "Input sequences must be aligned (of the same length)"
  for i in xrange(length):
    d = {}
    n=0
    for j in xrange(len(seqs)):
      c = seqs[j][i]
      if isGap(c):
        continue
      n+=1
      if c not in d:
        d[c]=1
      else:
        d[c]+=1
    residues=[]
    for res in sorted(d):
      residues.append((res, d[res]))
    cons.append(residues)
    totals.append(n)
  return cons, totals