Example #1
0
def refineSingle(al, gapPenalty = defaultMatchScores.gap) :
  if not isinstance(al, list) :
    al = list(al)
    
  p = calign.createProfile(al)

  q = []
  for k,s in enumerate(al) :
    ps = calign.createProfile([s])
    p2 = tuple([[a-b for a,b in zip(x,y)] for x,y in zip(p,ps)])

    q.append( calign.profileAlign(s, p2, gapPenalty = gapPenalty) )

  return q
Example #2
0
def seqMultiAlign(seqs, scores = defaultMatchScores, report=False) :
  if len(seqs) < 2:
    return seqs
  
  a = calign.globalAlign(seqs[0], seqs[1], scores=scores)
  ns = 2
  p = calign.createProfile(a)

  a = tuple(iton(x) for x in a)
  
  for kk,s2 in enumerate(seqs[2:]) :
    #print ns
    # assert p == calign.createProfile(a)
    assert len(a[0]) == len(p) and \
           p[0][calign.GAP] != ns and p[-1][calign.GAP] != ns
    
    pad = 20
    if len(p)+2*pad < len(s2) :
      # enough for sequences start to align
      pad = (len(s2) - len(p))  ;                assert len(p)+2*pad >= len(s2)

    pa,extendLeft,extendRight = calign.profileAlign(s2, p, pad=pad, chop=True,
                                                    gapPenalty=defaultMatchScores.gap)

    if extendLeft > 0 or extendRight > 0 :
      gapProfile = [0,0,0,0,0,ns]
      p1 = tuple((list(gapProfile) for k in range(extendLeft))) + p \
           + tuple((list(gapProfile) for k in range(extendRight)))
    else :
      p1 = p
    
    for k,n in enumerate(pa) :
      p1[k][n] += 1
    p = p1
      
    if extendLeft > 0 or extendRight > 0 :
      fr = '-'*extendLeft
      bk = '-'*extendRight
      a = tuple((fr + x + bk for x in a))
      
    a = a + (iton(pa),)
    ns += 1

    if report and (kk+1) % 1000 == 0 :
      import sys
      print kk+1, len(a[0]),
      sys.stdout.flush()
  if report: print
  
  return a
Example #3
0
def refineAlignment(al, ci = [0], drop = False, mx = -1, rev = False, verbose = False) :
  if not isinstance(al, list) :
    al = list(al)
    
  p = calign.createProfile(al)
  if rev:
    can = [(k,s) for k,s in enumerate(al) if all([s[x] == '-' for x in ci])]
  else :
    can = [(k,s) for k,s in enumerate(al) if any([s[x] != '-' for x in ci])]

  if verbose:
    print len(al[0]), len(can)
    import sys
    sys.stdout.flush()
    
  if mx > 0 and len(can) > mx:
    return
  
  changed = 0
  if len(can) :
    
    pr = calign.createProfile([s for n,s in can])
    p2 = tuple([[a-b for a,b in zip(x,y)] for x,y in zip(p,pr)])
    if drop:
      p2,rx = removeColumns(p2, ci)
    
    for n,s in can:
      if len(s.replace('-','')) <= len(p2) :
        ra = calign.profileAlign(s, p2)
        if drop :
          ra = restoreColumns(list(ra),rx, calign.GAP)
        al[n] = iton(ra)
        if tuple(ra) != sasn(s) :
          changed += 1

  if changed :
    p = calign.createProfile(al)
    r = [k for k,i in enumerate(p) if i[calign.GAP] == len(al)]

    r1 = toRanges(r)
    if r1 :
      for a,b in r1[::-1] :
        for k in range(len(al)) :
          x = al[k]
          al[k] = x[:a] + x[b:]

  return al,len(al[0]), changed
Example #4
0
def alignToProfile(seq, profile) :
  wasNucs = isinstance(seq[0], str)
  s = calign.profileAlign(seq, profile)
  s = iton(s) if wasNucs else s
  
  return s