def refineSingle(al, gapPenalty = defaultMatchScores.gap) : if not isinstance(al, list) : al = list(al) p = calign.createProfile(al) q = [] for k,s in enumerate(al) : ps = calign.createProfile([s]) p2 = tuple([[a-b for a,b in zip(x,y)] for x,y in zip(p,ps)]) q.append( calign.profileAlign(s, p2, gapPenalty = gapPenalty) ) return q
def seqMultiAlign(seqs, scores = defaultMatchScores, report=False) : if len(seqs) < 2: return seqs a = calign.globalAlign(seqs[0], seqs[1], scores=scores) ns = 2 p = calign.createProfile(a) a = tuple(iton(x) for x in a) for kk,s2 in enumerate(seqs[2:]) : #print ns # assert p == calign.createProfile(a) assert len(a[0]) == len(p) and \ p[0][calign.GAP] != ns and p[-1][calign.GAP] != ns pad = 20 if len(p)+2*pad < len(s2) : # enough for sequences start to align pad = (len(s2) - len(p)) ; assert len(p)+2*pad >= len(s2) pa,extendLeft,extendRight = calign.profileAlign(s2, p, pad=pad, chop=True, gapPenalty=defaultMatchScores.gap) if extendLeft > 0 or extendRight > 0 : gapProfile = [0,0,0,0,0,ns] p1 = tuple((list(gapProfile) for k in range(extendLeft))) + p \ + tuple((list(gapProfile) for k in range(extendRight))) else : p1 = p for k,n in enumerate(pa) : p1[k][n] += 1 p = p1 if extendLeft > 0 or extendRight > 0 : fr = '-'*extendLeft bk = '-'*extendRight a = tuple((fr + x + bk for x in a)) a = a + (iton(pa),) ns += 1 if report and (kk+1) % 1000 == 0 : import sys print kk+1, len(a[0]), sys.stdout.flush() if report: print return a
def refineAlignment(al, ci = [0], drop = False, mx = -1, rev = False, verbose = False) : if not isinstance(al, list) : al = list(al) p = calign.createProfile(al) if rev: can = [(k,s) for k,s in enumerate(al) if all([s[x] == '-' for x in ci])] else : can = [(k,s) for k,s in enumerate(al) if any([s[x] != '-' for x in ci])] if verbose: print len(al[0]), len(can) import sys sys.stdout.flush() if mx > 0 and len(can) > mx: return changed = 0 if len(can) : pr = calign.createProfile([s for n,s in can]) p2 = tuple([[a-b for a,b in zip(x,y)] for x,y in zip(p,pr)]) if drop: p2,rx = removeColumns(p2, ci) for n,s in can: if len(s.replace('-','')) <= len(p2) : ra = calign.profileAlign(s, p2) if drop : ra = restoreColumns(list(ra),rx, calign.GAP) al[n] = iton(ra) if tuple(ra) != sasn(s) : changed += 1 if changed : p = calign.createProfile(al) r = [k for k,i in enumerate(p) if i[calign.GAP] == len(al)] r1 = toRanges(r) if r1 : for a,b in r1[::-1] : for k in range(len(al)) : x = al[k] al[k] = x[:a] + x[b:] return al,len(al[0]), changed
def alignToProfile(seq, profile) : wasNucs = isinstance(seq[0], str) s = calign.profileAlign(seq, profile) s = iton(s) if wasNucs else s return s