def main(): A, B = (X for _, X in rosalib.parse_fasta()) T, pred = global_score(A, B) print(T[-1][-1]) SA, SB = solution(A, B, pred) print(SA) print(SB)
def main(): A,B = (X for _,X in rosalib.parse_fasta()) T,pred = global_score(A,B) print(T[-1][-1]) SA,SB = solution(A,B,pred) print(SA) print(SB)
def main(): for _,DNA in rosalib.parse_fasta(): S = sign(DNA) if S in DBC: continue elif S in DBI: del DBI[S] DBC.add(S) else: DBI[S] = DNA for S in DBI: DNA = DBI[S] print('%s->%s' % (DNA,correct(DNA)))
def main(): for _, DNA in rosalib.parse_fasta(): S = sign(DNA) if S in DBC: continue elif S in DBI: del DBI[S] DBC.add(S) else: DBI[S] = DNA for S in DBI: DNA = DBI[S] print('%s->%s' % (DNA, correct(DNA)))
def main(): DNAS = [DNA for _, DNA in rosalib.parse_fasta()] Succ = [None] * len(DNAS) Start = set(range(len(DNAS))) for i in range(len(DNAS)): for j in range(len(DNAS)): if i == j: continue e = edge(DNAS[i], DNAS[j]) if e != None: assert (Succ[i] == None) Succ[i] = (j, e) assert (j in Start) Start.remove(j) assert (len(Start) == 1) s = Start.pop() res = [DNAS[s]] while Succ[s] != None: t, e = Succ[s] res.append(DNAS[t][e:]) s = t print(''.join(res))
def main(): L = rosalib.parse_fasta() for i in range(len(L)): print(' '.join(str(pdist(L[i][1],L[j][1])) for j in range(len(L))))
def main(): A,B = (S for _,S in rosalib.parse_fasta()) s,la,ra,lb,rb = local_score(A,B) print(s) print(A[la:ra]) print(B[lb:rb])
#!/usr/bin/env python3 import rosalib # NB: the statement explains "Why Are We Counting Modulo 134,217,727? # (...) however, if we count modulo a large prime number" # yet 2^27-1 = 134217727 is not prime... (-_-) def Levenstein_align(u,v): m,n = len(u),len(v) T = [[max(i,j) for j in range(n+1)] for i in range(m+1)] Nb = [[int(i==0 or j==0) for j in range(n+1)] for i in range(m+1)] for i in range(1,m+1): for j in range(1,n+1): T[i][j] = min(T[i-1][j]+1,T[i][j-1]+1,T[i-1][j-1]+int(u[i-1]!=v[j-1])) if T[i-1][j]+1==T[i][j]: Nb[i][j] += Nb[i-1][j] if T[i][j-1]+1==T[i][j]: Nb[i][j] += Nb[i][j-1] if T[i-1][j-1]+int(u[i-1]!=v[j-1])==T[i][j]: Nb[i][j] += Nb[i-1][j-1] return (T[m][n],Nb[m][n]) L = rosalib.parse_fasta() _,cpt = Levenstein_align(L[0][1],L[1][1]) print(cpt % (2**27-1))
#!/usr/bin/env python3 from itertools import * import rosalib DNAS = [DNA for _,DNA in rosalib.parse_fasta()] n = len(DNAS) # dp((i1,i2,...,in)) = score max d'un alignement des DNAS[k][:ik] pour tout k memo,pred = {(0,)*n:0},{} def dp(I): if I in memo: return memo[I] res = float('-inf') for D in product(*(range(2 if I[k]>0 else 1) for k in range(n))): if max(D)==0: continue J = tuple(I[k]-D[k] for k in range(n)) C = [DNAS[k][I[k]-1] if D[k]==1 else '-' for k in range(n)] s = dp(J) + sum(-int(C[k]!=C[l]) for k,l in combinations(range(n),2)) if s>res: res = s # on retient le predecesseur pour faciliter la reconstruction # de la solution pred[I] = J memo[I] = res return res def solution(I): Sol = [[] for _ in range(n)] while I in pred: J = pred[I]
#!/usr/bin/env python3 import rosalib _, RNA = rosalib.parse_fasta()[0] pair = {'A': 'U', 'U': 'A', 'C': 'G', 'G': 'C'} memo = {} # nb de planar matchings de RNA[i:j+1] def planar_match(i, j): if i > j: return 1 if (i, j) in memo: return memo[i, j] res = planar_match(i + 1, j) # on ne match pas i for k in range(i + 1, j + 1): # indice avec lequel on match i if pair[RNA[i]] == RNA[k]: res += planar_match(i + 1, k - 1) * planar_match(k + 1, j) memo[i, j] = res return res print(planar_match(0, len(RNA) - 1) % 10**6)
def main(): A, B = (S for _, S in rosalib.parse_fasta()) M, SA, SB = fitting_score(A, B) print(M) print(SA) print(SB)
def main(): A, B = (S for _, S in rosalib.parse_fasta()) s, M = align_matrix(A, B) print(s) print(sum(sum(L) for L in M))
def main(): L = rosalib.parse_fasta() _,DNA = L[0] for (p,s) in rev_pal(DNA): print(p,s)
#!/usr/bin/env python3 import rosalib # pretty naive and lazy approach here, yet good enough # (for sure there are more efficient and still relatively simple # approaches using string hashing) # see also https://en.wikipedia.org/wiki/Longest_common_substring_problem # for a really good approach using suffix trees def LCSub(DNAS): L0 = len(DNAS[0]) for k in range(L0,-1,-1): for i in range(L0-k+1): P = DNAS[0][i:i+k] if all(DNAS[j].find(P)>=0 for j in range(1,len(DNAS))): return P DNAS = sorted((DNA for _,DNA in rosalib.parse_fasta()), key=len) print(LCSub(DNAS))
def main(): A,B = (S for _,S in rosalib.parse_fasta()) s,M = align_matrix(A,B) print(s) print(sum(sum(L) for L in M))
#!/usr/bin/env python3 import rosalib def fact(n): return 1 if n<=1 else n*fact(n-1) _,RNA = rosalib.parse_fasta()[0] print(fact(RNA.count('A'))*fact(RNA.count('C')))
def main(): L = rosalib.parse_fasta() _, DNA = L[0] for (p, s) in rev_pal(DNA): print(p, s)
#!/usr/bin/env python3 import rosalib def subseq(S, T): P = [] i = j = 0 while i < len(S) and j < len(T): if S[i] == T[j]: P.append(i + 1) j += 1 i += 1 return P if j == len(T) else None L = rosalib.parse_fasta() S, T = L[0][1], L[1][1] print(' '.join(map(str, subseq(S, T))))
def main(): A, B = (S for _, S in rosalib.parse_fasta()) M, SA, SB = semi_global_score(A, B) print(M) print(SA) print(SB)