Esempio n. 1
0
# O(N^2) DP approach
# runs in ~11s with pypy

# L'approche utilisee est assez simple et naturelle mais pourrait sembler naivement en O(N^3),
# la complexite en O(N^2) repose sur une prog. dyn. pour le calcul efficace des intersections.
# On ne detaille pas ici, on renvoie a l'explication claire et concise de (section 3.1) :
# Sand et al., Algorithms for Computing the Triplet and Quartet Distances for Binary and General Trees
# in Biology, 2013, http://www.cs.au.dk/~gerth/papers/biology13.pdf

## Input
S = raw_input().split()
N = len(S)
Sall = set(S)
A = {}
rosalib.parse_newick(raw_input(), A, True)
B = {}
rosalib.parse_newick(raw_input(), B, True)


## Fonctions de construction
def children(T, u, u0):
    return (v for v in T[u] if v != u0)


# Pour chaque arete orientee e = (u0,u) de l'arbre T,
# S(e) = ensemble des feuilles du sous-arbre enracine en u
#        pointe par la direction de e
# Calcul des S(e) (et numerotation des aretes au passage)
def dfs(T, u, u0, E, Enum, S):
    if u[0] != '@':  # leaf
Esempio n. 2
0
def main():
    T = {}
    R = rosalib.parse_newick(input(),T,False)
    P = dfs_prob(T,R)
    # l'enonce les veut dans l'ordre AA,Aa,aa
    print(' '.join(map(str,reversed(P))))
Esempio n. 3
0
#!/usr/bin/env python3

import rosalib

# NB: "Given: An unrooted *binary* tree"

T = {}
R = rosalib.parse_newick(input(),T,False)  # we do not unroot it
S = sorted(X for X in T if X[0]!='@')  # named species (necessarily leaves?)
CharTbl = []

def dfs(u):
    Su = set()
    if u[0]!='@':
        Su.add(u)
    for v in T[u]:
        Su |= dfs(v)
    if 1<len(Su)<len(S)-1: # nontrivial char
        CharTbl.append([X in Su for X in S])
    return Su

dfs(R)
for L in CharTbl:
    print(''.join('1' if x else '0' for x in L))
Esempio n. 4
0
#!/usr/bin/env python3

import rosalib

T = {}
R = rosalib.parse_newick(input(), T, False)
F = rosalib.parse_fasta()
N = len(F[0][1])  # size of DNAs
DNA = {x: y for x, y in F}


def dfs(u, u0, i, si, ti):
    if DNA[u][i] == si:
        yield u
    elif DNA[u][i] == ti:
        for v in T[u]:
            yield from dfs(v, u, i, si, ti)


for s in T:
    for t in T[s]:
        for i in range(N):
            if DNA[s][i] != DNA[t][i]:
                for w in dfs(t, s, i, DNA[s][i], DNA[t][i]):
                    print('%s %s %d %s->%s->%s' %
                          (t, w, i + 1, DNA[s][i], DNA[t][i], DNA[s][i]))
Esempio n. 5
0
#!/usr/bin/env python3

import rosalib

A = ['A', 'C', 'G', 'T', '-']

tree = input()
T = {}
R = rosalib.parse_newick(tree, T, False)
F = rosalib.parse_fasta()
N = len(F[0][1])  # size of DNAs
DNA = {x: y for x, y in F}

# dp(i,u,a) = min sum of hamming distances of i-th letters on edges of
#             the subtree rooted at node u using a in A as i-th letter
#             of node u's DNA strand
# O(len(T) * N * len(A))
memo, pred = {}, {}


def dp(i, u, a):
    assert (u not in DNA)  # u is not a leaf
    if (i, u, a) in memo:
        return memo[i, u, a]
    smin, pmin = 0, []
    for v in T[u]:
        if v in DNA:  # v leaf
            b = DNA[v][i]
            sv, pv = int(a != b), b
        else:
            sv, pv = float('inf'), None
Esempio n. 6
0
def main():
    T = {}
    R = rosalib.parse_newick(input(), T, False)
    P = dfs_prob(T, R)
    # l'enonce les veut dans l'ordre AA,Aa,aa
    print(' '.join(map(str, reversed(P))))
Esempio n. 7
0
#!/usr/bin/env python3

import rosalib

S = list(input().split())
S = {S[i]:i for i in range(len(S))}
Sall = frozenset(range(len(S)))
T1 = {}
R1 = rosalib.parse_newick(input(),T1,False)  # we do not unroot it
T2 = {}
R2 = rosalib.parse_newick(input(),T2,False)  # we do not unroot it

# similar to character table building from CTBL
def dfs(T,u,Splits):
    if u[0]!='@':
        return frozenset([S[u]])
    Su = frozenset()
    for v in T[u]:
        Su |= dfs(T,v,Splits)
    if 1<len(Su)<len(S)-1: # nontrivial char
        if 0 in Su:
            Splits.add(Su)
        else:
            Splits.add(Sall-Su) # on ajoute le complementaire
    return Su

S1 = set()
dfs(T1,R1,S1)
S2 = set()
dfs(T2,R2,S2)
d = 2*(len(S)-3)-2*len(S1&S2)
Esempio n. 8
0
#!/usr/bin/env python3

import rosalib

# NB: "Given: An unrooted *binary* tree"

T = {}
R = rosalib.parse_newick(input(), T, False)  # we do not unroot it
S = sorted(X for X in T if X[0] != '@')  # named species (necessarily leaves?)
CharTbl = []


def dfs(u):
    Su = set()
    if u[0] != '@':
        Su.add(u)
    for v in T[u]:
        Su |= dfs(v)
    if 1 < len(Su) < len(S) - 1:  # nontrivial char
        CharTbl.append([X in Su for X in S])
    return Su


dfs(R)
for L in CharTbl:
    print(''.join('1' if x else '0' for x in L))
Esempio n. 9
0
#!/usr/bin/env python3

import rosalib

T = {}
R = rosalib.parse_newick(input(),T,False)
F = rosalib.parse_fasta()
N = len(F[0][1])  # size of DNAs
DNA = {x:y for x,y in F}

def dfs(u,u0,i,si,ti):
    if DNA[u][i]==si:
        yield u
    elif DNA[u][i]==ti:
        for v in T[u]:
            yield from dfs(v,u,i,si,ti)

for s in T:
    for t in T[s]:
        for i in range(N):
            if DNA[s][i]!=DNA[t][i]:
                for w in dfs(t,s,i,DNA[s][i],DNA[t][i]):
                    print('%s %s %d %s->%s->%s' % (t,w,i+1,DNA[s][i],DNA[t][i],DNA[s][i]))
Esempio n. 10
0
# O(N^2) DP approach
# runs in ~11s with pypy

# L'approche utilisee est assez simple et naturelle mais pourrait sembler naivement en O(N^3),
# la complexite en O(N^2) repose sur une prog. dyn. pour le calcul efficace des intersections.
# On ne detaille pas ici, on renvoie a l'explication claire et concise de (section 3.1) :
# Sand et al., Algorithms for Computing the Triplet and Quartet Distances for Binary and General Trees
# in Biology, 2013, http://www.cs.au.dk/~gerth/papers/biology13.pdf


## Input
S = raw_input().split()
N = len(S)
Sall = set(S)
A = {}
rosalib.parse_newick(raw_input(),A,True)
B = {}
rosalib.parse_newick(raw_input(),B,True)


## Fonctions de construction
def children(T,u,u0):
    return (v for v in T[u] if v!=u0)

# Pour chaque arete orientee e = (u0,u) de l'arbre T,
# S(e) = ensemble des feuilles du sous-arbre enracine en u
#        pointe par la direction de e
# Calcul des S(e) (et numerotation des aretes au passage)
def dfs(T,u,u0,E,Enum,S):
    if u[0]!='@':  # leaf
        l = {u}