コード例 #1
0
def test_to_igraph():
    #Make sure the igraph output has correct same structure

    T1 = SuchTree(gopher_tree)
    T2 = SuchTree(lice_tree)
    links = pd.read_csv(gl_links, index_col=0)

    SLT = SuchLinkedTrees(T1, T2, links)

    g = SLT.to_igraph()

    # igraph returns an unweighted adjacency matrix,
    # so we'll convert SuchLinkedTrees weighted
    # adjacency matrix to an unweighted form.
    saj = numpy.ceil(SLT.adjacency())

    # For some reason, igraph invented its own Matrix
    # class that doesn't implement a standard numpy
    # interface. :-/
    iaj = numpy.array(list(map(list, g.get_adjacency())))

    # matrixes must be the same shape
    assert saj.shape == iaj.shape

    # all matrix elements must be equal
    assert reduce(lambda a, b: a and b, (saj == iaj).flatten())
コード例 #2
0
def test_link_identities():
    with tempfile.NamedTemporaryFile() as f1:
        f1.file.write(b'(A:1,(B:1,(C:1,D:1)E:1)F:1)G:1;')
        f1.file.close()
        T1 = SuchTree(f1.name)
    with tempfile.NamedTemporaryFile() as f2:
        f2.file.write(b'((a:1,b:1)e:1,(c:1,d:1)f:1)g:1;')
        f2.file.close()
        T2 = SuchTree(f2.name)

    ll = (('A', 'a'), ('B', 'c'), ('B', 'd'), ('C', 'd'), ('D', 'd'))

    links = pd.DataFrame(numpy.zeros((4, 4), dtype=int),
                         index=list(T1.leafs.keys()),
                         columns=list(T2.leafs.keys()))
    for i, j in ll:
        links.at[i, j] = 1

    SLT = SuchLinkedTrees(T1, T2, links)

    t1_sfeal = dict(zip(T1.leafs.values(), T1.leafs.keys()))
    t2_sfeal = dict(zip(T2.leafs.values(), T2.leafs.keys()))

    lll = set((t1_sfeal[j], t2_sfeal[i]) for i, j in SLT.linklist.tolist())

    assert set(ll) == lll
コード例 #3
0
def test_distance():
    T = SuchTree(test_tree)
    for line in open('SuchTree/tests/test.matrix'):
        a, b, d1 = line.split()
        d1 = float(d1)
        d2 = T.distance(a, b)
        assert d1 == approx(d2, 0.001)
コード例 #4
0
def test_get_children():
    T = SuchTree(test_tree)
    for node in dpt.inorder_node_iter():
        if not node.taxon:
            left, right = [n.label for n in node.child_nodes()]
        else:
            left, right = -1, -1
        L, R = T.get_children(node.label)
        assert L == left
        assert R == right
コード例 #5
0
def test_distances_by_name():
    T = SuchTree(test_tree)
    ids = []
    d1 = []
    for line in open('SuchTree/tests/test.matrix'):
        a, b, d = line.split()
        d1.append(float(d))
        ids.append((a, b))
    result = T.distances_by_name(ids)
    for D1, D2 in zip(d1, result):
        assert D1 == approx(D2, 0.001)
コード例 #6
0
def test_distances():
    T = SuchTree(test_tree)
    ids = []
    d1 = []
    for line in open('SuchTree/tests/test.matrix'):
        a, b, d = line.split()
        d1.append(float(d))
        A = T.leafs[a]
        B = T.leafs[b]
        ids.append((A, B))
    result = T.distances(numpy.array(ids, dtype=numpy.int64))
    for D1, D2 in zip(d1, result):
        assert D1 == approx(D2, 0.001)
コード例 #7
0
def test_init_both_trees_by_file():
    T = SuchTree(test_tree)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=list(T.leafs.keys()))
    SLT = SuchLinkedTrees(test_tree, test_tree, links)
    assert type(SLT) == SuchLinkedTrees
コード例 #8
0
def test_row_names():
    T = SuchTree(test_tree)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=list(T.leafs.keys()))

    SLT = SuchLinkedTrees(T, T, links)
    assert SLT.row_names == list(T.leafs.keys())
コード例 #9
0
def test_get_column_leafs():
    T = SuchTree(test_tree)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=list(T.leafs.keys()))
    SLT = SuchLinkedTrees(T, T, links)
    for n, colname in enumerate(links.columns):
        s = links.applymap(bool)[colname]
        leafs1 = set(map(lambda x: T.leafs[x], s[s > 0].index))
        leafs2 = set(SLT.get_column_leafs(n))
        assert leafs1 == leafs2
コード例 #10
0
def test_col_ids():
    T = SuchTree(test_tree)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=list(T.leafs.keys()))

    SLT = SuchLinkedTrees(T, T, links)
    col_ids = SLT.col_ids
    leaf_ids = T.leafs.values()
    assert len(col_ids) == len(leaf_ids)
    for i, j in zip(col_ids, leaf_ids):
        assert i == j
コード例 #11
0
def test_linkmatrix_property():
    T = SuchTree(test_tree)
    row_names = list(T.leafs.keys())
    numpy.random.shuffle(row_names)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=row_names)
    SLT = SuchLinkedTrees(T, T, links)
    for col in SLT.col_names:
        for row in SLT.row_names:
            col_id = SLT.col_names.index(col)
            row_id = SLT.row_names.index(row)
            assert bool(links.T[row][col]) == SLT.linkmatrix[row_id][col_id]
コード例 #12
0
def test_get_column_links():
    T = SuchTree(test_tree)
    row_names = list(T.leafs.keys())
    numpy.random.shuffle(row_names)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=row_names)
    SLT = SuchLinkedTrees(T, T, links)
    for n, colname in enumerate(links.columns):
        s = links.applymap(bool)[colname]
        c = SLT.get_column_links(n)
        for m, rowname in enumerate(SLT.row_names):
            assert s[rowname] == c[m]
コード例 #13
0
def test_is_ancestor():
    T = SuchTree(test_tree)
    assert T.length - 1 == sum(
        map(lambda x: T.is_ancestor(T.root, x),
            T.get_descendant_nodes(T.root)))
    assert 1 - T.length == sum(
        map(lambda x: T.is_ancestor(x, T.root),
            T.get_descendant_nodes(T.root)))
コード例 #14
0
def test_linklist_property():
    T = SuchTree(test_tree)
    row_names = list(T.leafs.keys())
    numpy.random.shuffle(row_names)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=row_names)
    SLT = SuchLinkedTrees(T, T, links)
    l = links.unstack()
    A = set(
        map(lambda x: (SLT.TreeB.leafs[x[0]], SLT.TreeA.leafs[x[1]]),
            list(l[l > 0].index)))
    B = set(map(lambda x: (x[0], x[1]), SLT.linklist))
    assert A == B
コード例 #15
0
def test_get_column_leafs_by_name_as_row_ids():
    T = SuchTree(test_tree)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=list(T.leafs.keys()))
    SLT = SuchLinkedTrees(T, T, links)
    for colname in links.columns:
        s = links.applymap(bool)[colname]
        leafs1 = set(
            map(
                list(SLT.col_ids).index,
                map(lambda x: T.leafs[x], s[s > 0].index)))
        leafs2 = set(SLT.get_column_leafs(colname, as_row_ids=True))
        assert leafs1 == leafs2
コード例 #16
0
def test_adjacency():
    T = SuchTree(test_tree)
    aj, leaf_ids = T.adjacency(T.root).values()
    leaf_ids = list(leaf_ids)
    for node in chain(T.leafs.values(), list(T.get_internal_nodes())):
        if node == T.root: continue  # skip the root node
        parent = T.get_parent(node)
        distance = T.distance(node, parent)
        i, j = leaf_ids.index(node), leaf_ids.index(parent)
        print(node, parent, ':', i, j, ' :: ', aj[i, j], distance)
コード例 #17
0
def test_subset_b():
    T = SuchTree(test_tree)
    row_names = list(T.leafs.keys())
    numpy.random.shuffle(row_names)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=row_names)
    SLT = SuchLinkedTrees(T, T, links)
    sfeal = dict(zip(SLT.TreeB.leafs.values(), SLT.TreeB.leafs.keys()))
    subset_links = links[list(map(lambda x: sfeal[x], SLT.TreeB.get_leafs(1)))]
    l = subset_links.unstack()
    SLT.subset_b(1)
    A = set(
        map(lambda x: (SLT.TreeB.leafs[x[0]], SLT.TreeA.leafs[x[1]]),
            list(l[l > 0].index)))
    B = set(map(lambda x: (x[0], x[1]), SLT.linklist))
    assert A == B
コード例 #18
0
def test_hierarchy():
    T = SuchTree(test_tree)
    all_leafs = set(T.get_leafs(T.root))
    for i in T.get_internal_nodes():
        some_leafs = set(T.get_leafs(i))
        assert some_leafs <= all_leafs
コード例 #19
0
def test_get_leafs():
    T = SuchTree(test_tree)
    assert set(list(T.get_leafs(T.root))) == set(T.leafs.values())
コード例 #20
0
def test_get_distance_to_root():
    T = SuchTree(test_tree)
    for leaf in dpt.leaf_node_iter():
        assert T.get_distance_to_root(leaf.label) == approx(
            leaf.distance_from_root(), 0.001)
コード例 #21
0
def test_init():
    T = SuchTree(test_tree)
    assert type(T) == SuchTree
コード例 #22
0
ファイル: treescan.py プロジェクト: ryneches/ManyTrees
def simtree(prefix,
            birth_rate=0.3,
            death_rate=0.1,
            min_host_leafs=8,
            max_host_leafs=64,
            min_guest_leafs=4,
            max_guest_leafs=128,
            duplication_rate=0.2,
            loss_rate=0.1,
            switch_rate=0.05,
            k=2.0,
            theta=0.5):
    '''
    Time interval is always 1.0 units, and GuestTreeGen stops after 1000
    attempts.
    '''

    max_guest_attempts = 1000

    # make output directory
    if not exists(prefix):
        mkdir(prefix)

    # build the host tree
    E = subprocess.call(['java'] + java_ops + [
        '-jar', 'jprime.jar', 'HostTreeGen', '-bi', '-min',
        str(min_host_leafs), '-max',
        str(max_host_leafs), '1.0',
        str(birth_rate),
        str(death_rate), prefix + '/' + 'host'
    ])

    if not E == 0: raise JPrIMEError('HostTreeGen failed.')

    E = subprocess.call(['java'] + java_ops + [
        '-jar', 'jprime.jar', 'BranchRelaxer', '-o', prefix + '/' +
        'host.relaxed.tree', prefix + '/' + 'host.pruned.tree', 'IIDGamma',
        str(k),
        str(theta)
    ])

    if not E == 0: raise JPrIMEError('BranchRelaxer failed on host tree.')

    # build the guest tree
    E = subprocess.call(['java'] + java_ops + [
        '-jar', 'jprime.jar', 'GuestTreeGen', '--max-attempts',
        str(max_guest_attempts), '-min',
        str(min_guest_leafs), '-max',
        str(max_guest_leafs), prefix + '/' + 'host.pruned.tree',
        str(duplication_rate),
        str(loss_rate),
        str(switch_rate), prefix + '/' + 'guest'
    ])

    if not E == 0: raise JPrIMEError('GuestTreGen failed.')

    E = subprocess.call(['java'] + java_ops + [
        '-jar', 'jprime.jar', 'BranchRelaxer', '-o', prefix + '/' +
        'guest.relaxed.tree', prefix + '/' + 'guest.pruned.tree', 'IIDGamma',
        str(k),
        str(theta)
    ])

    if not E == 0: raise JPrIMEError('BranchRelaxer failed on guest tree.')

    # load the trees
    T1 = SuchTree(prefix + '/' + 'host.relaxed.tree')
    T2 = SuchTree(prefix + '/' + 'guest.relaxed.tree')

    # populate the link matrix using the leaf names
    l = zeros((T1.n_leafs, T2.n_leafs), dtype=int)

    hostnames = T1.leafs.keys()
    guestnames = T2.leafs.keys()

    for L in T2.leafs.keys():
        guest, host = L.split('_')
        #host = 'H' + host
        i = hostnames.index(host)
        j = guestnames.index(L)
        l[i, j] = 1

    links = pandas.DataFrame(l, index=hostnames, columns=guestnames)
    links.to_csv(prefix + '/' + 'links.csv')

    # initialize the SuchLinkedTrees object
    SLT = SuchLinkedTrees(T1, T2, links)

    # plot the adjacency matrix
    aj = SLT.adjacency()
    lp_plot = seaborn.heatmap(aj.T,
                              cmap='viridis',
                              vmin=0,
                              vmax=1,
                              cbar=False,
                              square=True,
                              xticklabels=False,
                              yticklabels=False)
    lp_plot.invert_yaxis()
    fig = lp_plot.get_figure()
    fig.savefig(prefix + '/' + 'adjacency.png', size=6)
    fig.clf()

    # plot cophylogeny using R
    r_code = '''
    tr1 <- read.tree( "HOST_TREE" )
    tr2 <- read.tree( "GUEST_TREE" )
    links <- read.csv( "LINKS", row.names=1, stringsAsFactors = F )
    im <- graph_from_incidence_matrix( as.matrix( links ) )
    assoc <- as_edgelist( im )
    obj <- cophylo( tr1, tr2, assoc=assoc )
    pdf( "OUTFILE", width = 10, height = 12 )
    plot( obj )
    dev.off()
    '''
    r_code = r_code.replace('HOST_TREE', prefix + '/' + 'host.relaxed.tree')
    r_code = r_code.replace('GUEST_TREE', prefix + '/' + 'guest.relaxed.tree')
    r_code = r_code.replace('LINKS', prefix + '/' + 'links.csv')
    r_code = r_code.replace('OUTFILE', prefix + '/' + 'cophylo.pdf')
    robjects.r(r_code)

    # calculate spectral densities
    lambdas = SLT.spectrum()

    a_lambd = eigvalsh(SLT.TreeA.laplacian()['laplacian'])
    b_lambd = eigvalsh(SLT.TreeB.laplacian()['laplacian'])

    with open(prefix + '/' + 'eigenvalues.csv', 'w') as f:
        f.write('graph ' + ','.join(map(str, lambdas)) + '\n')
        f.write('TreeA ' + ','.join(map(str, a_lambd)) + '\n')
        f.write('TreeB ' + ','.join(map(str, b_lambd)) + '\n')

    bandwidth = 0.4
    X = linspace(-0.5, 1.5, 200)
    density = gaussian_kde(lambdas / max(lambdas), bw_method=bandwidth).pdf(X)
    a_dnsty = gaussian_kde(a_lambd / max(a_lambd), bw_method=bandwidth).pdf(X)
    b_dnsty = gaussian_kde(b_lambd / max(b_lambd), bw_method=bandwidth).pdf(X)

    with open(prefix + '/' + 'densities.txt', 'w') as f:
        f.write('graph ' + ','.join(map(str, density)) + '\n')
        f.write('TreeA ' + ','.join(map(str, a_dnsty)) + '\n')
        f.write('TreeB ' + ','.join(map(str, b_dnsty)) + '\n')

    # calculate Hommola correlation
    d = SLT.linked_distances()
    r, p = pearsonr(d['TreeA'], d['TreeB'])

    with open(prefix + '/' + 'distances.txt', 'w') as f:
        f.write('TreeA ' + ','.join(map(str, d['TreeA'])) + '\n')
        f.write('TreeB ' + ','.join(map(str, d['TreeB'])) + '\n')

    # save jointplot of patristic distances
    jp = seaborn.jointplot(d['TreeA'], d['TreeB'], size=6)
    jp.savefig(prefix + '/' + 'correlation.png')
    jp.fig.clf()

    # output moment data
    moments = {}
    moments['eigengap'] = lambdas[-1] - lambdas[-2]
    moments['skew'] = skew(density)
    moments['kurtosis'] = kurtosis(density)
    moments['treedist'] = pdd(a_dnsty, b_dnsty)
    moments['occupancy']   = ( 2.0 * SLT.n_links ) \
                             / ( SLT.TreeA.n_leafs \
                                 + SLT.TreeB.n_leafs )
    moments['squareness']  = float( SLT.TreeA.n_leafs ) \
                             / SLT.TreeB.n_leafs

    moments['r'] = r
    moments['p'] = p

    with open(prefix + '/' + 'moments.csv', 'w') as f:
        f.write(','.join(moments.keys()) + '\n')
        f.write(','.join(map(str, moments.values())))

    # output simulation parameters
    data = {}
    data['prefix'] = prefix
    data['host_leafs'] = T1.n_leafs
    data['guest_leafs'] = T2.n_leafs
    data['links'] = SLT.n_links
    data['birth_rate'] = birth_rate
    data['death_rate'] = death_rate
    data['min_host_leafs'] = min_host_leafs
    data['max_host_leafs'] = max_host_leafs
    data['min_guest_leafs'] = min_guest_leafs
    data['max_guest_leafs'] = max_guest_leafs
    data['duplication_rate'] = duplication_rate
    data['loss_rate'] = loss_rate
    data['switch_rate'] = switch_rate
    data['k'] = k
    data['theta'] = theta

    with open(prefix + '/' + 'data.csv', 'w') as f:
        f.write(','.join(data.keys()) + '\n')
        f.write(','.join(map(str, data.values())))
コード例 #23
0
def test_get_descendant_nodes():
    T = SuchTree(test_tree)
    A = set(T.get_descendant_nodes(T.root))
    B = set(T.get_leafs(T.root))
    C = set(T.get_internal_nodes())
    assert A == B | C