Ejemplo n.º 1
0
    def test_replacing_hgt(self):

        N = 20

        S = te.simulate_species_tree(N, model='innovation')

        # true gene tree (with losses)
        TGT = te.simulate_dated_gene_tree(
            S,
            dupl_rate=0.0,
            loss_rate=0.0,
            hgt_rate=1.0,
            prohibit_extinction='per_species',
            replace_prob=1.0,
        )

        # observable gene tree
        OGT = te.observable_tree(TGT)

        leaves = [v for v in OGT.leaves()]
        colors = {v.color for v in leaves}

        # print(TGT.to_newick())
        # print(OGT.to_newick())

        self.assertTrue(len(colors) == N and len(leaves) == N)
Ejemplo n.º 2
0
    def test_no_extinction(self):

        N = 10
        repeats = 20

        for _ in range(repeats):

            species_tree = te.simulate_species_tree(N,
                                                    model='innovation',
                                                    non_binary=0.2)

            gene_tree = te.simulate_dated_gene_tree(
                species_tree,
                dupl_rate=1.0,
                loss_rate=1.0,
                hgt_rate=0.5,
                prohibit_extinction='per_species')

            # check that there is no extinction in any species
            color_dict = {
                l.label: []
                for l in species_tree.preorder()
                if not l.children and l.event != 'L'
            }

            for v in gene_tree.preorder():
                if not v.children and v.event != 'L':
                    color_dict[v.color].append(v.label)

            for leaf_list in color_dict.values():
                self.assertTrue(leaf_list)

            gene_tree2 = te.simulate_dated_gene_tree(
                species_tree,
                dupl_rate=1.0,
                loss_rate=1.0,
                hgt_rate=0.5,
                prohibit_extinction='per_family')

            # check that there is no extinction in all species
            self.assertTrue([l for l in gene_tree2.leaves()])
Ejemplo n.º 3
0
    def test_rs_edges(self):

        S = te.simulate_species_tree(10)
        TGT = te.simulate_dated_gene_tree(S,
                                          dupl_rate=1.0,
                                          loss_rate=0.5,
                                          hgt_rate=0.5)
        OGT = te.observable_tree(TGT)

        transf1 = analysis.true_transfer_edges(OGT)
        transf2 = analysis.rs_transfer_edges(OGT, S)

        self.assertTrue(transf1.issuperset(transf2))
Ejemplo n.º 4
0
def generate_solutions_unique_species(n, i_p=0.5, d_p=0.5):
    done = False
    count = 0
    while not done:
        S = te.simulate_species_tree(10, model='innovation')
        TGT = te.simulate_dated_gene_tree(S,
                                          dupl_rate=0.5,
                                          loss_rate=0.5,
                                          hgt_rate=0.5,
                                          prohibit_extinction="per_family",
                                          replace_prob=0.0)
        OGT = te.observable_tree(TGT)
        ldt = ldt_graph(OGT, S)
        if len(ldt.nodes()) == n:
            IG = InvestigateGraph(ldt)
            IG.perturb_graph(i_p, d_p)

            solver = LDTEditor(IG._G_perturbed)
            solver.build_model()
            solver.optimize(time_limit=None)

            sol_graph, sol_distance = solver.get_solution()

            properly_colored = is_properly_colored(sol_graph)
            cograph = is_cograph(sol_graph)
            compatible = is_compatible(sol_graph)

            edit_dist = gt.symmetric_diff(IG._G_perturbed, sol_graph)
            print("Runtime: {}".format(solver.get_solve_time()))
            if properly_colored and cograph and compatible:
                print("Saving data...")
                solver._save_ILP_data(
                    IG._G_perturbed,
                    sol_graph,
                    solver.get_solve_time(),
                    edit_dist,
                    only_add=False,
                    only_delete=False,
                    filename="{}nodes/LDTEdit_exact_solution".format(n))
            else:
                print("No solution found!")
            count += 1
        if count == 100:
            done = True
Ejemplo n.º 5
0
def generate_trees(n=100,
                   m=10,
                   model='innovation',
                   dupl_rate=0.5,
                   loss_rate=0.5,
                   hgt_rate=0.5,
                   prohibit_extinction="per_family",
                   replace_prob=0.0,
                   size=10):
    i = 0
    dirName = 'exact_solutions/trees/{}trees'.format(size)
    # create folder if it doesnt exist
    if not os.path.exists(dirName):
        os.makedirs(dirName)
        ID = 0
    else:
        ID = find_next_ID('exact_solutions/trees/{}trees/'.format(size))

    while i < n:

        S = te.simulate_species_tree(m, model=model)
        TGT = te.simulate_dated_gene_tree(
            S,
            dupl_rate=dupl_rate,
            loss_rate=loss_rate,
            hgt_rate=hgt_rate,
            prohibit_extinction=prohibit_extinction,
            replace_prob=replace_prob)

        OGT = te.observable_tree(TGT)
        ldt = ldt_graph(OGT, S)
        amount_nodes = len(ldt.nodes())
        if amount_nodes == size:
            # save trees
            filename_species = 'exact_solutions/trees/{}trees/species_{}_{}_{}.json'.format(
                size, m, model, ID)
            filename_gene = 'exact_solutions/trees/{}trees/gene_{}_{}_{}_{}_{}_{}.json'.format(
                size, dupl_rate, loss_rate, hgt_rate, prohibit_extinction,
                replace_prob, ID)
            S.serialize(filename_species)
            TGT.serialize(filename_gene)
            ID += 1
            i += 1
Ejemplo n.º 6
0
    def test_ldt_fitch(self):

        S = te.simulate_species_tree(20, model='innovation')

        # true gene tree (with losses)
        TGT = te.simulate_dated_gene_tree(S,
                                          dupl_rate=1.0,
                                          loss_rate=0.5,
                                          hgt_rate=0.2)

        # observable gene tree
        OGT = te.observable_tree(TGT)

        # finally we can extract the LDT and Fitch graph
        ldt = analysis.ldt_graph(OGT, S)
        transfer_edges = analysis.rs_transfer_edges(OGT, S)
        fitch = analysis.undirected_fitch(OGT, transfer_edges)

        cotree = to_cotree(ldt)

        self.assertTrue(gt.is_subgraph(ldt, fitch) and cotree)
Ejemplo n.º 7
0
# -*- coding: utf-8 -*-
    
import tralda.tools.GraphTools as gt

import asymmetree.treeevolve as te
from asymmetree.analysis import (undirected_fitch,
                                 rs_transfer_edges,
                                 below_equal_above,
                                 ldt_graph,
                                 RsScenarioConstructor,)
from asymmetree.tools.PhyloTreeTools import (to_newick,)

S = te.simulate_species_tree(10)
TGT = te.simulate_dated_gene_tree(S, dupl_rate=1.0, loss_rate=0.5,
                                  hgt_rate=0.5)
OGT = te.observable_tree(TGT)

print('--- S ---\n', to_newick(S))
print(to_newick(S, distance=False, label_inner=False))
print('--- OGT ---\n', to_newick(OGT))

ldt, above, equal = below_equal_above(OGT, S)
fitch = undirected_fitch(OGT, rs_transfer_edges(OGT, S))
n = ldt.order()
print('Genes:', n, 'Total relations:', int(n * (n-1) / 2))
print('< {}\n= {}\n> {}'.format(ldt.size(), equal.size(), above.size()))

rs_scen_constr = RsScenarioConstructor(ldt)
result = rs_scen_constr.run()

if result:
Ejemplo n.º 8
0
# build loop

len(parameter_Df.index)

for ind in range(len(parameter_Df.index)-1):
    # species tree of type ’PhyloTree’
    s = te.simulate_species_tree(int(parameter_Df.loc[ind, 'num_of_leaves']), 
                                 model = parameter_Df.loc[ind, 'model'],
                                 non_binary_prob = parameter_Df.loc[ind, 'non_binary_prob'],
                                 planted = parameter_Df.loc[ind, 'planted'],
                                 remove_extinct = parameter_Df.loc[ind, 'remove_extinct'],
                                 rescale_to_height = parameter_Df.loc[ind, 'rescale_to_height']
                                 )
    
    # true gene tree (contains losses) of type ’PhyloTree’
    tgt = te.simulate_dated_gene_tree(s,
                                      dupl_rate = parameter_Df.loc[ind, 'dupl_rate'],
                                      loss_rate = parameter_Df.loc[ind, 'loss_rate'],
                                      hgt_rate = parameter_Df.loc[ind, 'hgt_rate'],
                                      dupl_polytomy = 0.0,
                                      prohibit_extinction= parameter_Df.loc[ind, 'prohibit_extinction'],
                                      replace_prob = parameter_Df.loc[ind, 'replace_prob']
                                      )
    
    
    # serialization
    s.serialize(wk_dir / '01_Data' / str(parameter_Df.loc[ind, 'ID'] + '_species_tree.pickle'))
    tgt.serialize(wk_dir / '01_Data' / str(parameter_Df.loc[ind, 'ID'] + '_gene_tree.pickle'))
    print('Simulating Tree :', ind)
ogt = te.observable_tree(tgt)
Ejemplo n.º 9
0
from tools.GraphTools import *
from tools.plotTools import *
import networkx as nx
import asymmetree.treeevolve as te
from asymmetree.datastructures import PhyloTree
from asymmetree.hgt import ldt_graph
from tools.LDT_ILP import LDTEditor
import asymmetree.tools.GraphTools as gt
import os

S = te.simulate_species_tree(20, model='innovation')
TGT = te.simulate_dated_gene_tree(S,
                                  dupl_rate=0.5,
                                  loss_rate=0.5,
                                  hgt_rate=0.5,
                                  prohibit_extinction="per_family",
                                  replace_prob=0.0)
OGT = te.observable_tree(TGT)
ldt = ldt_graph(OGT, S)

colors = gt.sort_by_colors(ldt)

#print("edges of G: \n{}".format(G._G.edges()))
#a, b, c = get_P3_data(G._G)
#print("\nThe regions of P3s: {}".format(a))
#print("\nThe amounts in the regions: {}".format(b))
#print("\nThe distance between regions: {}\n".format(c))

print("Amount of nodes: {}".format(len(ldt.nodes())))
print("Amount of colors: {}".format(len(colors)))
print("Amount of edges: {}".format(len(ldt.edges())))