Beispiel #1
0
    def test_replacing_hgt(self):

        N = 20

        S = te.simulate_species_tree(N, model='innovation')

        # true gene tree (with losses)
        TGT = te.simulate_dated_gene_tree(
            S,
            dupl_rate=0.0,
            loss_rate=0.0,
            hgt_rate=1.0,
            prohibit_extinction='per_species',
            replace_prob=1.0,
        )

        # observable gene tree
        OGT = te.observable_tree(TGT)

        leaves = [v for v in OGT.leaves()]
        colors = {v.color for v in leaves}

        # print(TGT.to_newick())
        # print(OGT.to_newick())

        self.assertTrue(len(colors) == N and len(leaves) == N)
Beispiel #2
0
    def test_rs_edges(self):

        S = te.simulate_species_tree(10)
        TGT = te.simulate_dated_gene_tree(S,
                                          dupl_rate=1.0,
                                          loss_rate=0.5,
                                          hgt_rate=0.5)
        OGT = te.observable_tree(TGT)

        transf1 = analysis.true_transfer_edges(OGT)
        transf2 = analysis.rs_transfer_edges(OGT, S)

        self.assertTrue(transf1.issuperset(transf2))
    def test_species_tree(self):

        N = 30

        for model in ('innovation', 'yule', 'BDP', 'EBDP'):

            species_tree = te.simulate_species_tree(N,
                                                    model=model,
                                                    non_binary=0.2)

            self.assertTrue(species_tree._assert_integrity())

            leaves = [l for l in species_tree.leaves() if l.event != 'L']
            self.assertEqual(len(leaves), N)
def simulate(directory, number_of_trees, species_per_tree):

    if not os.path.exists(directory):
        os.mkdir(directory)

    for i in range(number_of_trees):

        S = te.simulate_species_tree(50)
        T_simulator = te.GeneTreeSimulator(S)
        T = T_simulator.simulate()  # dupl./loss/HGT disabled

        te.assign_rates(T, S, autocorr_variance=0.2)

        T_nx = T.to_nx()
        with open('{}/scenario{}.pickle'.format(directory, i), 'wb') as f:
            pickle.dump(T_nx, f)
Beispiel #5
0
def generate_solutions_unique_species(n, i_p=0.5, d_p=0.5):
    done = False
    count = 0
    while not done:
        S = te.simulate_species_tree(10, model='innovation')
        TGT = te.simulate_dated_gene_tree(S,
                                          dupl_rate=0.5,
                                          loss_rate=0.5,
                                          hgt_rate=0.5,
                                          prohibit_extinction="per_family",
                                          replace_prob=0.0)
        OGT = te.observable_tree(TGT)
        ldt = ldt_graph(OGT, S)
        if len(ldt.nodes()) == n:
            IG = InvestigateGraph(ldt)
            IG.perturb_graph(i_p, d_p)

            solver = LDTEditor(IG._G_perturbed)
            solver.build_model()
            solver.optimize(time_limit=None)

            sol_graph, sol_distance = solver.get_solution()

            properly_colored = is_properly_colored(sol_graph)
            cograph = is_cograph(sol_graph)
            compatible = is_compatible(sol_graph)

            edit_dist = gt.symmetric_diff(IG._G_perturbed, sol_graph)
            print("Runtime: {}".format(solver.get_solve_time()))
            if properly_colored and cograph and compatible:
                print("Saving data...")
                solver._save_ILP_data(
                    IG._G_perturbed,
                    sol_graph,
                    solver.get_solve_time(),
                    edit_dist,
                    only_add=False,
                    only_delete=False,
                    filename="{}nodes/LDTEdit_exact_solution".format(n))
            else:
                print("No solution found!")
            count += 1
        if count == 100:
            done = True
Beispiel #6
0
def generate_trees(n=100,
                   m=10,
                   model='innovation',
                   dupl_rate=0.5,
                   loss_rate=0.5,
                   hgt_rate=0.5,
                   prohibit_extinction="per_family",
                   replace_prob=0.0,
                   size=10):
    i = 0
    dirName = 'exact_solutions/trees/{}trees'.format(size)
    # create folder if it doesnt exist
    if not os.path.exists(dirName):
        os.makedirs(dirName)
        ID = 0
    else:
        ID = find_next_ID('exact_solutions/trees/{}trees/'.format(size))

    while i < n:

        S = te.simulate_species_tree(m, model=model)
        TGT = te.simulate_dated_gene_tree(
            S,
            dupl_rate=dupl_rate,
            loss_rate=loss_rate,
            hgt_rate=hgt_rate,
            prohibit_extinction=prohibit_extinction,
            replace_prob=replace_prob)

        OGT = te.observable_tree(TGT)
        ldt = ldt_graph(OGT, S)
        amount_nodes = len(ldt.nodes())
        if amount_nodes == size:
            # save trees
            filename_species = 'exact_solutions/trees/{}trees/species_{}_{}_{}.json'.format(
                size, m, model, ID)
            filename_gene = 'exact_solutions/trees/{}trees/gene_{}_{}_{}_{}_{}_{}.json'.format(
                size, dupl_rate, loss_rate, hgt_rate, prohibit_extinction,
                replace_prob, ID)
            S.serialize(filename_species)
            TGT.serialize(filename_gene)
            ID += 1
            i += 1
    def test_no_extinction(self):

        N = 10
        repeats = 20

        for _ in range(repeats):

            species_tree = te.simulate_species_tree(N,
                                                    model='innovation',
                                                    non_binary=0.2)

            gene_tree = te.simulate_dated_gene_tree(
                species_tree,
                dupl_rate=1.0,
                loss_rate=1.0,
                hgt_rate=0.5,
                prohibit_extinction='per_species')

            # check that there is no extinction in any species
            color_dict = {
                l.label: []
                for l in species_tree.preorder()
                if not l.children and l.event != 'L'
            }

            for v in gene_tree.preorder():
                if not v.children and v.event != 'L':
                    color_dict[v.color].append(v.label)

            for leaf_list in color_dict.values():
                self.assertTrue(leaf_list)

            gene_tree2 = te.simulate_dated_gene_tree(
                species_tree,
                dupl_rate=1.0,
                loss_rate=1.0,
                hgt_rate=0.5,
                prohibit_extinction='per_family')

            # check that there is no extinction in all species
            self.assertTrue([l for l in gene_tree2.leaves()])
Beispiel #8
0
    def test_ldt_fitch(self):

        S = te.simulate_species_tree(20, model='innovation')

        # true gene tree (with losses)
        TGT = te.simulate_dated_gene_tree(S,
                                          dupl_rate=1.0,
                                          loss_rate=0.5,
                                          hgt_rate=0.2)

        # observable gene tree
        OGT = te.observable_tree(TGT)

        # finally we can extract the LDT and Fitch graph
        ldt = analysis.ldt_graph(OGT, S)
        transfer_edges = analysis.rs_transfer_edges(OGT, S)
        fitch = analysis.undirected_fitch(OGT, transfer_edges)

        cotree = to_cotree(ldt)

        self.assertTrue(gt.is_subgraph(ldt, fitch) and cotree)
Beispiel #9
0
# -*- coding: utf-8 -*-
    
import tralda.tools.GraphTools as gt

import asymmetree.treeevolve as te
from asymmetree.analysis import (undirected_fitch,
                                 rs_transfer_edges,
                                 below_equal_above,
                                 ldt_graph,
                                 RsScenarioConstructor,)
from asymmetree.tools.PhyloTreeTools import (to_newick,)

S = te.simulate_species_tree(10)
TGT = te.simulate_dated_gene_tree(S, dupl_rate=1.0, loss_rate=0.5,
                                  hgt_rate=0.5)
OGT = te.observable_tree(TGT)

print('--- S ---\n', to_newick(S))
print(to_newick(S, distance=False, label_inner=False))
print('--- OGT ---\n', to_newick(OGT))

ldt, above, equal = below_equal_above(OGT, S)
fitch = undirected_fitch(OGT, rs_transfer_edges(OGT, S))
n = ldt.order()
print('Genes:', n, 'Total relations:', int(n * (n-1) / 2))
print('< {}\n= {}\n> {}'.format(ldt.size(), equal.size(), above.size()))

rs_scen_constr = RsScenarioConstructor(ldt)
result = rs_scen_constr.run()

if result:
    indices = [i for i, x in enumerate(self) if bool(x) == True]
    return(indices)

# %% Simulation
# Simulate a species tree of type 'PhyoTree'
ind = 0
# build loop

len(parameter_Df.index)

for ind in range(len(parameter_Df.index)-1):
    # species tree of type ’PhyloTree’
    s = te.simulate_species_tree(int(parameter_Df.loc[ind, 'num_of_leaves']), 
                                 model = parameter_Df.loc[ind, 'model'],
                                 non_binary_prob = parameter_Df.loc[ind, 'non_binary_prob'],
                                 planted = parameter_Df.loc[ind, 'planted'],
                                 remove_extinct = parameter_Df.loc[ind, 'remove_extinct'],
                                 rescale_to_height = parameter_Df.loc[ind, 'rescale_to_height']
                                 )
    
    # true gene tree (contains losses) of type ’PhyloTree’
    tgt = te.simulate_dated_gene_tree(s,
                                      dupl_rate = parameter_Df.loc[ind, 'dupl_rate'],
                                      loss_rate = parameter_Df.loc[ind, 'loss_rate'],
                                      hgt_rate = parameter_Df.loc[ind, 'hgt_rate'],
                                      dupl_polytomy = 0.0,
                                      prohibit_extinction= parameter_Df.loc[ind, 'prohibit_extinction'],
                                      replace_prob = parameter_Df.loc[ind, 'replace_prob']
                                      )
    
    
Beispiel #11
0
from tools.GraphTools import *
from tools.plotTools import *
import networkx as nx
import asymmetree.treeevolve as te
from asymmetree.datastructures import PhyloTree
from asymmetree.hgt import ldt_graph
from tools.LDT_ILP import LDTEditor
import asymmetree.tools.GraphTools as gt
import os

S = te.simulate_species_tree(20, model='innovation')
TGT = te.simulate_dated_gene_tree(S,
                                  dupl_rate=0.5,
                                  loss_rate=0.5,
                                  hgt_rate=0.5,
                                  prohibit_extinction="per_family",
                                  replace_prob=0.0)
OGT = te.observable_tree(TGT)
ldt = ldt_graph(OGT, S)

colors = gt.sort_by_colors(ldt)

#print("edges of G: \n{}".format(G._G.edges()))
#a, b, c = get_P3_data(G._G)
#print("\nThe regions of P3s: {}".format(a))
#print("\nThe amounts in the regions: {}".format(b))
#print("\nThe distance between regions: {}\n".format(c))

print("Amount of nodes: {}".format(len(ldt.nodes())))
print("Amount of colors: {}".format(len(colors)))
print("Amount of edges: {}".format(len(ldt.edges())))
Beispiel #12
0
# -*- coding: utf-8 -*-

import asymmetree.seqevolve as se
import asymmetree.treeevolve as te

__author__ = 'David Schaller'

# specify models
subst_model = se.SubstModel('a',
                            'CUSTOM',
                            filename='../resources/subst_matrices/WAG.paml')
indel_model = se.IndelModel(0.01, 0.01, length_distr=('zipf', 1.821))
#indel_model = se.IndelModel(0.01, 0.01, length_distr=('negative_binomial', 1, 0.5))

# initialize evolver
evolver = se.Evolver(subst_model, indel_model=indel_model, jump_chain=False)
print(evolver.subst_model.Q)

# simulate along a tree
T = te.simulate_species_tree(5)
evolver.evolve_along_tree(T, start_length=150)

for node, sequence in evolver.sequences.items():
    print(node.label, subst_model.to_sequence(sequence))

alg_seq = evolver.true_alignment(write_to='testfile.alignment')
for node, sequence in alg_seq.items():
    print(node.label, sequence)
# -*- coding: utf-8 -*-

from asymmetree.treeevolve import simulate_species_tree
from asymmetree.genome import GenomeSimulator
from asymmetree.seqevolve import SubstModel, IndelModel, HetModel

__author__ = 'David Schaller'

species_tree = simulate_species_tree(10, model='innovation')

subst_model = SubstModel('a', 'JTT')
indel_model = IndelModel(0.01, 0.01, length_distr=('zipf', 1.821))
het_model = None

genome_sim = GenomeSimulator(species_tree, outdir='testfile_genome')

genome_sim.simulate_gene_trees(50,
                               dupl_rate=1.0,
                               loss_rate=0.5,
                               base_rate=('gamma', 1.0, 1.0),
                               prohibit_extinction='per_species')

genome_sim.simulate_sequences(subst_model,
                              indel_model=indel_model,
                              het_model=het_model,
                              length_distr=('constant', 200))
Beispiel #14
0
# -*- coding: utf-8 -*-

import asymmetree.treeevolve as te
from asymmetree.analysis.BestMatches import lrt_from_observable_tree
from asymmetree.tools.PhyloTreeTools import (
    to_newick, )

D = 1.0
L = 1.0
H = 0.0

# --------------------------------------------------------------------------
#                            SPECIES TREE
# --------------------------------------------------------------------------

S = te.simulate_species_tree(10, planted=True, non_binary_prob=0.2)
print('------------- S -------------')
print(to_newick(S))

# --------------------------------------------------------------------------
#                             GENE TREE
# --------------------------------------------------------------------------

TGT_simulator = te.GeneTreeSimulator(S)
TGT = TGT_simulator.simulate(dupl_rate=D,
                             loss_rate=L,
                             hgt_rate=H,
                             prohibit_extinction='per_species')

TGT = te.assign_rates(TGT,
                      S,
Beispiel #15
0
# -*- coding: utf-8 -*-

import asymmetree.treeevolve as te
from asymmetree.tools.PhyloTreeTools import (
    to_newick, )

__author__ = 'David Schaller'

print('Yule ------------------------')
tree = te.simulate_species_tree(10, model='yule', birth_rate=1.0)
print(to_newick(tree))

print('EBDP ------------------------')
tree2 = te.simulate_species_tree(10,
                                 episodes=[(1.0, 0.3, 0.8, 0.0),
                                           (0.9, 0.4, 0.6, 0.3)])
print(to_newick(tree2))

print('Yule age ------------------------')
tree3 = te.simulate_species_tree_age(2.0, model='yule', birth_rate=1.0)
print(to_newick(tree3))

print('EBDP age ------------------------')
tree4 = te.simulate_species_tree_age(2.0,
                                     model='EBDP',
                                     birth_rate=1.0,
                                     episodes=[(1.0, 0.3, 0.8, 0.0),
                                               (0.9, 0.4, 0.6, 0.3)])
print(to_newick(tree4))