Ejemplo n.º 1
0
 def setUp(self):
     self.tb1 = dendropy.TaxonSet(label="TI1")
     for i in range(1, 11):
         self.tb1.new_taxon(label="T%02d" % i)
     self.cb1 = dendropy.DnaCharacterMatrix(taxon_set=self.tb1,
                                            label="TI1, CA1")
     for t in self.tb1:
         self.cb1.append_taxon_sequence(t, state_symbols="AAAAAAAAAA")
     self.tb2 = dendropy.TaxonSet(label="TI2")
     for i in range(1, 21):
         self.tb2.new_taxon(label="T%02d" % i)
     self.cb2 = dendropy.DnaCharacterMatrix(taxon_set=self.tb2,
                                            label="TI2, CA2")
     for t in self.tb2:
         self.cb2.append_taxon_sequence(t, state_symbols="CCCCCCCCCC")
Ejemplo n.º 2
0
 def testIncompatibleRead(self):
     c = dendropy.DnaCharacterMatrix()
     self.assertRaises(ValueError,
                       c.read_from_path,
                       self.data_path,
                       "nexus",
                       matrix_offset=1)
 def testCopyConstruction(self):
     chars1 = self.dataset.char_matrices[0]
     chars2 = dendropy.DnaCharacterMatrix(chars1)
     self.assertDistinctButEqualDiscreteCharMatrix(chars1,
             chars2,
             distinct_taxa=False,
             distinct_state_alphabets=False)
 def testFromDnaCharMatrix(self):
     ca2 = dendropy.DnaCharacterMatrix(self.char_matrix1)
     self.assertDistinctButEqual(self.char_matrix1,
                                 ca2,
                                 char_type=dendropy.DnaCharacterMatrix,
                                 distinct_state_alphabets=False,
                                 distinct_taxa=False)
 def testInitRead(self):
     c = dendropy.DnaCharacterMatrix(stream=open(self.data_path, "rU"),
                                     schema="nexus")
     self.assertDistinctButEqual(self.reference_dataset.char_matrices[0],
                                 c,
                                 char_type=dendropy.DnaCharacterMatrix,
                                 distinct_state_alphabets=False,
                                 distinct_taxa=True)
 def testNonIndexedRead(self):
     c = dendropy.DnaCharacterMatrix()
     c.read_from_path(self.data_path, "nexus")
     self.assertDistinctButEqual(self.reference_dataset.char_matrices[0],
                                 c,
                                 char_type=dendropy.DnaCharacterMatrix,
                                 distinct_state_alphabets=False,
                                 distinct_taxa=True)
Ejemplo n.º 7
0
 def testSameTaxaInit(self):
     c = dendropy.DnaCharacterMatrix(
         stream=open(self.data_path, "rU"),
         schema="nexus",
         taxon_set=self.reference_dataset.char_matrices[0].taxon_set)
     self.assertDistinctButEqual(self.reference_dataset.char_matrices[0],
                                 c,
                                 char_type=dendropy.DnaCharacterMatrix,
                                 distinct_state_alphabets=False,
                                 distinct_taxa=False,
                                 ignore_chartypes=True)
 def testSameTaxaRead(self):
     c = dendropy.DnaCharacterMatrix()
     c.read_from_path(
         self.data_path,
         schema="nexus",
         taxon_set=self.reference_dataset.char_matrices[0].taxon_set)
     self.assertDistinctButEqual(self.reference_dataset.char_matrices[0],
                                 c,
                                 char_type=dendropy.DnaCharacterMatrix,
                                 distinct_state_alphabets=False,
                                 distinct_taxa=False)
Ejemplo n.º 9
0
def generate_char_matrix(seq_len,
                         tree_model,
                         seq_model,
                         mutation_rate=1.0,
                         root_states=None,
                         char_matrix=None,
                         rng=None):
    """
    Wrapper to conveniently generate a characters simulated under
    the given tree and character model.
    `seq_len`       : length of sequence (number of characters)
    `tree_model`    : dendropy.trees.Tree object
    `seq_model`    : dendropy.seqmodel.SeqModel object
    `mutation_rate` : mutation *modifier* rate (should be 1.0 if branch lengths
                      on tree reflect true expected number of changes
    `root_states`   : vector of root states (length must equal `seq_len`)
    `char_matrix`    : dendropy.CharacterMatrix object.
                      if given, new sequences for taxa on `tree_model` leaf_nodes
                      will be appended to existing sequences of corresponding
                      taxa in char_matrix; if not, a new
                      dendropy.CharacterMatrix object will be created
    `rng`           : random number generator; if not given, `GLOBAL_RNG` will be
                      used

    Returns: a dendropy.CharacterMatrix object.

    Since characters will be appended to existing sequences, you can simulate a
    sequences under a mixed model by calling this method multiple times with
    different character models and/or different mutation rates, passing
    in the same `char_matrix` object each time.
    """
    seq_evolver = SeqEvolver(seq_model=seq_model, mutation_rate=mutation_rate)
    tree = seq_evolver.evolve_states(tree=tree_model,
                                     seq_len=seq_len,
                                     root_states=None,
                                     rng=rng)
    char_map = seq_evolver.compose_char_map(tree, tree.taxon_set)
    if char_matrix is None:
        char_matrix = dendropy.DnaCharacterMatrix()
        char_matrix.taxon_set = tree_model.taxon_set
    if char_matrix.taxon_set is None:
        char_matrix.taxon_set = tree_model.taxon_set
    else:
        assert char_matrix.taxon_set is tree_model.taxon_set, "conflicting taxon sets"
    char_matrix.extend_map(other_map=char_map,
                           overwrite_existing=False,
                           extend_existing=True)
    return char_matrix
Ejemplo n.º 10
0
 def setUpClass(cls):
     d = dendropy.DnaCharacterMatrix()
     cls.original_labels = [
         "a0_123456789_1",
         "a0_123456789_2",
         "a0_123456789_3",
         "a0_123456789_4",
         "a0_123456789_5",
         "b0_123456789_1",
         "b0_123456789_2",
         "b0_123456789_3",
         "b0_123456789_4",
     ]
     for label in cls.original_labels:
         t = d.taxon_namespace.require_taxon(label=label)
         d[t] = d.default_state_alphabet.get_states_for_symbols("AACGT")
     cls.data = d
Ejemplo n.º 11
0
def run_method(method, tree, seqs, threshold=None):
    ch_list = list()
    taxons = [x.label for x in tree.taxon_namespace]
    for t in taxons:
        ch_list.append([x.symbol for x in seqs[t]])
    ch_arr = np.array(ch_list)
    ch_arr[ch_arr == "U"] = "T"

    ch_dendro = dendropy.DnaCharacterMatrix()
    ch_dendro.taxon_namespace = tree.taxon_namespace
    for t, taxon in enumerate(taxons):
        ch_dendro.new_sequence(tree.taxon_namespace[t], ch_arr[t, :].tolist())

    if method == "RaXML":
        raxml_HKY = reconstruct_tree.RAxML()
        start_time = time.time()
        tree_rec = raxml_HKY(ch_dendro, raxml_args="-T 2 --HKY85 -c 1")
    if method == "SNJ":
        snj = reconstruct_tree.SpectralNeighborJoining(
            reconstruct_tree.HKY_similarity_matrix_missing_data)
        start_time = time.time()
        tree_rec = snj(ch_arr, tree.taxon_namespace)
    if method == "NJ":
        nj = reconstruct_tree.NeighborJoining(
            reconstruct_tree.HKY_similarity_matrix_missing_data)
        start_time = time.time()
        tree_rec = nj(ch_arr, tree.taxon_namespace)
    if method == "STR + NJ":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.NeighborJoining,
            reconstruct_tree.HKY_similarity_matrix_missing_data)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix_missing_data,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            min_split=5)
    if method == "STR + SNJ":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.SpectralNeighborJoining,
            reconstruct_tree.HKY_similarity_matrix_missing_data)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix_missing_data,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            min_split=5)
    if method == "STR + RaXML":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.RAxML,
            reconstruct_tree.HKY_similarity_matrix_missing_data)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix_missing_data,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            raxml_args="-T 2 --HKY85 -c 1",
            min_split=5)
    runtime = time.time() - start_time
    RF, F1 = reconstruct_tree.compare_trees(tree_rec, tree)
    print(method)
    if threshold is not None: print(threshold)
    print("--- %s seconds ---" % runtime)
    print("RF = ", RF)
    print("F1% = ", F1)
    return ([method, str(threshold), runtime, RF, F1])
Ejemplo n.º 12
0
def hky85_chars(seq_len,
                tree_model,
                mutation_rate=1.0,
                kappa=1.0,
                base_freqs=[0.25, 0.25, 0.25, 0.25],
                root_states=None,
                char_matrix=None,
                retain_sequences_on_tree=False,
                rng=None):
    """
    Convenience class to wrap generation of characters (as a CharacterBlock
    object) based on the HKY model.

    Parameters
    ----------

    seq_len       : int
        Length of sequence (number of characters).
    tree_model    : |Tree|
        Tree on which to simulate.
    mutation_rate : float
        Mutation *modifier* rate (should be 1.0 if branch lengths on tree
        reflect true expected number of changes).
    root_states``   : list
        Vector of root states (length must equal ``seq_len``).
    char_matrix   : |DnaCharacterMatrix|
        If given, new sequences for taxa on ``tree_model`` leaf_nodes will be
        appended to existing sequences of corresponding taxa in char_matrix; if
        not, a new |DnaCharacterMatrix| object will be created.
    retain_sequences_on_tree : bool
        If |False|, sequence annotations will be cleared from tree after
        simulation. Set to |True| if you want to, e.g., evolve and accumulate
        different sequences on tree, or retain information for other purposes.
    rng           : random number generator
        If not given, 'GLOBAL_RNG' will be used.

    Returns
    -------
    d : |DnaCharacterMatrix|
        The simulated alignment.

    Since characters will be appended to existing sequences, you can simulate a
    sequences under a mixed model by calling this method multiple times with
    different character model parameter values and/or different mutation
    rates, passing in the same ``char_matrix`` object each time.
    """
    if char_matrix is None:
        char_matrix = dendropy.DnaCharacterMatrix(
            taxon_namespace=tree_model.taxon_namespace)
    else:
        assert char_matrix.taxon_namespace is tree_model.taxon_namespace
    state_alphabet = char_matrix.default_state_alphabet
    seq_model = Hky85(kappa=kappa,
                      base_freqs=base_freqs,
                      state_alphabet=state_alphabet)
    return simulate_discrete_chars(seq_len=seq_len,
                                   tree_model=tree_model,
                                   seq_model=seq_model,
                                   mutation_rate=mutation_rate,
                                   root_states=root_states,
                                   char_matrix=char_matrix,
                                   rng=rng)
Ejemplo n.º 13
0
def simulate_discrete_chars(seq_len,
                            tree_model,
                            seq_model,
                            mutation_rate=1.0,
                            root_states=None,
                            char_matrix=None,
                            retain_sequences_on_tree=False,
                            rng=None):
    """
    Wrapper to conveniently generate a characters simulated under
    the given tree and character model.

    Since characters will be appended to existing sequences, you can simulate a
    sequences under a mixed model by calling this method multiple times with
    different character models and/or different mutation rates, passing
    in the same ``char_matrix`` object each time.

    Parameters
    ----------

    seq_len       : int
        Length of sequence (number of characters).
    tree_model    : |Tree|
        Tree on which to simulate.
    seq_model     : dendropy.model.discrete.DiscreteCharacterEvolutionModel
        The character substitution model under which to to evolve the
        characters.
    mutation_rate : float
        Mutation *modifier* rate (should be 1.0 if branch lengths on tree
        reflect true expected number of changes).
    root_states``   : list
        Vector of root states (length must equal ``seq_len``).
    char_matrix   : |DnaCharacterMatrix|
        If given, new sequences for taxa on ``tree_model`` leaf_nodes will be
        appended to existing sequences of corresponding taxa in char_matrix; if
        not, a new |DnaCharacterMatrix| object will be created.
    retain_sequences_on_tree : bool
        If |False|, sequence annotations will be cleared from tree after
        simulation. Set to |True| if you want to, e.g., evolve and accumulate
        different sequences on tree, or retain information for other purposes.
    rng           : random number generator
        If not given, 'GLOBAL_RNG' will be used.

    Returns
    -------
    d : a dendropy.datamodel.CharacterMatrix object.

    """
    seq_evolver = DiscreteCharacterEvolver(seq_model=seq_model,
                                           mutation_rate=mutation_rate)
    tree = seq_evolver.evolve_states(tree=tree_model,
                                     seq_len=seq_len,
                                     root_states=None,
                                     rng=rng)
    if char_matrix is None:
        char_matrix = dendropy.DnaCharacterMatrix(
            taxon_namespace=tree_model.taxon_namespace)
        char_matrix.taxon_namespace = tree_model.taxon_namespace
    else:
        assert char_matrix.taxon_namespace is tree_model.taxon_namespace, "conflicting taxon sets"
    seq_evolver.extend_char_matrix_with_characters_on_tree(
        char_matrix=char_matrix, tree=tree)
    if not retain_sequences_on_tree:
        seq_evolver.clean_tree(tree)
    return char_matrix