Ejemplo n.º 1
0
 def get_char_matrix(self, labels=None):
     char_matrix = charmatrixmodel.CharacterMatrix()
     if labels is None:
         labels = [str(i) for i in range(1000)]
     char_matrix.expected_labels = []
     char_matrix.original_taxa = []
     char_matrix.original_seqs = []
     self.rng.shuffle(labels)
     for label in labels:
         t = dendropy.Taxon(label=label)
         char_matrix.taxon_namespace.add_taxon(t)
         char_matrix.original_taxa.append(t)
         char_matrix[t].original_taxon = t
         char_matrix.expected_labels.append(label)
         seq = [self.rng.randint(0, 100) for _ in range(4)]
         char_matrix[t] = seq
         char_matrix[t].original_seq = char_matrix[t]
         char_matrix.original_seqs.append(char_matrix[t])
         char_matrix[t].original_taxon = t
         char_matrix[t].label = label
     assert len(char_matrix.taxon_namespace) == len(char_matrix.original_taxa)
     assert len(char_matrix) == len(char_matrix.original_taxa)
     assert len(char_matrix) == len(labels)
     char_matrix.nseqs = len(char_matrix)
     return char_matrix
 def test_mixed_token_lookup(self):
     labels = ["t{}".format(i) for i in range(1, 101)]
     tns = dendropy.TaxonNamespace()
     tsm = nexusprocessing.NexusTaxonSymbolMapper(taxon_namespace=tns)
     translate = {}
     t_labels = {}
     labels_t = {}
     for label_idx, label in enumerate(labels):
         t = dendropy.Taxon(label)
         t_labels[t] = t.label
         labels_t[t.label] = t
         tsm.add_taxon(t)
         if label_idx % 2 == 0:
             token = str(label_idx + 1)
             translate[token] = t
             tsm.add_translate_token(token, t)
     self.assertEqual(len(tns), len(labels))
     for label_idx, label in enumerate(labels):
         token = label_idx + 1
         t1 = tsm.require_taxon_for_symbol(token)
         self.assertEqual(len(tns), len(labels))
         self.assertEqual(t1.label, label)
         self.assertIs(t1, labels_t[label])
         if token in translate:
             self.assertIs(t1, translate[token])
     self.assertEqual(len(tns), len(labels))
Ejemplo n.º 3
0
 def get_char_matrix(self):
     labels = [
         "z01",
         "<NONE>",
         "z03",
         "z04",
         "z05",
         "z06",
         None,
         None,
         "z09",
         "z10",
         "z11",
         "<NONE>",
         None,
         "z14",
         "z15",
     ]
     char_matrix = charmatrixmodel.CharacterMatrix()
     char_matrix.expected_labels = []
     char_matrix.expected_taxa = set()
     random.shuffle(labels)
     for label in labels:
         t = dendropy.Taxon(label=None)
         char_matrix.taxon_namespace.add_taxon(t)
         char_matrix[t] = [1, 1, 1]
         char_matrix.expected_taxa.add(t)
         char_matrix.expected_labels.append(t.label)
     char_matrix.taxon_namespace = dendropy.TaxonNamespace()
     assert len(char_matrix) == len(labels)
     assert len(char_matrix) == len(char_matrix._taxon_sequence_map)
     char_matrix.nseqs = len(char_matrix)
     return char_matrix
 def setUp(self):
     self.tree, self.anodes, self.lnodes, self.inodes = self.get_tree(
             suppress_internal_node_taxa=True,
             suppress_leaf_node_taxa=True)
     self.node_label_to_taxon_label_map = {
         "a" : "a",
         "b" : "a",
         "c" : "2",
         "e" : "2",
         "f" : "b",
         "g" : "B",
         "h" : "B",
         "i" : "h",
         "j" : "H",
         "k" : "h",
         "l" : None,
         "m" : None,
         "n" : "H",
         "o" : "J",
         "p" : "j",
             }
     self.original_taxa = []
     for idx, nd in enumerate(self.tree):
         taxon_label = self.node_label_to_taxon_label_map[nd.label]
         t = dendropy.Taxon(label=taxon_label)
         self.tree.taxon_namespace.add_taxon(t)
         nd.taxon = t
         nd.original_taxon = t
         self.original_taxa.append(t)
     assert len(self.tree.taxon_namespace) == len(self.node_label_to_taxon_label_map)
     assert len(self.tree.taxon_namespace) == len(self.original_taxa)
Ejemplo n.º 5
0
    def getDiploid(self):
        """
        Set diploid species list.
        Open up a dialog for user to select diploid species. Get result from the dialog and store as
        a global variable.
        """
        class emptyFileError(Exception):
            pass

        try:
            if len(self.inputFiles) == 0:
                raise emptyFileError

            # Create a taxon_namespace object based on current taxa names set.
            taxa = dendropy.TaxonNamespace()
            for taxon in list(self.taxa_names):
                taxa.add_taxon(dendropy.Taxon(taxon))

            dialog = diploidList.DiploidListDlg(taxa, self.diploidList, self)

            if dialog.exec_():
                # If executed, update diploid species list.
                self.diploidList = dialog.getDiploidSpeciesList()

        except emptyFileError:
            QMessageBox.warning(self, "Warning",
                                "Please select a file type and upload data!",
                                QMessageBox.Ok)
            return
Ejemplo n.º 6
0
def MaybeMergeChildren(parent_node):
    children = parent_node.child_nodes()
    assert len(children) == 2
    if not AreLeaves(children):
        logging.debug('Not both children are leaves. Bailing.')
        return False

    # Make the new dictionaries and edge lengths
    child_pathways = [c.pathways for c in children]
    child_lengths = [c.edge.length for c in children]
    virtual_count = sum(c.count for c in children)
    max_length_idx = pylab.argmax(child_lengths)
    label = children[max_length_idx].taxon.label
    merged_pathways = set.union(*child_pathways)

    logging.debug('Merging 2 children with edge lengths %s', child_lengths)

    # Remove children and update the parent
    map(parent_node.remove_child, children)
    parent_node.edge.length += child_lengths[max_length_idx]
    parent_node.pathways = merged_pathways
    parent_node.count = virtual_count
    parent_node.annotate('count')
    for pname in parent_node.pathways:
        setattr(parent_node, pname, True)
        parent_node.annotate(pname)

    # Set up a taxon for the parent according to the
    # most distinct child.
    # TODO(flamholz): indicate somehow that this was merged.
    taxon = dendropy.Taxon()
    taxon.label = label
    parent_node.taxon = taxon

    return True
Ejemplo n.º 7
0
def json_to_dendropy_sub(json, node, taxon_set):
    '''
	recursively calls itself for all children of node and
	builds up the tree. entries in json are added as node attributes
	'''
    if 'xvalue' in json:
        node.xvalue = float(json['xvalue'])
    for attr, val in json.iteritems():
        if attr == 'children':
            for sub_json in val:
                child_node = dendropy.Node()
                json_to_dendropy_sub(sub_json, child_node, taxon_set)
                if hasattr(child_node, 'xvalue'):
                    node.add_child(child_node,
                                   edge_length=child_node.xvalue - node.xvalue)
                elif hasattr(child_node, 'branch_length'):
                    node.add_child(child_node,
                                   edge_length=child_node.branch_length)
                else:
                    node.add_child(child_node, edge_length=1.0)
        else:
            try:
                node.__setattr__(attr, float(val))
            except:
                if val == 'undefined':
                    node.__setattr__(attr, None)
                else:
                    node.__setattr__(attr, val)
    if len(node.child_nodes()) == 0:
        node.taxon = dendropy.Taxon(label=json['strain'].upper())
        node.strain = json['strain']
        taxon_set.add_taxon(node.taxon)
Ejemplo n.º 8
0
 def setUp(self):
     self.taxa = [
         dendropy.Taxon(label=label) for label in ["a", "b", "c", "d"]
     ]
     self.n0 = dendropy.Node(label="0", taxon=self.taxa[0])
     self.c1 = dendropy.Node(label="1", taxon=None)
     self.c2 = dendropy.Node(label=None, taxon=self.taxa[1])
     self.c3 = dendropy.Node(label=None, taxon=None)
     self.c3 = dendropy.Node(label=None, taxon=self.taxa[2])
     self.p1 = dendropy.Node(label="-1", taxon=self.taxa[3])
     self.n0.parent_node = self.p1
     self.n0.set_child_nodes([self.c1, self.c2])
     self.c2.set_child_nodes([self.c3])
     self.nodes = [self.n0, self.c1, self.c2, self.c3, self.p1]
     for idx, nd in enumerate(self.nodes):
         if idx % 2 == 0:
             nd.edge.label = "E{}".format(idx)
             nd.edge.length = idx
         an1 = nd.annotations.add_new(
             "a{}".format(idx), "{}{}{}".format(nd.label, nd.taxon, idx))
         an2 = nd.annotations.add_bound_attribute("label")
         an3 = an1.annotations.add_bound_attribute("name")
         ae1 = nd.edge.annotations.add_new(
             "a{}".format(idx), "{}{}".format(nd.edge.label, idx))
         ae2 = nd.edge.annotations.add_bound_attribute("label")
         ae3 = ae1.annotations.add_bound_attribute("name")
     self.e0 = self.n0._edge
Ejemplo n.º 9
0
 def testTaxaQuerying(self):
     ts = dendropy.TaxonSet(self.labels)
     self.assertTrue(ts.has_taxa(labels=self.labels))
     self.assertTrue(ts.has_taxa(taxa=ts))
     self.assertFalse(ts.has_taxa(labels=self.labels + ["k"]))
     k = ts.new_taxon(label="k")
     self.assertTrue(ts.has_taxa(taxa=[k]))
     self.assertTrue(ts.has_taxon(label="k"))
     self.assertTrue(ts.has_taxa(labels=self.labels + ["k"]))
     j = dendropy.Taxon(label="j")
     ts.add_taxon(j)
     self.assertTrue(ts.has_taxa(taxa=[j]))
     self.assertTrue(ts.has_taxon(label="j"))
     self.assertTrue(ts.has_taxa(labels=self.labels + ["j"]))
     self.assertFalse(ts.has_taxon(taxon=dendropy.Taxon()))
     for label in self.labels:
         self.assertTrue(ts.has_taxon(label=label))
Ejemplo n.º 10
0
def makeTaxTree(splits, contigTax, outname):
    RANK_PREFIXES = ['k', 'p', 'c', 'o', 'f', 'g', 's']
    # Create namespace and node collection
    names = set()
    contigTax2 = {}
    for k, v in contigTax.items():
        for i, p in enumerate(RANK_PREFIXES):  # Add rank prefixes
            v[i] = '{}_{}'.format(p, v[i])
            v[i] = v[i].replace('(', '[').replace(
                ')', ']')  # Parentheses will break newick
        contigTax2[k] = v
    [names.update([k] + v) for k, v in contigTax2.items()]
    names.update(
        splits)  # We want to have the contigs AND the splits in our tree
    nodes = {name: dendropy.Node() for name in names}
    taxa = []
    for name, node in nodes.items():
        taxon = dendropy.Taxon(name)
        node.taxon = taxon
        taxa.append(taxon)
    namespace = dendropy.TaxonNamespace()
    namespace.add_taxa(taxa)

    # Create and populate tree
    tree = dendropy.Tree(taxon_namespace=namespace)
    parents = {}

    removedSplits = set(
    )  # This shouldn't be needed but since we have taxonomy problems do it for now.

    for split in splits:
        contig = split.rsplit('_split', 1)[0]
        tax = contigTax2[contig]
        if tax[-1] == 's_Firmicutes bacterium' or tax[
                4] == 'f_Clostridia bacterium [no family in NCBI]':  # Weird taxonomy, find solution, avoid for now!
            print(contig)
            removedSplits.add(split)
            continue
        tree.seed_node.add_child(nodes[tax[0]])
        for i in range(1, len(tax)):
            nodes[tax[i - 1]].add_child(nodes[tax[i]])
            if tax[i] not in parents:
                parents[tax[i]] = set([tax[i - 1]])
            else:
                parents[tax[i]].add(tax[i - 1])

        nodes[tax[-1]].add_child(nodes[contig])
        nodes[contig].add_child(nodes[split])

    # All nodes should have only one parent!
    for p in parents:
        if len(parents[p]) > 1:
            print(p, parents[p])

    with open(outname, 'w') as outfile:
        outfile.write(tree.as_string('newick').replace('\'', ''))

    return removedSplits
Ejemplo n.º 11
0
    def reportNewickTree(self, outDir, outFile, leafLabels=None):
        # read duplicate nodes
        duplicateSeqs = self.__readDuplicateSeqs()

        # read tree
        treeFile = os.path.join(outDir, 'storage', 'tree',
                                DefaultValues.PPLACER_TREE_OUT)
        tree = dendropy.Tree.get_from_path(treeFile,
                                           schema='newick',
                                           rooting="force-rooted",
                                           preserve_underscores=True)

        # clean up internal node labels
        for node in tree.internal_nodes():
            if node.label:
                labelSplit = node.label.split('|')

                label = labelSplit[0]
                if labelSplit[1] != '':
                    label += '|' + labelSplit[1]
                if labelSplit[2] != '':
                    label += '|' + labelSplit[2]

                node.label = label

        # insert duplicate nodes into tree
        for leaf in tree.leaf_nodes():
            duplicates = duplicateSeqs.get(leaf.taxon.label, None)
            if duplicates != None:
                newParent = leaf.parent_node.new_child(
                    edge_length=leaf.edge_length)
                curLeaf = leaf.parent_node.remove_child(leaf)
                newParent.new_child(taxon=curLeaf.taxon, edge_length=0)
                for d in duplicates:
                    newParent.new_child(taxon=dendropy.Taxon(label=d),
                                        edge_length=0)

        # append taxonomy to leaf nodes
        if leafLabels == 'taxonomy':
            # read taxonomy string for each IMG genome
            taxonomy = {}
            for line in open(
                    os.path.join(DefaultValues.GENOME_TREE_DIR,
                                 DefaultValues.GENOME_TREE_TAXONOMY)):
                lineSplit = line.split('\t')
                taxonomy[lineSplit[0]] = lineSplit[1].rstrip()

            # append taxonomy to leaf labels
            for leaf in tree.leaf_nodes():
                taxaStr = taxonomy.get(leaf.taxon.label, None)
                if taxaStr:
                    leaf.taxon.label += '|' + taxaStr

        # write out tree
        oldStdOut = reassignStdOut(outFile)
        print((tree.as_string(schema='newick', suppress_rooting=True)))
        restoreStdOut(outFile, oldStdOut)
Ejemplo n.º 12
0
 def test_basic_construction(self):
     taxon = dendropy.Taxon("z")
     nd = dendropy.Node(taxon=taxon, label="x", edge_length=1)
     self.assertIs(nd.taxon, taxon)
     self.assertEqual(nd.label, "x")
     edge = nd.edge
     self.assertEqual(edge.length, 1)
     self.assertIs(edge.head_node, nd)
     self.assertIs(edge.tail_node, None)
    def getTaxamap(self):
        """
        When user clicks "Set taxa map", open up TaxamapDlg for user input
        and update taxa map.
        """
        class emptyFileError(Exception):
            pass

        try:
            if self.data is None:
                raise emptyFileError

            # For unphased data, the number of taxa should double because of phasing.
            if str(self.dataTypeEdit.currentText()) == "unphased data":
                taxa = dendropy.TaxonNamespace()
                # Turn each taxon into two.
                for taxon in self.data.taxon_namespace:
                    taxa.add_taxon(dendropy.Taxon(taxon.label + "_0"))
                    taxa.add_taxon(dendropy.Taxon(taxon.label + "_1"))
                # Default is only one individual for each species.
                if len(self.taxamap) == 0:
                    for taxon in taxa:
                        self.taxamap[taxon.label] = taxon.label

                dialog = TaxamapDlg.TaxamapDlg(taxa, self.taxamap, self)
                if dialog.exec_():
                    self.taxamap = dialog.getTaxamap()
            else:
                # Default is only one individual for each species.
                if len(self.taxamap) == 0:
                    for taxon in self.data.taxon_namespace:
                        self.taxamap[taxon.label] = taxon.label

                dialog = TaxamapDlg.TaxamapDlg(self.data.taxon_namespace,
                                               self.taxamap, self)
                if dialog.exec_():
                    self.taxamap = dialog.getTaxamap()

        except emptyFileError:
            QMessageBox.warning(self, "Warning", "Please upload data first!",
                                QMessageBox.Ok)
            return
    def getTaxaList(self):
        """
        When user clicks "Select", open up taxaList dialog for user to select taxa used for inference.
        Update self.taxaList based on user input.
        """
        class emptyFileError(Exception):
            pass

        try:
            if self.data is None:
                raise emptyFileError

            # For unphased data, the number of taxa should double because of phasing.
            if str(self.dataTypeEdit.currentText()) == "unphased data":
                taxa = dendropy.TaxonNamespace()
                # Turn each taxon into two.
                for taxon in self.data.taxon_namespace:
                    taxa.add_taxon(dendropy.Taxon(taxon.label + "_0"))
                    taxa.add_taxon(dendropy.Taxon(taxon.label + "_1"))
                # Default is all taxa are used for inference.
                if len(self.taxaList) == 0:
                    for taxon in taxa:
                        self.taxaList.append(taxon.label)

                dialog = taxaList.TaxaListDlg(taxa, self.taxaList, self)
                if dialog.exec_():
                    self.taxaList = dialog.getTaxaList()
            else:
                # Default is all taxa are used for inference.
                if len(self.taxaList) == 0:
                    for taxon in self.data.taxon_namespace:
                        self.taxaList.append(taxon.label)

                dialog = taxaList.TaxaListDlg(self.data.taxon_namespace,
                                              self.taxaList, self)
                if dialog.exec_():
                    self.taxaList = dialog.getTaxaList()
        except emptyFileError:
            QMessageBox.warning(self, "Warning", "Please upload data first!",
                                QMessageBox.Ok)
            return
 def setUp(self):
     self.tree1, self.anodes1, self.lnodes1, self.inodes1 = self.get_tree(
             suppress_internal_node_taxa=True,
             suppress_leaf_node_taxa=True)
     self.expected_labels = set([nd.label for nd in self.anodes1 if nd.label is not None])
     self.expected_taxa = set()
     for nd in self.tree1:
         if nd.label is not None:
             nd.taxon = dendropy.Taxon(label=nd.label)
             self.expected_taxa.add(nd.taxon)
     assert len(self.expected_labels) == len(self.anodes1)
     assert len(self.expected_taxa) == len(self.expected_labels)
Ejemplo n.º 16
0
 def setUp(self):
     self.char_matrix = charmatrixmodel.CharacterMatrix()
     labels = [
             "a", "b", "c", "d", "e", "f",
             ]
     self.expected_taxa = set()
     for label in labels:
         t = dendropy.Taxon(label=label)
         self.char_matrix.taxon_namespace.add_taxon(t)
         self.expected_taxa.add(t)
         seq = [_ for _ in range(4)]
         self.char_matrix[t] = seq
Ejemplo n.º 17
0
def decorate_tree(tree, replace_dict):
    t = deepcopy(tree)
    for n in tree_traverse(t.seed_node):
        if n.parent_node is not None:
            if n.label is None:
                if n.taxon != None:
                    if n.taxon._label in replace_dict:
                        tax = dendropy.Taxon(
                            label=replace_dict[n.taxon._label])
                        n.taxon = tax
                        t.taxon_namespace.add_taxon(tax)
    return t
Ejemplo n.º 18
0
 def test_truncate_ultrametric(self):
     for tree in self.trees:
         dists = tree.calc_node_root_distances()
         min_dist, max_dist = tree.minmax_leaf_distance_from_root()
         trunc_dists = [(max_dist * f) for f in (0.25, 0.5, 0.75, 0.90)]
         for td in trunc_dists:
             working = dendropy.Tree(tree)
             working.truncate_from_root(td)
             for idx, leaf in enumerate(working.leaf_iter()):
                 if leaf.label is None and leaf.taxon is None:
                     leaf.taxon = dendropy.Taxon(label="t{}".format(idx +
                                                                    1))
             self.check_ultrametric_tree(working, td)
Ejemplo n.º 19
0
def add_trifurication(tree):
    parent_node = list(tree.leaf_node_iter())[0].parent_node

    t1 = dendropy.Taxon(f'X1')
    t2 = dendropy.Taxon(f'X2')
    t3 = dendropy.Taxon(f'X3')

    tree.taxon_namespace.add_taxon(t1)
    tree.taxon_namespace.add_taxon(t2)
    tree.taxon_namespace.add_taxon(t3)

    child_a = dendropy.Node(edge_length=1.234)
    child_b = dendropy.Node(edge_length=1.234)
    child_c = dendropy.Node(edge_length=4.123)

    child_a.taxon = t1
    child_b.taxon = t2
    child_c.taxon = t3

    parent_node.add_child(child_a)
    parent_node.add_child(child_b)
    parent_node.add_child(child_c)
def matrix_to_dendropy_pdm(dmat, taxa):
    """Read FastME distance matrix into a dendropy PDM object

    Parameters
    ----------
    dmat : str
        paup* distance matrix file name

    Returns
    -------
    pdm : dendropy phylogenetic distance matrix object

    """
    pdm = dendropy.PhylogeneticDistanceMatrix()
    pdm.taxon_namespace = dendropy.TaxonNamespace()
    pdm._mapped_taxa = set()

    for i, si in enumerate(taxa):
        for j, sj in enumerate(taxa):
            dij = dmat[i, j]

            xi = pdm.taxon_namespace.get_taxon(si)
            if not xi:
                xi = dendropy.Taxon(si)
                pdm.taxon_namespace.add_taxon(xi)
                pdm._mapped_taxa.add(xi)
                pdm._taxon_phylogenetic_distances[xi] = {}

            xj = pdm.taxon_namespace.get_taxon(sj)
            if not xj:
                xj = dendropy.Taxon(sj)
                pdm.taxon_namespace.add_taxon(xj)
                pdm._mapped_taxa.add(xj)
                pdm._taxon_phylogenetic_distances[xj] = {}

            dij = float(dij)
            pdm._taxon_phylogenetic_distances[xi][xj] = dij
    return pdm
Ejemplo n.º 21
0
 def get_char_matrix(self, taxon_namespace=None):
     char_matrix = self.__class__.matrix_type(taxon_namespace=taxon_namespace)
     labels = [str(i) for i in range(self.__class__.nseqs)]
     self.__class__.rng.shuffle(labels)
     seq_iter = itertools.cycle(self.__class__.sequence_source)
     nchar = len(self.__class__.sequence_source) * 2
     for label in labels:
         t = dendropy.Taxon(label=label)
         char_matrix.taxon_namespace.add_taxon(t)
         seq = [next(seq_iter) for s in range(nchar)]
         char_matrix[t] = seq
         self.assertTrue(isinstance(char_matrix[t], self.__class__.sequence_type))
         self.assertIs(type(char_matrix[t]), self.__class__.sequence_type)
     return char_matrix
 def test_add_taxon(self):
     labels = ["t{}".format(i) for i in range(1, 101)]
     tns = dendropy.TaxonNamespace()
     tsm = nexusprocessing.NexusTaxonSymbolMapper(taxon_namespace=tns)
     for label_idx, label in enumerate(labels):
         t = dendropy.Taxon(label)
         tsm.add_taxon(t)
         self.assertEqual(len(tns), label_idx + 1)
         self.assertEqual(t.label, label)
         self.assertIs(tsm.require_taxon_for_symbol(label), t)
         self.assertEqual(len(tns), label_idx + 1)
         self.assertIs(tsm.require_taxon_for_symbol(str(label_idx + 1)), t)
         self.assertEqual(len(tns), label_idx + 1)
     self.assertEqual(len(tns), len(labels))
 def test_no_number_lookup_and_create2(self):
     # looking up a number symbol should result in new taxon creation
     labels = ["t{}".format(i) for i in range(1, 101)]
     tns = dendropy.TaxonNamespace()
     tsm = nexusprocessing.NexusTaxonSymbolMapper(
         taxon_namespace=tns, enable_lookup_by_taxon_number=False)
     taxa = []
     for label_idx, label in enumerate(labels):
         t = dendropy.Taxon(label)
         tsm.add_taxon(t)
         taxa.append(t)
     self.assertEqual(len(tns), len(labels))
     for label_idx, label in enumerate(labels):
         t1 = tsm.require_taxon_for_symbol(label_idx + 1)
         self.assertNotIn(t1, taxa)
         self.assertEqual(t1.label, str(label_idx + 1))
         self.assertEqual(len(tns), len(labels) + label_idx + 1)
Ejemplo n.º 24
0
def insert(placed_edge, query_name, x_1, x_2):
    tailn = placed_edge.tail_node
    headn = placed_edge.head_node
    tailn.remove_child(headn)
    nn = dy.Node()
    nn.add_child(headn)
    qry = dy.Node(taxon=dy.Taxon(query_name))
    nn.add_child(qry)
    qry.edge_length = x_1
    tailn.add_child(nn)
    if placed_edge.head_node in list(master_edge.head_node.ancestor_iter()
                                     ) or master_edge == placed_edge:
        nn.edge_length = placed_edge.length - max(x_2, 0)
        headn.edge_length = max(x_2, 0)
    else:
        nn.edge_length = max(x_2, 0)
        headn.edge_length = placed_edge.length - max(x_2, 0)
Ejemplo n.º 25
0
def generateSpeciesTree(nspecies, mean_edge_length):
    node = dendropy.Node()
    leaves = [node]
    for i in range(nspecies - 1):
        toSplit = random.choice(range(len(leaves)))
        c1, c2 = splitLeaf(leaves[toSplit])
        c1.edge_length = sample_edge_length(mean_edge_length)
        c2.edge_length = sample_edge_length(mean_edge_length)
        del leaves[toSplit]
        leaves.append(c1)
        leaves.append(c2)

    names = generateNames(nspecies)
    for i in range(len(leaves)):
        tx = dendropy.Taxon(label=names[i])
        leaves[i].taxon = tx
    tree = dendropy.Tree(seed_node=node)
    return tree
Ejemplo n.º 26
0
def generateBalancedSpeciesTree(nspecies, elength):
    assert nspecies > 0 and (nspecies & (nspecies - 1)) == 0  # power of 2
    node = dendropy.Node()
    leaves = [node]
    for i in range(nspecies - 1):

        c1, c2 = splitLeaf(leaves[0])
        c1.edge_length = elength
        c2.edge_length = elength
        del leaves[0]
        leaves.append(c1)
        leaves.append(c2)

    names = generateNames(nspecies)
    for i in range(len(leaves)):
        tx = dendropy.Taxon(label=names[i])
        leaves[i].taxon = tx
    tree = dendropy.Tree(seed_node=node)
    return tree
Ejemplo n.º 27
0
def transfer_internal_node_labels_to_tree(source_tree_filename, destination_tree_filename, output_tree_filename,
                                          sequence_reconstructor):
    source_tree = dendropy.Tree.get_from_path(source_tree_filename, 'newick', preserve_underscores=True)
    source_internal_node_labels = []
    for source_internal_node in source_tree.internal_nodes():
        if source_internal_node.label:
            source_internal_node_labels.append(source_internal_node.label)
        else:
            source_internal_node_labels.append('')

    destination_tree = dendropy.Tree.get_from_path(destination_tree_filename, 'newick', preserve_underscores=True)
    for index, destination_internal_node in enumerate(destination_tree.internal_nodes()):
        new_label = sequence_reconstructor.replace_internal_node_label(str(source_internal_node_labels[index]))
        destination_internal_node.label = None
        destination_internal_node.taxon = dendropy.Taxon(new_label)

    output_tree_string = tree_as_string(destination_tree, suppress_internal=False, suppress_rooting=False)
    with open(output_tree_filename, 'w+') as output_file:
        output_file.write(output_tree_string.replace('\'', ''))
def simulate_fossils_on_tree(tree, q):
    """ the main function to simulate fossils on a tree """
    # update its bipartitions (way of indexing the edges)
    tree.update_bipartitions()
    # get age of the root
    root_age = tree.max_distance_from_root()
    # store taxon label needed to be kept (the extant and fossils leaves)
    taxon_label_to_keep = []
    # fossil id
    f = 1
    # loop through bipartitions
    for i in range(len(tree.bipartition_encoding)):
        # access the edge through the bipartitions-edges hash
        edge = tree.bipartition_edge_map[tree.bipartition_encoding[i]]
        # we add the tip node, if it is an extant leaf, to the taxon_label_to_keep
        if is_extant_leaf(tree, edge.head_node) == True:
            taxon_label_to_keep.append(edge.head_node.taxon.label)
        # generate a poisson number of fossil
        F = poisson(q * edge.length)
        # get F divergence age values.
        # They need to be sorted in order to use always the same split node in the function 'add_fossil_tip'.
        F_div_ages = sorted([
            uniform(0, edge.length) +
            (root_age - edge.head_node.distance_from_root()) for x in range(F)
        ])
        # loop through fossil divergence ages
        for i in range(F):
            # create a new fossil tip object, with a very small branch length
            fossil_tip = dp.Node(edge_length=0.00001,
                                 taxon=dp.Taxon(label='f%s' % (f)))
            # add the fossil
            add_fossil_tip(root_age, edge.head_node, fossil_tip, F_div_ages[i])
            # add the fossil tip taxon label to the taxon_label_to_keep
            taxon_label_to_keep.append('f%s' % (f))
            f += 1
    # write temp_tree with fossil and extinct lineages
    # tree.write(path="10/%s_temp.tree"%(str(iterator)), schema="newick")
    # update taxonnamespace
    tree.update_taxon_namespace()
    # Now, we prune extinct tips
    tree.retain_taxa_with_labels(taxon_label_to_keep)
    return tree
Ejemplo n.º 29
0
def generateMonoConcordantTree(sptree, genesPerSp):

    gtree = dendropy.Tree(sptree)
    for node in gtree.postorder_node_iter():
        node.edge_length = None
    for leaf in gtree.leaf_nodes():
        name = leaf.taxon.label
        leaves = [leaf]
        for i in range(genesPerSp - 1):
            toSplit = random.choice(range(len(leaves)))
            c1, c2 = splitLeaf(leaves[toSplit])
            del leaves[toSplit]
            leaves.append(c1)
            leaves.append(c2)
        for i in range(len(leaves)):
            tx = dendropy.Taxon(label=name + str(i))
            leaves[i].taxon = tx

    gtree.unassign_taxa(exclude_leaves=True)
    return gtree
Ejemplo n.º 30
0
    def transfer_internal_names_to_tree(self, source_tree, destination_tree,
                                        output_tree):
        source_tree_obj = dendropy.Tree.get_from_path(
            source_tree, 'newick', preserve_underscores=True)
        source_internal_node_labels = []
        for source_internal_node in source_tree_obj.internal_nodes():
            if source_internal_node.label:
                source_internal_node_labels.append(source_internal_node.label)
            else:
                source_internal_node_labels.append('')

        destination_tree_obj = dendropy.Tree.get_from_path(
            destination_tree, 'newick', preserve_underscores=True)
        for index, destination_internal_node in enumerate(
                destination_tree_obj.internal_nodes()):
            destination_internal_node.label = None
            destination_internal_node.taxon = dendropy.Taxon(
                self.internal_node_prefix +
                str(source_internal_node_labels[index]))
        self.write_tree(destination_tree_obj, output_tree)