def runTest(self):
        # rooted tree: so clade bitmasks
        tree_list = dendropy.TreeList.get_from_stream(
            StringIO("""[&R]((t5:0.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):1):0.065840,t3:0.170221):0.383247);"""),
            "newick")
        for i in tree_list:
            _LOG.debug(i._get_indented_form())
            i.encode_bipartitions()
            _LOG.debug(i._get_indented_form(splits=True))
            i._debug_check_tree(splits=True, logger_obj=_LOG)
        root1 = tree_list[0].seed_node
        root1e = root1.edge
        self.assertEqual(bitprocessing.indexes_of_set_bits(root1e.split_bitmask), list(range(6)))
        self.assertEqual(bitprocessing.indexes_of_set_bits(root1e.split_bitmask, one_based=True), list(range(1,7)))
        self.assertEqual(bitprocessing.indexes_of_set_bits(root1e.split_bitmask, fill_bitmask=21, one_based=True), [1, 3, 5])
        self.assertEqual(bitprocessing.indexes_of_set_bits(root1e.split_bitmask, fill_bitmask=21), [0, 2, 4])
        self.assertEqual(bitprocessing.num_set_bits(root1e.split_bitmask), 6)

        fc1 = root1.child_nodes()[0]
        fc1e = fc1.edge
        self.assertEqual(bitprocessing.indexes_of_set_bits(fc1e.split_bitmask), [0, 1])
        self.assertEqual(bitprocessing.indexes_of_set_bits(fc1e.split_bitmask, one_based=True), [1, 2])
        self.assertEqual(bitprocessing.indexes_of_set_bits(fc1e.split_bitmask, fill_bitmask=0x15, one_based=True), [1])
        self.assertEqual(bitprocessing.indexes_of_set_bits(fc1e.split_bitmask, fill_bitmask=0x15), [0])
        self.assertEqual(bitprocessing.num_set_bits(fc1e.split_bitmask), 2)
Esempio n. 2
0
    def runTest(self):
        # rooted tree: so clade bitmasks
        tree_list = dendropy.TreeList.get_from_stream(
            StringIO("""[&R]((t5:0.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):1):0.065840,t3:0.170221):0.383247);"""),
            "newick")
        for i in tree_list:
            _LOG.debug(i._get_indented_form())
            i.encode_bipartitions()
            _LOG.debug(i._get_indented_form(splits=True))
            i._debug_check_tree(splits=True, logger_obj=_LOG)
        root1 = tree_list[0].seed_node
        root1e = root1.edge
        self.assertEqual(bitprocessing.indexes_of_set_bits(root1e.split_bitmask), list(range(6)))
        self.assertEqual(bitprocessing.indexes_of_set_bits(root1e.split_bitmask, one_based=True), list(range(1,7)))
        self.assertEqual(bitprocessing.indexes_of_set_bits(root1e.split_bitmask, fill_bitmask=21, one_based=True), [1, 3, 5])
        self.assertEqual(bitprocessing.indexes_of_set_bits(root1e.split_bitmask, fill_bitmask=21), [0, 2, 4])
        self.assertEqual(bitprocessing.num_set_bits(root1e.split_bitmask), 6)

        fc1 = root1.child_nodes()[0]
        fc1e = fc1.edge
        self.assertEqual(bitprocessing.indexes_of_set_bits(fc1e.split_bitmask), [0, 1])
        self.assertEqual(bitprocessing.indexes_of_set_bits(fc1e.split_bitmask, one_based=True), [1, 2])
        self.assertEqual(bitprocessing.indexes_of_set_bits(fc1e.split_bitmask, fill_bitmask=0x15, one_based=True), [1])
        self.assertEqual(bitprocessing.indexes_of_set_bits(fc1e.split_bitmask, fill_bitmask=0x15), [0])
        self.assertEqual(bitprocessing.num_set_bits(fc1e.split_bitmask), 2)
Esempio n. 3
0
def getCentroidEdgeRandom(tree, minBound=5):
    fullMask = tree.seed_node.tree_leafset_bitmask
    numLeaves = bitprocessing.num_set_bits(fullMask)
    candidates = []
    for edge in tree.postorder_internal_edge_iter():
        if edge.tail_node is None:
            continue

        mask = edge.bipartition.leafset_bitmask
        numMask1 = bitprocessing.num_set_bits(mask)
        numMask2 = numLeaves - numMask1

        if numMask1 >= minBound and numMask2 >= minBound:
            candidates.append(edge)
    return np.random.choice(candidates)
Esempio n. 4
0
def count_bits(a):
    deprecate.dendropy_deprecation_warning(
        preamble=
        "Deprecated since DendroPy 4: 'dendropy.treesplit.count_bits()'.",
        old_construct=
        "from dendropy import treesplit\nd = treesplit.count_bits(...)",
        new_construct=
        "from dendropy.utility import bitprocessing\nd = bitprocessing.num_set_bits(...)"
    )
    return bitprocessing.num_set_bits(a)
Esempio n. 5
0
def getCentroidEdge(tree):
    numLeaves = bitprocessing.num_set_bits(tree.seed_node.tree_leafset_bitmask)
    # numLeaves = len(tree.seed_node.leaf_nodes())
    bestBalance = float('inf')
    # sys.stderr.write("searching for best edge in num leaves:")
    # sys.stderr.write(str(numLeaves) + str("\n"))
    for edge in tree.postorder_edge_iter():
        if edge.tail_node is None:
            continue
        balance = abs(
            numLeaves / 2 -
            bitprocessing.num_set_bits(edge.bipartition.leafset_bitmask))
        # sys.stderr.write("current_balance:")
        # sys.stderr.write(str(balance) + "\n")
        if balance < bestBalance:
            bestBalance = balance
            bestEdge = edge
    # sys.stderr.write(str(bestEdge.head_node))
    # sys.stderr.write(str(bestEdge.length))
    # sys.stderr.write(str(bestEdge.head_node.label))
    return bestEdge
Esempio n. 6
0
def getBestHeuristicEdge(tree, max_subset_size, num_taxa):
    num_leaves = bitprocessing.num_set_bits(
        tree.seed_node.tree_leafset_bitmask)
    current_best_score = -1
    current_best_edge = None
    subset_L_size = 0
    subset_r_size = 0
    best_L_score = 0
    best_R_score = 0
    best_subset_L_size = 0
    best_subset_R_size = 0
    for edge in tree.postorder_edge_iter():
        if edge.tail_node is None:
            continue
        if edge.head_node.label is not None:
            subset_L_size = bitprocessing.num_set_bits(
                edge.bipartition.leafset_bitmask)
            subset_R_size = num_leaves - bitprocessing.num_set_bits(
                edge.bipartition.leafset_bitmask)
            # sys.stderr.write(str(subset_L_size) + ":" + str(subset_R_size) + "\n")
            current_L_score = heuristic(float(edge.head_node.label),
                                        subset_L_size, max_subset_size,
                                        num_taxa)
            current_R_score = heuristic(float(edge.head_node.label),
                                        subset_R_size, max_subset_size,
                                        num_taxa)
            if current_best_score < sum([current_L_score, current_R_score]):
                best_L_score = current_L_score
                best_R_score = current_R_score
                best_subset_L_size = subset_L_size
                best_subset_R_size = subset_R_size
                current_best_score = sum([current_L_score, current_R_score])
                current_best_edge = edge

    # sys.stderr.write(str(current_best_edge.head_node) + "\n")
    # sys.stderr.write(str(current_best_edge.length) + "\n")
    # sys.stderr.write(str(current_best_edge.head_node.label) + "\n")
    # sys.stderr.write(str(best_subset_L_size) + ":" + str(best_subset_R_size) + "\n")
    # sys.stderr.write(str(best_L_score) + ":" + str(best_R_score) + "\n")
    return current_best_edge
Esempio n. 7
0
def getLongestEdge(tree):
    numLeaves = bitprocessing.num_set_bits(tree.seed_node.tree_leafset_bitmask)
    # numLeaves = len(tree.seed_node.leaf_nodes())
    longeth_edge = None
    longest_edge_length = 0
    for edge in tree.postorder_edge_iter():
        if edge.tail_node is None:
            continue
        current_length = edge.length
        if longest_edge_length < current_length:
            longest_edge_length = current_length
            longest_edge = edge
    return longest_edge
def frequency_of_bipartition_inclusive(tree_list, taxon_labels,
                                       return_locus_list):
    is_bipartitions_updated = False
    split = tree_list.taxon_namespace.taxa_bitmask(labels=taxon_labels)
    k = len(taxon_labels)
    if bitprocessing.num_set_bits(split) != k:
        raise IndexError('Not all taxa could be mapped to bipartition (%s): %s' \
            % (tree_list.taxon_namespace.bitmask_as_bitstring(split), k))
    found = 0
    total = 0
    if return_locus_list:
        locus_list = []
    for tree in tree_list:
        tree_labels = [leaf.taxon.label for leaf in tree.leaf_nodes()]
        labels_present = [name for name in taxon_labels if name in tree_labels]
        modified_split = tree_list.taxon_namespace.taxa_bitmask(
            labels=labels_present)
        unnormalized_split = modified_split
        normalized_split = treemodel.Bipartition.normalize_bitmask(
            bitmask=modified_split,
            fill_bitmask=tree_list.taxon_namespace.all_taxa_bitmask(),
            lowest_relevant_bit=1)
        if not is_bipartitions_updated or not tree.bipartition_encoding:
            tree.encode_bipartitions()
        bipartition_encoding = set(b.split_bitmask
                                   for b in tree.bipartition_encoding)
        total += 1
        if tree.is_unrooted and (normalized_split in bipartition_encoding):
            found += 1
            if return_locus_list:
                locus_list.append(tree._label)
        elif (not tree.is_unrooted) and (unnormalized_split
                                         in bipartition_encoding):
            found += 1
            if return_locus_list:
                locus_list.append(tree._label)
    if return_locus_list:
        try:
            return float(found) / total, locus_list
        except ZeroDivisionError:
            return 0, 0
    else:
        try:
            return float(found) / total
        except ZeroDivisionError:
            return 0
Esempio n. 9
0
    def masked_frequency_of_bipartition(self, **kwargs):
        """Adaptation of dendropy.TreeList.frequency_of_bipartition that takes a taxon mask. 
        
        This allows identifying splits on a subset of taxa within a larger tree without
        pruning any tree structures, which is much slower.

        Given a split or bipartition specified as:

            - a split bitmask given the keyword 'split_bitmask'
            - a list of `Taxon` objects given with the keyword `taxa`
            - a list of taxon labels given with the keyword `labels`
            - a list of oids given with the keyword `oids`

        this function returns the proportion of trees in self in which the 
        split is found.
        """
        partialMask = kwargs["mask"] if "mask" in kwargs else self.taxon_namespace.all_taxa_bitmask()

        if "split_bitmask" in kwargs:
            targetSplit = kwargs["split_bitmask"]
        else:
            targetSplit = self.taxon_namespace.get_taxa_bitmask(**kwargs)
            k = kwargs.values()[0]
            if bitprocessing.num_set_bits(targetSplit) != len(k):
                raise IndexError('Not all taxa could be mapped to split (%s): %s' 
                    % (self.taxon_namespace.split_bitmask_string(targetSplit), k))
        found = 0
        total = 0
        for tree in self:
            tree.compat_encode_bipartitions()
            total += 1
            compSplit = (~targetSplit & partialMask)
            #for test_split in tree.split_edges:
            for test_split in tree.reference_tree.bipartition_encoding:
                if not treesplit.is_compatible(test_split, targetSplit, partialMask):
                    break
                masked_test = (test_split & partialMask)
                if targetSplit == masked_test or compSplit == masked_test:
                    found += 1
                    break

        return float(found) / total
Esempio n. 10
0
def count_bits(a):
    deprecate.dendropy_deprecation_warning(
            preamble="Deprecated since DendroPy 4: 'dendropy.treesplit.count_bits()'.",
            old_construct="from dendropy import treesplit\nd = treesplit.count_bits(...)",
            new_construct="from dendropy.utility import bitprocessing\nd = bitprocessing.num_set_bits(...)")
    return bitprocessing.num_set_bits(a)
 def runTest(self):
     self.assertEqual(bitprocessing.num_set_bits(21), 3)
Esempio n. 12
0
 def runTest(self):
     self.assertEqual(bitprocessing.num_set_bits(21), 3)