def runTest(self): # rooted tree: so clade bitmasks tree_list = dendropy.TreeList.get_from_stream( StringIO("""[&R]((t5:0.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):1):0.065840,t3:0.170221):0.383247);"""), "newick") for i in tree_list: _LOG.debug(i._get_indented_form()) i.encode_bipartitions() _LOG.debug(i._get_indented_form(splits=True)) i._debug_check_tree(splits=True, logger_obj=_LOG) root1 = tree_list[0].seed_node root1e = root1.edge self.assertEqual(bitprocessing.indexes_of_set_bits(root1e.split_bitmask), list(range(6))) self.assertEqual(bitprocessing.indexes_of_set_bits(root1e.split_bitmask, one_based=True), list(range(1,7))) self.assertEqual(bitprocessing.indexes_of_set_bits(root1e.split_bitmask, fill_bitmask=21, one_based=True), [1, 3, 5]) self.assertEqual(bitprocessing.indexes_of_set_bits(root1e.split_bitmask, fill_bitmask=21), [0, 2, 4]) self.assertEqual(bitprocessing.num_set_bits(root1e.split_bitmask), 6) fc1 = root1.child_nodes()[0] fc1e = fc1.edge self.assertEqual(bitprocessing.indexes_of_set_bits(fc1e.split_bitmask), [0, 1]) self.assertEqual(bitprocessing.indexes_of_set_bits(fc1e.split_bitmask, one_based=True), [1, 2]) self.assertEqual(bitprocessing.indexes_of_set_bits(fc1e.split_bitmask, fill_bitmask=0x15, one_based=True), [1]) self.assertEqual(bitprocessing.indexes_of_set_bits(fc1e.split_bitmask, fill_bitmask=0x15), [0]) self.assertEqual(bitprocessing.num_set_bits(fc1e.split_bitmask), 2)
def getCentroidEdgeRandom(tree, minBound=5): fullMask = tree.seed_node.tree_leafset_bitmask numLeaves = bitprocessing.num_set_bits(fullMask) candidates = [] for edge in tree.postorder_internal_edge_iter(): if edge.tail_node is None: continue mask = edge.bipartition.leafset_bitmask numMask1 = bitprocessing.num_set_bits(mask) numMask2 = numLeaves - numMask1 if numMask1 >= minBound and numMask2 >= minBound: candidates.append(edge) return np.random.choice(candidates)
def count_bits(a): deprecate.dendropy_deprecation_warning( preamble= "Deprecated since DendroPy 4: 'dendropy.treesplit.count_bits()'.", old_construct= "from dendropy import treesplit\nd = treesplit.count_bits(...)", new_construct= "from dendropy.utility import bitprocessing\nd = bitprocessing.num_set_bits(...)" ) return bitprocessing.num_set_bits(a)
def getCentroidEdge(tree): numLeaves = bitprocessing.num_set_bits(tree.seed_node.tree_leafset_bitmask) # numLeaves = len(tree.seed_node.leaf_nodes()) bestBalance = float('inf') # sys.stderr.write("searching for best edge in num leaves:") # sys.stderr.write(str(numLeaves) + str("\n")) for edge in tree.postorder_edge_iter(): if edge.tail_node is None: continue balance = abs( numLeaves / 2 - bitprocessing.num_set_bits(edge.bipartition.leafset_bitmask)) # sys.stderr.write("current_balance:") # sys.stderr.write(str(balance) + "\n") if balance < bestBalance: bestBalance = balance bestEdge = edge # sys.stderr.write(str(bestEdge.head_node)) # sys.stderr.write(str(bestEdge.length)) # sys.stderr.write(str(bestEdge.head_node.label)) return bestEdge
def getBestHeuristicEdge(tree, max_subset_size, num_taxa): num_leaves = bitprocessing.num_set_bits( tree.seed_node.tree_leafset_bitmask) current_best_score = -1 current_best_edge = None subset_L_size = 0 subset_r_size = 0 best_L_score = 0 best_R_score = 0 best_subset_L_size = 0 best_subset_R_size = 0 for edge in tree.postorder_edge_iter(): if edge.tail_node is None: continue if edge.head_node.label is not None: subset_L_size = bitprocessing.num_set_bits( edge.bipartition.leafset_bitmask) subset_R_size = num_leaves - bitprocessing.num_set_bits( edge.bipartition.leafset_bitmask) # sys.stderr.write(str(subset_L_size) + ":" + str(subset_R_size) + "\n") current_L_score = heuristic(float(edge.head_node.label), subset_L_size, max_subset_size, num_taxa) current_R_score = heuristic(float(edge.head_node.label), subset_R_size, max_subset_size, num_taxa) if current_best_score < sum([current_L_score, current_R_score]): best_L_score = current_L_score best_R_score = current_R_score best_subset_L_size = subset_L_size best_subset_R_size = subset_R_size current_best_score = sum([current_L_score, current_R_score]) current_best_edge = edge # sys.stderr.write(str(current_best_edge.head_node) + "\n") # sys.stderr.write(str(current_best_edge.length) + "\n") # sys.stderr.write(str(current_best_edge.head_node.label) + "\n") # sys.stderr.write(str(best_subset_L_size) + ":" + str(best_subset_R_size) + "\n") # sys.stderr.write(str(best_L_score) + ":" + str(best_R_score) + "\n") return current_best_edge
def getLongestEdge(tree): numLeaves = bitprocessing.num_set_bits(tree.seed_node.tree_leafset_bitmask) # numLeaves = len(tree.seed_node.leaf_nodes()) longeth_edge = None longest_edge_length = 0 for edge in tree.postorder_edge_iter(): if edge.tail_node is None: continue current_length = edge.length if longest_edge_length < current_length: longest_edge_length = current_length longest_edge = edge return longest_edge
def frequency_of_bipartition_inclusive(tree_list, taxon_labels, return_locus_list): is_bipartitions_updated = False split = tree_list.taxon_namespace.taxa_bitmask(labels=taxon_labels) k = len(taxon_labels) if bitprocessing.num_set_bits(split) != k: raise IndexError('Not all taxa could be mapped to bipartition (%s): %s' \ % (tree_list.taxon_namespace.bitmask_as_bitstring(split), k)) found = 0 total = 0 if return_locus_list: locus_list = [] for tree in tree_list: tree_labels = [leaf.taxon.label for leaf in tree.leaf_nodes()] labels_present = [name for name in taxon_labels if name in tree_labels] modified_split = tree_list.taxon_namespace.taxa_bitmask( labels=labels_present) unnormalized_split = modified_split normalized_split = treemodel.Bipartition.normalize_bitmask( bitmask=modified_split, fill_bitmask=tree_list.taxon_namespace.all_taxa_bitmask(), lowest_relevant_bit=1) if not is_bipartitions_updated or not tree.bipartition_encoding: tree.encode_bipartitions() bipartition_encoding = set(b.split_bitmask for b in tree.bipartition_encoding) total += 1 if tree.is_unrooted and (normalized_split in bipartition_encoding): found += 1 if return_locus_list: locus_list.append(tree._label) elif (not tree.is_unrooted) and (unnormalized_split in bipartition_encoding): found += 1 if return_locus_list: locus_list.append(tree._label) if return_locus_list: try: return float(found) / total, locus_list except ZeroDivisionError: return 0, 0 else: try: return float(found) / total except ZeroDivisionError: return 0
def masked_frequency_of_bipartition(self, **kwargs): """Adaptation of dendropy.TreeList.frequency_of_bipartition that takes a taxon mask. This allows identifying splits on a subset of taxa within a larger tree without pruning any tree structures, which is much slower. Given a split or bipartition specified as: - a split bitmask given the keyword 'split_bitmask' - a list of `Taxon` objects given with the keyword `taxa` - a list of taxon labels given with the keyword `labels` - a list of oids given with the keyword `oids` this function returns the proportion of trees in self in which the split is found. """ partialMask = kwargs["mask"] if "mask" in kwargs else self.taxon_namespace.all_taxa_bitmask() if "split_bitmask" in kwargs: targetSplit = kwargs["split_bitmask"] else: targetSplit = self.taxon_namespace.get_taxa_bitmask(**kwargs) k = kwargs.values()[0] if bitprocessing.num_set_bits(targetSplit) != len(k): raise IndexError('Not all taxa could be mapped to split (%s): %s' % (self.taxon_namespace.split_bitmask_string(targetSplit), k)) found = 0 total = 0 for tree in self: tree.compat_encode_bipartitions() total += 1 compSplit = (~targetSplit & partialMask) #for test_split in tree.split_edges: for test_split in tree.reference_tree.bipartition_encoding: if not treesplit.is_compatible(test_split, targetSplit, partialMask): break masked_test = (test_split & partialMask) if targetSplit == masked_test or compSplit == masked_test: found += 1 break return float(found) / total
def count_bits(a): deprecate.dendropy_deprecation_warning( preamble="Deprecated since DendroPy 4: 'dendropy.treesplit.count_bits()'.", old_construct="from dendropy import treesplit\nd = treesplit.count_bits(...)", new_construct="from dendropy.utility import bitprocessing\nd = bitprocessing.num_set_bits(...)") return bitprocessing.num_set_bits(a)
def runTest(self): self.assertEqual(bitprocessing.num_set_bits(21), 3)