def test_binary_partition_tree_average_linkage2(self): graph = hg.UndirectedGraph(10) graph.add_edges( (0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 7, 7), (3, 6, 4, 2, 5, 3, 6, 9, 7, 3, 8, 5, 9, 4, 6, 9, 7, 8, 6, 9, 8)) edge_values = np.asarray( (0.87580029, 0.60123697, 0.79924759, 0.74221387, 0.75418382, 0.66159356, 1.31856839, 0.76080612, 1.08881471, 0.98557615, 0.61454158, 0.50913424, 0.63556478, 0.64684775, 1.14865302, 0.81741018, 2.1591071, 0.60563004, 2.06636665, 1.35617725, 0.83085949), dtype=np.float64) tree, altitudes = hg.binary_partition_tree_average_linkage( graph, edge_values) expected_parents = np.asarray((11, 14, 10, 13, 15, 10, 11, 18, 12, 13, 12, 17, 16, 14, 15, 16, 17, 18, 18), dtype=np.int64) expected_altitudes = np.asarray( (0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.509134, 0.601237, 0.610086, 0.635565, 0.661594, 0.732129, 0.810695, 1.241727, 1.35874), dtype=np.float64) self.assertTrue(np.all(expected_parents == tree.parents())) self.assertTrue(np.allclose(expected_altitudes, altitudes))
def test_binary_partition_tree_exponential_linkage_equiv(self): np.random.seed(10) g = hg.get_4_adjacency_graph((10, 10)) edge_weights = np.random.rand(g.num_edges()) edge_weight_weights = np.random.randint(1, 10, g.num_edges()) tree, altitudes = hg.binary_partition_tree_exponential_linkage( g, edge_weights, 0, edge_weight_weights) t_ref, alt_ref = hg.binary_partition_tree_average_linkage( g, edge_weights, edge_weight_weights) self.assertTrue(np.all(tree.parents() == t_ref.parents())) self.assertTrue(np.allclose(altitudes, alt_ref)) tree, altitudes = hg.binary_partition_tree_exponential_linkage( g, edge_weights, float('inf'), edge_weight_weights) t_ref, alt_ref = hg.binary_partition_tree_complete_linkage( g, edge_weights) self.assertTrue(np.all(tree.parents() == t_ref.parents())) self.assertTrue(np.allclose(altitudes, alt_ref)) tree, altitudes = hg.binary_partition_tree_exponential_linkage( g, edge_weights, float('-inf'), edge_weight_weights) t_ref, alt_ref = hg.binary_partition_tree_single_linkage( g, edge_weights) self.assertTrue(np.all(tree.parents() == t_ref.parents())) self.assertTrue(np.allclose(altitudes, alt_ref))
def binary_partition_tree_exponential_linkage(graph, edge_weights, alpha, edge_weight_weights=None): """ Binary partition tree with exponential linkage distance. Given a graph :math:`G=(V, E)`, with initial edge weights :math:`w` with associated weights :math:`w_2`, the distance :math:`d(X,Y)` between any two clusters :math:`X` and :math:`Y` is .. math:: d(X,Y) = \\frac{1}{Z} \sum_{x \in X, y \in Y, \{x,y\} in E} w_2(\{x,y\}) \\times \exp(\\alpha * w(\{x,y\})) \\times w(\{x,y\}) with :math:`Z = \sum_{x \in X, y \in Y, \{x,y\} \in E} w_2(\{x,y\}) \\times \exp(\\alpha * w(\{x,y\}))`. :See: Nishant Yadav, Ari Kobren, Nicholas Monath, Andrew Mccallum. `Supervised Hierarchical Clustering with Exponential Linkage <http://proceedings.mlr.press/v97/yadav19a.html>`_ Proceedings of the 36th International Conference on Machine Learning, PMLR 97:6973-6983, 2019. :param graph: input graph :param edge_weights: edge weights of the input graph :param alpha: exponential parameter :param edge_weight_weights: weighting of edge weights of the input graph (default to an array of ones) :return: a tree (Concept :class:`~higra.CptHierarchy`) and its node altitudes """ alpha = float(alpha) if edge_weight_weights is None: edge_weight_weights = np.ones_like(edge_weights) else: edge_weights, edge_weight_weights = hg.cast_to_common_type( edge_weights, edge_weight_weights) # special cases: improve efficiency and avoid numerical issues if alpha == 0: tree, altitudes = hg.binary_partition_tree_average_linkage( graph, edge_weights, edge_weight_weights) elif alpha == float('-inf'): tree, altitudes = hg.binary_partition_tree_single_linkage( graph, edge_weights) elif alpha == float('inf'): tree, altitudes = hg.binary_partition_tree_complete_linkage( graph, edge_weights) else: res = hg.cpp._binary_partition_tree_exponential_linkage( graph, edge_weights, alpha, edge_weight_weights) tree = res.tree() altitudes = res.altitudes() hg.CptHierarchy.link(tree, graph) return tree, altitudes
def sparse_avg_hac(coo_pw_sim_mat): """Run hac on a coo sparse matrix of edges. :param coo_pw_sim_mat: N by N coo matrix w/ pairwise sim matrix :return: Z - linkage matrix, as in scipy linkage, other meta data from higra """ ugraph, edge_weights = coo_2_hg(coo_pw_sim_mat) t, altitudes = hg.binary_partition_tree_average_linkage( ugraph, edge_weights) Z = hg.binary_hierarchy_to_scipy_linkage_matrix(t, altitudes=altitudes) return Z, t, altitudes, ugraph, edge_weights
def test_binary_partition_tree_average_linkage(self): graph = hg.get_4_adjacency_graph((3, 3)) edge_values = np.asarray((1, 7, 2, 10, 16, 3, 11, 4, 12, 14, 5, 6), np.float32) edge_weights = np.asarray((7, 1, 7, 3, 2, 8, 2, 2, 2, 1, 5, 9), np.float32) tree, levels = hg.binary_partition_tree_average_linkage( graph, edge_values, edge_weights) expected_parents = np.asarray( (9, 9, 10, 11, 11, 12, 13, 13, 14, 10, 15, 12, 15, 14, 16, 16, 16), np.uint32) expected_levels = np.asarray( (0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 11.5, 12), np.float32) self.assertTrue(np.all(expected_parents == tree.parents())) self.assertTrue(np.all(expected_levels == levels))