def run_orange3(Z, D): import Orange.clustering.hierarchical as orange_hier tree = orange_hier.tree_from_linkage(Z) start_time = time.time() orange_hier.optimal_leaf_ordering(tree, squareform(D)) end_time = time.time() return end_time - start_time, None
def cluster_columns(self, data, parts, ordered=False): assert len(parts.columns) == 1, "columns split is no longer supported" assert all(var.is_continuous for var in data.domain.attributes) col0 = parts.columns[0] if col0.cluster is not None: cluster = col0.cluster else: cluster = None if col0.cluster_ordered is not None: cluster_ord = col0.cluster_ordered else: cluster_ord = None need_dist = cluster is None or (ordered and cluster_ord is None) matrix = None if need_dist: data = Orange.distance._preprocess(data) matrix = np.asarray(Orange.distance.PearsonR(data, axis=0)) # nan values break clustering below matrix = np.nan_to_num(matrix) if cluster is None: assert matrix is not None assert len(matrix) < self.MaxClustering cluster = hierarchical.dist_matrix_clustering( matrix, linkage=hierarchical.WARD) if ordered and cluster_ord is None: assert len(matrix) < self.MaxOrderedClustering cluster_ord = hierarchical.optimal_leaf_ordering(cluster, matrix) col_groups = [ col._replace(cluster=cluster, cluster_ordered=cluster_ord) for col in parts.columns ] return parts._replace(columns=col_groups)
def cluster_rows(self, data: Table, parts: 'Parts', ordered=False) -> 'Parts': row_groups = [] for row in parts.rows: if row.cluster is not None: cluster = row.cluster else: cluster = None if row.cluster_ordered is not None: cluster_ord = row.cluster_ordered else: cluster_ord = None if row.can_cluster: matrix = None need_dist = cluster is None or (ordered and cluster_ord is None) if need_dist: subset = data[row.indices] matrix = Orange.distance.Euclidean(subset) if cluster is None: assert len(matrix) < self.MaxClustering cluster = hierarchical.dist_matrix_clustering( matrix, linkage=hierarchical.WARD ) if ordered and cluster_ord is None: assert len(matrix) < self.MaxOrderedClustering cluster_ord = hierarchical.optimal_leaf_ordering( cluster, matrix, ) row_groups.append(row._replace(cluster=cluster, cluster_ordered=cluster_ord)) return parts._replace(rows=row_groups)
def cluster_data(self, matrix): with self.progressBar(): # cluster rows if len(matrix) > 1: rows_distances = Euclidean(matrix) cluster = hierarchical.dist_matrix_clustering(rows_distances) row_order = hierarchical.optimal_leaf_ordering( cluster, rows_distances, progress_callback=self.progressBarSet) row_order = np.array([x.value.index for x in leaves(row_order)]) else: row_order = np.array([0]) # cluster columns if matrix.X.shape[1] > 1: columns_distances = Euclidean(matrix, axis=0) cluster = hierarchical.dist_matrix_clustering(columns_distances) columns_order = hierarchical.optimal_leaf_ordering( cluster, columns_distances, progress_callback=self.progressBarSet) columns_order = np.array([x.value.index for x in leaves(columns_order)]) else: columns_order = np.array([0]) return row_order, columns_order
def test_optimal_ordering(self): def indices(root): return [leaf.value.index for leaf in hierarchical.leaves(root)] ordered = hierarchical.optimal_leaf_ordering(self.cluster, self.matrix) self.assertEqual(ordered.value.range, self.cluster.value.range) self.assertSetEqual(set(indices(self.cluster)), set(indices(ordered))) def pairs(iterable): i1, i2 = tee(iterable) next(i1) yield from zip(i1, i2) def score(root): return sum([self.matrix[i, j] for i, j in pairs(indices(root))]) score_unordered = score(self.cluster) score_ordered = score(ordered) self.assertGreater(score_unordered, score_ordered) self.assertEqual(score_ordered, 21.0)
def test_optimal_ordering(self): def indices(root): return [leaf.value.index for leaf in hierarchical.leaves(root)] ordered = hierarchical.optimal_leaf_ordering( self.cluster, self.matrix) self.assertEqual(ordered.value.range, self.cluster.value.range) self.assertSetEqual(set(indices(self.cluster)), set(indices(ordered))) def pairs(iterable): i1, i2 = tee(iterable) next(i1) yield from zip(i1, i2) def score(root): return sum([self.matrix[i, j] for i, j in pairs(indices(root))]) score_unordered = score(self.cluster) score_ordered = score(ordered) self.assertGreater(score_unordered, score_ordered) self.assertEqual(score_ordered, 21.0)
def _ordered_cluster_tree(self): if self._ordered_tree is None: tree = self._cluster_tree() self._ordered_tree = \ hierarchical.optimal_leaf_ordering(tree, self.matrix) return self._ordered_tree