def cluster_columns(self, data, parts, ordered=False): assert len(parts.columns) == 1, "columns split is no longer supported" assert all(var.is_continuous for var in data.domain.attributes) col0 = parts.columns[0] if col0.cluster is not None: cluster = col0.cluster else: cluster = None if col0.cluster_ordered is not None: cluster_ord = col0.cluster_ordered else: cluster_ord = None need_dist = cluster is None or (ordered and cluster_ord is None) matrix = None if need_dist: data = Orange.distance._preprocess(data) matrix = np.asarray(Orange.distance.PearsonR(data, axis=0)) # nan values break clustering below matrix = np.nan_to_num(matrix) if cluster is None: assert matrix is not None assert len(matrix) < self.MaxClustering cluster = hierarchical.dist_matrix_clustering( matrix, linkage=hierarchical.WARD) if ordered and cluster_ord is None: assert len(matrix) < self.MaxOrderedClustering cluster_ord = hierarchical.optimal_leaf_ordering(cluster, matrix) col_groups = [ col._replace(cluster=cluster, cluster_ordered=cluster_ord) for col in parts.columns ] return parts._replace(columns=col_groups)
def setUpClass(cls): m = [ [], [3], [2, 4], [17, 5, 4], [2, 8, 3, 8], [7, 5, 10, 11, 2], [8, 4, 1, 5, 11, 13], [4, 7, 12, 8, 10, 1, 5], [13, 9, 14, 15, 7, 8, 4, 6], [12, 10, 11, 15, 2, 5, 7, 3, 1], ] cls.items = [ "Ann", "Bob", "Curt", "Danny", "Eve", "Fred", "Greg", "Hue", "Ivy", "Jon", ] dist = numpy.array(list(flatten(m)), dtype=float) matrix = hierarchical.squareform(dist, mode="lower") cls.m = m cls.matrix = Orange.misc.DistMatrix(matrix) cls.matrix.items = cls.items cls.cluster = hierarchical.dist_matrix_clustering(cls.matrix)
def cluster_rows(self, data: Table, parts: 'Parts', ordered=False) -> 'Parts': row_groups = [] for row in parts.rows: if row.cluster is not None: cluster = row.cluster else: cluster = None if row.cluster_ordered is not None: cluster_ord = row.cluster_ordered else: cluster_ord = None if row.can_cluster: matrix = None need_dist = cluster is None or (ordered and cluster_ord is None) if need_dist: subset = data[row.indices] matrix = Orange.distance.Euclidean(subset) if cluster is None: assert len(matrix) < self.MaxClustering cluster = hierarchical.dist_matrix_clustering( matrix, linkage=hierarchical.WARD ) if ordered and cluster_ord is None: assert len(matrix) < self.MaxOrderedClustering cluster_ord = hierarchical.optimal_leaf_ordering( cluster, matrix, ) row_groups.append(row._replace(cluster=cluster, cluster_ordered=cluster_ord)) return parts._replace(rows=row_groups)
def cluster_data(self, matrix): with self.progressBar(): # cluster rows if len(matrix) > 1: rows_distances = Euclidean(matrix) cluster = hierarchical.dist_matrix_clustering(rows_distances) row_order = hierarchical.optimal_leaf_ordering( cluster, rows_distances, progress_callback=self.progressBarSet) row_order = np.array([x.value.index for x in leaves(row_order)]) else: row_order = np.array([0]) # cluster columns if matrix.X.shape[1] > 1: columns_distances = Euclidean(matrix, axis=0) cluster = hierarchical.dist_matrix_clustering(columns_distances) columns_order = hierarchical.optimal_leaf_ordering( cluster, columns_distances, progress_callback=self.progressBarSet) columns_order = np.array([x.value.index for x in leaves(columns_order)]) else: columns_order = np.array([0]) return row_order, columns_order
def setUpClass(cls): m = [[], [3], [2, 4], [17, 5, 4], [2, 8, 3, 8], [7, 5, 10, 11, 2], [8, 4, 1, 5, 11, 13], [4, 7, 12, 8, 10, 1, 5], [13, 9, 14, 15, 7, 8, 4, 6], [12, 10, 11, 15, 2, 5, 7, 3, 1]] cls.items = ["Ann", "Bob", "Curt", "Danny", "Eve", "Fred", "Greg", "Hue", "Ivy", "Jon"] dist = numpy.array(list(flatten(m)), dtype=float) matrix = hierarchical.squareform(dist, mode="lower") cls.m = m cls.matrix = Orange.misc.DistMatrix(matrix) cls.matrix.items = cls.items cls.cluster = hierarchical.dist_matrix_clustering(cls.matrix)
def _cluster_tree(self): if self._tree is None: self._tree = hierarchical.dist_matrix_clustering(self.matrix) return self._tree