Exemple #1
0
def run_orange3(Z, D):
    import Orange.clustering.hierarchical as orange_hier

    tree = orange_hier.tree_from_linkage(Z)
    start_time = time.time()
    orange_hier.optimal_leaf_ordering(tree, squareform(D))
    end_time = time.time()
    return end_time - start_time, None
Exemple #2
0
    def cluster_columns(self, data, parts, ordered=False):
        assert len(parts.columns) == 1, "columns split is no longer supported"
        assert all(var.is_continuous for var in data.domain.attributes)

        col0 = parts.columns[0]
        if col0.cluster is not None:
            cluster = col0.cluster
        else:
            cluster = None
        if col0.cluster_ordered is not None:
            cluster_ord = col0.cluster_ordered
        else:
            cluster_ord = None
        need_dist = cluster is None or (ordered and cluster_ord is None)
        matrix = None
        if need_dist:
            data = Orange.distance._preprocess(data)
            matrix = np.asarray(Orange.distance.PearsonR(data, axis=0))
            # nan values break clustering below
            matrix = np.nan_to_num(matrix)

        if cluster is None:
            assert matrix is not None
            assert len(matrix) < self.MaxClustering
            cluster = hierarchical.dist_matrix_clustering(
                matrix, linkage=hierarchical.WARD)
        if ordered and cluster_ord is None:
            assert len(matrix) < self.MaxOrderedClustering
            cluster_ord = hierarchical.optimal_leaf_ordering(cluster, matrix)

        col_groups = [
            col._replace(cluster=cluster, cluster_ordered=cluster_ord)
            for col in parts.columns
        ]
        return parts._replace(columns=col_groups)
Exemple #3
0
    def cluster_rows(self, data: Table, parts: 'Parts', ordered=False) -> 'Parts':
        row_groups = []
        for row in parts.rows:
            if row.cluster is not None:
                cluster = row.cluster
            else:
                cluster = None
            if row.cluster_ordered is not None:
                cluster_ord = row.cluster_ordered
            else:
                cluster_ord = None

            if row.can_cluster:
                matrix = None
                need_dist = cluster is None or (ordered and cluster_ord is None)
                if need_dist:
                    subset = data[row.indices]
                    matrix = Orange.distance.Euclidean(subset)

                if cluster is None:
                    assert len(matrix) < self.MaxClustering
                    cluster = hierarchical.dist_matrix_clustering(
                        matrix, linkage=hierarchical.WARD
                    )
                if ordered and cluster_ord is None:
                    assert len(matrix) < self.MaxOrderedClustering
                    cluster_ord = hierarchical.optimal_leaf_ordering(
                        cluster, matrix,
                    )
            row_groups.append(row._replace(cluster=cluster, cluster_ordered=cluster_ord))

        return parts._replace(rows=row_groups)
Exemple #4
0
    def cluster_data(self, matrix):
        with self.progressBar():
            # cluster rows
            if len(matrix) > 1:
                rows_distances = Euclidean(matrix)
                cluster = hierarchical.dist_matrix_clustering(rows_distances)
                row_order = hierarchical.optimal_leaf_ordering(
                    cluster, rows_distances, progress_callback=self.progressBarSet)
                row_order = np.array([x.value.index for x in leaves(row_order)])
            else:
                row_order = np.array([0])

            # cluster columns
            if matrix.X.shape[1] > 1:
                columns_distances = Euclidean(matrix, axis=0)
                cluster = hierarchical.dist_matrix_clustering(columns_distances)
                columns_order = hierarchical.optimal_leaf_ordering(
                    cluster, columns_distances,
                    progress_callback=self.progressBarSet)
                columns_order = np.array([x.value.index for x in leaves(columns_order)])
            else:
                columns_order = np.array([0])
        return row_order, columns_order
    def test_optimal_ordering(self):
        def indices(root):
            return [leaf.value.index for leaf in hierarchical.leaves(root)]

        ordered = hierarchical.optimal_leaf_ordering(self.cluster, self.matrix)

        self.assertEqual(ordered.value.range, self.cluster.value.range)
        self.assertSetEqual(set(indices(self.cluster)), set(indices(ordered)))

        def pairs(iterable):
            i1, i2 = tee(iterable)
            next(i1)
            yield from zip(i1, i2)

        def score(root):
            return sum([self.matrix[i, j] for i, j in pairs(indices(root))])

        score_unordered = score(self.cluster)
        score_ordered = score(ordered)
        self.assertGreater(score_unordered, score_ordered)
        self.assertEqual(score_ordered, 21.0)
    def test_optimal_ordering(self):
        def indices(root):
            return [leaf.value.index for leaf in hierarchical.leaves(root)]

        ordered = hierarchical.optimal_leaf_ordering(
            self.cluster, self.matrix)

        self.assertEqual(ordered.value.range, self.cluster.value.range)
        self.assertSetEqual(set(indices(self.cluster)),
                            set(indices(ordered)))

        def pairs(iterable):
            i1, i2 = tee(iterable)
            next(i1)
            yield from zip(i1, i2)

        def score(root):
            return sum([self.matrix[i, j] for i, j in pairs(indices(root))])
        score_unordered = score(self.cluster)
        score_ordered = score(ordered)
        self.assertGreater(score_unordered, score_ordered)
        self.assertEqual(score_ordered, 21.0)
Exemple #7
0
 def _ordered_cluster_tree(self):
     if self._ordered_tree is None:
         tree = self._cluster_tree()
         self._ordered_tree = \
             hierarchical.optimal_leaf_ordering(tree, self.matrix)
     return self._ordered_tree