Ejemplo n.º 1
0
    def cluster_columns(self, data, parts, ordered=False):
        assert len(parts.columns) == 1, "columns split is no longer supported"
        assert all(var.is_continuous for var in data.domain.attributes)

        col0 = parts.columns[0]
        if col0.cluster is not None:
            cluster = col0.cluster
        else:
            cluster = None
        if col0.cluster_ordered is not None:
            cluster_ord = col0.cluster_ordered
        else:
            cluster_ord = None
        need_dist = cluster is None or (ordered and cluster_ord is None)
        matrix = None
        if need_dist:
            data = Orange.distance._preprocess(data)
            matrix = np.asarray(Orange.distance.PearsonR(data, axis=0))
            # nan values break clustering below
            matrix = np.nan_to_num(matrix)

        if cluster is None:
            assert matrix is not None
            assert len(matrix) < self.MaxClustering
            cluster = hierarchical.dist_matrix_clustering(
                matrix, linkage=hierarchical.WARD)
        if ordered and cluster_ord is None:
            assert len(matrix) < self.MaxOrderedClustering
            cluster_ord = hierarchical.optimal_leaf_ordering(cluster, matrix)

        col_groups = [
            col._replace(cluster=cluster, cluster_ordered=cluster_ord)
            for col in parts.columns
        ]
        return parts._replace(columns=col_groups)
    def setUpClass(cls):
        m = [
            [],
            [3],
            [2, 4],
            [17, 5, 4],
            [2, 8, 3, 8],
            [7, 5, 10, 11, 2],
            [8, 4, 1, 5, 11, 13],
            [4, 7, 12, 8, 10, 1, 5],
            [13, 9, 14, 15, 7, 8, 4, 6],
            [12, 10, 11, 15, 2, 5, 7, 3, 1],
        ]
        cls.items = [
            "Ann",
            "Bob",
            "Curt",
            "Danny",
            "Eve",
            "Fred",
            "Greg",
            "Hue",
            "Ivy",
            "Jon",
        ]

        dist = numpy.array(list(flatten(m)), dtype=float)
        matrix = hierarchical.squareform(dist, mode="lower")
        cls.m = m
        cls.matrix = Orange.misc.DistMatrix(matrix)
        cls.matrix.items = cls.items

        cls.cluster = hierarchical.dist_matrix_clustering(cls.matrix)
Ejemplo n.º 3
0
    def cluster_rows(self, data: Table, parts: 'Parts', ordered=False) -> 'Parts':
        row_groups = []
        for row in parts.rows:
            if row.cluster is not None:
                cluster = row.cluster
            else:
                cluster = None
            if row.cluster_ordered is not None:
                cluster_ord = row.cluster_ordered
            else:
                cluster_ord = None

            if row.can_cluster:
                matrix = None
                need_dist = cluster is None or (ordered and cluster_ord is None)
                if need_dist:
                    subset = data[row.indices]
                    matrix = Orange.distance.Euclidean(subset)

                if cluster is None:
                    assert len(matrix) < self.MaxClustering
                    cluster = hierarchical.dist_matrix_clustering(
                        matrix, linkage=hierarchical.WARD
                    )
                if ordered and cluster_ord is None:
                    assert len(matrix) < self.MaxOrderedClustering
                    cluster_ord = hierarchical.optimal_leaf_ordering(
                        cluster, matrix,
                    )
            row_groups.append(row._replace(cluster=cluster, cluster_ordered=cluster_ord))

        return parts._replace(rows=row_groups)
Ejemplo n.º 4
0
    def cluster_data(self, matrix):
        with self.progressBar():
            # cluster rows
            if len(matrix) > 1:
                rows_distances = Euclidean(matrix)
                cluster = hierarchical.dist_matrix_clustering(rows_distances)
                row_order = hierarchical.optimal_leaf_ordering(
                    cluster, rows_distances, progress_callback=self.progressBarSet)
                row_order = np.array([x.value.index for x in leaves(row_order)])
            else:
                row_order = np.array([0])

            # cluster columns
            if matrix.X.shape[1] > 1:
                columns_distances = Euclidean(matrix, axis=0)
                cluster = hierarchical.dist_matrix_clustering(columns_distances)
                columns_order = hierarchical.optimal_leaf_ordering(
                    cluster, columns_distances,
                    progress_callback=self.progressBarSet)
                columns_order = np.array([x.value.index for x in leaves(columns_order)])
            else:
                columns_order = np.array([0])
        return row_order, columns_order
    def setUpClass(cls):
        m = [[],
             [3],
             [2, 4],
             [17, 5, 4],
             [2, 8, 3, 8],
             [7, 5, 10, 11, 2],
             [8, 4, 1, 5, 11, 13],
             [4, 7, 12, 8, 10, 1, 5],
             [13, 9, 14, 15, 7, 8, 4, 6],
             [12, 10, 11, 15, 2, 5, 7, 3, 1]]
        cls.items = ["Ann", "Bob", "Curt", "Danny", "Eve", "Fred",
                     "Greg", "Hue", "Ivy", "Jon"]

        dist = numpy.array(list(flatten(m)), dtype=float)
        matrix = hierarchical.squareform(dist, mode="lower")
        cls.m = m
        cls.matrix = Orange.misc.DistMatrix(matrix)
        cls.matrix.items = cls.items

        cls.cluster = hierarchical.dist_matrix_clustering(cls.matrix)
Ejemplo n.º 6
0
 def _cluster_tree(self):
     if self._tree is None:
         self._tree = hierarchical.dist_matrix_clustering(self.matrix)
     return self._tree