def setUpClass(cls):
        super().setUpClass()
        WidgetOutputsTestMixin.init(cls)

        cls.distances = Euclidean(cls.data)
        cls.signal_name = "距离(Distances)"
        cls.signal_data = cls.distances
        cls.same_input_output_domain = False

        cls.distances_cols = Euclidean(cls.data, axis=0)
Example #2
0
 def test_report_widgets_unsupervised_dist(self):
     rep = OWReport.get_instance()
     data = Table("zoo")
     dist = Euclidean(data)
     widgets = self.dist_widgets
     self.assertEqual(len(widgets), 2)
     self._create_report(widgets, rep, dist)
Example #3
0
    def __call__(self, data):
        params = self.params.copy()
        dissimilarity = params['dissimilarity']
        if isinstance(self._metric, DistanceModel) or (isinstance(
                self._metric, type) and issubclass(self._metric, Distance)):
            data = self.preprocess(data)
            _X, Y, domain = data.X, data.Y, data.domain
            X = dist_matrix = self._metric(_X)
            dissimilarity = 'precomputed'
        elif self._metric is 'precomputed':
            dist_matrix, Y, domain = data, None, None
            X = dist_matrix
            dissimilarity = 'precomputed'
        else:
            data = self.preprocess(data)
            X, Y, domain = data.X, data.Y, data.domain
            if self.init_type == "PCA":
                dist_matrix = Euclidean(X)

        if self.init_type == "PCA" and self.init_data is None:
            init_data = torgerson(dist_matrix, params['n_components'])
        elif self.init_data is not None:
            init_data = self.init_data
        else:
            init_data = None

        params["dissimilarity"] = dissimilarity
        mds = self.__wraps__(**params)
        mds.fit(X, y=Y, init=init_data)
        mds.domain = domain
        return mds
Example #4
0
    def setUpClass(cls):
        super().setUpClass()
        WidgetOutputsTestMixin.init(cls)

        cls.signal_name = "Distances"
        cls.signal_data = Euclidean(cls.data)
        cls.same_input_output_domain = False
Example #5
0
    def test_mds_pca_init(self):
        result = np.array([-2.6928912, 0.32603512])

        projector = MDS(n_components=2,
                        dissimilarity=Euclidean,
                        init_type='PCA',
                        n_init=1)
        X = projector(self.iris).embedding_
        np.testing.assert_array_almost_equal(X[0], result)

        projector = MDS(n_components=2,
                        dissimilarity='precomputed',
                        init_type='PCA',
                        n_init=1)
        X = projector(Euclidean(self.iris)).embedding_
        np.testing.assert_array_almost_equal(X[0], result)

        projector = MDS(n_components=2,
                        dissimilarity='euclidean',
                        init_type='PCA',
                        n_init=1)
        X = projector(self.iris).embedding_
        np.testing.assert_array_almost_equal(X[0], result)

        projector = MDS(n_components=6,
                        dissimilarity='euclidean',
                        init_type='PCA',
                        n_init=1)
        X = projector(self.iris[:5]).embedding_
        result = np.array(
            [-0.31871, -0.064644, 0.015653, -1.5e-08, -4.3e-11, 0])
        np.testing.assert_array_almost_equal(np.abs(X[0]), np.abs(result))
Example #6
0
    def test_attr_label_matrix_and_data(self):
        w = self.widget
        # Don't run the MDS optimization to save time and to prevent the
        # widget be in a blocking state when trying to send the next signal
        w.start = Mock()

        # Data and matrix
        data = Table("zoo")
        dist = Euclidean(data)
        self.send_signal(w.Inputs.distances, dist)
        self.send_signal(w.Inputs.data, data)
        self.assertTrue(set(chain(data.domain.variables, data.domain.metas))
                        < set(w.controls.attr_label.model()))

        # Has data, but receives a signal without data: has to keep the label
        self.send_signal(w.Inputs.distances, None)
        self.assertTrue(set(chain(data.domain.variables, data.domain.metas))
                        < set(w.controls.attr_label.model()))

        # Has matrix without data, and loses the data: remove the label
        self.send_signal(w.Inputs.data, None)
        self.assertEqual(list(w.controls.attr_label.model()), [None])

        # Has matrix without data, receives data: add attrs to combo, select
        self.send_signal(w.Inputs.data, data)
        self.assertTrue(set(chain(data.domain.variables, data.domain.metas))
                        < set(w.controls.attr_label.model()))
Example #7
0
 def test_distances_without_data_1(self):
     """
     Only distances and no data.
     GH-2335
     """
     signal_data = Euclidean(self.data, axis=1)
     signal_data.row_items = None
     self.send_signal("Distances", signal_data)
    def test_no_crash_on_single_instance(self):
        """Test that single instance does not crash widget due to distance matrix having no valid distances"""
        dist = Euclidean(self.data[:1], axis=1)

        self.send_signal(self.widget.Inputs.distances, dist)
        net = self.get_output(self.widget.Outputs.network)
        self.assertTrue(net)
        self.assertEqual(net.number_of_nodes(), 1)
 def test_labels(self):
     grades = Table.from_url("https://datasets.biolab.si/core/grades-two.tab")
     distances = Euclidean(grades)
     self.widget.set_distances(distances)
     ac = self.widget.annot_combo
     idx = ac.model().indexOf(grades.domain.metas[0])
     ac.setCurrentIndex(idx)
     ac.activated.emit(idx)
     self.assertIsNone(self.widget.tablemodel.label_colors)
Example #10
0
 def __mds_test_helper(self, data, n_com):
     mds_fit = MDS(n_components=n_com, dissimilarity=Euclidean, random_state=0)
     mds_fit = mds_fit(data)
     mds_dist = MDS(n_components=n_com, dissimilarity='precomputed', random_state=0)
     mds_dist = mds_dist(Euclidean(data))
     eshape = data.X.shape[0], n_com
     self.assertTrue(np.allclose(mds_fit.embedding_, mds_dist.embedding_))
     self.assertEqual(eshape, mds_fit.embedding_.shape)
     self.assertEqual(eshape, mds_dist.embedding_.shape)
Example #11
0
 def __init__(self,
              distance=Euclidean(),
              k=10,
              average=False,
              variance=False):
     """Initialize the distance measure, number of nearest neighbours to consider and
     whether to normalize by average and by variance."""
     super().__init__(distance, k)
     self.average = average
     self.variance = variance
    def test_no_crash_on_zero_distance(self):
        """ Test that minimum distance 0 does not make the widget automatically set the distance threshold under 0,
        causing no nodes to satisfy condition"""
        dist = Euclidean(self.data, axis=1)

        self.widget.percentil = 100.0
        self.send_signal(self.widget.Inputs.distances, dist)
        net = self.get_output(self.widget.Outputs.network)
        self.assertTrue(net)
        self.assertEqual(net.number_of_nodes(), len(self.data))
Example #13
0
    def test_attr_label_from_data(self):
        w = self.widget
        # Don't run the MDS optimization to save time and to prevent the
        # widget be in a blocking state when trying to send the next signal
        w.start = Mock()

        data = Table("zoo")
        dist = Euclidean(data)
        self.send_signal(w.Inputs.distances, dist)
        self.assertTrue(set(chain(data.domain.variables, data.domain.metas))
                        < set(w.controls.attr_label.model()))
Example #14
0
    def setUpClass(cls):
        super().setUpClass()
        WidgetOutputsTestMixin.init(cls)

        cls.signal_name = "Distances"
        cls.signal_data = Euclidean(cls.data)
        cls.same_input_output_domain = False

        my_dir = os.path.dirname(__file__)
        datasets_dir = os.path.join(my_dir, '..', '..', '..', 'datasets')
        cls.datasets_dir = os.path.realpath(datasets_dir)
Example #15
0
 def fit(self, X, Y=None):
     proj = skl_cluster.KMeans(**self.params)
     proj = proj.fit(X, Y)
     if 2 <= proj.n_clusters < len(X):
         proj.silhouette = silhouette_score(X, proj.labels_)
     else:
         proj.silhouette = 0
     proj.inertia = proj.inertia_ / len(X)
     cluster_dist = Euclidean(proj.cluster_centers_)
     proj.inter_cluster = np.mean(cluster_dist[np.triu_indices_from(
         cluster_dist, 1)])
     return KMeansModel(proj, self.preprocessors)
Example #16
0
 def test_num_meta_labels(self):
     x, y = (ContinuousVariable(c) for c in "xy")
     s = StringVariable("s")
     data = Table.from_list(Domain([x], [], [y, s]),
                            [[0, 1, "a"], [1, np.nan, "b"]])
     distances = Euclidean(data)
     self.widget.set_distances(distances)
     ac = self.widget.annot_combo
     idx = ac.model().indexOf(y)
     ac.setCurrentIndex(idx)
     ac.activated.emit(idx)
     self.assertEqual(self.widget.tablemodel.labels, ["1", "?"])
Example #17
0
 def fit(self, X, Y=None):
     proj = skl_cluster.KMeans(**self.params)
     proj = proj.fit(X, Y)
     proj.silhouette = np.nan
     try:
         if self._compute_silhouette and 2 <= proj.n_clusters < X.shape[0]:
             proj.silhouette = silhouette_score(X, proj.labels_, sample_size=5000)
     except MemoryError:  # Pairwise dist in silhouette fails for large data
         pass
     proj.inertia = proj.inertia_ / X.shape[0]
     cluster_dist = Euclidean(proj.cluster_centers_)
     proj.inter_cluster = np.mean(cluster_dist[np.triu_indices_from(cluster_dist, 1)])
     return KMeansModel(proj, self.preprocessors)
Example #18
0
    def test_torgerson(self):
        data = self.ionosphere[::5]
        dis = Euclidean(data)

        e1 = torgerson(dis, eigen_solver="auto")
        e2 = torgerson(dis, eigen_solver="lapack")
        e3 = torgerson(dis, eigen_solver="arpack")

        np.testing.assert_almost_equal(np.abs(e1), np.abs(e2))
        np.testing.assert_almost_equal(np.abs(e2), np.abs(e3))

        with self.assertRaises(ValueError):
            torgerson(dis, eigen_solver="madness")
Example #19
0
    def __tsne_test_helper(self, data, n_com):
        tsne_def = TSNE(n_components=n_com, metric="euclidean")
        tsne_def = tsne_def(data)

        tsne_euc = TSNE(n_components=n_com, metric=Euclidean)
        tsne_euc = tsne_euc(data)

        tsne_pre = TSNE(n_components=n_com, metric="precomputed")
        tsne_pre = tsne_pre(Euclidean(data))

        self.assertEqual((data.X.shape[0], n_com), tsne_def.embedding_.shape)
        self.assertEqual((data.X.shape[0], n_com), tsne_euc.embedding_.shape)
        self.assertEqual((data.X.shape[0], n_com), tsne_pre.embedding_.shape)
Example #20
0
    def _initialize(self):
        matrix_existed = self.effective_matrix is not None
        effective_matrix = self.effective_matrix
        self._invalidated = True
        self.data = None
        self.effective_matrix = None
        self.closeContext()
        self.clear_messages()

        # if no data nor matrix is present reset plot
        if self.signal_data is None and self.matrix is None:
            self.clear()
            self.init_attr_values()
            return

        if self.signal_data is not None and self.matrix is not None and \
                len(self.signal_data) != len(self.matrix):
            self.Error.mismatching_dimensions()
            self.clear()
            self.init_attr_values()
            return

        if self.signal_data is not None:
            self.data = self.signal_data
        elif self.matrix_data is not None:
            self.data = self.matrix_data

        if self.matrix is not None:
            self.effective_matrix = self.matrix
            if self.matrix.axis == 0 and self.data is not None \
                    and self.data is self.matrix_data:
                names = [[attr.name] for attr in self.data.domain.attributes]
                domain = Domain([], metas=[StringVariable("labels")])
                self.data = Table.from_list(domain, names)
        elif self.data.domain.attributes:
            preprocessed_data = MDS().preprocess(self.data)
            self.effective_matrix = Euclidean(preprocessed_data)
        else:
            self.Error.no_attributes()
            self.clear()
            self.init_attr_values()
            return

        self.init_attr_values()
        self.openContext(self.data)
        self._invalidated = not (
            matrix_existed and self.effective_matrix is not None
            and array_equal(effective_matrix, self.effective_matrix))
        if self._invalidated:
            self.clear()
        self.graph.set_effective_matrix(self.effective_matrix)
Example #21
0
    def cluster_data(self, matrix):
        with self.progressBar():
            # cluster rows
            if len(matrix) > 1:
                rows_distances = Euclidean(matrix)
                cluster = hierarchical.dist_matrix_clustering(rows_distances)
                row_order = hierarchical.optimal_leaf_ordering(
                    cluster, rows_distances, progress_callback=self.progressBarSet)
                row_order = np.array([x.value.index for x in leaves(row_order)])
            else:
                row_order = np.array([0])

            # cluster columns
            if matrix.X.shape[1] > 1:
                columns_distances = Euclidean(matrix, axis=0)
                cluster = hierarchical.dist_matrix_clustering(columns_distances)
                columns_order = hierarchical.optimal_leaf_ordering(
                    cluster, columns_distances,
                    progress_callback=self.progressBarSet)
                columns_order = np.array([x.value.index for x in leaves(columns_order)])
            else:
                columns_order = np.array([0])
        return row_order, columns_order
Example #22
0
    def test_set_distances(self):
        assert isinstance(self.widget, OWDistanceMatrix)

        iris = Table("iris")[:5]
        distances = Euclidean(iris)

        # Distances with row data
        self.widget.set_distances(distances)
        self.assertIn(iris.domain[0], self.widget.annot_combo.model())

        # Distances without row data
        distances.row_items = None
        self.widget.set_distances(distances)
        self.assertNotIn(iris.domain[0], self.widget.annot_combo.model())
Example #23
0
 def __init__(self,
              classifier,
              distance=Euclidean(),
              k=10,
              relative=True,
              include=False,
              neighbourhood='fixed'):
     """Initialize the parameters."""
     super().__init__(distance, k)
     self.classifier = classifier
     self.relative = relative
     self.include = include
     assert neighbourhood in ['fixed', 'variable']
     self.neighbourhood = neighbourhood
Example #24
0
    def _initialize(self):
        matrix_existed = self.effective_matrix is not None
        effective_matrix = self.effective_matrix
        self.__invalidated = True
        self.data = None
        self.effective_matrix = None
        self.closeContext()
        self.clear_messages()

        # if no data nor matrix is present reset plot
        if self.signal_data is None and self.matrix is None:
            self.clear()
            self.init_attr_values()
            return

        if self.signal_data is not None and self.matrix is not None and \
                len(self.signal_data) != len(self.matrix):
            self.Error.mismatching_dimensions()
            self.clear()
            self.init_attr_values()
            return

        if self.signal_data is not None:
            self.data = self.signal_data
        elif self.matrix_data is not None:
            self.data = self.matrix_data

        if self.matrix is not None:
            self.effective_matrix = self.matrix
            if self.matrix.axis == 0 and self.data is self.matrix_data:
                self.data = None
        elif self.data.domain.attributes:
            preprocessed_data = MDS().preprocess(self.data)
            self.effective_matrix = Euclidean(preprocessed_data)
        else:
            self.Error.no_attributes()
            self.clear()
            self.init_attr_values()
            return

        self.init_attr_values()
        self.openContext(self.data)
        self.__invalidated = not (matrix_existed and
                                  self.effective_matrix is not None and
                                  np.array_equal(effective_matrix,
                                                 self.effective_matrix))
        if self.__invalidated:
            self.clear()
        self.graph.set_effective_matrix(self.effective_matrix)
Example #25
0
    def test_labels(self):
        x, y = (ContinuousVariable(c) for c in "xy")
        s = StringVariable("s")
        grades = Table.from_list(Domain(
            [x, y], [], [s]), [[91.0, 89.0, "Bill"], [51.0, 100.0, "Cynthia"],
                               [9.0, 61.0, "Demi"], [49.0, 92.0, "Fred"],
                               [91.0, 49.0, "George"]])

        distances = Euclidean(grades)
        self.widget.set_distances(distances)
        ac = self.widget.annot_combo
        idx = ac.model().indexOf(grades.domain.metas[0])
        ac.setCurrentIndex(idx)
        ac.activated.emit(idx)
        self.assertIsNone(self.widget.tablemodel.label_colors)
Example #26
0
 def test_infinite_distances(self):
     """
     Scipy does not accept infinite distances and neither does this widget.
     Error is shown.
     GH-2380
     """
     table = Table(
         Domain([ContinuousVariable("a")],
                [DiscreteVariable("b", values=["y"])]),
         list(zip([1.79e308, -1e120], "yy")))
     distances = Euclidean(table)
     self.assertFalse(self.widget.Error.not_finite_distances.is_shown())
     self.send_signal(self.widget.Inputs.distances, distances)
     self.assertTrue(self.widget.Error.not_finite_distances.is_shown())
     self.send_signal(self.widget.Inputs.distances, self.distances)
     self.assertFalse(self.widget.Error.not_finite_distances.is_shown())
 def test_infinite_distances(self):
     """
     Scipy does not accept infinite distances and neither does this widget.
     Error is shown.
     GH-2380
     """
     table = Table.from_list(
         Domain([ContinuousVariable("a")],
                [DiscreteVariable("b", values=("y", ))]),
         list(zip([1.79e308, -1e120], "yy")))
     with warnings.catch_warnings():
         warnings.filterwarnings("ignore", ".*", RuntimeWarning)
         distances = Euclidean(table)
     self.assertFalse(self.widget.Error.not_finite_distances.is_shown())
     self.send_signal(self.widget.Inputs.distances, distances)
     self.assertTrue(self.widget.Error.not_finite_distances.is_shown())
     self.send_signal(self.widget.Inputs.distances, self.distances)
     self.assertFalse(self.widget.Error.not_finite_distances.is_shown())
Example #28
0
 def __init__(self,
              classifier,
              distance=Euclidean(),
              k=10,
              gamma=0.5,
              rho=0.5,
              exp=True,
              rf=None):
     """Initialize the parameters."""
     RegrModelNC.__init__(self, classifier)
     NearestNeighbours.__init__(self, distance, k)
     self._gamma = gamma  # distance sensitivity
     self._rho = rho  # variance sensitivity
     self.exp = exp  # type of normalization
     self.rf = rf  # random forest for normalization
     if self.rf:
         assert isinstance(rf, RandomForestRegressor), \
             "Rf must be an instance of sklearn's RandomForestRegressor."
    def setUp(self):
        self.widget = self.create_widget(
            OWNxFromDistances)  # type: OWNxFromDistances
        self.data = Table("iris")
        self.distances = Euclidean(self.data)

        # When converted to a graph, this has the following components:
        # At threshold 0.5:  {1, 6} and disconnected {0}, {2}, {3}, {4}, {5}
        # At threshold 1 {0, 1, 2, 6}, {3, 5}, {4}
        # At threshold 2 {0, 1, 2, 3, 5, 6}, {4}
        m = np.full((7, 7), 10.0)
        m[1, 6] = m[6, 1] = 0.5

        m[0, 1] = m[1, 2] = m[2, 6] = m[0, 6] = 1
        m[1, 0] = m[2, 1] = m[6, 2] = m[6, 0] = 1

        m[3, 5] = m[5, 3] = 1

        m[2, 3] = m[3, 2] = 2
        self.distances1 = DistMatrix(m)
Example #30
0
 def __call__(self, data):
     distances = SklDistance, SpearmanDistance, PearsonDistance
     if isinstance(self._metric, distances):
         data = self.preprocess(data)
         _X, Y, domain = data.X, data.Y, data.domain
         X = dist_matrix = self._metric(_X)
         self.params['dissimilarity'] = 'precomputed'
     elif self._metric is 'precomputed':
         dist_matrix, Y, domain = data, None, None
         X = dist_matrix
     else:
         data = self.preprocess(data)
         X, Y, domain = data.X, data.Y, data.domain
         if self.init_type == "PCA":
             dist_matrix = Euclidean(X)
     if self.init_type == "PCA" and self.init_data is None:
         self.init_data = torgerson(dist_matrix,
                                    self.params['n_components'])
     clf = self.fit(X, Y=Y)
     clf.domain = domain
     return clf