def setUpClass(cls): super().setUpClass() WidgetOutputsTestMixin.init(cls) cls.distances = Euclidean(cls.data) cls.signal_name = "距离(Distances)" cls.signal_data = cls.distances cls.same_input_output_domain = False cls.distances_cols = Euclidean(cls.data, axis=0)
def test_report_widgets_unsupervised_dist(self): rep = OWReport.get_instance() data = Table("zoo") dist = Euclidean(data) widgets = self.dist_widgets self.assertEqual(len(widgets), 2) self._create_report(widgets, rep, dist)
def __call__(self, data): params = self.params.copy() dissimilarity = params['dissimilarity'] if isinstance(self._metric, DistanceModel) or (isinstance( self._metric, type) and issubclass(self._metric, Distance)): data = self.preprocess(data) _X, Y, domain = data.X, data.Y, data.domain X = dist_matrix = self._metric(_X) dissimilarity = 'precomputed' elif self._metric is 'precomputed': dist_matrix, Y, domain = data, None, None X = dist_matrix dissimilarity = 'precomputed' else: data = self.preprocess(data) X, Y, domain = data.X, data.Y, data.domain if self.init_type == "PCA": dist_matrix = Euclidean(X) if self.init_type == "PCA" and self.init_data is None: init_data = torgerson(dist_matrix, params['n_components']) elif self.init_data is not None: init_data = self.init_data else: init_data = None params["dissimilarity"] = dissimilarity mds = self.__wraps__(**params) mds.fit(X, y=Y, init=init_data) mds.domain = domain return mds
def setUpClass(cls): super().setUpClass() WidgetOutputsTestMixin.init(cls) cls.signal_name = "Distances" cls.signal_data = Euclidean(cls.data) cls.same_input_output_domain = False
def test_mds_pca_init(self): result = np.array([-2.6928912, 0.32603512]) projector = MDS(n_components=2, dissimilarity=Euclidean, init_type='PCA', n_init=1) X = projector(self.iris).embedding_ np.testing.assert_array_almost_equal(X[0], result) projector = MDS(n_components=2, dissimilarity='precomputed', init_type='PCA', n_init=1) X = projector(Euclidean(self.iris)).embedding_ np.testing.assert_array_almost_equal(X[0], result) projector = MDS(n_components=2, dissimilarity='euclidean', init_type='PCA', n_init=1) X = projector(self.iris).embedding_ np.testing.assert_array_almost_equal(X[0], result) projector = MDS(n_components=6, dissimilarity='euclidean', init_type='PCA', n_init=1) X = projector(self.iris[:5]).embedding_ result = np.array( [-0.31871, -0.064644, 0.015653, -1.5e-08, -4.3e-11, 0]) np.testing.assert_array_almost_equal(np.abs(X[0]), np.abs(result))
def test_attr_label_matrix_and_data(self): w = self.widget # Don't run the MDS optimization to save time and to prevent the # widget be in a blocking state when trying to send the next signal w.start = Mock() # Data and matrix data = Table("zoo") dist = Euclidean(data) self.send_signal(w.Inputs.distances, dist) self.send_signal(w.Inputs.data, data) self.assertTrue(set(chain(data.domain.variables, data.domain.metas)) < set(w.controls.attr_label.model())) # Has data, but receives a signal without data: has to keep the label self.send_signal(w.Inputs.distances, None) self.assertTrue(set(chain(data.domain.variables, data.domain.metas)) < set(w.controls.attr_label.model())) # Has matrix without data, and loses the data: remove the label self.send_signal(w.Inputs.data, None) self.assertEqual(list(w.controls.attr_label.model()), [None]) # Has matrix without data, receives data: add attrs to combo, select self.send_signal(w.Inputs.data, data) self.assertTrue(set(chain(data.domain.variables, data.domain.metas)) < set(w.controls.attr_label.model()))
def test_distances_without_data_1(self): """ Only distances and no data. GH-2335 """ signal_data = Euclidean(self.data, axis=1) signal_data.row_items = None self.send_signal("Distances", signal_data)
def test_no_crash_on_single_instance(self): """Test that single instance does not crash widget due to distance matrix having no valid distances""" dist = Euclidean(self.data[:1], axis=1) self.send_signal(self.widget.Inputs.distances, dist) net = self.get_output(self.widget.Outputs.network) self.assertTrue(net) self.assertEqual(net.number_of_nodes(), 1)
def test_labels(self): grades = Table.from_url("https://datasets.biolab.si/core/grades-two.tab") distances = Euclidean(grades) self.widget.set_distances(distances) ac = self.widget.annot_combo idx = ac.model().indexOf(grades.domain.metas[0]) ac.setCurrentIndex(idx) ac.activated.emit(idx) self.assertIsNone(self.widget.tablemodel.label_colors)
def __mds_test_helper(self, data, n_com): mds_fit = MDS(n_components=n_com, dissimilarity=Euclidean, random_state=0) mds_fit = mds_fit(data) mds_dist = MDS(n_components=n_com, dissimilarity='precomputed', random_state=0) mds_dist = mds_dist(Euclidean(data)) eshape = data.X.shape[0], n_com self.assertTrue(np.allclose(mds_fit.embedding_, mds_dist.embedding_)) self.assertEqual(eshape, mds_fit.embedding_.shape) self.assertEqual(eshape, mds_dist.embedding_.shape)
def __init__(self, distance=Euclidean(), k=10, average=False, variance=False): """Initialize the distance measure, number of nearest neighbours to consider and whether to normalize by average and by variance.""" super().__init__(distance, k) self.average = average self.variance = variance
def test_no_crash_on_zero_distance(self): """ Test that minimum distance 0 does not make the widget automatically set the distance threshold under 0, causing no nodes to satisfy condition""" dist = Euclidean(self.data, axis=1) self.widget.percentil = 100.0 self.send_signal(self.widget.Inputs.distances, dist) net = self.get_output(self.widget.Outputs.network) self.assertTrue(net) self.assertEqual(net.number_of_nodes(), len(self.data))
def test_attr_label_from_data(self): w = self.widget # Don't run the MDS optimization to save time and to prevent the # widget be in a blocking state when trying to send the next signal w.start = Mock() data = Table("zoo") dist = Euclidean(data) self.send_signal(w.Inputs.distances, dist) self.assertTrue(set(chain(data.domain.variables, data.domain.metas)) < set(w.controls.attr_label.model()))
def setUpClass(cls): super().setUpClass() WidgetOutputsTestMixin.init(cls) cls.signal_name = "Distances" cls.signal_data = Euclidean(cls.data) cls.same_input_output_domain = False my_dir = os.path.dirname(__file__) datasets_dir = os.path.join(my_dir, '..', '..', '..', 'datasets') cls.datasets_dir = os.path.realpath(datasets_dir)
def fit(self, X, Y=None): proj = skl_cluster.KMeans(**self.params) proj = proj.fit(X, Y) if 2 <= proj.n_clusters < len(X): proj.silhouette = silhouette_score(X, proj.labels_) else: proj.silhouette = 0 proj.inertia = proj.inertia_ / len(X) cluster_dist = Euclidean(proj.cluster_centers_) proj.inter_cluster = np.mean(cluster_dist[np.triu_indices_from( cluster_dist, 1)]) return KMeansModel(proj, self.preprocessors)
def test_num_meta_labels(self): x, y = (ContinuousVariable(c) for c in "xy") s = StringVariable("s") data = Table.from_list(Domain([x], [], [y, s]), [[0, 1, "a"], [1, np.nan, "b"]]) distances = Euclidean(data) self.widget.set_distances(distances) ac = self.widget.annot_combo idx = ac.model().indexOf(y) ac.setCurrentIndex(idx) ac.activated.emit(idx) self.assertEqual(self.widget.tablemodel.labels, ["1", "?"])
def fit(self, X, Y=None): proj = skl_cluster.KMeans(**self.params) proj = proj.fit(X, Y) proj.silhouette = np.nan try: if self._compute_silhouette and 2 <= proj.n_clusters < X.shape[0]: proj.silhouette = silhouette_score(X, proj.labels_, sample_size=5000) except MemoryError: # Pairwise dist in silhouette fails for large data pass proj.inertia = proj.inertia_ / X.shape[0] cluster_dist = Euclidean(proj.cluster_centers_) proj.inter_cluster = np.mean(cluster_dist[np.triu_indices_from(cluster_dist, 1)]) return KMeansModel(proj, self.preprocessors)
def test_torgerson(self): data = self.ionosphere[::5] dis = Euclidean(data) e1 = torgerson(dis, eigen_solver="auto") e2 = torgerson(dis, eigen_solver="lapack") e3 = torgerson(dis, eigen_solver="arpack") np.testing.assert_almost_equal(np.abs(e1), np.abs(e2)) np.testing.assert_almost_equal(np.abs(e2), np.abs(e3)) with self.assertRaises(ValueError): torgerson(dis, eigen_solver="madness")
def __tsne_test_helper(self, data, n_com): tsne_def = TSNE(n_components=n_com, metric="euclidean") tsne_def = tsne_def(data) tsne_euc = TSNE(n_components=n_com, metric=Euclidean) tsne_euc = tsne_euc(data) tsne_pre = TSNE(n_components=n_com, metric="precomputed") tsne_pre = tsne_pre(Euclidean(data)) self.assertEqual((data.X.shape[0], n_com), tsne_def.embedding_.shape) self.assertEqual((data.X.shape[0], n_com), tsne_euc.embedding_.shape) self.assertEqual((data.X.shape[0], n_com), tsne_pre.embedding_.shape)
def _initialize(self): matrix_existed = self.effective_matrix is not None effective_matrix = self.effective_matrix self._invalidated = True self.data = None self.effective_matrix = None self.closeContext() self.clear_messages() # if no data nor matrix is present reset plot if self.signal_data is None and self.matrix is None: self.clear() self.init_attr_values() return if self.signal_data is not None and self.matrix is not None and \ len(self.signal_data) != len(self.matrix): self.Error.mismatching_dimensions() self.clear() self.init_attr_values() return if self.signal_data is not None: self.data = self.signal_data elif self.matrix_data is not None: self.data = self.matrix_data if self.matrix is not None: self.effective_matrix = self.matrix if self.matrix.axis == 0 and self.data is not None \ and self.data is self.matrix_data: names = [[attr.name] for attr in self.data.domain.attributes] domain = Domain([], metas=[StringVariable("labels")]) self.data = Table.from_list(domain, names) elif self.data.domain.attributes: preprocessed_data = MDS().preprocess(self.data) self.effective_matrix = Euclidean(preprocessed_data) else: self.Error.no_attributes() self.clear() self.init_attr_values() return self.init_attr_values() self.openContext(self.data) self._invalidated = not ( matrix_existed and self.effective_matrix is not None and array_equal(effective_matrix, self.effective_matrix)) if self._invalidated: self.clear() self.graph.set_effective_matrix(self.effective_matrix)
def cluster_data(self, matrix): with self.progressBar(): # cluster rows if len(matrix) > 1: rows_distances = Euclidean(matrix) cluster = hierarchical.dist_matrix_clustering(rows_distances) row_order = hierarchical.optimal_leaf_ordering( cluster, rows_distances, progress_callback=self.progressBarSet) row_order = np.array([x.value.index for x in leaves(row_order)]) else: row_order = np.array([0]) # cluster columns if matrix.X.shape[1] > 1: columns_distances = Euclidean(matrix, axis=0) cluster = hierarchical.dist_matrix_clustering(columns_distances) columns_order = hierarchical.optimal_leaf_ordering( cluster, columns_distances, progress_callback=self.progressBarSet) columns_order = np.array([x.value.index for x in leaves(columns_order)]) else: columns_order = np.array([0]) return row_order, columns_order
def test_set_distances(self): assert isinstance(self.widget, OWDistanceMatrix) iris = Table("iris")[:5] distances = Euclidean(iris) # Distances with row data self.widget.set_distances(distances) self.assertIn(iris.domain[0], self.widget.annot_combo.model()) # Distances without row data distances.row_items = None self.widget.set_distances(distances) self.assertNotIn(iris.domain[0], self.widget.annot_combo.model())
def __init__(self, classifier, distance=Euclidean(), k=10, relative=True, include=False, neighbourhood='fixed'): """Initialize the parameters.""" super().__init__(distance, k) self.classifier = classifier self.relative = relative self.include = include assert neighbourhood in ['fixed', 'variable'] self.neighbourhood = neighbourhood
def _initialize(self): matrix_existed = self.effective_matrix is not None effective_matrix = self.effective_matrix self.__invalidated = True self.data = None self.effective_matrix = None self.closeContext() self.clear_messages() # if no data nor matrix is present reset plot if self.signal_data is None and self.matrix is None: self.clear() self.init_attr_values() return if self.signal_data is not None and self.matrix is not None and \ len(self.signal_data) != len(self.matrix): self.Error.mismatching_dimensions() self.clear() self.init_attr_values() return if self.signal_data is not None: self.data = self.signal_data elif self.matrix_data is not None: self.data = self.matrix_data if self.matrix is not None: self.effective_matrix = self.matrix if self.matrix.axis == 0 and self.data is self.matrix_data: self.data = None elif self.data.domain.attributes: preprocessed_data = MDS().preprocess(self.data) self.effective_matrix = Euclidean(preprocessed_data) else: self.Error.no_attributes() self.clear() self.init_attr_values() return self.init_attr_values() self.openContext(self.data) self.__invalidated = not (matrix_existed and self.effective_matrix is not None and np.array_equal(effective_matrix, self.effective_matrix)) if self.__invalidated: self.clear() self.graph.set_effective_matrix(self.effective_matrix)
def test_labels(self): x, y = (ContinuousVariable(c) for c in "xy") s = StringVariable("s") grades = Table.from_list(Domain( [x, y], [], [s]), [[91.0, 89.0, "Bill"], [51.0, 100.0, "Cynthia"], [9.0, 61.0, "Demi"], [49.0, 92.0, "Fred"], [91.0, 49.0, "George"]]) distances = Euclidean(grades) self.widget.set_distances(distances) ac = self.widget.annot_combo idx = ac.model().indexOf(grades.domain.metas[0]) ac.setCurrentIndex(idx) ac.activated.emit(idx) self.assertIsNone(self.widget.tablemodel.label_colors)
def test_infinite_distances(self): """ Scipy does not accept infinite distances and neither does this widget. Error is shown. GH-2380 """ table = Table( Domain([ContinuousVariable("a")], [DiscreteVariable("b", values=["y"])]), list(zip([1.79e308, -1e120], "yy"))) distances = Euclidean(table) self.assertFalse(self.widget.Error.not_finite_distances.is_shown()) self.send_signal(self.widget.Inputs.distances, distances) self.assertTrue(self.widget.Error.not_finite_distances.is_shown()) self.send_signal(self.widget.Inputs.distances, self.distances) self.assertFalse(self.widget.Error.not_finite_distances.is_shown())
def test_infinite_distances(self): """ Scipy does not accept infinite distances and neither does this widget. Error is shown. GH-2380 """ table = Table.from_list( Domain([ContinuousVariable("a")], [DiscreteVariable("b", values=("y", ))]), list(zip([1.79e308, -1e120], "yy"))) with warnings.catch_warnings(): warnings.filterwarnings("ignore", ".*", RuntimeWarning) distances = Euclidean(table) self.assertFalse(self.widget.Error.not_finite_distances.is_shown()) self.send_signal(self.widget.Inputs.distances, distances) self.assertTrue(self.widget.Error.not_finite_distances.is_shown()) self.send_signal(self.widget.Inputs.distances, self.distances) self.assertFalse(self.widget.Error.not_finite_distances.is_shown())
def __init__(self, classifier, distance=Euclidean(), k=10, gamma=0.5, rho=0.5, exp=True, rf=None): """Initialize the parameters.""" RegrModelNC.__init__(self, classifier) NearestNeighbours.__init__(self, distance, k) self._gamma = gamma # distance sensitivity self._rho = rho # variance sensitivity self.exp = exp # type of normalization self.rf = rf # random forest for normalization if self.rf: assert isinstance(rf, RandomForestRegressor), \ "Rf must be an instance of sklearn's RandomForestRegressor."
def setUp(self): self.widget = self.create_widget( OWNxFromDistances) # type: OWNxFromDistances self.data = Table("iris") self.distances = Euclidean(self.data) # When converted to a graph, this has the following components: # At threshold 0.5: {1, 6} and disconnected {0}, {2}, {3}, {4}, {5} # At threshold 1 {0, 1, 2, 6}, {3, 5}, {4} # At threshold 2 {0, 1, 2, 3, 5, 6}, {4} m = np.full((7, 7), 10.0) m[1, 6] = m[6, 1] = 0.5 m[0, 1] = m[1, 2] = m[2, 6] = m[0, 6] = 1 m[1, 0] = m[2, 1] = m[6, 2] = m[6, 0] = 1 m[3, 5] = m[5, 3] = 1 m[2, 3] = m[3, 2] = 2 self.distances1 = DistMatrix(m)
def __call__(self, data): distances = SklDistance, SpearmanDistance, PearsonDistance if isinstance(self._metric, distances): data = self.preprocess(data) _X, Y, domain = data.X, data.Y, data.domain X = dist_matrix = self._metric(_X) self.params['dissimilarity'] = 'precomputed' elif self._metric is 'precomputed': dist_matrix, Y, domain = data, None, None X = dist_matrix else: data = self.preprocess(data) X, Y, domain = data.X, data.Y, data.domain if self.init_type == "PCA": dist_matrix = Euclidean(X) if self.init_type == "PCA" and self.init_data is None: self.init_data = torgerson(dist_matrix, self.params['n_components']) clf = self.fit(X, Y=Y) clf.domain = domain return clf