def test_attr_label_from_data(self): w = self.widget # Don't run the MDS optimization to save time and to prevent the # widget be in a blocking state when trying to send the next signal w.start = Mock() data = Table("zoo") dist = Euclidean(data) self.send_signal(w.Inputs.distances, dist) self.assertTrue(set(chain(data.domain.variables, data.domain.metas)) < set(w.controls.attr_label.model()))
def setUpClass(cls): super().setUpClass() WidgetOutputsTestMixin.init(cls) cls.signal_name = "Distances" cls.signal_data = Euclidean(cls.data) cls.same_input_output_domain = False my_dir = os.path.dirname(__file__) datasets_dir = os.path.join(my_dir, '..', '..', '..', 'datasets') cls.datasets_dir = os.path.realpath(datasets_dir)
def test_num_meta_labels(self): x, y = (ContinuousVariable(c) for c in "xy") s = StringVariable("s") data = Table.from_list(Domain([x], [], [y, s]), [[0, 1, "a"], [1, np.nan, "b"]]) distances = Euclidean(data) self.widget.set_distances(distances) ac = self.widget.annot_combo idx = ac.model().indexOf(y) ac.setCurrentIndex(idx) ac.activated.emit(idx) self.assertEqual(self.widget.tablemodel.labels, ["1", "?"])
def fit(self, X, Y=None): proj = skl_cluster.KMeans(**self.params) proj = proj.fit(X, Y) if 2 <= proj.n_clusters < len(X): proj.silhouette = silhouette_score(X, proj.labels_) else: proj.silhouette = 0 proj.inertia = proj.inertia_ / len(X) cluster_dist = Euclidean(proj.cluster_centers_) proj.inter_cluster = np.mean(cluster_dist[np.triu_indices_from( cluster_dist, 1)]) return KMeansModel(proj, self.preprocessors)
def fit(self, X, Y=None): proj = skl_cluster.KMeans(**self.params) proj = proj.fit(X, Y) proj.silhouette = np.nan try: if self._compute_silhouette and 2 <= proj.n_clusters < X.shape[0]: proj.silhouette = silhouette_score(X, proj.labels_, sample_size=5000) except MemoryError: # Pairwise dist in silhouette fails for large data pass proj.inertia = proj.inertia_ / X.shape[0] cluster_dist = Euclidean(proj.cluster_centers_) proj.inter_cluster = np.mean(cluster_dist[np.triu_indices_from(cluster_dist, 1)]) return KMeansModel(proj, self.preprocessors)
def test_torgerson(self): data = self.ionosphere[::5] dis = Euclidean(data) e1 = torgerson(dis, eigen_solver="auto") e2 = torgerson(dis, eigen_solver="lapack") e3 = torgerson(dis, eigen_solver="arpack") np.testing.assert_almost_equal(np.abs(e1), np.abs(e2)) np.testing.assert_almost_equal(np.abs(e2), np.abs(e3)) with self.assertRaises(ValueError): torgerson(dis, eigen_solver="madness")
def __tsne_test_helper(self, data, n_com): tsne_def = TSNE(n_components=n_com, metric="euclidean") tsne_def = tsne_def(data) tsne_euc = TSNE(n_components=n_com, metric=Euclidean) tsne_euc = tsne_euc(data) tsne_pre = TSNE(n_components=n_com, metric="precomputed") tsne_pre = tsne_pre(Euclidean(data)) self.assertEqual((data.X.shape[0], n_com), tsne_def.embedding_.shape) self.assertEqual((data.X.shape[0], n_com), tsne_euc.embedding_.shape) self.assertEqual((data.X.shape[0], n_com), tsne_pre.embedding_.shape)
def _initialize(self): matrix_existed = self.effective_matrix is not None effective_matrix = self.effective_matrix self._invalidated = True self.data = None self.effective_matrix = None self.closeContext() self.clear_messages() # if no data nor matrix is present reset plot if self.signal_data is None and self.matrix is None: self.clear() self.init_attr_values() return if self.signal_data is not None and self.matrix is not None and \ len(self.signal_data) != len(self.matrix): self.Error.mismatching_dimensions() self.clear() self.init_attr_values() return if self.signal_data is not None: self.data = self.signal_data elif self.matrix_data is not None: self.data = self.matrix_data if self.matrix is not None: self.effective_matrix = self.matrix if self.matrix.axis == 0 and self.data is not None \ and self.data is self.matrix_data: names = [[attr.name] for attr in self.data.domain.attributes] domain = Domain([], metas=[StringVariable("labels")]) self.data = Table.from_list(domain, names) elif self.data.domain.attributes: preprocessed_data = MDS().preprocess(self.data) self.effective_matrix = Euclidean(preprocessed_data) else: self.Error.no_attributes() self.clear() self.init_attr_values() return self.init_attr_values() self.openContext(self.data) self._invalidated = not ( matrix_existed and self.effective_matrix is not None and array_equal(effective_matrix, self.effective_matrix)) if self._invalidated: self.clear() self.graph.set_effective_matrix(self.effective_matrix)
def cluster_data(self, matrix): with self.progressBar(): # cluster rows if len(matrix) > 1: rows_distances = Euclidean(matrix) cluster = hierarchical.dist_matrix_clustering(rows_distances) row_order = hierarchical.optimal_leaf_ordering( cluster, rows_distances, progress_callback=self.progressBarSet) row_order = np.array([x.value.index for x in leaves(row_order)]) else: row_order = np.array([0]) # cluster columns if matrix.X.shape[1] > 1: columns_distances = Euclidean(matrix, axis=0) cluster = hierarchical.dist_matrix_clustering(columns_distances) columns_order = hierarchical.optimal_leaf_ordering( cluster, columns_distances, progress_callback=self.progressBarSet) columns_order = np.array([x.value.index for x in leaves(columns_order)]) else: columns_order = np.array([0]) return row_order, columns_order
def __init__(self, classifier, distance=Euclidean(), k=10, relative=True, include=False, neighbourhood='fixed'): """Initialize the parameters.""" super().__init__(distance, k) self.classifier = classifier self.relative = relative self.include = include assert neighbourhood in ['fixed', 'variable'] self.neighbourhood = neighbourhood
def _initialize(self): matrix_existed = self.effective_matrix is not None effective_matrix = self.effective_matrix self.__invalidated = True self.data = None self.effective_matrix = None self.closeContext() self.clear_messages() # if no data nor matrix is present reset plot if self.signal_data is None and self.matrix is None: self.clear() self.init_attr_values() return if self.signal_data is not None and self.matrix is not None and \ len(self.signal_data) != len(self.matrix): self.Error.mismatching_dimensions() self.clear() self.init_attr_values() return if self.signal_data is not None: self.data = self.signal_data elif self.matrix_data is not None: self.data = self.matrix_data if self.matrix is not None: self.effective_matrix = self.matrix if self.matrix.axis == 0 and self.data is self.matrix_data: self.data = None elif self.data.domain.attributes: preprocessed_data = MDS().preprocess(self.data) self.effective_matrix = Euclidean(preprocessed_data) else: self.Error.no_attributes() self.clear() self.init_attr_values() return self.init_attr_values() self.openContext(self.data) self.__invalidated = not (matrix_existed and self.effective_matrix is not None and np.array_equal(effective_matrix, self.effective_matrix)) if self.__invalidated: self.clear() self.graph.set_effective_matrix(self.effective_matrix)
def test_labels(self): x, y = (ContinuousVariable(c) for c in "xy") s = StringVariable("s") grades = Table.from_list(Domain( [x, y], [], [s]), [[91.0, 89.0, "Bill"], [51.0, 100.0, "Cynthia"], [9.0, 61.0, "Demi"], [49.0, 92.0, "Fred"], [91.0, 49.0, "George"]]) distances = Euclidean(grades) self.widget.set_distances(distances) ac = self.widget.annot_combo idx = ac.model().indexOf(grades.domain.metas[0]) ac.setCurrentIndex(idx) ac.activated.emit(idx) self.assertIsNone(self.widget.tablemodel.label_colors)
def test_infinite_distances(self): """ Scipy does not accept infinite distances and neither does this widget. Error is shown. GH-2380 """ table = Table( Domain([ContinuousVariable("a")], [DiscreteVariable("b", values=["y"])]), list(zip([1.79e308, -1e120], "yy"))) distances = Euclidean(table) self.assertFalse(self.widget.Error.not_finite_distances.is_shown()) self.send_signal(self.widget.Inputs.distances, distances) self.assertTrue(self.widget.Error.not_finite_distances.is_shown()) self.send_signal(self.widget.Inputs.distances, self.distances) self.assertFalse(self.widget.Error.not_finite_distances.is_shown())
def __init__(self, classifier, distance=Euclidean(), k=10, gamma=0.5, rho=0.5, exp=True, rf=None): """Initialize the parameters.""" RegrModelNC.__init__(self, classifier) NearestNeighbours.__init__(self, distance, k) self._gamma = gamma # distance sensitivity self._rho = rho # variance sensitivity self.exp = exp # type of normalization self.rf = rf # random forest for normalization if self.rf: assert isinstance(rf, RandomForestRegressor), \ "Rf must be an instance of sklearn's RandomForestRegressor."
def test_infinite_distances(self): """ Scipy does not accept infinite distances and neither does this widget. Error is shown. GH-2380 """ table = Table.from_list( Domain([ContinuousVariable("a")], [DiscreteVariable("b", values=("y", ))]), list(zip([1.79e308, -1e120], "yy"))) with warnings.catch_warnings(): warnings.filterwarnings("ignore", ".*", RuntimeWarning) distances = Euclidean(table) self.assertFalse(self.widget.Error.not_finite_distances.is_shown()) self.send_signal(self.widget.Inputs.distances, distances) self.assertTrue(self.widget.Error.not_finite_distances.is_shown()) self.send_signal(self.widget.Inputs.distances, self.distances) self.assertFalse(self.widget.Error.not_finite_distances.is_shown())
class TestDistMatrix(TestCase): def setUp(self): self.iris = Table('iris') self.dist = Euclidean(self.iris) def test_submatrix(self): sub = self.dist.submatrix([2, 3, 4]) np.testing.assert_equal(sub, self.dist[2:5, 2:5]) self.assertTrue(tables_equal(sub.row_items, self.dist.row_items[2:5])) def test_pickling(self): unpickled_dist = pickle.loads(pickle.dumps(self.dist)) np.testing.assert_equal(unpickled_dist, self.dist) self.assertTrue( tables_equal(unpickled_dist.row_items, self.dist.row_items)) self.assertTrue( tables_equal(unpickled_dist.col_items, self.dist.col_items)) self.assertEqual(unpickled_dist.axis, self.dist.axis)
def __mds_test_helper(self, data, n_com): mds_fit = MDS( n_components=n_com, dissimilarity=Euclidean, random_state=0) mds_fit = mds_fit(data) mds_odist = MDS( n_components=n_com, dissimilarity='precomputed', random_state=0) mds_odist = mds_odist(Euclidean(data)) mds_sdist = MDS( n_components=n_com, dissimilarity='euclidean', random_state=0) mds_sdist = mds_sdist(data) eshape = data.X.shape[0], n_com self.assertTrue(np.allclose(mds_fit.embedding_, mds_odist.embedding_)) self.assertTrue(np.allclose(mds_fit.embedding_, mds_sdist.embedding_)) self.assertEqual(eshape, mds_fit.embedding_.shape) self.assertEqual(eshape, mds_odist.embedding_.shape) self.assertEqual(eshape, mds_sdist.embedding_.shape)
def setUp(self): self.widget = self.create_widget( OWNxFromDistances) # type: OWNxFromDistances self.data = Table("iris") self.distances = Euclidean(self.data) # When converted to a graph, this has the following components: # At threshold 0.5: {1, 6} and disconnected {0}, {2}, {3}, {4}, {5} # At threshold 1 {0, 1, 2, 6}, {3, 5}, {4} # At threshold 2 {0, 1, 2, 3, 5, 6}, {4} m = np.full((7, 7), 10.0) m[1, 6] = m[6, 1] = 0.5 m[0, 1] = m[1, 2] = m[2, 6] = m[0, 6] = 1 m[1, 0] = m[2, 1] = m[6, 2] = m[6, 0] = 1 m[3, 5] = m[5, 3] = 1 m[2, 3] = m[3, 2] = 2 self.distances1 = DistMatrix(m)
def __call__(self, data): distances = SklDistance, SpearmanDistance, PearsonDistance if isinstance(self._metric, distances): data = self.preprocess(data) _X, Y, domain = data.X, data.Y, data.domain X = dist_matrix = self._metric(_X) self.params['dissimilarity'] = 'precomputed' elif self._metric is 'precomputed': dist_matrix, Y, domain = data, None, None X = dist_matrix else: data = self.preprocess(data) X, Y, domain = data.X, data.Y, data.domain if self.init_type == "PCA": dist_matrix = Euclidean(X) if self.init_type == "PCA" and self.init_data is None: self.init_data = torgerson(dist_matrix, self.params['n_components']) clf = self.fit(X, Y=Y) clf.domain = domain return clf
def _initialize(self): # clear everything self.closeContext() self._clear() self.Error.clear() self.effective_matrix = None self.embedding = None # if no data nor matrix is present reset plot if self.signal_data is None and self.matrix_data is None: self.data = None self.init_attr_values() return if self.signal_data is not None and self.matrix is not None and \ len(self.signal_data) != len(self.matrix): self.Error.mismatching_dimensions() self._update_plot() return if self.signal_data is not None: self.data = self.signal_data elif self.matrix_data is not None: self.data = self.matrix_data if self.matrix is not None: self.effective_matrix = self.matrix if self.matrix.axis == 0 and self.data is self.matrix_data: self.data = None elif self.data.domain.attributes: preprocessed_data = MDS().preprocess(self.data) self.effective_matrix = Euclidean(preprocessed_data) else: self.Error.no_attributes() return self.init_attr_values() self.openContext(self.data) self.graph.set_effective_matrix(self.effective_matrix)
def setUpClass(cls): cls.data = Table("iris") cls.distances = Euclidean(cls.data) cls.init = torgerson(cls.distances) cls.args = (cls.distances, 300, 25, 0, cls.init)
def test_matrix_columns_labels(self): dist = Euclidean(self.data, axis=0) self.send_signal(self.widget.Inputs.distances, dist) simulate.combobox_activate_index(self.widget.controls.attr_label, 2)
def test_matrix_columns_default_label(self): dist = Euclidean(self.data, axis=0) self.send_signal(self.widget.Inputs.distances, dist) label_text = self.widget.controls.attr_label.currentText() self.assertEqual(label_text, "labels")
def setRegion(self, low, high): low, high = np.clip([low, high], *self.boundary()) self.region.setRegion((low, high)) def getRegion(self): return self.region.getRegion() def setValues(self, values): self.fillCurve.setData([0,1], [0]) if not len(values): self.curve.setData([0, 1], [0]) self.setBoundary(0, 0) return nbins = int(min(np.sqrt(len(values)), 100)) freq, edges = np.histogram(values, bins=nbins) self.curve.setData(edges, freq) self.setBoundary(edges[0], edges[-1]) self.autoRange() @property def xData(self): return self.curve.xData @property def yData(self): return self.curve.yData if __name__ == "__main__": WidgetPreview(OWNxFromDistances).run(set_matrix=(Euclidean(Table("iris"))))
def test_matrix_columns_tooltip(self): dist = Euclidean(self.data, axis=0) self.send_signal(self.widget.Inputs.distances, dist) self.assertIn("sepal length", self.widget.get_tooltip([0]))
def __init__(self, distance=Euclidean(), k=1, weighted=False): super().__init__(distance, k) self.weighted = weighted
self.fillCurve.setData([0, 1], [0]) if not len(values): self.curve.setData([0, 1], [0]) self.setBoundary(0, 0) self.autoRange() return nbins = min(len(values), 100) freq, edges = np.histogram(values, bins=nbins) self.curve.setData(edges, freq) self.setBoundary(edges[0], edges[-1]) self.autoRange() @property def xData(self): return self.curve.xData @property def yData(self): return self.curve.yData if __name__ == "__main__": from Orange.distance import Euclidean appl = QApplication([]) data = Table('iris') dm = Euclidean(data) ow = OWDuplicates() ow.set_distances(dm) ow.show() appl.exec_()
if not filename: return self.filename = filename self.unconditional_save_file() self.last_dir = os.path.split(self.filename)[0] self.adjust_label() def save_file(self): dist = self.distances if dist is None: return if not self.filename: self.save_file_as() else: dist.save(self.filename) skip_row = not dist.has_row_labels() and dist.row_items is not None skip_col = not dist.has_col_labels() and dist.col_items is not None if skip_row and skip_col: self.warning("Associated data table was not saved") elif skip_row or skip_col: self.warning("Data associated with {} was not saved".format( ["rows", "columns"][skip_col])) else: self.warning() if __name__ == "__main__": from Orange.data import Table from Orange.distance import Euclidean WidgetPreview(OWSaveDistances).run(Euclidean(Table("iris")))
def setUp(self): self.learner = knn.kNNLearner(distance_constructor=Euclidean())
def setUpClass(cls): super().setUpClass() WidgetOutputsTestMixin.init(cls) cls.signal_name = "Distances" cls.signal_data = Euclidean(cls.data)