def test_euclidean_cols(self): assert_almost_equal = np.testing.assert_almost_equal data = self.cont_data dist = distance.Euclidean(data, axis=0, normalize=False) assert_almost_equal( dist, [[0, 8.062257748, 4.242640687], [8.062257748, 0, 5.196152423], [4.242640687, 5.196152423, 0]]) with data.unlocked(): data.X[1, 1] = np.nan dist = distance.Euclidean(data, axis=0, normalize=False) assert_almost_equal( dist, [[0, 6.218252702, 4.242640687], [6.218252702, 0, 2.581988897], [4.242640687, 2.581988897, 0]]) with data.unlocked(): data.X[1, 0] = np.nan dist = distance.Euclidean(data, axis=0, normalize=False) assert_almost_equal( dist, [[0, 6.218252702, 5.830951895], [6.218252702, 0, 2.581988897], [5.830951895, 2.581988897, 0]])
def test_euclidean_cols_normalized(self): assert_almost_equal = np.testing.assert_almost_equal data = self.cont_data dist = distance.Euclidean(data, axis=0, normalize=True) assert_almost_equal( dist, [[0, 2.455273959, 0.649839392], [2.455273959, 0, 2.473176308], [0.649839392, 2.473176308, 0]]) with data.unlocked(): data.X[1, 1] = np.nan dist = distance.Euclidean(data, axis=0, normalize=True) assert_almost_equal( dist, [[0, 2, 0.649839392], [2, 0, 1.704275472], [0.649839392, 1.704275472, 0]]) with data.unlocked(): data.X[1, 0] = np.nan dist = distance.Euclidean(data, axis=0, normalize=True) assert_almost_equal( dist, [[0, 2, 1.450046001], [2, 0, 1.704275472], [1.450046001, 1.704275472, 0]])
def test_euclidean_cont(self): assert_almost_equal = np.testing.assert_almost_equal data = self.cont_data dist = distance.Euclidean(data, axis=1, normalize=False) assert_almost_equal( dist, np.sqrt( np.array([[0, 12, 5, 38], [12, 0, 21, 82], [5, 21, 0, 41], [38, 82, 41, 0]]))) with data.unlocked(): data.X[1, 0] = np.nan dist = distance.Euclidean(data, axis=1, normalize=False) assert_almost_equal(dist, [[0, 4.472135955, 2.236067977, 6.164414003], [4.472135955, 0, 5.385164807, 6.480740698], [2.236067977, 5.385164807, 0, 6.403124237], [6.164414003, 6.480740698, 6.403124237, 0]]) with data.unlocked(): data.X[0, 0] = np.nan dist = distance.Euclidean(data, axis=1, normalize=False) assert_almost_equal(dist, [[0, 5.099019514, 4.795831523, 4.472135955], [5.099019514, 0, 5.916079783, 6], [4.795831523, 5.916079783, 0, 6.403124237], [4.472135955, 6, 6.403124237, 0]])
def test_two_tables(self): assert_almost_equal = np.testing.assert_almost_equal dist = distance.Euclidean(self.cont_data, self.cont_data2, normalize=True) assert_almost_equal( dist, [ [1.17040218, 0.47809144], [2.78516478, 1.96961039], [1.28668394, 0.79282497], [1.27179413, 1.54919334], ], ) model = distance.Euclidean(normalize=True).fit(self.cont_data) dist = model(self.cont_data, self.cont_data2) assert_almost_equal( dist, [ [1.17040218, 0.47809144], [2.78516478, 1.96961039], [1.28668394, 0.79282497], [1.27179413, 1.54919334], ], ) dist = model(self.cont_data2) assert_almost_equal(dist, [[0, 0.827119692], [0.827119692, 0]])
def test_euclidean_cont_normalized(self): assert_almost_equal = np.testing.assert_almost_equal data = self.cont_data model = distance.Euclidean(axis=1, normalize=True).fit(data) assert_almost_equal(model.means, [2, 2.75, 1.5]) assert_almost_equal(model.vars, [9, 2.1875, 1.25]) assert_almost_equal(model.dist_missing2_cont, [1, 1, 1]) dist = model(data) assert_almost_equal( dist, [ [0, 1.654239383, 1.146423008, 1.621286967], [1.654239383, 0, 2.068662631, 3.035242727], [1.146423008, 2.068662631, 0, 1.956673562], [1.621286967, 3.035242727, 1.956673562, 0], ], ) dist = distance.Euclidean(data, axis=1, normalize=True) assert_almost_equal( dist, [ [0, 1.654239383, 1.146423008, 1.621286967], [1.654239383, 0, 2.068662631, 3.035242727], [1.146423008, 2.068662631, 0, 1.956673562], [1.621286967, 3.035242727, 1.956673562, 0], ], ) data.X[1, 0] = np.nan model = distance.Euclidean(axis=1, normalize=True).fit(data) assert_almost_equal(model.means, [3, 2.75, 1.5]) assert_almost_equal(model.vars, [8, 2.1875, 1.25]) dist = model(data) assert_almost_equal( dist, [ [0, 1.806733438, 1.146423008, 1.696635326], [1.806733438, 0, 2.192519751, 2.675283697], [1.146423008, 2.192519751, 0, 2.019547333], [1.696635326, 2.675283697, 2.019547333, 0], ], ) data.X[0, 0] = np.nan model = distance.Euclidean(axis=1, normalize=True).fit(data) assert_almost_equal(model.means, [4, 2.75, 1.5]) assert_almost_equal(model.vars, [9, 2.1875, 1.25]) dist = model(data) assert_almost_equal( dist, [ [0, 1.874642823, 1.521277659, 1.276154939], [1.874642823, 0, 2.248809209, 2.580143961], [1.521277659, 2.248809209, 0, 1.956673562], [1.276154939, 2.580143961, 1.956673562, 0], ], )
def test_euclidean_disc(self): assert_almost_equal = np.testing.assert_almost_equal data = self.disc_data model = distance.Euclidean().fit(data) assert_almost_equal(model.dist_missing_disc, [[1 / 3, 2 / 3, 1, 1], [2 / 3, 2 / 3, 1, 2 / 3], [2 / 3, 1 / 3, 1, 1]]) assert_almost_equal(model.dist_missing2_disc, [1 - 5 / 9, 1 - 3 / 9, 1 - 5 / 9]) dist = model(data) assert_almost_equal( dist, np.sqrt(np.array([[0, 2, 3], [2, 0, 2], [3, 2, 0]]))) with data.unlocked(): data.X[1, 0] = np.nan model = distance.Euclidean().fit(data) assert_almost_equal(model.dist_missing_disc, [[1 / 2, 1 / 2, 1, 1], [2 / 3, 2 / 3, 1, 2 / 3], [2 / 3, 1 / 3, 1, 1]]) assert_almost_equal(model.dist_missing2_disc, [1 - 2 / 4, 1 - 3 / 9, 1 - 5 / 9]) with data.unlocked(): dist = model(data) assert_almost_equal( dist, np.sqrt(np.array([[0, 2.5, 3], [2.5, 0, 1.5], [3, 1.5, 0]]))) with data.unlocked(): data.X[0, 0] = np.nan model = distance.Euclidean().fit(data) assert_almost_equal( model.dist_missing_disc, [[1, 0, 1, 1], [2 / 3, 2 / 3, 1, 2 / 3], [2 / 3, 1 / 3, 1, 1]]) assert_almost_equal(model.dist_missing2_disc, [1 - 1, 1 - 3 / 9, 1 - 5 / 9]) dist = model(data) assert_almost_equal( dist, np.sqrt(np.array([[0, 2, 2], [2, 0, 1], [2, 1, 0]]))) data = self.disc_data4 with data.unlocked(): data.X[:2, 0] = np.nan model = distance.Euclidean().fit(data) assert_almost_equal(model.dist_missing_disc, [[1 / 2, 1 / 2, 1, 1], [3 / 4, 2 / 4, 1, 3 / 4], [3 / 4, 1 / 4, 1, 1]]) assert_almost_equal(model.dist_missing2_disc, [1 - 2 / 4, 1 - 6 / 16, 1 - 10 / 16]) dist = model(data) assert_almost_equal( dist, np.sqrt( np.array([[0, 2.5, 2.5, 2.5], [2.5, 0, 0.5, 1.5], [2.5, 0.5, 0, 2], [2.5, 1.5, 2, 0]])))
def test_euclidean_mixed(self): assert_almost_equal = np.testing.assert_almost_equal data = self.mixed_data model = distance.Euclidean(axis=1, normalize=True).fit(data) assert_almost_equal(model.means, [1 / 3, 3, 1]) assert_almost_equal(model.vars, [8 / 9, 8 / 3, 2 / 3]) assert_almost_equal( model.dist_missing_disc, [[1 / 3, 2 / 3, 1, 1], [2 / 3, 2 / 3, 1, 2 / 3], [2 / 3, 1 / 3, 1, 1]], ) assert_almost_equal(model.dist_missing2_cont, [1, 1, 1]) assert_almost_equal(model.dist_missing2_disc, [1 - 5 / 9, 1 - 3 / 9, 1 - 5 / 9]) dist = model(data) assert_almost_equal( dist, [ [0, 2.828427125, 2.121320344], [2.828427125, 0, 2.828427125], [2.121320344, 2.828427125, 0], ], )
def build_linkage_matrix(topic_table): #print(topic_table.domain) x = data.Table(topic_table) print(x.X) dist_matrix = distance.Euclidean(x.X) #d = Orange.misc.distmatrix.__new__(dist_matrix) print(dist_matrix) #linkage = hierarchical.dist_matrix_linkage(dist_matrix,linkage = hierarchical.AVERAGE linkage = scipy.cluster.hierarchy.linkage(dist_matrix, method=hierarchical.AVERAGE) print((linkage)) return linkage
def test_main(): from PyQt4.QtGui import QApplication import sip import Orange.distance as distance app = QApplication([]) w = OWHierarchicalClustering() data = Orange.data.Table("iris.tab") matrix = distance.Euclidean(data) w.set_distances(matrix) w.handleNewSignals() w.show() w.raise_() rval = app.exec_() w.onDeleteWidget() sip.delete(w) del w app.processEvents() return rval
f1 = 'Hierarchical.csv' if os.path.exists(f1): os.remove(f1) f2 = 'KMeans.csv' if os.path.exists(f2): os.remove(f2) data = Orange.data.Table('output.tab') #matrix = Orange.misc.SymMatrix(len(data)) numDocs = len(data) print "Count of documents in Reuters dataset: " + str(numDocs) + "\n" print "1. Constructing Distance Matrices\n" starter = time.time() constructorEuclidean = distance.Euclidean() EuclideanDistanceMat = distance.distance_matrix( data, distance_constructor=constructorEuclidean) euclidean_hierarchical_clustering = clustering.hierarchical.HierarchicalClustering( ) euclidean_hierarchical_clustering.linkage = clustering.hierarchical.AVERAGE euclideanRoot = euclidean_hierarchical_clustering(EuclideanDistanceMat) ender = time.time() timer = ender - starter starter1 = time.time() constructorManhattan = distance.Manhattan() ManhattanDistanceMat = distance.distance_matrix( data, distance_constructor=constructorManhattan) manhattan_hierarchical_clustering = clustering.hierarchical.HierarchicalClustering( )
def paint(self, painter, *args): if self._line is None: self.boundingRect() painter.save() painter.setPen(self.pen()) painter.drawLine(self._line) painter.restore() def clusters_at_height(root, height): """Return a list of clusters by cutting the clustering at `height`. """ lower = set() cluster_list = [] for cl in preorder(root): if cl in lower: continue if cl.value.height < height: cluster_list.append(cl) lower.update(preorder(cl)) return cluster_list if __name__ == "__main__": # pragma: no cover from Orange import distance data = Orange.data.Table("iris") matrix = distance.Euclidean(distance._preprocess(data)) WidgetPreview(OWHierarchicalClustering).run(matrix)
import Orange.data import Orange.misc from Orange.widgets import widget, gui, settings from Orange import distance _METRICS = [ ("Euclidean", distance.Euclidean()), ("Manhattan", distance.Manhattan()), ("Cosine", distance.Cosine()), ("Jaccard", distance.Jaccard()), ("Mahalanobis", distance.Mahalanobis()), ("Spearman", distance.SpearmanR()), ("Spearman absolute", distance.SpearmanRAbsolute()), ("Pearson", distance.PearsonR()), ("Pearson absolute", distance.PearsonRAbsolute()), ] class OWDistances(widget.OWWidget): name = "Distances" description = "Compute a matrix of pairwise distances." icon = "icons/Distance.svg" inputs = [("Data", Orange.data.Table, "set_data")] outputs = [("Distances", Orange.misc.DistMatrix)] axis = settings.Setting(0) metric_idx = settings.Setting(0) autocommit = settings.Setting(False) want_main_area = False