def test_manhattan_cols_normalized(self): assert_almost_equal = np.testing.assert_almost_equal data = self.cont_data dist = distance.Manhattan(data, axis=0, normalize=True) assert_almost_equal( dist, [[0, 4.5833333, 2], [4.5833333, 0, 4.25], [2, 4.25, 0]]) with data.unlocked(): data.X[1, 1] = np.nan dist = distance.Manhattan(data, axis=0, normalize=True) assert_almost_equal( dist, [[0, 4.6666667, 2], [4.6666667, 0, 4], [2, 4, 0]]) with data.unlocked(): data.X[1, 0] = np.nan dist = distance.Manhattan(data, axis=0, normalize=True) assert_almost_equal( dist, [[0, 5.5, 4], [5.5, 0, 4], [4, 4, 0]])
def test_manhattan_cols(self): assert_almost_equal = np.testing.assert_almost_equal data = self.cont_data dist = distance.Manhattan(data, axis=0, normalize=False) assert_almost_equal( dist, [[0, 20, 7], [20, 0, 15], [7, 15, 0]]) with data.unlocked(): data.X[1, 1] = np.nan dist = distance.Manhattan(data, axis=0, normalize=False) assert_almost_equal( dist, [[0, 19, 7], [19, 0, 14], [7, 14, 0]]) with data.unlocked(): data.X[1, 0] = np.nan dist = distance.Manhattan(data, axis=0, normalize=False) assert_almost_equal( dist, [[0, 17, 9], [17, 0, 14], [9, 14, 0]])
def test_manhattan_cont(self): assert_almost_equal = np.testing.assert_almost_equal data = self.cont_data dist = distance.Manhattan(data, axis=1, normalize=False) assert_almost_equal( dist, [[0, 7, 6, 9], [7, 0, 5, 16], [6, 5, 0, 13], [9, 16, 13, 0]]) with data.unlocked(): data.X[1, 0] = np.nan dist = distance.Manhattan(data, axis=1, normalize=False) assert_almost_equal( dist, [[0, 7, 6, 9], [7, 0, 3, 14], [6, 3, 0, 13], [9, 14, 13, 0]]) with data.unlocked(): data.X[0, 0] = np.nan dist = distance.Manhattan(data, axis=1, normalize=False) assert_almost_equal( dist, [[0, 10, 10, 8], [10, 0, 7, 13], [10, 7, 0, 13], [8, 13, 13, 0]])
def test_manhattan_cont_normalized(self): assert_almost_equal = np.testing.assert_almost_equal data = self.cont_data model = distance.Manhattan(axis=1, normalize=True).fit(data) assert_almost_equal(model.medians, [1.5, 4.5, 1.5]) assert_almost_equal(model.mads, [1.5, 2, 1]) assert_almost_equal(model.dist_missing2_cont, np.ones(3)) dist = model(data) assert_almost_equal( dist, [ [0, 2.416666667, 1.833333333, 3], [2.416666667, 0, 1.75, 5.416666667], [1.833333333, 1.75, 0, 4.166666667], [3, 5.416666667, 4.166666667, 0], ], ) dist = distance.Manhattan(data, axis=1, normalize=True) assert_almost_equal( dist, [ [0, 2.416666667, 1.833333333, 3], [2.416666667, 0, 1.75, 5.416666667], [1.833333333, 1.75, 0, 4.166666667], [3, 5.416666667, 4.166666667, 0], ], ) data.X[1, 0] = np.nan model = distance.Manhattan(axis=1, normalize=True).fit(data) assert_almost_equal(model.medians, [2, 4.5, 1.5]) assert_almost_equal(model.mads, [1, 2, 1]) dist = model(data) assert_almost_equal( dist, [[0, 2.75, 2, 4], [2.75, 0, 1.25, 5.75], [2, 1.25, 0, 5], [4, 5.75, 5, 0]], ) data.X[0, 0] = np.nan model = distance.Manhattan(axis=1, normalize=True).fit(data) assert_almost_equal(model.medians, [4.5, 4.5, 1.5]) assert_almost_equal(model.mads, [2.5, 2, 1]) dist = model(data) assert_almost_equal( dist, [ [0, 2.75, 2.5, 2], [2.75, 0, 1.75, 3.75], [2.5, 1.75, 0, 3.5], [2, 3.75, 3.5, 0], ], )
def test_manhattan_no_data(self): np.testing.assert_almost_equal( distance.Manhattan(Table(self.domain)), np.zeros((0, 0))) np.testing.assert_almost_equal( distance.Manhattan(self.mixed_data, Table(self.domain)), np.zeros((3, 0))) np.testing.assert_almost_equal( distance.Manhattan(Table(self.domain), self.mixed_data), np.zeros((0, 3))) self.assertRaises( ValueError, distance.Manhattan, Table(self.cont_domain), axis=0, normalize=True)
def test_manhattan_disc(self): assert_almost_equal = np.testing.assert_almost_equal data = self.disc_data model = distance.Manhattan().fit(data) assert_almost_equal(model.dist_missing_disc, [[1 / 3, 2 / 3, 1, 1], [2 / 3, 2 / 3, 1, 2 / 3], [2 / 3, 1 / 3, 1, 1]]) assert_almost_equal(model.dist_missing2_disc, [1 - 5 / 9, 1 - 3 / 9, 1 - 5 / 9]) dist = model(data) assert_almost_equal(dist, [[0, 2, 3], [2, 0, 2], [3, 2, 0]]) with data.unlocked(): data.X[1, 0] = np.nan model = distance.Manhattan().fit(data) assert_almost_equal(model.dist_missing_disc, [[1 / 2, 1 / 2, 1, 1], [2 / 3, 2 / 3, 1, 2 / 3], [2 / 3, 1 / 3, 1, 1]]) assert_almost_equal(model.dist_missing2_disc, [1 - 2 / 4, 1 - 3 / 9, 1 - 5 / 9]) dist = model(data) assert_almost_equal(dist, [[0, 2.5, 3], [2.5, 0, 1.5], [3, 1.5, 0]]) with data.unlocked(): data.X[0, 0] = np.nan model = distance.Manhattan().fit(data) assert_almost_equal( model.dist_missing_disc, [[1, 0, 1, 1], [2 / 3, 2 / 3, 1, 2 / 3], [2 / 3, 1 / 3, 1, 1]]) assert_almost_equal(model.dist_missing2_disc, [1 - 1, 1 - 3 / 9, 1 - 5 / 9]) dist = model(data) assert_almost_equal(dist, [[0, 2, 2], [2, 0, 1], [2, 1, 0]]) data = self.disc_data4 with data.unlocked(): data.X[:2, 0] = np.nan model = distance.Manhattan().fit(data) assert_almost_equal(model.dist_missing_disc, [[1 / 2, 1 / 2, 1, 1], [3 / 4, 2 / 4, 1, 3 / 4], [3 / 4, 1 / 4, 1, 1]]) assert_almost_equal(model.dist_missing2_disc, [1 - 2 / 4, 1 - 6 / 16, 1 - 10 / 16]) dist = model(data) assert_almost_equal(dist, [[0, 2.5, 2.5, 2.5], [2.5, 0, 0.5, 1.5], [2.5, 0.5, 0, 2], [2.5, 1.5, 2, 0]])
def test_manhattan_mixed_cols(self): self.assertRaises(ValueError, distance.Manhattan, self.mixed_data, axis=0) self.assertRaises(ValueError, distance.Manhattan(axis=0).fit, self.mixed_data)
def test_two_tables(self): assert_almost_equal = np.testing.assert_almost_equal dist = distance.Manhattan(self.cont_data, self.cont_data2, normalize=True) assert_almost_equal(dist, [[1.3333333, 0.25], [3.75, 2.6666667], [2.5, 2.0833333], [1.6666667, 2.75]]) model = distance.Manhattan(normalize=True).fit(self.cont_data) dist = model(self.cont_data, self.cont_data2) assert_almost_equal(dist, [[1.3333333, 0.25], [3.75, 2.6666667], [2.5, 2.0833333], [1.6666667, 2.75]]) dist = model(self.cont_data2) assert_almost_equal(dist, [[0, 1.083333333], [1.083333333, 0]])
def test_manhattan_mixed(self): assert_almost_equal = np.testing.assert_almost_equal data = self.mixed_data data.X[2, 0] = 2 # prevent mads[0] = 0 model = distance.Manhattan(axis=1, normalize=True).fit(data) assert_almost_equal(model.medians, [1, 3, 1]) assert_almost_equal(model.mads, [1, 2, 1]) assert_almost_equal(model.dist_missing_disc, [[1 / 3, 2 / 3, 1, 1], [2 / 3, 2 / 3, 1, 2 / 3], [2 / 3, 1 / 3, 1, 1]]) assert_almost_equal(model.dist_missing2_disc, [1 - 5 / 9, 1 - 3 / 9, 1 - 5 / 9]) dist = model(data) assert_almost_equal(dist, [[0, 4.5, 4.5], [4.5, 0, 5], [4.5, 5, 0]])
print "Count of documents in Reuters dataset: " + str(numDocs) + "\n" print "1. Constructing Distance Matrices\n" starter = time.time() constructorEuclidean = distance.Euclidean() EuclideanDistanceMat = distance.distance_matrix( data, distance_constructor=constructorEuclidean) euclidean_hierarchical_clustering = clustering.hierarchical.HierarchicalClustering( ) euclidean_hierarchical_clustering.linkage = clustering.hierarchical.AVERAGE euclideanRoot = euclidean_hierarchical_clustering(EuclideanDistanceMat) ender = time.time() timer = ender - starter starter1 = time.time() constructorManhattan = distance.Manhattan() ManhattanDistanceMat = distance.distance_matrix( data, distance_constructor=constructorManhattan) manhattan_hierarchical_clustering = clustering.hierarchical.HierarchicalClustering( ) manhattan_hierarchical_clustering.linkage = clustering.hierarchical.AVERAGE manhattanRoot = manhattan_hierarchical_clustering(ManhattanDistanceMat) ender1 = time.time() timer1 = ender1 - starter1 print "2. Time: Hierarchical clustering: " print "With Euclidean distance = " + str(timer) + "sec" print "With Manhattan distance = " + str(timer1) + "sec\n" euclideanRoot.mapping.objects = data manhattanRoot.mapping.objects = data
import Orange.data import Orange.misc from Orange.widgets import widget, gui, settings from Orange import distance _METRICS = [ ("Euclidean", distance.Euclidean()), ("Manhattan", distance.Manhattan()), ("Cosine", distance.Cosine()), ("Jaccard", distance.Jaccard()), ("Mahalanobis", distance.Mahalanobis()), ("Spearman", distance.SpearmanR()), ("Spearman absolute", distance.SpearmanRAbsolute()), ("Pearson", distance.PearsonR()), ("Pearson absolute", distance.PearsonRAbsolute()), ] class OWDistances(widget.OWWidget): name = "Distances" description = "Compute a matrix of pairwise distances." icon = "icons/Distance.svg" inputs = [("Data", Orange.data.Table, "set_data")] outputs = [("Distances", Orange.misc.DistMatrix)] axis = settings.Setting(0) metric_idx = settings.Setting(0) autocommit = settings.Setting(False) want_main_area = False