def test_cosine_disc(self): assert_almost_equal = np.testing.assert_almost_equal data = self.disc_data data.X = np.array([[1, 0, 0], [0, 1, 1], [1, 3, 0]], dtype=float) model = distance.Cosine().fit(data) assert_almost_equal(model.means, [2 / 3, 2 / 3, 1 / 3]) dist = model(data) assert_almost_equal( dist, 1 - np.array([[1, 0, 1 / sqrt(2)], [0, 1, 0.5], [1 / sqrt(2), 0.5, 1]])) data.X[1, 1] = np.nan model = distance.Cosine().fit(data) assert_almost_equal(model.means, [2 / 3, 1 / 2, 1 / 3]) dist = model(data) assert_almost_equal( dist, 1 - np.array([[1, 0, 1 / sqrt(2)], [0, 1, 0.5 / sqrt(1.25) / sqrt(2)], [1 / sqrt(2), 0.5 / sqrt(1.25) / sqrt(2), 1]])) data.X = np.array([[1, 0, 0], [0, np.nan, 1], [1, np.nan, 1], [1, 3, 1]]) model = distance.Cosine().fit(data) dist = model(data) assert_almost_equal(model.means, [0.75, 0.5, 0.75]) assert_almost_equal(dist, [[0, 1, 0.333333333, 0.422649731], [1, 0, 0.254644008, 0.225403331], [0.333333333, 0.254644008, 0, 0.037749551], [0.422649731, 0.225403331, 0.037749551, 0]])
def test_cosine_cont(self): assert_almost_equal = np.testing.assert_almost_equal data = self.cont_data dist = distance.Cosine(data, axis=1) assert_almost_equal(dist, [[0, 0.266200614, 0.0741799, 0.355097978], [0.266200614, 0, 0.547089186, 0.925279678], [0.0741799, 0.547089186, 0, 0.12011731], [0.355097978, 0.925279678, 0.12011731, 0]]) with data.unlocked(): data.X[1, 0] = np.nan dist = distance.Cosine(data, axis=1) assert_almost_equal(dist, [[0, 0.174971353, 0.0741799, 0.355097978], [0.174971353, 0, 0.207881966, 0.324809395], [0.0741799, 0.207881966, 0, 0.12011731], [0.355097978, 0.324809395, 0.12011731, 0]]) with data.unlocked(): data.X[0, 0] = np.nan dist = distance.Cosine(data, axis=1) assert_almost_equal(dist, [[0, 0.100977075, 0.035098719, 0.056666739], [0.100977075, 0, 0.188497329, 0.246304671], [0.035098719, 0.188497329, 0, 0.12011731], [0.056666739, 0.246304671, 0.12011731, 0]])
def test_cosine_cols(self): assert_almost_equal = np.testing.assert_almost_equal data = self.cont_data dist = distance.Cosine(data, axis=0, normalize=False) assert_almost_equal( dist, [[0, 0.711324865, 0.11050082], [0.711324865, 0, 0.44365136], [0.11050082, 0.44365136, 0]]) with data.unlocked(): data.X[1, 1] = np.nan dist = distance.Cosine(data, axis=0, normalize=False) assert_almost_equal( dist, [[0, 0.47702364, 0.11050082], [0.47702364, 0, 0.181076975], [0.11050082, 0.181076975, 0]]) with data.unlocked(): data.X[1, 0] = np.nan data.X[1, 2] = 2 dist = distance.Cosine(data, axis=0, normalize=False) assert_almost_equal( dist, [[0, 0.269703257, 0.087129071], [0.269703257, 0, 0.055555556], [0.087129071, 0.055555556, 0]])
def test_two_tables(self): assert_almost_equal = np.testing.assert_almost_equal with self.cont_data.unlocked(), self.cont_data2.unlocked(): self.cont_data.X[1, 0] = np.nan self.cont_data2.X[1, 0] = np.nan dist = distance.Cosine(self.cont_data, self.cont_data2) assert_almost_equal( dist, [[0.2142857, 0.1573352], [0.4958158, 0.2097042], [0.0741799, 0.0198039], [0.1514447, 0.0451363]]) model = distance.Cosine().fit(self.cont_data) dist = model(self.cont_data, self.cont_data2) assert_almost_equal( dist, [[0.2142857, 0.1573352], [0.4958158, 0.2097042], [0.0741799, 0.0198039], [0.1514447, 0.0451363]]) dist = model(self.cont_data2) assert_almost_equal(dist, [[0, 0.092514787], [0.092514787, 0]])
def test_cosine_mixed(self): assert_almost_equal = np.testing.assert_almost_equal data = self.mixed_data data.X = np.array( [[1, 3, 2, 1, 0, 0], [-1, 5, 0, 0, 1, 1], [1, 1, 1, 1, 3, 0]], dtype=float) model = distance.Cosine(axis=1).fit(data) assert_almost_equal(model.means, [1 / 3, 3, 1, 2 / 3, 2 / 3, 1 / 3]) dist = model(data) assert_almost_equal( dist, [[0, 0.316869949, 0.191709623], [0.316869949, 0, 0.577422873], [0.191709623, 0.577422873, 0]])
import Orange.data import Orange.misc from Orange.widgets import widget, gui, settings from Orange import distance _METRICS = [ ("Euclidean", distance.Euclidean()), ("Manhattan", distance.Manhattan()), ("Cosine", distance.Cosine()), ("Jaccard", distance.Jaccard()), ("Mahalanobis", distance.Mahalanobis()), ("Spearman", distance.SpearmanR()), ("Spearman absolute", distance.SpearmanRAbsolute()), ("Pearson", distance.PearsonR()), ("Pearson absolute", distance.PearsonRAbsolute()), ] class OWDistances(widget.OWWidget): name = "Distances" description = "Compute a matrix of pairwise distances." icon = "icons/Distance.svg" inputs = [("Data", Orange.data.Table, "set_data")] outputs = [("Distances", Orange.misc.DistMatrix)] axis = settings.Setting(0) metric_idx = settings.Setting(0) autocommit = settings.Setting(False) want_main_area = False