Beispiel #1
0
    def test_manhattan_cols_normalized(self):
        assert_almost_equal = np.testing.assert_almost_equal
        data = self.cont_data

        dist = distance.Manhattan(data, axis=0, normalize=True)
        assert_almost_equal(
            dist,
            [[0, 4.5833333, 2],
             [4.5833333, 0, 4.25],
             [2, 4.25, 0]])

        with data.unlocked():
            data.X[1, 1] = np.nan
        dist = distance.Manhattan(data, axis=0, normalize=True)
        assert_almost_equal(
            dist,
            [[0, 4.6666667, 2],
             [4.6666667, 0, 4],
             [2, 4, 0]])

        with data.unlocked():
            data.X[1, 0] = np.nan
        dist = distance.Manhattan(data, axis=0, normalize=True)
        assert_almost_equal(
            dist,
            [[0, 5.5, 4],
             [5.5, 0, 4],
             [4, 4, 0]])
Beispiel #2
0
    def test_manhattan_cols(self):
        assert_almost_equal = np.testing.assert_almost_equal
        data = self.cont_data

        dist = distance.Manhattan(data, axis=0, normalize=False)
        assert_almost_equal(
            dist,
            [[0, 20, 7],
             [20, 0, 15],
             [7, 15, 0]])

        with data.unlocked():
            data.X[1, 1] = np.nan
        dist = distance.Manhattan(data, axis=0, normalize=False)
        assert_almost_equal(
            dist,
            [[0, 19, 7],
             [19, 0, 14],
             [7, 14, 0]])

        with data.unlocked():
            data.X[1, 0] = np.nan
        dist = distance.Manhattan(data, axis=0, normalize=False)
        assert_almost_equal(
            dist,
            [[0, 17, 9],
             [17, 0, 14],
             [9, 14, 0]])
Beispiel #3
0
    def test_manhattan_cont(self):
        assert_almost_equal = np.testing.assert_almost_equal
        data = self.cont_data

        dist = distance.Manhattan(data, axis=1, normalize=False)
        assert_almost_equal(
            dist,
            [[0, 7, 6, 9],
             [7, 0, 5, 16],
             [6, 5, 0, 13],
             [9, 16, 13, 0]])

        with data.unlocked():
            data.X[1, 0] = np.nan
        dist = distance.Manhattan(data, axis=1, normalize=False)
        assert_almost_equal(
            dist,
            [[0, 7, 6, 9],
             [7, 0, 3, 14],
             [6, 3, 0, 13],
             [9, 14, 13, 0]])

        with data.unlocked():
            data.X[0, 0] = np.nan
        dist = distance.Manhattan(data, axis=1, normalize=False)
        assert_almost_equal(
            dist,
            [[0, 10, 10, 8],
             [10, 0, 7, 13],
             [10, 7, 0, 13],
             [8, 13, 13, 0]])
    def test_manhattan_cont_normalized(self):
        assert_almost_equal = np.testing.assert_almost_equal
        data = self.cont_data

        model = distance.Manhattan(axis=1, normalize=True).fit(data)
        assert_almost_equal(model.medians, [1.5, 4.5, 1.5])
        assert_almost_equal(model.mads, [1.5, 2, 1])
        assert_almost_equal(model.dist_missing2_cont, np.ones(3))

        dist = model(data)
        assert_almost_equal(
            dist,
            [
                [0, 2.416666667, 1.833333333, 3],
                [2.416666667, 0, 1.75, 5.416666667],
                [1.833333333, 1.75, 0, 4.166666667],
                [3, 5.416666667, 4.166666667, 0],
            ],
        )

        dist = distance.Manhattan(data, axis=1, normalize=True)
        assert_almost_equal(
            dist,
            [
                [0, 2.416666667, 1.833333333, 3],
                [2.416666667, 0, 1.75, 5.416666667],
                [1.833333333, 1.75, 0, 4.166666667],
                [3, 5.416666667, 4.166666667, 0],
            ],
        )

        data.X[1, 0] = np.nan
        model = distance.Manhattan(axis=1, normalize=True).fit(data)
        assert_almost_equal(model.medians, [2, 4.5, 1.5])
        assert_almost_equal(model.mads, [1, 2, 1])

        dist = model(data)
        assert_almost_equal(
            dist,
            [[0, 2.75, 2, 4], [2.75, 0, 1.25, 5.75], [2, 1.25, 0, 5],
             [4, 5.75, 5, 0]],
        )

        data.X[0, 0] = np.nan
        model = distance.Manhattan(axis=1, normalize=True).fit(data)
        assert_almost_equal(model.medians, [4.5, 4.5, 1.5])
        assert_almost_equal(model.mads, [2.5, 2, 1])

        dist = model(data)
        assert_almost_equal(
            dist,
            [
                [0, 2.75, 2.5, 2],
                [2.75, 0, 1.75, 3.75],
                [2.5, 1.75, 0, 3.5],
                [2, 3.75, 3.5, 0],
            ],
        )
Beispiel #5
0
 def test_manhattan_no_data(self):
     np.testing.assert_almost_equal(
         distance.Manhattan(Table(self.domain)),
         np.zeros((0, 0)))
     np.testing.assert_almost_equal(
         distance.Manhattan(self.mixed_data, Table(self.domain)),
         np.zeros((3, 0)))
     np.testing.assert_almost_equal(
         distance.Manhattan(Table(self.domain), self.mixed_data),
         np.zeros((0, 3)))
     self.assertRaises(
         ValueError,
         distance.Manhattan, Table(self.cont_domain), axis=0, normalize=True)
Beispiel #6
0
    def test_manhattan_disc(self):
        assert_almost_equal = np.testing.assert_almost_equal
        data = self.disc_data

        model = distance.Manhattan().fit(data)
        assert_almost_equal(model.dist_missing_disc,
                            [[1 / 3, 2 / 3, 1, 1], [2 / 3, 2 / 3, 1, 2 / 3],
                             [2 / 3, 1 / 3, 1, 1]])
        assert_almost_equal(model.dist_missing2_disc,
                            [1 - 5 / 9, 1 - 3 / 9, 1 - 5 / 9])
        dist = model(data)
        assert_almost_equal(dist, [[0, 2, 3], [2, 0, 2], [3, 2, 0]])

        with data.unlocked():
            data.X[1, 0] = np.nan
        model = distance.Manhattan().fit(data)
        assert_almost_equal(model.dist_missing_disc,
                            [[1 / 2, 1 / 2, 1, 1], [2 / 3, 2 / 3, 1, 2 / 3],
                             [2 / 3, 1 / 3, 1, 1]])
        assert_almost_equal(model.dist_missing2_disc,
                            [1 - 2 / 4, 1 - 3 / 9, 1 - 5 / 9])

        dist = model(data)
        assert_almost_equal(dist, [[0, 2.5, 3], [2.5, 0, 1.5], [3, 1.5, 0]])

        with data.unlocked():
            data.X[0, 0] = np.nan
        model = distance.Manhattan().fit(data)
        assert_almost_equal(
            model.dist_missing_disc,
            [[1, 0, 1, 1], [2 / 3, 2 / 3, 1, 2 / 3], [2 / 3, 1 / 3, 1, 1]])
        assert_almost_equal(model.dist_missing2_disc,
                            [1 - 1, 1 - 3 / 9, 1 - 5 / 9])

        dist = model(data)
        assert_almost_equal(dist, [[0, 2, 2], [2, 0, 1], [2, 1, 0]])

        data = self.disc_data4
        with data.unlocked():
            data.X[:2, 0] = np.nan
        model = distance.Manhattan().fit(data)
        assert_almost_equal(model.dist_missing_disc,
                            [[1 / 2, 1 / 2, 1, 1], [3 / 4, 2 / 4, 1, 3 / 4],
                             [3 / 4, 1 / 4, 1, 1]])
        assert_almost_equal(model.dist_missing2_disc,
                            [1 - 2 / 4, 1 - 6 / 16, 1 - 10 / 16])

        dist = model(data)
        assert_almost_equal(dist, [[0, 2.5, 2.5, 2.5], [2.5, 0, 0.5, 1.5],
                                   [2.5, 0.5, 0, 2], [2.5, 1.5, 2, 0]])
Beispiel #7
0
 def test_manhattan_mixed_cols(self):
     self.assertRaises(ValueError,
                       distance.Manhattan,
                       self.mixed_data,
                       axis=0)
     self.assertRaises(ValueError,
                       distance.Manhattan(axis=0).fit, self.mixed_data)
Beispiel #8
0
    def test_two_tables(self):
        assert_almost_equal = np.testing.assert_almost_equal

        dist = distance.Manhattan(self.cont_data,
                                  self.cont_data2,
                                  normalize=True)
        assert_almost_equal(dist, [[1.3333333, 0.25], [3.75, 2.6666667],
                                   [2.5, 2.0833333], [1.6666667, 2.75]])

        model = distance.Manhattan(normalize=True).fit(self.cont_data)
        dist = model(self.cont_data, self.cont_data2)
        assert_almost_equal(dist, [[1.3333333, 0.25], [3.75, 2.6666667],
                                   [2.5, 2.0833333], [1.6666667, 2.75]])

        dist = model(self.cont_data2)
        assert_almost_equal(dist, [[0, 1.083333333], [1.083333333, 0]])
Beispiel #9
0
    def test_manhattan_mixed(self):
        assert_almost_equal = np.testing.assert_almost_equal
        data = self.mixed_data

        data.X[2, 0] = 2  # prevent mads[0] = 0
        model = distance.Manhattan(axis=1, normalize=True).fit(data)
        assert_almost_equal(model.medians, [1, 3, 1])
        assert_almost_equal(model.mads, [1, 2, 1])
        assert_almost_equal(model.dist_missing_disc,
                            [[1 / 3, 2 / 3, 1, 1], [2 / 3, 2 / 3, 1, 2 / 3],
                             [2 / 3, 1 / 3, 1, 1]])
        assert_almost_equal(model.dist_missing2_disc,
                            [1 - 5 / 9, 1 - 3 / 9, 1 - 5 / 9])

        dist = model(data)
        assert_almost_equal(dist, [[0, 4.5, 4.5], [4.5, 0, 5], [4.5, 5, 0]])
Beispiel #10
0
print "Count of documents in Reuters dataset: " + str(numDocs) + "\n"
print "1. Constructing Distance Matrices\n"

starter = time.time()
constructorEuclidean = distance.Euclidean()
EuclideanDistanceMat = distance.distance_matrix(
    data, distance_constructor=constructorEuclidean)
euclidean_hierarchical_clustering = clustering.hierarchical.HierarchicalClustering(
)
euclidean_hierarchical_clustering.linkage = clustering.hierarchical.AVERAGE
euclideanRoot = euclidean_hierarchical_clustering(EuclideanDistanceMat)
ender = time.time()
timer = ender - starter

starter1 = time.time()
constructorManhattan = distance.Manhattan()
ManhattanDistanceMat = distance.distance_matrix(
    data, distance_constructor=constructorManhattan)
manhattan_hierarchical_clustering = clustering.hierarchical.HierarchicalClustering(
)
manhattan_hierarchical_clustering.linkage = clustering.hierarchical.AVERAGE
manhattanRoot = manhattan_hierarchical_clustering(ManhattanDistanceMat)
ender1 = time.time()
timer1 = ender1 - starter1

print "2. Time: Hierarchical clustering: "
print "With Euclidean distance = " + str(timer) + "sec"
print "With Manhattan distance = " + str(timer1) + "sec\n"

euclideanRoot.mapping.objects = data
manhattanRoot.mapping.objects = data
Beispiel #11
0
import Orange.data
import Orange.misc
from Orange.widgets import widget, gui, settings
from Orange import distance

_METRICS = [
    ("Euclidean", distance.Euclidean()),
    ("Manhattan", distance.Manhattan()),
    ("Cosine", distance.Cosine()),
    ("Jaccard", distance.Jaccard()),
    ("Mahalanobis", distance.Mahalanobis()),
    ("Spearman", distance.SpearmanR()),
    ("Spearman absolute", distance.SpearmanRAbsolute()),
    ("Pearson", distance.PearsonR()),
    ("Pearson absolute", distance.PearsonRAbsolute()),
]


class OWDistances(widget.OWWidget):
    name = "Distances"
    description = "Compute a matrix of pairwise distances."
    icon = "icons/Distance.svg"

    inputs = [("Data", Orange.data.Table, "set_data")]
    outputs = [("Distances", Orange.misc.DistMatrix)]

    axis = settings.Setting(0)
    metric_idx = settings.Setting(0)
    autocommit = settings.Setting(False)

    want_main_area = False