예제 #1
0
    def test5_filter_finite(self):
        x_y_pairs = [(0, 0), (1, 1), (math.inf, 9), (9, math.inf),
                     (-math.inf, 2), (2, -math.inf), (3.2, -20),
                     (float("nan"), 0), (0, float("nan")), (7, 7),
                     (float("inf"), 100), (100, float("inf")), (4, 0),
                     (float("-inf"), 1), (1, float("inf")), (4, 0), (0.0, 0.1),
                     (-3.9, 27)]
        computed = hac(x_y_pairs)

        # hac should return an numpy array or matrix of the right shape
        self.assertTrue(
            isinstance(computed, np.ndarray)
            or isinstance(computed, np.matrix))
        self.assertEqual(np.shape(computed), (7, 4))
        computed = np.array(computed)

        # The third column should be increasing
        for i in range(6):
            self.assertGreaterEqual(computed[i + 1, 2], computed[i, 2])

        # Verify hac operates exactly as linkage does
        expected = linkage([(0, 0), (1, 1), (3.2, -20), (7, 7), (4, 0), (4, 0),
                            (0.0, 0.1), (-3.9, 27)])

        self.assertTrue(np.all(np.isclose(computed, expected)))
예제 #2
0
 def test_tiebreak(self):
     x_y_pairs = get_x_y_pairs(tiebreak_csv_file)
     computed = hac(x_y_pairs)
     expected_cluster_sizes \
       = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 8, 8, 12, 20]
     for i in range(np.shape(computed)[0]):
         row = np.array(computed[i, :]).flatten()
         self.assertEqual(row[0], 2 * i)
         self.assertEqual(row[1], 2 * i + 1)
         self.assertEqual(row[2], 0)
         self.assertEqual(row[3], expected_cluster_sizes[i])
예제 #3
0
    def test5_tiebreak(self):
        x_y_pairs = get_x_y_pairs(tiebreak_csv_file)
        computed = hac(x_y_pairs)
        expected_cluster_sizes \
                = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 8, 8, 12, 20]

        # chose lowest cluster index for the first position
        # if still tied, chose lowest cluster index for the second position
        for i in range(np.shape(computed)[0]):
            row = np.array(computed[i, :]).flatten()
            self.assertEqual(row[0], 2 * i)
            self.assertEqual(row[1], 2 * i + 1)
            self.assertEqual(row[2], 0)
            self.assertEqual(row[3], expected_cluster_sizes[i])
예제 #4
0
    def test_randomized(self):
        x_y_pairs = get_x_y_pairs(random_csv_file)
        computed = hac(x_y_pairs)

        # hac should return an numpy array of the right shape
        self.assertIsInstance(computed, np.ndarray)
        self.assertEqual(np.shape(computed), (19, 4))

        # The third column should be increasing
        for i in range(18):
            self.assertGreaterEqual(computed[i + 1, 2], computed[i, 2])

        # Verify hac operates exactly as linkage does
        expected = scipy.cluster.hierarchy.linkage(x_y_pairs)
        self.assertTrue(np.all(np.isclose(computed, expected)))
예제 #5
0
    def test7_more_than_20(self):
        n_points = 80
        x_y_pairs = [(x**2, x**2) for x in range(0, n_points)]
        computed = hac(x_y_pairs)
        computed = np.array(computed)

        # The third column should be increasing
        for i in range(n_points - 2):
            self.assertGreaterEqual(computed[i + 1, 2], computed[i, 2])

        # first row
        self.assertTrue(np.allclose(computed[0], [0, 1, 2**.5, 2]))
        for i, row in enumerate(computed[1:, :]):
            self.assertEqual(row[0], i + 2)
            self.assertEqual(row[1], i + n_points)
            self.assertTrue(
                np.isclose(row[2], ((i + i + 2)**2 + (i + i + 4)**2 - 2)**0.5))
            self.assertEqual(row[3], i + 3)
예제 #6
0
    def test3_pokemon_csv(self):
        x_y_pairs = get_x_y_pairs(pokemon_csv_file)
        computed = hac(x_y_pairs)

        # hac should return an numpy array of the right shape
        self.assertIsInstance(computed, np.ndarray)
        self.assertEqual(np.shape(computed), (19, 4))

        # The third column should be increasing
        for i in range(18):
            self.assertGreaterEqual(computed[i + 1, 2], computed[i, 2])

        # Verify hac operates exactly as linkage does - giving leeway for tiebreaker
        expected = linkage(x_y_pairs)
        self.assertTrue(
            np.allclose(computed[computed[:, 0].argsort()],
                        expected[expected[:, 0].argsort()]))
        self.assertTrue(
            np.allclose(computed[computed[:, 1].argsort()],
                        expected[expected[:, 1].argsort()]))