Beispiel #1
0
def verify_algorithm(x, y, split_number, split_size, k, seed=None):
    """
    Parameters
    ----------
    x : ht.DNDarray
        array containing data vectors
    y : ht.DNDarray
        array containing the labels for x (must be in same order)
    split_number: int
        the number of test iterations
    split_size : int
        the number of vectors used by the KNN-Algorithm
    k : int
        The number of neighbours for KNN-Algorithm
    seed : int
        Seed for the random generator used in creating folds. Used for deterministic testing purposes.
    Returns
    -------
    accuracies : ht.DNDarray
        array of shape (split_number,) containing the accuracy per run
    """
    assert len(x) == len(y)
    assert split_size < len(x)
    assert k < len(x)

    accuracies = []

    for split_index in range(split_number):
        fold_x, fold_y, verification_x, verification_y = create_fold(
            x, y, split_size, seed)
        classifier = KNN(fold_x, fold_y, k)
        result_y = classifier.predict(verification_x)
        accuracies.append(calculate_accuracy(result_y, verification_y).item())
    return accuracies
Beispiel #2
0
    def test_fit_one_hot(self,):
        X = ht.load_hdf5("heat/datasets/iris.h5", dataset="data")

        # Keys as label array
        keys = []
        for i in range(50):
            keys.append(0)
        for i in range(50, 100):
            keys.append(1)
        for i in range(100, 150):
            keys.append(2)
        labels = ht.array(keys, split=0)

        # Keys as one_hot
        keys = []
        for i in range(50):
            keys.append([1, 0, 0])
        for i in range(50, 100):
            keys.append([0, 1, 0])
        for i in range(100, 150):
            keys.append([0, 0, 1])

        Y = ht.array(keys)

        knn = KNN(X, Y, 5)

        knn.fit(X, Y)

        result = knn.predict(X)

        self.assertTrue(ht.is_estimator(knn))
        self.assertTrue(ht.is_classifier(knn))
        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.shape, labels.shape)
Beispiel #3
0
    def test_utility(self,):
        a = ht.array([1, 2, 3, 4])
        b = ht.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1]])

        one_hot = KNN.label_to_one_hot(a)

        self.assertTrue((one_hot == b).all())
Beispiel #4
0
    def test_split_zero(self):
        X = ht.load_hdf5("heat/datasets/iris.h5", dataset="data", split=0)

        # Generate keys for the iris.h5 dataset
        keys = []
        for i in range(50):
            keys.append(0)
        for i in range(50, 100):
            keys.append(1)
        for i in range(100, 150):
            keys.append(2)
        Y = ht.array(keys, split=0)

        knn = KNN(X, Y, 5)

        result = knn.predict(X)

        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.shape, Y.shape)
Beispiel #5
0
    def test_split_none(self):
        X = ht.load_hdf5("heat/datasets/iris.h5", dataset="data")

        # Generate keys for the iris.h5 dataset
        keys = []
        for i in range(50):
            keys.append(0)
        for i in range(50, 100):
            keys.append(1)
        for i in range(100, 150):
            keys.append(2)
        Y = ht.array(keys)

        knn = KNN(X, Y, 5)

        result = knn.predict(X)

        self.assertTrue(ht.is_estimator(knn))
        self.assertTrue(ht.is_classifier(knn))
        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.shape, Y.shape)
Beispiel #6
0
    def test_exception(self,):
        a = ht.zeros((3,))
        b = ht.zeros((1,))
        c = ht.zeros((3,))
        d = ht.zeros((2, 2, 2))

        with self.assertRaises(ValueError):
            knn = KNN(a, b, 1)

        with self.assertRaises(ValueError):
            knn = KNN(a, b, 1)

        knn = KNN(a, c, 1)
        with self.assertRaises(ValueError):
            knn.fit(a, b)

        knn = KNN(a, c, 1)
        with self.assertRaises(ValueError):
            knn.fit(a, d)

        with self.assertRaises(ValueError):
            knn = KNN(a, d, 1)