def test_lasso(self): # ToDo: add additional tests # get some test data X = ht.load_hdf5( os.path.join(os.getcwd(), "heat/datasets/data/diabetes.h5"), dataset="x", device=ht_device, split=0, ) y = ht.load_hdf5( os.path.join(os.getcwd(), "heat/datasets/data/diabetes.h5"), dataset="y", device=ht_device, split=0, ) # normalize dataset X = X / ht.sqrt((ht.mean(X ** 2, axis=0))) m, n = X.shape # HeAT lasso instance estimator = ht.regression.lasso.Lasso(max_iter=100, tol=None) # check whether the results are correct self.assertEqual(estimator.lam, 0.1) self.assertTrue(estimator.theta is None) self.assertTrue(estimator.n_iter is None) self.assertEqual(estimator.max_iter, 100) self.assertEqual(estimator.coef_, None) self.assertEqual(estimator.intercept_, None) estimator.fit(X, y) # check whether the results are correct self.assertEqual(estimator.lam, 0.1) self.assertIsInstance(estimator.theta, ht.DNDarray) self.assertEqual(estimator.n_iter, 100) self.assertEqual(estimator.max_iter, 100) self.assertEqual(estimator.coef_.shape, (n - 1, 1)) self.assertEqual(estimator.intercept_.shape, (1,)) yest = estimator.predict(X) # check whether the results are correct self.assertIsInstance(yest, ht.DNDarray) self.assertEqual(yest.shape, (m, 1)) with self.assertRaises(ValueError): estimator.fit(X, ht.zeros((3, 3, 3))) with self.assertRaises(ValueError): estimator.fit(ht.zeros((3, 3, 3)), ht.zeros((3, 3)))
def test_fit_one_hot(self,): x = ht.load_hdf5("heat/datasets/iris.h5", dataset="data") # keys as label array keys = [] for i in range(50): keys.append(0) for i in range(50, 100): keys.append(1) for i in range(100, 150): keys.append(2) labels = ht.array(keys, split=0) # keys as one_hot keys = [] for i in range(50): keys.append([1, 0, 0]) for i in range(50, 100): keys.append([0, 1, 0]) for i in range(100, 150): keys.append([0, 0, 1]) y = ht.array(keys) knn = KNeighborsClassifier(n_neighbors=5) knn.fit(x, y) result = knn.predict(x) self.assertTrue(ht.is_estimator(knn)) self.assertTrue(ht.is_classifier(knn)) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(result.shape, labels.shape)
def test_load_hdf5(self): # HDF5 support is optional if not ht.io.supports_hdf5(): return # default parameters iris = ht.load_hdf5(self.HDF5_PATH, self.HDF5_DATASET, device=ht_device) self.assertIsInstance(iris, ht.DNDarray) self.assertEqual(iris.shape, self.IRIS.shape) self.assertEqual(iris.dtype, ht.float32) self.assertEqual(iris._DNDarray__array.dtype, torch.float32) self.assertTrue((self.IRIS == iris._DNDarray__array).all()) # positive split axis iris = ht.load_hdf5(self.HDF5_PATH, self.HDF5_DATASET, split=0, device=ht_device) self.assertIsInstance(iris, ht.DNDarray) self.assertEqual(iris.shape, self.IRIS.shape) self.assertEqual(iris.dtype, ht.float32) lshape = iris.lshape self.assertLessEqual(lshape[0], self.IRIS.shape[0]) self.assertEqual(lshape[1], self.IRIS.shape[1]) # negative split axis iris = ht.load_hdf5(self.HDF5_PATH, self.HDF5_DATASET, split=-1) self.assertIsInstance(iris, ht.DNDarray) self.assertEqual(iris.shape, self.IRIS.shape) self.assertEqual(iris.dtype, ht.float32) lshape = iris.lshape self.assertEqual(lshape[0], self.IRIS.shape[0]) self.assertLessEqual(lshape[1], self.IRIS.shape[1]) # different data type iris = ht.load_hdf5(self.HDF5_PATH, self.HDF5_DATASET, dtype=ht.int8, device=ht_device) self.assertIsInstance(iris, ht.DNDarray) self.assertEqual(iris.shape, self.IRIS.shape) self.assertEqual(iris.dtype, ht.int8) self.assertEqual(iris._DNDarray__array.dtype, torch.int8)
def test_load_hdf5_exception(self): # HDF5 support is optional if not ht.io.supports_hdf5(): return # improper argument types with self.assertRaises(TypeError): ht.load_hdf5(1, "data") with self.assertRaises(TypeError): ht.load_hdf5("iris.h5", 1) with self.assertRaises(TypeError): ht.load_hdf5("iris.h5", dataset="data", split=1.0) # file or dataset does not exist with self.assertRaises(IOError): ht.load_hdf5("foo.h5", dataset="data") with self.assertRaises(IOError): ht.load_hdf5("iris.h5", dataset="foo")
def test_fit_iris(self): # get some test data iris = ht.load_hdf5( os.path.join(os.getcwd(), 'heat/datasets/data/iris.h5'), 'data') # fit the clusters k = 3 kmeans = ht.ml.cluster.KMeans(n_clusters=k) centroids = kmeans.fit(iris) # check whether the results are correct self.assertIsInstance(centroids, ht.tensor) self.assertEqual(centroids.shape, (1, iris.shape[1], k))
def test_split_zero(self): X = ht.load_hdf5("heat/datasets/iris.h5", dataset="data", split=0) # Generate keys for the iris.h5 dataset keys = [] for i in range(50): keys.append(0) for i in range(50, 100): keys.append(1) for i in range(100, 150): keys.append(2) Y = ht.array(keys, split=0) knn = KNN(X, Y, 5) result = knn.predict(X) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(result.shape, Y.shape)
def test_split_none(self): x = ht.load_hdf5("heat/datasets/iris.h5", dataset="data") # generate keys for the iris.h5 dataset keys = [] for i in range(50): keys.append(0) for i in range(50, 100): keys.append(1) for i in range(100, 150): keys.append(2) y = ht.array(keys) knn = KNeighborsClassifier(n_neighbors=5) knn.fit(x, y) result = knn.predict(x) self.assertTrue(ht.is_estimator(knn)) self.assertTrue(ht.is_classifier(knn)) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(result.shape, y.shape)
def test_split_none(self): X = ht.load_hdf5("heat/datasets/iris.h5", dataset="data") # Generate keys for the iris.h5 dataset keys = [] for i in range(50): keys.append(0) for i in range(50, 100): keys.append(1) for i in range(100, 150): keys.append(2) Y = ht.array(keys) knn = KNN(X, Y, 5) result = knn.predict(X) self.assertTrue(ht.is_estimator(knn)) self.assertTrue(ht.is_classifier(knn)) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(result.shape, Y.shape)
def test_lasso(self): # ToDo: add additional tests # get some test data X = ht.load_hdf5(os.path.join(os.getcwd(), "heat/datasets/data/diabetes.h5"), dataset="x") y = ht.load_hdf5(os.path.join(os.getcwd(), "heat/datasets/data/diabetes.h5"), dataset="y") # normalize dataset X = X / ht.sqrt((ht.mean(X**2, axis=0))) m, n = X.shape # HeAT lasso instance estimator = ht.core.regression.lasso.HeatLasso(max_iter=100, tol=None) # check whether the results are correct self.assertEqual(estimator.lam, 0.1) self.assertTrue(estimator.theta is None) self.assertTrue(estimator.n_iter is None) self.assertEqual(estimator.max_iter, 100) self.assertEqual(estimator.coef_, None) self.assertEqual(estimator.intercept_, None) estimator.fit(X, y) # check whether the results are correct self.assertEqual(estimator.lam, 0.1) self.assertIsInstance(estimator.theta, ht.DNDarray) self.assertEqual(estimator.n_iter, 100) self.assertEqual(estimator.max_iter, 100) self.assertEqual(estimator.coef_.shape, (n - 1, 1)) self.assertEqual(estimator.intercept_.shape, (1, )) yest = estimator.predict(X) # check whether the results are correct self.assertIsInstance(yest, ht.DNDarray) self.assertEqual(yest.shape, (m, )) X = ht.load_hdf5(os.path.join(os.getcwd(), "heat/datasets/data/diabetes.h5"), dataset="x") y = ht.load_hdf5(os.path.join(os.getcwd(), "heat/datasets/data/diabetes.h5"), dataset="y") # Now the same stuff again in PyTorch X = torch.tensor(X._DNDarray__array) y = torch.tensor(y._DNDarray__array) # normalize dataset X = X / torch.sqrt((torch.mean(X**2, 0))) m, n = X.shape estimator = ht.core.regression.lasso.PytorchLasso(max_iter=100, tol=None) # check whether the results are correct self.assertEqual(estimator.lam, 0.1) self.assertTrue(estimator.theta is None) self.assertTrue(estimator.n_iter is None) self.assertEqual(estimator.max_iter, 100) self.assertEqual(estimator.coef_, None) self.assertEqual(estimator.intercept_, None) estimator.fit(X, y) # check whether the results are correct self.assertEqual(estimator.lam, 0.1) self.assertIsInstance(estimator.theta, torch.Tensor) self.assertEqual(estimator.n_iter, 100) self.assertEqual(estimator.max_iter, 100) self.assertEqual(estimator.coef_.shape, (n - 1, 1)) self.assertEqual(estimator.intercept_.shape, (1, )) yest = estimator.predict(X) # check whether the results are correct self.assertIsInstance(yest, torch.Tensor) self.assertEqual(yest.shape, (m, )) X = ht.load_hdf5(os.path.join(os.getcwd(), "heat/datasets/data/diabetes.h5"), dataset="x") y = ht.load_hdf5(os.path.join(os.getcwd(), "heat/datasets/data/diabetes.h5"), dataset="y") # Now the same stuff again in PyTorch X = X._DNDarray__array.numpy() y = y._DNDarray__array.numpy() # normalize dataset X = X / np.sqrt((np.mean(X**2, axis=0, keepdims=True))) m, n = X.shape estimator = ht.core.regression.lasso.NumpyLasso(max_iter=100, tol=None) # check whether the results are correct self.assertEqual(estimator.lam, 0.1) self.assertTrue(estimator.theta is None) self.assertTrue(estimator.n_iter is None) self.assertEqual(estimator.max_iter, 100) self.assertEqual(estimator.coef_, None) self.assertEqual(estimator.intercept_, None) estimator.fit(X, y) # check whether the results are correct self.assertEqual(estimator.lam, 0.1) self.assertIsInstance(estimator.theta, np.ndarray) self.assertEqual(estimator.n_iter, 100) self.assertEqual(estimator.max_iter, 100) self.assertEqual(estimator.coef_.shape, (n - 1, 1)) self.assertEqual(estimator.intercept_.shape, (1, )) yest = estimator.predict(X) # check whether the results are correct self.assertIsInstance(yest, np.ndarray) self.assertEqual(yest.shape, (m, ))
import sys import os import random # Fix python path if run from terminal curdir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.abspath(os.path.join(curdir, "../../"))) import heat as ht from heat.classification.knn import KNN # Load dataset from hdf5 file X = ht.load_hdf5("../../heat/datasets/data/iris.h5", dataset="data", split=0) # Generate keys for the iris.h5 dataset keys = [] for i in range(50): keys.append(0) for i in range(50, 100): keys.append(1) for i in range(100, 150): keys.append(2) Y = ht.array(keys, split=0) def calculate_accuracy(new_y, verification_y): """ Calculates the accuracy of classification/clustering-algorithms. Note this only works with integer/discrete classes. For algorithms that give approximations an error function is required.
import torch import sys sys.path.append("../../") import heat as ht from matplotlib import pyplot as plt from sklearn import datasets import heat.ml.regression.lasso as lasso import plotfkt # read scikit diabetes data set diabetes = datasets.load_diabetes() # load diabetes dataset from hdf5 file X = ht.load_hdf5("../../heat/datasets/data/diabetes.h5", dataset="x", split=0) y = ht.load_hdf5("../../heat/datasets/data/diabetes.h5", dataset="y", split=0) # normalize dataset #DoTO this goes into the lasso fit routine soon as issue #106 is solved X = X / ht.sqrt((ht.mean(X**2, axis=0))) # HeAT lasso instance estimator = lasso.HeatLasso(max_iter=100) # List lasso model parameters theta_list = list() # Range of lambda values lamda = np.logspace(0, 4, 10) / 10 # compute the lasso path
from matplotlib import pyplot as plt from sklearn import datasets import heat.regression.lasso as lasso import plotfkt import pkg_resources # read scikit diabetes data set diabetes = datasets.load_diabetes() # load diabetes dataset from hdf5 file diabetes_path = pkg_resources.resource_filename( pkg_resources.Requirement.parse("heat"), "heat/datasets/diabetes.h5") X = ht.load_hdf5(diabetes_path, dataset="x", split=0) y = ht.load_hdf5(diabetes_path, dataset="y", split=0) # normalize dataset #DoTO this goes into the lasso fit routine soon as issue #106 is solved X = X / ht.sqrt((ht.mean(X**2, axis=0))) # HeAT lasso instance estimator = lasso.Lasso(max_iter=100) # List lasso model parameters theta_list = list() # Range of lambda values lamda = np.logspace(0, 4, 10) / 10 # compute the lasso path
import sys import os import random import heat as ht from heat.classification.kneighborsclassifier import KNeighborsClassifier import pkg_resources # Load dataset from hdf5 file iris_path = pkg_resources.resource_filename( pkg_resources.Requirement.parse("heat"), "heat/datasets/iris.h5") X = ht.load_hdf5(iris_path, dataset="data", split=0) # Generate keys for the iris.h5 dataset keys = [] for i in range(50): keys.append(0) for i in range(50, 100): keys.append(1) for i in range(100, 150): keys.append(2) Y = ht.array(keys, split=0) def calculate_accuracy(new_y, verification_y): """ Calculates the accuracy of classification/clustering-algorithms. Note this only works with integer/discrete classes. For algorithms that give approximations an error function is required.