コード例 #1
0
        def test_lasso(self):
            # ToDo: add additional tests
            # get some test data
            X = ht.load_hdf5(
                os.path.join(os.getcwd(), "heat/datasets/data/diabetes.h5"),
                dataset="x",
                device=ht_device,
                split=0,
            )
            y = ht.load_hdf5(
                os.path.join(os.getcwd(), "heat/datasets/data/diabetes.h5"),
                dataset="y",
                device=ht_device,
                split=0,
            )

            # normalize dataset
            X = X / ht.sqrt((ht.mean(X ** 2, axis=0)))
            m, n = X.shape
            # HeAT lasso instance
            estimator = ht.regression.lasso.Lasso(max_iter=100, tol=None)
            # check whether the results are correct
            self.assertEqual(estimator.lam, 0.1)
            self.assertTrue(estimator.theta is None)
            self.assertTrue(estimator.n_iter is None)
            self.assertEqual(estimator.max_iter, 100)
            self.assertEqual(estimator.coef_, None)
            self.assertEqual(estimator.intercept_, None)

            estimator.fit(X, y)

            # check whether the results are correct
            self.assertEqual(estimator.lam, 0.1)
            self.assertIsInstance(estimator.theta, ht.DNDarray)
            self.assertEqual(estimator.n_iter, 100)
            self.assertEqual(estimator.max_iter, 100)
            self.assertEqual(estimator.coef_.shape, (n - 1, 1))
            self.assertEqual(estimator.intercept_.shape, (1,))

            yest = estimator.predict(X)

            # check whether the results are correct
            self.assertIsInstance(yest, ht.DNDarray)
            self.assertEqual(yest.shape, (m, 1))

            with self.assertRaises(ValueError):
                estimator.fit(X, ht.zeros((3, 3, 3)))
            with self.assertRaises(ValueError):
                estimator.fit(ht.zeros((3, 3, 3)), ht.zeros((3, 3)))
コード例 #2
0
    def test_fit_one_hot(self,):
        x = ht.load_hdf5("heat/datasets/iris.h5", dataset="data")

        # keys as label array
        keys = []
        for i in range(50):
            keys.append(0)
        for i in range(50, 100):
            keys.append(1)
        for i in range(100, 150):
            keys.append(2)
        labels = ht.array(keys, split=0)

        # keys as one_hot
        keys = []
        for i in range(50):
            keys.append([1, 0, 0])
        for i in range(50, 100):
            keys.append([0, 1, 0])
        for i in range(100, 150):
            keys.append([0, 0, 1])
        y = ht.array(keys)

        knn = KNeighborsClassifier(n_neighbors=5)
        knn.fit(x, y)
        result = knn.predict(x)

        self.assertTrue(ht.is_estimator(knn))
        self.assertTrue(ht.is_classifier(knn))
        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.shape, labels.shape)
コード例 #3
0
ファイル: test_io.py プロジェクト: suleisl2000/heat
    def test_load_hdf5(self):
        # HDF5 support is optional
        if not ht.io.supports_hdf5():
            return

        # default parameters
        iris = ht.load_hdf5(self.HDF5_PATH,
                            self.HDF5_DATASET,
                            device=ht_device)
        self.assertIsInstance(iris, ht.DNDarray)
        self.assertEqual(iris.shape, self.IRIS.shape)
        self.assertEqual(iris.dtype, ht.float32)
        self.assertEqual(iris._DNDarray__array.dtype, torch.float32)
        self.assertTrue((self.IRIS == iris._DNDarray__array).all())

        # positive split axis
        iris = ht.load_hdf5(self.HDF5_PATH,
                            self.HDF5_DATASET,
                            split=0,
                            device=ht_device)
        self.assertIsInstance(iris, ht.DNDarray)
        self.assertEqual(iris.shape, self.IRIS.shape)
        self.assertEqual(iris.dtype, ht.float32)
        lshape = iris.lshape
        self.assertLessEqual(lshape[0], self.IRIS.shape[0])
        self.assertEqual(lshape[1], self.IRIS.shape[1])

        # negative split axis
        iris = ht.load_hdf5(self.HDF5_PATH, self.HDF5_DATASET, split=-1)
        self.assertIsInstance(iris, ht.DNDarray)
        self.assertEqual(iris.shape, self.IRIS.shape)
        self.assertEqual(iris.dtype, ht.float32)
        lshape = iris.lshape
        self.assertEqual(lshape[0], self.IRIS.shape[0])
        self.assertLessEqual(lshape[1], self.IRIS.shape[1])

        # different data type
        iris = ht.load_hdf5(self.HDF5_PATH,
                            self.HDF5_DATASET,
                            dtype=ht.int8,
                            device=ht_device)
        self.assertIsInstance(iris, ht.DNDarray)
        self.assertEqual(iris.shape, self.IRIS.shape)
        self.assertEqual(iris.dtype, ht.int8)
        self.assertEqual(iris._DNDarray__array.dtype, torch.int8)
コード例 #4
0
ファイル: test_io.py プロジェクト: tkurze/heat
    def test_load_hdf5_exception(self):
        # HDF5 support is optional
        if not ht.io.supports_hdf5():
            return

        # improper argument types
        with self.assertRaises(TypeError):
            ht.load_hdf5(1, "data")
        with self.assertRaises(TypeError):
            ht.load_hdf5("iris.h5", 1)
        with self.assertRaises(TypeError):
            ht.load_hdf5("iris.h5", dataset="data", split=1.0)

        # file or dataset does not exist
        with self.assertRaises(IOError):
            ht.load_hdf5("foo.h5", dataset="data")
        with self.assertRaises(IOError):
            ht.load_hdf5("iris.h5", dataset="foo")
コード例 #5
0
    def test_fit_iris(self):
        # get some test data
        iris = ht.load_hdf5(
            os.path.join(os.getcwd(), 'heat/datasets/data/iris.h5'), 'data')

        # fit the clusters
        k = 3
        kmeans = ht.ml.cluster.KMeans(n_clusters=k)
        centroids = kmeans.fit(iris)

        # check whether the results are correct
        self.assertIsInstance(centroids, ht.tensor)
        self.assertEqual(centroids.shape, (1, iris.shape[1], k))
コード例 #6
0
    def test_split_zero(self):
        X = ht.load_hdf5("heat/datasets/iris.h5", dataset="data", split=0)

        # Generate keys for the iris.h5 dataset
        keys = []
        for i in range(50):
            keys.append(0)
        for i in range(50, 100):
            keys.append(1)
        for i in range(100, 150):
            keys.append(2)
        Y = ht.array(keys, split=0)

        knn = KNN(X, Y, 5)

        result = knn.predict(X)

        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.shape, Y.shape)
コード例 #7
0
    def test_split_none(self):
        x = ht.load_hdf5("heat/datasets/iris.h5", dataset="data")

        # generate keys for the iris.h5 dataset
        keys = []
        for i in range(50):
            keys.append(0)
        for i in range(50, 100):
            keys.append(1)
        for i in range(100, 150):
            keys.append(2)
        y = ht.array(keys)

        knn = KNeighborsClassifier(n_neighbors=5)
        knn.fit(x, y)
        result = knn.predict(x)

        self.assertTrue(ht.is_estimator(knn))
        self.assertTrue(ht.is_classifier(knn))
        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.shape, y.shape)
コード例 #8
0
    def test_split_none(self):
        X = ht.load_hdf5("heat/datasets/iris.h5", dataset="data")

        # Generate keys for the iris.h5 dataset
        keys = []
        for i in range(50):
            keys.append(0)
        for i in range(50, 100):
            keys.append(1)
        for i in range(100, 150):
            keys.append(2)
        Y = ht.array(keys)

        knn = KNN(X, Y, 5)

        result = knn.predict(X)

        self.assertTrue(ht.is_estimator(knn))
        self.assertTrue(ht.is_classifier(knn))
        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.shape, Y.shape)
コード例 #9
0
        def test_lasso(self):
            # ToDo: add additional tests
            # get some test data
            X = ht.load_hdf5(os.path.join(os.getcwd(),
                                          "heat/datasets/data/diabetes.h5"),
                             dataset="x")
            y = ht.load_hdf5(os.path.join(os.getcwd(),
                                          "heat/datasets/data/diabetes.h5"),
                             dataset="y")

            # normalize dataset
            X = X / ht.sqrt((ht.mean(X**2, axis=0)))
            m, n = X.shape
            # HeAT lasso instance
            estimator = ht.core.regression.lasso.HeatLasso(max_iter=100,
                                                           tol=None)
            # check whether the results are correct
            self.assertEqual(estimator.lam, 0.1)
            self.assertTrue(estimator.theta is None)
            self.assertTrue(estimator.n_iter is None)
            self.assertEqual(estimator.max_iter, 100)
            self.assertEqual(estimator.coef_, None)
            self.assertEqual(estimator.intercept_, None)

            estimator.fit(X, y)

            # check whether the results are correct
            self.assertEqual(estimator.lam, 0.1)
            self.assertIsInstance(estimator.theta, ht.DNDarray)
            self.assertEqual(estimator.n_iter, 100)
            self.assertEqual(estimator.max_iter, 100)
            self.assertEqual(estimator.coef_.shape, (n - 1, 1))
            self.assertEqual(estimator.intercept_.shape, (1, ))

            yest = estimator.predict(X)

            # check whether the results are correct
            self.assertIsInstance(yest, ht.DNDarray)
            self.assertEqual(yest.shape, (m, ))

            X = ht.load_hdf5(os.path.join(os.getcwd(),
                                          "heat/datasets/data/diabetes.h5"),
                             dataset="x")
            y = ht.load_hdf5(os.path.join(os.getcwd(),
                                          "heat/datasets/data/diabetes.h5"),
                             dataset="y")

            # Now the same stuff again in PyTorch
            X = torch.tensor(X._DNDarray__array)
            y = torch.tensor(y._DNDarray__array)

            # normalize dataset
            X = X / torch.sqrt((torch.mean(X**2, 0)))
            m, n = X.shape

            estimator = ht.core.regression.lasso.PytorchLasso(max_iter=100,
                                                              tol=None)
            # check whether the results are correct
            self.assertEqual(estimator.lam, 0.1)
            self.assertTrue(estimator.theta is None)
            self.assertTrue(estimator.n_iter is None)
            self.assertEqual(estimator.max_iter, 100)
            self.assertEqual(estimator.coef_, None)
            self.assertEqual(estimator.intercept_, None)

            estimator.fit(X, y)

            # check whether the results are correct
            self.assertEqual(estimator.lam, 0.1)
            self.assertIsInstance(estimator.theta, torch.Tensor)
            self.assertEqual(estimator.n_iter, 100)
            self.assertEqual(estimator.max_iter, 100)
            self.assertEqual(estimator.coef_.shape, (n - 1, 1))
            self.assertEqual(estimator.intercept_.shape, (1, ))

            yest = estimator.predict(X)

            # check whether the results are correct
            self.assertIsInstance(yest, torch.Tensor)
            self.assertEqual(yest.shape, (m, ))

            X = ht.load_hdf5(os.path.join(os.getcwd(),
                                          "heat/datasets/data/diabetes.h5"),
                             dataset="x")
            y = ht.load_hdf5(os.path.join(os.getcwd(),
                                          "heat/datasets/data/diabetes.h5"),
                             dataset="y")

            # Now the same stuff again in PyTorch
            X = X._DNDarray__array.numpy()
            y = y._DNDarray__array.numpy()

            # normalize dataset
            X = X / np.sqrt((np.mean(X**2, axis=0, keepdims=True)))
            m, n = X.shape

            estimator = ht.core.regression.lasso.NumpyLasso(max_iter=100,
                                                            tol=None)
            # check whether the results are correct
            self.assertEqual(estimator.lam, 0.1)
            self.assertTrue(estimator.theta is None)
            self.assertTrue(estimator.n_iter is None)
            self.assertEqual(estimator.max_iter, 100)
            self.assertEqual(estimator.coef_, None)
            self.assertEqual(estimator.intercept_, None)

            estimator.fit(X, y)

            # check whether the results are correct
            self.assertEqual(estimator.lam, 0.1)
            self.assertIsInstance(estimator.theta, np.ndarray)
            self.assertEqual(estimator.n_iter, 100)
            self.assertEqual(estimator.max_iter, 100)
            self.assertEqual(estimator.coef_.shape, (n - 1, 1))
            self.assertEqual(estimator.intercept_.shape, (1, ))

            yest = estimator.predict(X)

            # check whether the results are correct
            self.assertIsInstance(yest, np.ndarray)
            self.assertEqual(yest.shape, (m, ))
コード例 #10
0
import sys
import os
import random

# Fix python path if run from terminal
curdir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.abspath(os.path.join(curdir, "../../")))

import heat as ht
from heat.classification.knn import KNN

# Load dataset from hdf5 file
X = ht.load_hdf5("../../heat/datasets/data/iris.h5", dataset="data", split=0)

# Generate keys for the iris.h5 dataset
keys = []
for i in range(50):
    keys.append(0)
for i in range(50, 100):
    keys.append(1)
for i in range(100, 150):
    keys.append(2)
Y = ht.array(keys, split=0)


def calculate_accuracy(new_y, verification_y):
    """
    Calculates the accuracy of classification/clustering-algorithms.
    Note this only works with integer/discrete classes. For algorithms that give approximations an error function is
    required.
コード例 #11
0
import torch
import sys

sys.path.append("../../")

import heat as ht
from matplotlib import pyplot as plt
from sklearn import datasets
import heat.ml.regression.lasso as lasso
import plotfkt

# read scikit diabetes data set
diabetes = datasets.load_diabetes()

# load diabetes dataset from hdf5 file
X = ht.load_hdf5("../../heat/datasets/data/diabetes.h5", dataset="x", split=0)
y = ht.load_hdf5("../../heat/datasets/data/diabetes.h5", dataset="y", split=0)

# normalize dataset #DoTO this goes into the lasso fit routine soon as issue #106 is solved
X = X / ht.sqrt((ht.mean(X**2, axis=0)))

# HeAT lasso instance
estimator = lasso.HeatLasso(max_iter=100)

# List  lasso model parameters
theta_list = list()

# Range of lambda values
lamda = np.logspace(0, 4, 10) / 10

# compute the lasso path
コード例 #12
0
from matplotlib import pyplot as plt
from sklearn import datasets
import heat.regression.lasso as lasso

import plotfkt

import pkg_resources

# read scikit diabetes data set
diabetes = datasets.load_diabetes()

# load diabetes dataset from hdf5 file
diabetes_path = pkg_resources.resource_filename(
    pkg_resources.Requirement.parse("heat"), "heat/datasets/diabetes.h5")

X = ht.load_hdf5(diabetes_path, dataset="x", split=0)
y = ht.load_hdf5(diabetes_path, dataset="y", split=0)

# normalize dataset #DoTO this goes into the lasso fit routine soon as issue #106 is solved
X = X / ht.sqrt((ht.mean(X**2, axis=0)))

# HeAT lasso instance
estimator = lasso.Lasso(max_iter=100)

# List  lasso model parameters
theta_list = list()

# Range of lambda values
lamda = np.logspace(0, 4, 10) / 10

# compute the lasso path
コード例 #13
0
import sys
import os
import random

import heat as ht
from heat.classification.kneighborsclassifier import KNeighborsClassifier

import pkg_resources

# Load dataset from hdf5 file
iris_path = pkg_resources.resource_filename(
    pkg_resources.Requirement.parse("heat"), "heat/datasets/iris.h5")

X = ht.load_hdf5(iris_path, dataset="data", split=0)

# Generate keys for the iris.h5 dataset
keys = []
for i in range(50):
    keys.append(0)
for i in range(50, 100):
    keys.append(1)
for i in range(100, 150):
    keys.append(2)
Y = ht.array(keys, split=0)


def calculate_accuracy(new_y, verification_y):
    """
    Calculates the accuracy of classification/clustering-algorithms.
    Note this only works with integer/discrete classes. For algorithms that give approximations an error function is
    required.