コード例 #1
0
def calculate_accuracy(new_y, verification_y):
    """
    Calculates the accuracy of classification/clustering-algorithms.
    Note this only works with integer/discrete classes. For algorithms that give approximations an error function is
    required.

    Parameters
    ----------
    new_y : ht.tensor of shape (n_samples, n_features), required
        The new labels that where generated
    verification_y : ht.tensor of shape (n_samples, n_features), required
        Known labels

    Returns
    ----------
    float
        the accuracy, number of properly labeled samples divided by amount of labels.
    """

    if new_y.gshape != verification_y.gshape:
        raise ValueError("Expecting results of same length, got {}, {}".format(
            new_y.gshape, verification_y.gshape))

    count = ht.sum(ht.where(new_y == verification_y, 1, 0))

    return count / new_y.gshape[0]
コード例 #2
0
    def test_fit_iris_unsplit(self):
        split = 0
        # get some test data
        iris = ht.load("heat/datasets/iris.csv", sep=";", split=split)
        ht.random.seed(1)
        # fit the clusters
        k = 3
        kmedoid = ht.cluster.KMedoids(n_clusters=k, random_state=1)
        kmedoid.fit(iris)

        # check whether the results are correct
        self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray)
        self.assertEqual(kmedoid.cluster_centers_.shape, (k, iris.shape[1]))
        # same test with init=kmedoids++
        kmedoid = ht.cluster.KMedoids(n_clusters=k, init="kmedoids++")
        kmedoid.fit(iris)

        # check whether the results are correct
        self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray)
        self.assertEqual(kmedoid.cluster_centers_.shape, (k, iris.shape[1]))

        # check whether result is actually a datapoint
        for i in range(kmedoid.cluster_centers_.shape[0]):
            self.assertTrue(
                ht.any(
                    ht.sum(ht.abs(kmedoid.cluster_centers_[i, :] - iris),
                           axis=1) == 0))
コード例 #3
0
    def __joint_log_likelihood(self, X):
        """
        Adapted to HeAT from scikit-learn.

        Calculates joint log-likelihood for n_samples to be assigned to each class.
        Returns ht.DNDarray joint_log_likelihood(n_samples, n_classes).
        """

        jll_size = self.classes_._DNDarray__array.numel()
        jll_shape = (X.shape[0], jll_size)
        joint_log_likelihood = ht.empty(jll_shape, dtype=X.dtype, split=X.split, device=X.device)
        for i in range(jll_size):
            jointi = ht.log(self.class_prior_[i])
            n_ij = -0.5 * ht.sum(ht.log(2.0 * ht.pi * self.sigma_[i, :]))
            n_ij -= 0.5 * ht.sum(((X - self.theta_[i, :]) ** 2) / (self.sigma_[i, :]), 1)
            joint_log_likelihood[:, i] = jointi + n_ij

        return joint_log_likelihood
コード例 #4
0
ファイル: laplacian.py プロジェクト: sebimarkgraf/heat
 def _normalized_symmetric_L(self, A):
     degree = ht.sum(A, axis=1)
     degree.resplit_(axis=None)
     # Find stand-alone vertices with no connections
     temp = torch.ones(degree.shape,
                       dtype=degree.larray.dtype,
                       device=degree.device.torch_device)
     degree.larray = torch.where(degree.larray == 0, temp, degree.larray)
     L = A / ht.sqrt(ht.expand_dims(degree, axis=1))
     L = L / ht.sqrt(ht.expand_dims(degree, axis=0))
     L = L * (-1.0)
     L.fill_diagonal(1.0)
     return L
コード例 #5
0
ファイル: knn.py プロジェクト: sebimarkgraf/heat
    def predict(self, X) -> ht.dndarray:
        """
        Parameters
        ----------
        X : ht.DNDarray
            Input data to be predicted
        """

        distances = ht.spatial.cdist(X, self.x)
        _, indices = ht.topk(distances, self.num_neighbours, largest=False)

        labels = self.y[indices.flatten()]
        labels.balance_()
        labels = ht.reshape(labels, (indices.gshape + (self.y.gshape[1], )))
        labels = ht.sum(labels, axis=1)
        maximums = ht.argmax(labels, axis=1)

        return maximums
コード例 #6
0
    def predict(self, x: DNDarray) -> DNDarray:
        """
        Predict the class labels for the provided data.

        Parameters
        ----------
        x : DNDarray
            The test samples.
        """
        distances = self.effective_metric_(x, self.x)
        _, indices = ht.topk(distances, self.n_neighbors, largest=False)

        predictions = self.y[indices.flatten()]
        predictions.balance_()
        predictions = ht.reshape(predictions,
                                 (indices.gshape + (self.y.gshape[1], )))
        predictions = ht.sum(predictions, axis=1)

        self.classes_ = ht.argmax(predictions, axis=1)

        return self.classes_
コード例 #7
0
    def test_spherical_clusters(self):
        seed = 1
        n = 20 * ht.MPI_WORLD.size
        data = self.create_spherical_dataset(num_samples_cluster=n,
                                             radius=1.0,
                                             offset=4.0,
                                             dtype=ht.float32,
                                             random_state=seed)
        kmedoid = ht.cluster.KMedoids(n_clusters=4, init="kmedoids++")
        kmedoid.fit(data)
        self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray)
        self.assertEqual(kmedoid.cluster_centers_.shape, (4, 3))
        for i in range(kmedoid.cluster_centers_.shape[0]):
            self.assertTrue(
                ht.any(
                    ht.sum(ht.abs(kmedoid.cluster_centers_[i, :] - data),
                           axis=1) == 0))

        # More Samples
        n = 100 * ht.MPI_WORLD.size
        data = self.create_spherical_dataset(num_samples_cluster=n,
                                             radius=1.0,
                                             offset=4.0,
                                             dtype=ht.float32,
                                             random_state=seed)
        kmedoid = ht.cluster.KMedoids(n_clusters=4, init="kmedoids++")
        kmedoid.fit(data)
        self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray)
        self.assertEqual(kmedoid.cluster_centers_.shape, (4, 3))
        # check whether result is actually a datapoint
        for i in range(kmedoid.cluster_centers_.shape[0]):
            self.assertTrue(
                ht.any(
                    ht.sum(ht.abs(kmedoid.cluster_centers_[i, :] - data),
                           axis=1) == 0))

        # different datatype
        n = 20 * ht.MPI_WORLD.size
        data = self.create_spherical_dataset(num_samples_cluster=n,
                                             radius=1.0,
                                             offset=4.0,
                                             dtype=ht.float64,
                                             random_state=seed)
        kmedoid = ht.cluster.KMedoids(n_clusters=4, init="kmedoids++")
        kmedoid.fit(data)
        self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray)
        self.assertEqual(kmedoid.cluster_centers_.shape, (4, 3))
        for i in range(kmedoid.cluster_centers_.shape[0]):
            self.assertTrue(
                ht.any(
                    ht.sum(ht.abs(kmedoid.cluster_centers_[i, :] -
                                  data.astype(ht.float32)),
                           axis=1) == 0))

        # on Ints (different radius, offset and datatype
        data = self.create_spherical_dataset(num_samples_cluster=n,
                                             radius=10.0,
                                             offset=40.0,
                                             dtype=ht.int32,
                                             random_state=seed)
        kmedoid = ht.cluster.KMedoids(n_clusters=4, init="kmedoids++")
        kmedoid.fit(data)
        self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray)
        self.assertEqual(kmedoid.cluster_centers_.shape, (4, 3))
        for i in range(kmedoid.cluster_centers_.shape[0]):
            self.assertTrue(
                ht.any(
                    ht.sum(ht.abs(kmedoid.cluster_centers_[i, :] - data),
                           axis=1) == 0))
コード例 #8
0
    def test_sum(self):
        array_len = 11

        # check sum over all float elements of 1d tensor locally
        shape_noaxis = ht.ones(array_len)
        no_axis_sum = shape_noaxis.sum()

        self.assertIsInstance(no_axis_sum, ht.DNDarray)
        self.assertEqual(no_axis_sum.shape, (1, ))
        self.assertEqual(no_axis_sum.lshape, (1, ))
        self.assertEqual(no_axis_sum.dtype, ht.float32)
        self.assertEqual(no_axis_sum.larray.dtype, torch.float32)
        self.assertEqual(no_axis_sum.split, None)
        self.assertEqual(no_axis_sum.larray, array_len)

        out_noaxis = ht.zeros((1, ))
        ht.sum(shape_noaxis, out=out_noaxis)
        self.assertTrue(out_noaxis.larray == shape_noaxis.larray.sum())

        # check sum over all float elements of split 1d tensor
        shape_noaxis_split = ht.arange(array_len, split=0)
        shape_noaxis_split_sum = shape_noaxis_split.sum()

        self.assertIsInstance(shape_noaxis_split_sum, ht.DNDarray)
        self.assertEqual(shape_noaxis_split_sum.shape, (1, ))
        self.assertEqual(shape_noaxis_split_sum.lshape, (1, ))
        self.assertEqual(shape_noaxis_split_sum.dtype, ht.int64)
        self.assertEqual(shape_noaxis_split_sum.larray.dtype, torch.int64)
        self.assertEqual(shape_noaxis_split_sum.split, None)
        self.assertEqual(shape_noaxis_split_sum, 55)

        out_noaxis = ht.zeros((1, ))
        ht.sum(shape_noaxis_split, out=out_noaxis)
        self.assertEqual(out_noaxis.larray, 55)

        # check sum over all float elements of 3d tensor locally
        shape_noaxis = ht.ones((3, 3, 3))
        no_axis_sum = shape_noaxis.sum()

        self.assertIsInstance(no_axis_sum, ht.DNDarray)
        self.assertEqual(no_axis_sum.shape, (1, ))
        self.assertEqual(no_axis_sum.lshape, (1, ))
        self.assertEqual(no_axis_sum.dtype, ht.float32)
        self.assertEqual(no_axis_sum.larray.dtype, torch.float32)
        self.assertEqual(no_axis_sum.split, None)
        self.assertEqual(no_axis_sum.larray, 27)

        out_noaxis = ht.zeros((1, ))
        ht.sum(shape_noaxis, out=out_noaxis)
        self.assertEqual(out_noaxis.larray, 27)

        # check sum over all float elements of split 3d tensor
        shape_noaxis_split_axis = ht.ones((3, 3, 3), split=0)
        split_axis_sum = shape_noaxis_split_axis.sum(axis=0)

        self.assertIsInstance(split_axis_sum, ht.DNDarray)
        self.assertEqual(split_axis_sum.shape, (3, 3))
        self.assertEqual(split_axis_sum.dtype, ht.float32)
        self.assertEqual(split_axis_sum.larray.dtype, torch.float32)
        self.assertEqual(split_axis_sum.split, None)

        # check split semantics
        shape_noaxis_split_axis = ht.ones((3, 3, 3), split=2)
        split_axis_sum = shape_noaxis_split_axis.sum(axis=1)
        self.assertIsInstance(split_axis_sum, ht.DNDarray)
        self.assertEqual(split_axis_sum.shape, (3, 3))
        self.assertEqual(split_axis_sum.dtype, ht.float32)
        self.assertEqual(split_axis_sum.larray.dtype, torch.float32)
        self.assertEqual(split_axis_sum.split, 1)

        out_noaxis = ht.zeros((3, 3))
        ht.sum(shape_noaxis, axis=0, out=out_noaxis)
        self.assertTrue((out_noaxis.larray == torch.full(
            (3, 3), 3, dtype=torch.float,
            device=self.device.torch_device)).all())

        # check sum over all float elements of splitted 5d tensor with negative axis
        shape_noaxis_split_axis_neg = ht.ones((1, 2, 3, 4, 5), split=1)
        shape_noaxis_split_axis_neg_sum = shape_noaxis_split_axis_neg.sum(
            axis=-2)

        self.assertIsInstance(shape_noaxis_split_axis_neg_sum, ht.DNDarray)
        self.assertEqual(shape_noaxis_split_axis_neg_sum.shape, (1, 2, 3, 5))
        self.assertEqual(shape_noaxis_split_axis_neg_sum.dtype, ht.float32)
        self.assertEqual(shape_noaxis_split_axis_neg_sum.larray.dtype,
                         torch.float32)
        self.assertEqual(shape_noaxis_split_axis_neg_sum.split, 1)

        out_noaxis = ht.zeros((1, 2, 3, 5), split=1)
        ht.sum(shape_noaxis_split_axis_neg, axis=-2, out=out_noaxis)

        # check sum over all float elements of splitted 3d tensor with tuple axis
        shape_split_axis_tuple = ht.ones((3, 4, 5), split=1)
        shape_split_axis_tuple_sum = shape_split_axis_tuple.sum(axis=(-2, -3))
        expected_result = ht.ones((5, )) * 12.0

        self.assertIsInstance(shape_split_axis_tuple_sum, ht.DNDarray)
        self.assertEqual(shape_split_axis_tuple_sum.shape, (5, ))
        self.assertEqual(shape_split_axis_tuple_sum.dtype, ht.float32)
        self.assertEqual(shape_split_axis_tuple_sum.larray.dtype,
                         torch.float32)
        self.assertEqual(shape_split_axis_tuple_sum.split, None)
        self.assertTrue((shape_split_axis_tuple_sum == expected_result).all())

        # exceptions
        with self.assertRaises(ValueError):
            ht.ones(array_len).sum(axis=1)
        with self.assertRaises(ValueError):
            ht.ones(array_len).sum(axis=-2)
        with self.assertRaises(ValueError):
            ht.ones((4, 4)).sum(axis=0, out=out_noaxis)
        with self.assertRaises(TypeError):
            ht.ones(array_len).sum(axis="bad_axis_type")
コード例 #9
0
    def logsumexp(self,
                  a,
                  axis=None,
                  b=None,
                  keepdim=False,
                  return_sign=False):
        """
        Adapted to HeAT from scikit-learn.

        Compute the log of the sum of exponentials of input elements.

        Parameters
        ----------
        a : ht.tensor
            Input array.
        axis : None or int or tuple of ints, optional
            Axis or axes over which the sum is taken. By default `axis` is None,
            and all elements are summed.
        keepdim : bool, optional
            If this is set to True, the axes which are reduced are left in the
            result as dimensions with size one. With this option, the result
            will broadcast correctly against the original array.
        b : ht.tensor, optional
            Scaling factor for exp(`a`) must be of the same shape as `a` or
            broadcastable to `a`. These values may be negative in order to
            implement subtraction.
        #return_sign : bool, optional
            If this is set to True, the result will be a pair containing sign
            information; if False, results that are negative will be returned
            as NaN. Default is False (no sign information).
            #TODO: returns NotImplementedYet error.

        Returns
        -------
        res : ht.tensor
            The result, ``np.log(np.sum(np.exp(a)))`` calculated in a numerically
            more stable way. If `b` is given then ``np.log(np.sum(b*np.exp(a)))``
            is returned.
        #TODO sgn : ndarray NOT IMPLEMENTED YET
            If return_sign is True, this will be an array of floating-point
            numbers matching res and +1, 0, or -1 depending on the sign
            of the result. If False, only one result is returned.

        """

        if b is not None:
            raise NotImplementedError("Not implemented for weighted logsumexp")

        a_max = ht.max(a, axis=axis, keepdim=True)

        # TODO: sanitize a_max / implement isfinite(): sanitation module, cf. #468
        # if a_max.numdims > 0:
        #     a_max[~np.isfinite(a_max)] = 0
        # elif not np.isfinite(a_max):
        #     a_max = 0

        # TODO: reinstate after allowing b not None
        # if b is not None:
        #     b = np.asarray(b)
        #     tmp = b * np.exp(a - a_max)
        # else:
        tmp = ht.exp(a - a_max)

        s = ht.sum(tmp, axis=axis, keepdim=keepdim)
        if return_sign:
            raise NotImplementedError("Not implemented for return_sign")
            # sgn = np.sign(s)  # TODO: np.sign
            # s *= sgn  # /= makes more sense but we need zero -> zero
        out = ht.log(s)

        if not keepdim:
            a_max = ht.squeeze(a_max, axis=axis)
        out += a_max

        # if return_sign: #TODO: np.sign
        #    return out, sgn
        # else:
        return out
コード例 #10
0
ファイル: laplacian.py プロジェクト: suleisl2000/heat
 def _simple_L(self, A):
     degree = ht.sum(A, axis=1)
     L = ht.diag(degree) - A
     return L
コード例 #11
0
    def test_sum(self):
        array_len = 11

        # check sum over all float elements of 1d tensor locally
        shape_noaxis = ht.ones(array_len)
        no_axis_sum = shape_noaxis.sum()

        self.assertIsInstance(no_axis_sum, ht.tensor)
        self.assertEqual(no_axis_sum.shape, (1, ))
        self.assertEqual(no_axis_sum.lshape, (1, ))
        self.assertEqual(no_axis_sum.dtype, ht.float32)
        self.assertEqual(no_axis_sum._tensor__array.dtype, torch.float32)
        self.assertEqual(no_axis_sum.split, None)
        self.assertEqual(no_axis_sum._tensor__array, array_len)

        out_noaxis = ht.zeros((1, ))
        ht.sum(shape_noaxis, out=out_noaxis)
        self.assertTrue(
            out_noaxis._tensor__array == shape_noaxis._tensor__array.sum())

        # check sum over all float elements of split 1d tensor
        shape_noaxis_split = ht.arange(array_len, split=0)
        shape_noaxis_split_sum = shape_noaxis_split.sum()

        self.assertIsInstance(shape_noaxis_split_sum, ht.tensor)
        self.assertEqual(shape_noaxis_split_sum.shape, (1, ))
        self.assertEqual(shape_noaxis_split_sum.lshape, (1, ))
        self.assertEqual(shape_noaxis_split_sum.dtype, ht.int64)
        self.assertEqual(shape_noaxis_split_sum._tensor__array.dtype,
                         torch.int64)
        self.assertEqual(shape_noaxis_split_sum.split, None)
        self.assertEqual(shape_noaxis_split_sum, 55)

        out_noaxis = ht.zeros((1, ))
        ht.sum(shape_noaxis_split, out=out_noaxis)
        self.assertEqual(out_noaxis._tensor__array, 55)

        # check sum over all float elements of 3d tensor locally
        shape_noaxis = ht.ones((3, 3, 3))
        no_axis_sum = shape_noaxis.sum()

        self.assertIsInstance(no_axis_sum, ht.tensor)
        self.assertEqual(no_axis_sum.shape, (1, ))
        self.assertEqual(no_axis_sum.lshape, (1, ))
        self.assertEqual(no_axis_sum.dtype, ht.float32)
        self.assertEqual(no_axis_sum._tensor__array.dtype, torch.float32)
        self.assertEqual(no_axis_sum.split, None)
        self.assertEqual(no_axis_sum._tensor__array, 27)

        out_noaxis = ht.zeros((1, ))
        ht.sum(shape_noaxis, out=out_noaxis)
        self.assertEqual(out_noaxis._tensor__array, 27)

        # check sum over all float elements of split 3d tensor
        shape_noaxis_split_axis = ht.ones((3, 3, 3), split=0)
        split_axis_sum = shape_noaxis_split_axis.sum(axis=0)

        self.assertIsInstance(split_axis_sum, ht.tensor)
        self.assertEqual(split_axis_sum.shape, (1, 3, 3))
        self.assertEqual(split_axis_sum.dtype, ht.float32)
        self.assertEqual(split_axis_sum._tensor__array.dtype, torch.float32)
        self.assertEqual(split_axis_sum.split, None)

        out_noaxis = ht.zeros((
            1,
            3,
            3,
        ))
        ht.sum(shape_noaxis, axis=0, out=out_noaxis)
        self.assertTrue((out_noaxis._tensor__array == torch.full((
            1,
            3,
            3,
        ), 3)).all())

        # check sum over all float elements of splitted 5d tensor with negative axis
        shape_noaxis_split_axis_neg = ht.ones((1, 2, 3, 4, 5), split=1)
        shape_noaxis_split_axis_neg_sum = shape_noaxis_split_axis_neg.sum(
            axis=-2)

        self.assertIsInstance(shape_noaxis_split_axis_neg_sum, ht.tensor)
        self.assertEqual(shape_noaxis_split_axis_neg_sum.shape,
                         (1, 2, 3, 1, 5))
        self.assertEqual(shape_noaxis_split_axis_neg_sum.dtype, ht.float32)
        self.assertEqual(shape_noaxis_split_axis_neg_sum._tensor__array.dtype,
                         torch.float32)
        self.assertEqual(shape_noaxis_split_axis_neg_sum.split, 1)

        out_noaxis = ht.zeros((1, 2, 3, 1, 5))
        ht.sum(shape_noaxis_split_axis_neg, axis=-2, out=out_noaxis)

        # exceptions
        with self.assertRaises(ValueError):
            ht.ones(array_len).sum(axis=1)
        with self.assertRaises(ValueError):
            ht.ones(array_len).sum(axis=-2)
        with self.assertRaises(ValueError):
            ht.ones((4, 4)).sum(axis=0, out=out_noaxis)
        with self.assertRaises(TypeError):
            ht.ones(array_len).sum(axis='bad_axis_type')