Exemple #1
0
    def predict(self, x: DNDarray) -> DNDarray:
        """
        Adapted to HeAT from scikit-learn.
        Perform classification on a tensor of test data ``x``.

        Parameters
        ----------
        x : DNDarray
            Input data with shape (n_samples, n_features)
        """
        # sanitize input
        # TODO: sanitation/validation module, cf. #468
        if not isinstance(x, ht.DNDarray):
            raise ValueError(
                "input needs to be a ht.DNDarray, but was {}".format(type(x)))
        jll = self.__joint_log_likelihood(x)
        return self.classes_[ht.argmax(jll, axis=1)]
Exemple #2
0
    def predict(self, X) -> ht.dndarray:
        """
        Parameters
        ----------
        X : ht.DNDarray
            Input data to be predicted
        """

        distances = ht.spatial.cdist(X, self.x)
        _, indices = ht.topk(distances, self.num_neighbours, largest=False)

        labels = self.y[indices.flatten()]
        labels.balance_()
        labels = ht.reshape(labels, (indices.gshape + (self.y.gshape[1], )))
        labels = ht.sum(labels, axis=1)
        maximums = ht.argmax(labels, axis=1)

        return maximums
Exemple #3
0
    def fit(self, x: DNDarray):
        """
        Clusters dataset X via spectral embedding.
        Computes the low-dim representation by calculation of eigenspectrum (eigenvalues and eigenvectors) of the graph
        laplacian from the similarity matrix and fits the eigenvectors that correspond to the k lowest eigenvalues with
        a seperate clustering algorithm (currently only kmeans is supported). Similarity metrics for adjacency
        calculations are supported via spatial.distance. The eigenvalues and eigenvectors are computed by reducing the
        Laplacian via lanczos iterations and using the torch eigenvalue solver on this smaller matrix. If other
        eigenvalue decompostion methods are supported, this will be expanded.

        Parameters
        ----------
        x : DNDarray
            Training instances to cluster. Shape = (n_samples, n_features)
        """
        # 1. input sanitation
        if not isinstance(x, DNDarray):
            raise ValueError(
                "input needs to be a ht.DNDarray, but was {}".format(type(x)))
        if x.split is not None and x.split != 0:
            raise NotImplementedError(
                "Not implemented for other splitting-axes")
        # 2. Embed Dataset into lower-dimensional Eigenvector space
        eigenvalues, eigenvectors = self._spectral_embedding(x)

        # 3. Find the spectral gap, if number of clusters is not defined from the outside
        if self.n_clusters is None:
            diff = eigenvalues[1:] - eigenvalues[:-1]
            tmp = ht.argmax(diff).item()
            self.n_clusters = tmp + 1

        components = eigenvectors[:, :self.n_clusters].copy()

        params = self._cluster.get_params()
        params["n_clusters"] = self.n_clusters
        self._cluster.set_params(**params)
        self._cluster.fit(components)
        self._labels = self._cluster.labels_
        self._cluster_centers = self._cluster.cluster_centers_

        return self
Exemple #4
0
    def predict(self, x: DNDarray) -> DNDarray:
        """
        Predict the class labels for the provided data.

        Parameters
        ----------
        x : DNDarray
            The test samples.
        """
        distances = self.effective_metric_(x, self.x)
        _, indices = ht.topk(distances, self.n_neighbors, largest=False)

        predictions = self.y[indices.flatten()]
        predictions.balance_()
        predictions = ht.reshape(predictions,
                                 (indices.gshape + (self.y.gshape[1], )))
        predictions = ht.sum(predictions, axis=1)

        self.classes_ = ht.argmax(predictions, axis=1)

        return self.classes_
Exemple #5
0
    def predict(self, X):
        """
        Adapted to HeAT from scikit-learn.

        Perform classification on a tensor of test data X.

        Parameters
        ----------
        X : ht.tensor of shape (n_samples, n_features)

        Returns
        -------
        C : ht.tensor of shape (n_samples,)
            Predicted labels for X
        """
        # sanitize input
        # TODO: sanitation/validation module, cf. #468
        if not isinstance(X, ht.DNDarray):
            raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X)))
        jll = self.__joint_log_likelihood(X)
        return self.classes_[ht.argmax(jll, axis=1).numpy()]
Exemple #6
0
    def test_argmax(self):
        torch.manual_seed(1)
        data = ht.random.randn(3, 4, 5)

        # 3D local tensor, major axis
        result = ht.argmax(data, axis=0)
        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.dtype, ht.int64)
        self.assertEqual(result._DNDarray__array.dtype, torch.int64)
        self.assertEqual(result.shape, (4, 5))
        self.assertEqual(result.lshape, (4, 5))
        self.assertEqual(result.split, None)
        self.assertTrue(
            (result._DNDarray__array == data._DNDarray__array.argmax(0)).all())

        # 3D local tensor, minor axis
        result = ht.argmax(data, axis=-1, keepdim=True)
        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.dtype, ht.int64)
        self.assertEqual(result._DNDarray__array.dtype, torch.int64)
        self.assertEqual(result.shape, (3, 4, 1))
        self.assertEqual(result.lshape, (3, 4, 1))
        self.assertEqual(result.split, None)
        self.assertTrue(
            (result._DNDarray__array == data._DNDarray__array.argmax(
                -1, keepdim=True)).all())

        # 1D split tensor, no axis
        data = ht.arange(-10, 10, split=0)
        result = ht.argmax(data)
        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.dtype, ht.int64)
        self.assertEqual(result._DNDarray__array.dtype, torch.int64)
        self.assertEqual(result.shape, (1, ))
        self.assertEqual(result.lshape, (1, ))
        self.assertEqual(result.split, None)
        self.assertTrue((result._DNDarray__array == torch.tensor(
            [19], device=self.device.torch_device)))

        # 2D split tensor, along the axis
        data = ht.array(ht.random.randn(4, 5), is_split=0)
        result = ht.argmax(data, axis=1)
        expected = torch.argmax(data._DNDarray__array, dim=1)
        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.dtype, ht.int64)
        self.assertEqual(result._DNDarray__array.dtype, torch.int64)
        self.assertEqual(result.shape, (ht.MPI_WORLD.size * 4, ))
        self.assertEqual(result.lshape, (4, ))
        self.assertEqual(result.split, 0)
        self.assertTrue((result._DNDarray__array == expected).all())

        # 2D split tensor, across the axis
        size = ht.MPI_WORLD.size * 2
        data = ht.tril(ht.ones((size, size), split=0), k=-1)

        result = ht.argmax(data, axis=0)
        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(result.dtype, ht.int64)
        self.assertEqual(result._DNDarray__array.dtype, torch.int64)
        self.assertEqual(result.shape, (size, ))
        self.assertEqual(result.lshape, (size, ))
        self.assertEqual(result.split, None)
        # skip test on gpu; argmax works different
        if not (torch.cuda.is_available() and result.device == ht.gpu):
            self.assertTrue((result._DNDarray__array != 0).all())

        # 2D split tensor, across the axis, output tensor
        size = ht.MPI_WORLD.size * 2
        data = ht.tril(ht.ones((size, size), split=0), k=-1)

        output = ht.empty((size, ))
        result = ht.argmax(data, axis=0, out=output)

        self.assertIsInstance(result, ht.DNDarray)
        self.assertEqual(output.dtype, ht.int64)
        self.assertEqual(output._DNDarray__array.dtype, torch.int64)
        self.assertEqual(output.shape, (size, ))
        self.assertEqual(output.lshape, (size, ))
        self.assertEqual(output.split, None)
        # skip test on gpu; argmax works different
        if not (torch.cuda.is_available() and output.device == ht.gpu):
            self.assertTrue((output._DNDarray__array != 0).all())

        # check exceptions
        with self.assertRaises(TypeError):
            data.argmax(axis=(0, 1))
        with self.assertRaises(TypeError):
            data.argmax(axis=1.1)
        with self.assertRaises(TypeError):
            data.argmax(axis="y")
        with self.assertRaises(ValueError):
            ht.argmax(data, axis=-4)