def predict(self, x: DNDarray) -> DNDarray: """ Adapted to HeAT from scikit-learn. Perform classification on a tensor of test data ``x``. Parameters ---------- x : DNDarray Input data with shape (n_samples, n_features) """ # sanitize input # TODO: sanitation/validation module, cf. #468 if not isinstance(x, ht.DNDarray): raise ValueError( "input needs to be a ht.DNDarray, but was {}".format(type(x))) jll = self.__joint_log_likelihood(x) return self.classes_[ht.argmax(jll, axis=1)]
def predict(self, X) -> ht.dndarray: """ Parameters ---------- X : ht.DNDarray Input data to be predicted """ distances = ht.spatial.cdist(X, self.x) _, indices = ht.topk(distances, self.num_neighbours, largest=False) labels = self.y[indices.flatten()] labels.balance_() labels = ht.reshape(labels, (indices.gshape + (self.y.gshape[1], ))) labels = ht.sum(labels, axis=1) maximums = ht.argmax(labels, axis=1) return maximums
def fit(self, x: DNDarray): """ Clusters dataset X via spectral embedding. Computes the low-dim representation by calculation of eigenspectrum (eigenvalues and eigenvectors) of the graph laplacian from the similarity matrix and fits the eigenvectors that correspond to the k lowest eigenvalues with a seperate clustering algorithm (currently only kmeans is supported). Similarity metrics for adjacency calculations are supported via spatial.distance. The eigenvalues and eigenvectors are computed by reducing the Laplacian via lanczos iterations and using the torch eigenvalue solver on this smaller matrix. If other eigenvalue decompostion methods are supported, this will be expanded. Parameters ---------- x : DNDarray Training instances to cluster. Shape = (n_samples, n_features) """ # 1. input sanitation if not isinstance(x, DNDarray): raise ValueError( "input needs to be a ht.DNDarray, but was {}".format(type(x))) if x.split is not None and x.split != 0: raise NotImplementedError( "Not implemented for other splitting-axes") # 2. Embed Dataset into lower-dimensional Eigenvector space eigenvalues, eigenvectors = self._spectral_embedding(x) # 3. Find the spectral gap, if number of clusters is not defined from the outside if self.n_clusters is None: diff = eigenvalues[1:] - eigenvalues[:-1] tmp = ht.argmax(diff).item() self.n_clusters = tmp + 1 components = eigenvectors[:, :self.n_clusters].copy() params = self._cluster.get_params() params["n_clusters"] = self.n_clusters self._cluster.set_params(**params) self._cluster.fit(components) self._labels = self._cluster.labels_ self._cluster_centers = self._cluster.cluster_centers_ return self
def predict(self, x: DNDarray) -> DNDarray: """ Predict the class labels for the provided data. Parameters ---------- x : DNDarray The test samples. """ distances = self.effective_metric_(x, self.x) _, indices = ht.topk(distances, self.n_neighbors, largest=False) predictions = self.y[indices.flatten()] predictions.balance_() predictions = ht.reshape(predictions, (indices.gshape + (self.y.gshape[1], ))) predictions = ht.sum(predictions, axis=1) self.classes_ = ht.argmax(predictions, axis=1) return self.classes_
def predict(self, X): """ Adapted to HeAT from scikit-learn. Perform classification on a tensor of test data X. Parameters ---------- X : ht.tensor of shape (n_samples, n_features) Returns ------- C : ht.tensor of shape (n_samples,) Predicted labels for X """ # sanitize input # TODO: sanitation/validation module, cf. #468 if not isinstance(X, ht.DNDarray): raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X))) jll = self.__joint_log_likelihood(X) return self.classes_[ht.argmax(jll, axis=1).numpy()]
def test_argmax(self): torch.manual_seed(1) data = ht.random.randn(3, 4, 5) # 3D local tensor, major axis result = ht.argmax(data, axis=0) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(result.dtype, ht.int64) self.assertEqual(result._DNDarray__array.dtype, torch.int64) self.assertEqual(result.shape, (4, 5)) self.assertEqual(result.lshape, (4, 5)) self.assertEqual(result.split, None) self.assertTrue( (result._DNDarray__array == data._DNDarray__array.argmax(0)).all()) # 3D local tensor, minor axis result = ht.argmax(data, axis=-1, keepdim=True) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(result.dtype, ht.int64) self.assertEqual(result._DNDarray__array.dtype, torch.int64) self.assertEqual(result.shape, (3, 4, 1)) self.assertEqual(result.lshape, (3, 4, 1)) self.assertEqual(result.split, None) self.assertTrue( (result._DNDarray__array == data._DNDarray__array.argmax( -1, keepdim=True)).all()) # 1D split tensor, no axis data = ht.arange(-10, 10, split=0) result = ht.argmax(data) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(result.dtype, ht.int64) self.assertEqual(result._DNDarray__array.dtype, torch.int64) self.assertEqual(result.shape, (1, )) self.assertEqual(result.lshape, (1, )) self.assertEqual(result.split, None) self.assertTrue((result._DNDarray__array == torch.tensor( [19], device=self.device.torch_device))) # 2D split tensor, along the axis data = ht.array(ht.random.randn(4, 5), is_split=0) result = ht.argmax(data, axis=1) expected = torch.argmax(data._DNDarray__array, dim=1) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(result.dtype, ht.int64) self.assertEqual(result._DNDarray__array.dtype, torch.int64) self.assertEqual(result.shape, (ht.MPI_WORLD.size * 4, )) self.assertEqual(result.lshape, (4, )) self.assertEqual(result.split, 0) self.assertTrue((result._DNDarray__array == expected).all()) # 2D split tensor, across the axis size = ht.MPI_WORLD.size * 2 data = ht.tril(ht.ones((size, size), split=0), k=-1) result = ht.argmax(data, axis=0) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(result.dtype, ht.int64) self.assertEqual(result._DNDarray__array.dtype, torch.int64) self.assertEqual(result.shape, (size, )) self.assertEqual(result.lshape, (size, )) self.assertEqual(result.split, None) # skip test on gpu; argmax works different if not (torch.cuda.is_available() and result.device == ht.gpu): self.assertTrue((result._DNDarray__array != 0).all()) # 2D split tensor, across the axis, output tensor size = ht.MPI_WORLD.size * 2 data = ht.tril(ht.ones((size, size), split=0), k=-1) output = ht.empty((size, )) result = ht.argmax(data, axis=0, out=output) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(output.dtype, ht.int64) self.assertEqual(output._DNDarray__array.dtype, torch.int64) self.assertEqual(output.shape, (size, )) self.assertEqual(output.lshape, (size, )) self.assertEqual(output.split, None) # skip test on gpu; argmax works different if not (torch.cuda.is_available() and output.device == ht.gpu): self.assertTrue((output._DNDarray__array != 0).all()) # check exceptions with self.assertRaises(TypeError): data.argmax(axis=(0, 1)) with self.assertRaises(TypeError): data.argmax(axis=1.1) with self.assertRaises(TypeError): data.argmax(axis="y") with self.assertRaises(ValueError): ht.argmax(data, axis=-4)