def test_500x2(self): """ This test is based on statistics, that if we run kmeans, on a normal distributed dataset, centered around 0 and use 4 clusters then they will be located in each one corner. """ features = self.generate_matrices_for_k_means((500, 2), seed=1304) res = kmeans(features, k=4).compute() corners = set() for x in res: if x[0] > 0 and x[1] > 0: corners.add("pp") elif x[0] > 0 and x[1] < 0: corners.add("pn") elif x[0] < 0 and x[1] > 0: corners.add("np") else: corners.add("nn") self.assertTrue(len(corners) == 4)
def test_500x2(self): """ This test is based on statistics, that if we run kmeans, on a normal distributed dataset, centered around 0 and use 4 clusters then they will be located in each one corner. This test uses the prediction builtin. """ features = self.generate_matrices_for_k_means((500, 2), seed=1304) [c, _] = kmeans(features, k=4).compute() C = Matrix(self.sds, c) elm = Matrix(self.sds, np.array([[1, 1], [-1, 1], [-1, -1], [1, -1]])) res = kmeansPredict(elm, C).compute() corners = set() for x in res: if x == 1: corners.add("pp") elif x == 2: corners.add("pn") elif x == 3: corners.add("np") else: corners.add("nn") self.assertTrue(len(corners) == 4)
def test_invalid_input_2(self): features = Matrix(self.sds, np.array([1])) with self.assertRaises(ValueError) as context: kmeans(features, k=-1)