def test_kmeans(self): """ with k=1 always get one cluster centered on the mean""" data = self.sc.parallelize(array([ array([1.0, 2.0, 6.0]), array([1.0, 3.0, 0.0]), array([1.0, 4.0, 6.0])] )) labels, centers = kmeans(data, k=1, maxiter=20, tol=0.001) assert array_equal(centers[0], array([1.0, 3.0, 4.0])) assert array_equal(labels.collect(), array([0, 0, 0]))
def test_kmeans(self): """ With k=1 always get one cluster centered on the mean""" data_local = [ array([1.0, 2.0, 6.0]), array([1.0, 3.0, 0.0]), array([1.0, 4.0, 6.0])] data = self.sc.parallelize(zip(range(1, 4), data_local)) labels, centers = kmeans(data, k=1, maxiter=20, tol=0.001) assert array_equal(centers[0], array([1.0, 3.0, 4.0])) assert array_equal(labels.map(lambda (_, v): v).collect(), array([0, 0, 0]))
def test_kmeans(self): """ With k=1 always get one cluster centered on the mean""" data_local = [ array([1.0, 2.0, 6.0]), array([1.0, 3.0, 0.0]), array([1.0, 4.0, 6.0]) ] data = self.sc.parallelize(zip(range(1, 4), data_local)) labels, centers = kmeans(data, k=1, maxiter=20, tol=0.001) assert array_equal(centers[0], array([1.0, 3.0, 4.0])) assert array_equal( labels.map(lambda (_, v): v).collect(), array([0, 0, 0]))
def test_kmeans(self): data = get_data_kmeans(self) labels, centers, dists, normDists = kmeans(data, 5, "euclidean") labels.collect() dists.collect() normDists.collect()