def test_parse_vector(self): a = DenseVector([3, 4, 6, 7]) self.assertTrue(str(a), '[3.0,4.0,6.0,7.0]') self.assertTrue(Vectors.parse(str(a)), a) a = SparseVector(4, [0, 2], [3, 4]) self.assertTrue(str(a), '(4,[0,2],[3.0,4.0])') self.assertTrue(Vectors.parse(str(a)), a) a = SparseVector(10, [0, 1], [4, 5]) self.assertTrue(SparseVector.parse(' (10, [0,1 ],[ 4.0,5.0] )'), a)
def test_parse_vector(self): a = DenseVector([3, 4, 6, 7]) self.assertTrue(str(a), '[3.0,4.0,6.0,7.0]') self.assertTrue(Vectors.parse(str(a)), a) a = SparseVector(4, [0, 2], [3, 4]) self.assertTrue(str(a), '(4,[0,2],[3.0,4.0])') self.assertTrue(Vectors.parse(str(a)), a) a = SparseVector(10, [0, 1], [4, 5]) self.assertTrue(SparseVector.parse(' (10, [0,1 ],[ 4.0,5.0] )'), a)
def kmeansInitialClusters(dataset): model = KMeansModel(CENTER_VECTORS) vectorsRdd = dataset.rdd.map(lambda data: Vectors.parse(Vectors.stringify(data['features']))) trainedModel = KMeans.train(vectorsRdd, 4, maxIterations=1000, initialModel=model) result=[] for d in dataset.collect(): entry = {} entry["features"] = d["features"] entry["prediction"] = trainedModel.predict(Vectors.parse(Vectors.stringify(d['features']))) entry["label"] = d['label'] result.append(entry) plotDiversitySizeClustering(result, CENTERS, "Size", "Diversity", "Song Analysis by Size and Diversity with Initial Clusters") centroidArtistSongCount(result, CENTERS)
def test_parse_vector(self): a = DenseVector([]) self.assertEqual(str(a), "[]") self.assertEqual(Vectors.parse(str(a)), a) a = DenseVector([3, 4, 6, 7]) self.assertEqual(str(a), "[3.0,4.0,6.0,7.0]") self.assertEqual(Vectors.parse(str(a)), a) a = SparseVector(4, [], []) self.assertEqual(str(a), "(4,[],[])") self.assertEqual(SparseVector.parse(str(a)), a) a = SparseVector(4, [0, 2], [3, 4]) self.assertEqual(str(a), "(4,[0,2],[3.0,4.0])") self.assertEqual(Vectors.parse(str(a)), a) a = SparseVector(10, [0, 1], [4, 5]) self.assertEqual(SparseVector.parse(" (10, [0,1 ],[ 4.0,5.0] )"), a)