def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||"): """Train a k-means clustering model.""" # cache serialized data to avoid objects over head in JVM jcached = _to_java_object_rdd(rdd.map(_convert_to_vector), cache=True) model = callMLlibFunc("trainKMeansModel", jcached, k, maxIterations, runs, initializationMode) centers = callJavaFunc(rdd.context, model.clusterCenters) return KMeansModel([c.toArray() for c in centers])
def _regression_train_wrapper(train_func, modelClass, data, initial_weights): first = data.first() if not isinstance(first, LabeledPoint): raise ValueError("data should be an RDD of LabeledPoint, but got %s" % first) initial_weights = initial_weights or [0.0] * len(data.first().features) weights, intercept = train_func(_to_java_object_rdd(data, cache=True), _convert_to_vector(initial_weights)) return modelClass(weights, intercept)
def _prepare(cls, ratings): assert isinstance(ratings, RDD), "ratings should be RDD" first = ratings.first() if not isinstance(first, Rating): if isinstance(first, (tuple, list)): ratings = ratings.map(lambda x: Rating(*x)) else: raise ValueError("rating should be RDD of Rating or tuple/list") return _to_java_object_rdd(ratings, True)
def _prepare(cls, ratings): assert isinstance(ratings, RDD), "ratings should be RDD" first = ratings.first() if not isinstance(first, Rating): if isinstance(first, (tuple, list)): ratings = ratings.map(lambda x: Rating(*x)) else: raise ValueError( "rating should be RDD of Rating or tuple/list") return _to_java_object_rdd(ratings, True)
def test_to_java_object_rdd(self): # SPARK-6660 data = RandomRDDs.uniformRDD(self.sc, 10, 5, seed=0L) self.assertEqual(_to_java_object_rdd(data).count(), 10)
def test_to_java_object_rdd(self): # SPARK-6660 data = RandomRDDs.uniformRDD(self.sc, 10, 5, seed=0) self.assertEqual(_to_java_object_rdd(data).count(), 10)
def _regression_train_wrapper(train_func, modelClass, data, initial_weights): initial_weights = initial_weights or [0.0] * len(data.first().features) weights, intercept = train_func(_to_java_object_rdd(data, cache=True), _convert_to_vector(initial_weights)) return modelClass(weights, intercept)