def train( cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||", seed=None, initializationSteps=5, epsilon=1e-4, initialModel=None, ): """Train a k-means clustering model.""" clusterInitialModel = [] if initialModel is not None: if not isinstance(initialModel, KMeansModel): raise Exception( "initialModel is of " + str(type(initialModel)) + ". It needs " "to be of <type 'KMeansModel'>" ) clusterInitialModel = [_convert_to_vector(c) for c in initialModel.clusterCenters] model = callMLlibFunc( "trainKMeansModel", rdd.map(_convert_to_vector), k, maxIterations, runs, initializationMode, seed, initializationSteps, epsilon, clusterInitialModel, ) centers = callJavaFunc(rdd.context, model.clusterCenters) return KMeansModel([c.toArray() for c in centers])
def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||", seed=None, initializationSteps=5, epsilon=1e-4, initialModel=None): """Train a k-means clustering model.""" if runs != 1: warnings.warn( "Support for runs is deprecated in 1.6.0. This param will have no effect in 1.7.0." ) clusterInitialModel = [] if initialModel is not None: if not isinstance(initialModel, KMeansModel): raise Exception("initialModel is of " + str(type(initialModel)) + ". It needs " "to be of <type 'KMeansModel'>") clusterInitialModel = [ _convert_to_vector(c) for c in initialModel.clusterCenters ] model = callMLlibFunc("trainKMeansModel", rdd.map(_convert_to_vector), k, maxIterations, runs, initializationMode, seed, initializationSteps, epsilon, clusterInitialModel) centers = callJavaFunc(rdd.context, model.clusterCenters) return KMeansModel([c.toArray() for c in centers])
def train( cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||", seed=None, initializationSteps=5, epsilon=1e-4, ): """Train a k-means clustering model.""" model = callMLlibFunc( "trainKMeansModel", rdd.map(_convert_to_vector), k, maxIterations, runs, initializationMode, seed, initializationSteps, epsilon, ) centers = callJavaFunc(rdd.context, model.clusterCenters) return KMeansModel([c.toArray() for c in centers])
def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||", seed=None, initializationSteps=5, epsilon=1e-4): """Train a k-means clustering model.""" model = callMLlibFunc("trainKMeansModel", rdd.map(_convert_to_vector), k, maxIterations, runs, initializationMode, seed, initializationSteps, epsilon) centers = callJavaFunc(rdd.context, model.clusterCenters) return KMeansModel([c.toArray() for c in centers])
def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||"): """Train a k-means clustering model.""" # cache serialized data to avoid objects over head in JVM jcached = _to_java_object_rdd(rdd.map(_convert_to_vector), cache=True) model = callMLlibFunc("trainKMeansModel", jcached, k, maxIterations, runs, initializationMode) centers = callJavaFunc(rdd.context, model.clusterCenters) return KMeansModel([c.toArray() for c in centers])
def train(cls, rdd, k, maxIterations=100, initializationMode="k-means||", seed=None, initializationSteps=2, epsilon=1e-4, initialModel=None): """ Train a k-means clustering model. :param rdd: Training points as an `RDD` of `Vector` or convertible sequence types. :param k: Number of clusters to create. :param maxIterations: Maximum number of iterations allowed. (default: 100) :param initializationMode: The initialization algorithm. This can be either "random" or "k-means||". (default: "k-means||") :param seed: Random seed value for cluster initialization. Set as None to generate seed based on system time. (default: None) :param initializationSteps: Number of steps for the k-means|| initialization mode. This is an advanced setting -- the default of 2 is almost always enough. (default: 2) :param epsilon: Distance threshold within which a center will be considered to have converged. If all centers move less than this Euclidean distance, iterations are stopped. (default: 1e-4) :param initialModel: Initial cluster centers can be provided as a KMeansModel object rather than using the random or k-means|| initializationModel. (default: None) """ clusterInitialModel = [] if initialModel is not None: if not isinstance(initialModel, KMeansModel): raise Exception("initialModel is of " + str(type(initialModel)) + ". It needs " "to be of <type 'KMeansModel'>") clusterInitialModel = [ _convert_to_vector(c) for c in initialModel.clusterCenters ] model = callMLlibFunc("trainKMeansModel", rdd.map(_convert_to_vector), k, maxIterations, initializationMode, seed, initializationSteps, epsilon, clusterInitialModel) centers = callJavaFunc(rdd.context, model.clusterCenters) return KMeansModel([c.toArray() for c in centers])
def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||", seed=None, initializationSteps=5, epsilon=1e-4, initialModel=None): """ Train a k-means clustering model. :param rdd: Training points as an `RDD` of `Vector` or convertible sequence types. :param k: Number of clusters to create. :param maxIterations: Maximum number of iterations allowed. (default: 100) :param runs: Number of runs to execute in parallel. The best model according to the cost function will be returned (deprecated in 1.6.0). (default: 1) :param initializationMode: The initialization algorithm. This can be either "random" or "k-means||". (default: "k-means||") :param seed: Random seed value for cluster initialization. Set as None to generate seed based on system time. (default: None) :param initializationSteps: Number of steps for the k-means|| initialization mode. This is an advanced setting -- the default of 5 is almost always enough. (default: 5) :param epsilon: Distance threshold within which a center will be considered to have converged. If all centers move less than this Euclidean distance, iterations are stopped. (default: 1e-4) :param initialModel: Initial cluster centers can be provided as a KMeansModel object rather than using the random or k-means|| initializationModel. (default: None) """ if runs != 1: warnings.warn( "Support for runs is deprecated in 1.6.0. This param will have no effect in 2.0.0.") clusterInitialModel = [] if initialModel is not None: if not isinstance(initialModel, KMeansModel): raise Exception("initialModel is of "+str(type(initialModel))+". It needs " "to be of <type 'KMeansModel'>") clusterInitialModel = [_convert_to_vector(c) for c in initialModel.clusterCenters] model = callMLlibFunc("trainKMeansModel", rdd.map(_convert_to_vector), k, maxIterations, runs, initializationMode, seed, initializationSteps, epsilon, clusterInitialModel) centers = callJavaFunc(rdd.context, model.clusterCenters) return KMeansModel([c.toArray() for c in centers])
def callBigDlFunc(bigdl_type, name, *args): """ Call API in PythonBigDL """ sc = SparkContext.getOrCreate() if bigdl_type == "float": api = getattr( sc._jvm.com.intel.analytics.bigdl.python.api.PythonBigDL.ofFloat(), name) elif bigdl_type == "double": api = getattr( sc._jvm.com.intel.analytics.bigdl.python.api.PythonBigDL.ofDouble( ), name) else: raise Exception("Not supported bigdl_type: %s" % bigdl_type) return callJavaFunc(sc, api, *args)
def callBigDlFunc(bigdl_type, name, *args): """ Call API in PythonBigDL """ sc = SparkContext.getOrCreate() if bigdl_type == "float": api = getattr( sc._jvm.com.intel.analytics.bigdl.python.api.PythonBigDL.ofFloat(), name) elif bigdl_type == "double": api = getattr( sc._jvm.com.intel.analytics.bigdl.python.api.PythonBigDL.ofDouble(), name) else: raise Exception("Not supported bigdl_type: %s" % bigdl_type) return callJavaFunc(sc, api, *args)
def callBigDlFunc(bigdl_type, name, *args): """ Call API in PythonBigDL """ gateway = _get_gateway() error = Exception("Cannot find function: %s" % name) for jinvoker in JavaCreator.instance(bigdl_type, gateway).value: # hasattr(jinvoker, name) always return true here, # so you need to invoke the method to check if it exist or not try: api = getattr(jinvoker, name) result = callJavaFunc(api, *args) except Exception as e: error = e if "does not exist" not in str(e): raise e else: return result raise error
def callBigDlFunc(bigdl_type, name, *args): """ Call API in PythonBigDL """ jinstance = JavaCreator.instance(bigdl_type=bigdl_type).value sc = get_spark_context() api = getattr(jinstance, name) return callJavaFunc(sc, api, *args)
def train( cls, rdd, k, maxIterations=100, initializationMode="k-means||", seed=None, initializationSteps=2, epsilon=1e-4, initialModel=None, distanceMeasure="euclidean", ): """ Train a k-means clustering model. .. versionadded:: 0.9.0 Parameters ---------- rdd : ::py:class:`pyspark.RDD` Training points as an `RDD` of :py:class:`pyspark.mllib.linalg.Vector` or convertible sequence types. k : int Number of clusters to create. maxIterations : int, optional Maximum number of iterations allowed. (default: 100) initializationMode : str, optional The initialization algorithm. This can be either "random" or "k-means||". (default: "k-means||") seed : int, optional Random seed value for cluster initialization. Set as None to generate seed based on system time. (default: None) initializationSteps : Number of steps for the k-means|| initialization mode. This is an advanced setting -- the default of 2 is almost always enough. (default: 2) epsilon : float, optional Distance threshold within which a center will be considered to have converged. If all centers move less than this Euclidean distance, iterations are stopped. (default: 1e-4) initialModel : :py:class:`KMeansModel`, optional Initial cluster centers can be provided as a KMeansModel object rather than using the random or k-means|| initializationModel. (default: None) distanceMeasure : str, optional The distance measure used by the k-means algorithm. (default: "euclidean") """ clusterInitialModel = [] if initialModel is not None: if not isinstance(initialModel, KMeansModel): raise TypeError("initialModel is of " + str(type(initialModel)) + ". It needs " "to be of <type 'KMeansModel'>") clusterInitialModel = [ _convert_to_vector(c) for c in initialModel.clusterCenters ] model = callMLlibFunc( "trainKMeansModel", rdd.map(_convert_to_vector), k, maxIterations, initializationMode, seed, initializationSteps, epsilon, clusterInitialModel, distanceMeasure, ) centers = callJavaFunc(rdd.context, model.clusterCenters) return KMeansModel([c.toArray() for c in centers])