def train(cls, rdd, k, convergenceTol=1e-3, maxIterations=100, seed=None, initialModel=None): """Train a Gaussian Mixture clustering model.""" initialModelWeights = None initialModelMu = None initialModelSigma = None if initialModel is not None: if initialModel.k != k: raise Exception( "Mismatched cluster count, initialModel.k = %s, however k = %s" % (initialModel.k, k)) initialModelWeights = initialModel.weights initialModelMu = [ initialModel.gaussians[i].mu for i in range(initialModel.k) ] initialModelSigma = [ initialModel.gaussians[i].sigma for i in range(initialModel.k) ] weight, mu, sigma = callMLlibFunc("trainGaussianMixtureModel", rdd.map(_convert_to_vector), k, convergenceTol, maxIterations, seed, initialModelWeights, initialModelMu, initialModelSigma) mvg_obj = [MultivariateGaussian(mu[i], sigma[i]) for i in range(k)] return GaussianMixtureModel(weight, mvg_obj)
def train(cls, rdd, k, convergenceTol=1e-3, maxIterations=100, seed=None): """Train a Gaussian Mixture clustering model.""" weight, mu, sigma = callMLlibFunc("trainGaussianMixture", rdd.map(_convert_to_vector), k, convergenceTol, maxIterations, seed) mvg_obj = [MultivariateGaussian(mu[i], sigma[i]) for i in range(k)] return GaussianMixtureModel(weight, mvg_obj)
def gaussians(self): """ Array of MultivariateGaussian where gaussians[i] represents the Multivariate Gaussian (Normal) Distribution for Gaussian i. """ return [ MultivariateGaussian(gaussian[0], gaussian[1]) for gaussian in self.call("gaussians")]