def finalizeMergeOnMasterNode(partsRDD): # Create an algorithm to compute a sparse variance-covariance matrix on the master node covarianceMaster = covariance.Distributed(step=step2Master, method=covariance.fastCSR) parts_list = partsRDD.collect() # Add partial results computed on local nodes to the algorithm on the master node for _, value in parts_list: deserialized_value = deserializePartialResult(value, covariance) covarianceMaster.input.add(covariance.partialResults, deserialized_value) # Compute a sparse variance-covariance matrix on the master node covarianceMaster.compute() # Finalize computations and retrieve the results res = covarianceMaster.finalizeCompute() result = {} result['covariance'] = res.get(covariance.covariance) result['mean'] = res.get(covariance.mean) return result
def trainMaster(partsRDD): # Create an algorithm object to train the multiple linear regression model with a QR decomposition-based method linearRegressionTraining = training.Distributed(step2Master, method=training.qrDense) parts_list = partsRDD.collect() # Add partial results computed on local nodes to the algorithm on the master node for key, pres in parts_list: deserialized_pres = deserializePartialResult(pres, training) linearRegressionTraining.input.add(training.partialModels, deserialized_pres) # Build and retrieve the final multiple linear regression model linearRegressionTraining.compute() trainingResult = linearRegressionTraining.finalizeCompute() return trainingResult.get(training.model)
def trainMaster(partsRDD): # Create an algorithm to train the Naive Bayes model on the master node algorithm = training.Distributed(step2Master, nClasses) parts_List = partsRDD.collect() # Add partial results computed on local nodes to the algorithm on the master node for _, value in parts_List: deserialized_pres = deserializePartialResult(value, training) algorithm.input.add(training.partialModels, deserialized_pres) # Train the Naive Bayes model on the master node algorithm.compute() # Finalize computations and retrieve the training results trainingResult = algorithm.finalizeCompute() return trainingResult.get(classifier.training.model)
def finalizeMergeOnMasterNode(partsRDD): # Create an algorithm to compute low order moments on the master node momentsMaster = low_order_moments.Distributed( step2Master, method=low_order_moments.defaultDense) parts_List = partsRDD.collect() # Add partial results computed on local nodes to the algorithm on the master node for _, value in parts_List: deserialized_pres = deserializePartialResult(value, low_order_moments) momentsMaster.input.add(low_order_moments.partialResults, deserialized_pres) # Compute low order moments on the master node momentsMaster.compute() # Finalize computations and retrieve the results return momentsMaster.finalizeCompute()
def trainMaster(partsRDD): # Create an algorithm object to train the multiple linear regression model with the normal equations method ridgeRegressionTraining = training.Distributed(step2Master) parts_list = partsRDD.collect() # Add partial results computed on local nodes to the algorithm on the master node for key, pres in parts_list: deserialized_pres = deserializePartialResult(pres, training) ridgeRegressionTraining.input.add(training.partialModels, deserialized_pres) # Build and retrieve the final multiple linear regression model ridgeRegressionTraining.compute() trainingResult = ridgeRegressionTraining.finalizeCompute() return trainingResult.get(training.model)
def computeInitMaster(partsRDD): # Create an algorithm to compute k-means on the master node kmeansMasterInit = init.Distributed(step2Master, nClusters, method=init.randomDense) partsList = partsRDD.collect() # Add partial results computed on local nodes to the algorithm on the master node for _, value in partsList: deserialized_pres = deserializePartialResult(value, init) kmeansMasterInit.input.add(init.partialResults, deserialized_pres) # Compute k-means on the master node kmeansMasterInit.compute() # Finalize computations and retrieve the results initResult = kmeansMasterInit.finalizeCompute() return initResult.get(init.centroids)
def computeMaster(partsRDDcompute): # Create an algorithm to compute k-means on the master node kmeansMaster = kmeans.Distributed(step2Master, nClusters, method=kmeans.defaultDense) parts_List = partsRDDcompute.collect() # Add partial results computed on local nodes to the algorithm on the master node for _, value in parts_List: deserialized_pres = deserializePartialResult(value, kmeans) kmeansMaster.input.add(kmeans.partialResults, deserialized_pres) # Compute k-means on the master node kmeansMaster.compute() # Finalize computations and retrieve the results res = kmeansMaster.finalizeCompute() return res.get(kmeans.centroids)