コード例 #1
0
def finalizeMergeOnMasterNode(partsRDD):

    # Create an algorithm to compute a sparse variance-covariance matrix on the master node
    covarianceMaster = covariance.Distributed(step=step2Master,
                                              method=covariance.fastCSR)

    parts_list = partsRDD.collect()

    # Add partial results computed on local nodes to the algorithm on the master node
    for _, value in parts_list:
        deserialized_value = deserializePartialResult(value, covariance)
        covarianceMaster.input.add(covariance.partialResults,
                                   deserialized_value)

    # Compute a sparse variance-covariance matrix on the master node
    covarianceMaster.compute()

    # Finalize computations and retrieve the results
    res = covarianceMaster.finalizeCompute()

    result = {}
    result['covariance'] = res.get(covariance.covariance)
    result['mean'] = res.get(covariance.mean)

    return result
コード例 #2
0
def trainMaster(partsRDD):

    # Create an algorithm object to train the multiple linear regression model with a QR decomposition-based method
    linearRegressionTraining = training.Distributed(step2Master, method=training.qrDense)

    parts_list = partsRDD.collect()

    # Add partial results computed on local nodes to the algorithm on the master node
    for key, pres in parts_list:
        deserialized_pres = deserializePartialResult(pres, training)
        linearRegressionTraining.input.add(training.partialModels, deserialized_pres)

    # Build and retrieve the final multiple linear regression model
    linearRegressionTraining.compute()

    trainingResult = linearRegressionTraining.finalizeCompute()

    return trainingResult.get(training.model)
コード例 #3
0
def trainMaster(partsRDD):

    # Create an algorithm to train the Naive Bayes model on the master node
    algorithm = training.Distributed(step2Master, nClasses)

    parts_List = partsRDD.collect()

    # Add partial results computed on local nodes to the algorithm on the master node
    for _, value in parts_List:
        deserialized_pres = deserializePartialResult(value, training)
        algorithm.input.add(training.partialModels, deserialized_pres)

    # Train the Naive Bayes model on the master node
    algorithm.compute()

    # Finalize computations and retrieve the training results
    trainingResult = algorithm.finalizeCompute()

    return trainingResult.get(classifier.training.model)
コード例 #4
0
def finalizeMergeOnMasterNode(partsRDD):

    # Create an algorithm to compute low order moments on the master node
    momentsMaster = low_order_moments.Distributed(
        step2Master, method=low_order_moments.defaultDense)

    parts_List = partsRDD.collect()

    # Add partial results computed on local nodes to the algorithm on the master node
    for _, value in parts_List:
        deserialized_pres = deserializePartialResult(value, low_order_moments)
        momentsMaster.input.add(low_order_moments.partialResults,
                                deserialized_pres)

    # Compute low order moments on the master node
    momentsMaster.compute()

    # Finalize computations and retrieve the results
    return momentsMaster.finalizeCompute()
コード例 #5
0
def trainMaster(partsRDD):

    # Create an algorithm object to train the multiple linear regression model with the normal equations method
    ridgeRegressionTraining = training.Distributed(step2Master)

    parts_list = partsRDD.collect()

    # Add partial results computed on local nodes to the algorithm on the master node
    for key, pres in parts_list:
        deserialized_pres = deserializePartialResult(pres, training)
        ridgeRegressionTraining.input.add(training.partialModels,
                                          deserialized_pres)

    # Build and retrieve the final multiple linear regression model
    ridgeRegressionTraining.compute()

    trainingResult = ridgeRegressionTraining.finalizeCompute()

    return trainingResult.get(training.model)
コード例 #6
0
ファイル: spark_KmeansDense.py プロジェクト: anjgola/samples
def computeInitMaster(partsRDD):

    # Create an algorithm to compute k-means on the master node
    kmeansMasterInit = init.Distributed(step2Master, nClusters, method=init.randomDense)

    partsList = partsRDD.collect()

    # Add partial results computed on local nodes to the algorithm on the master node
    for _, value in partsList:
        deserialized_pres = deserializePartialResult(value, init)
        kmeansMasterInit.input.add(init.partialResults, deserialized_pres)

    # Compute k-means on the master node
    kmeansMasterInit.compute()

    # Finalize computations and retrieve the results
    initResult = kmeansMasterInit.finalizeCompute()

    return initResult.get(init.centroids)
コード例 #7
0
ファイル: spark_KmeansDense.py プロジェクト: anjgola/samples
def computeMaster(partsRDDcompute):

    # Create an algorithm to compute k-means on the master node
    kmeansMaster = kmeans.Distributed(step2Master, nClusters, method=kmeans.defaultDense)

    parts_List = partsRDDcompute.collect()

    # Add partial results computed on local nodes to the algorithm on the master node
    for _, value in parts_List:
        deserialized_pres = deserializePartialResult(value, kmeans)
        kmeansMaster.input.add(kmeans.partialResults, deserialized_pres)

    # Compute k-means on the master node
    kmeansMaster.compute()

    # Finalize computations and retrieve the results
    res = kmeansMaster.finalizeCompute()

    return res.get(kmeans.centroids)