Esempio n. 1
0
def trainModel():
    global trainingResult

    # Retrieve the input data from a .csv file
    trainDataTable = createSparseTable(trainDatasetFileNames[rankId])

    # Initialize FileDataSource to retrieve the input data from a .csv file
    trainLabelsSource = FileDataSource(trainGroundTruthFileNames[rankId],
                                       DataSourceIface.doAllocateNumericTable,
                                       DataSourceIface.doDictionaryFromContext)

    # Retrieve the data from input files
    trainLabelsSource.loadDataBlock()

    # Create an algorithm object to train the Naive Bayes model based on the local-node data
    localAlgorithm = training.Distributed(step1Local,
                                          nClasses,
                                          method=training.fastCSR)

    # Pass a training data set and dependent values to the algorithm
    localAlgorithm.input.set(classifier.training.data, trainDataTable)
    localAlgorithm.input.set(classifier.training.labels,
                             trainLabelsSource.getNumericTable())

    # Train the Naive Bayes model on local nodes
    pres = localAlgorithm.compute()

    # Serialize partial results required by step 2
    dataArch = InputDataArchive()
    pres.serialize(dataArch)

    nodeResults = dataArch.getArchiveAsArray()

    # Transfer partial results to step 2 on the root node
    serializedData = comm.gather(nodeResults)

    if rankId == MPI_ROOT:
        # Create an algorithm object to build the final Naive Bayes model on the master node
        masterAlgorithm = training.Distributed(step2Master,
                                               nClasses,
                                               method=training.fastCSR)

        for i in range(nBlocks):
            # Deserialize partial results from step 1
            dataArch = OutputDataArchive(serializedData[i])

            dataForStep2FromStep1 = training.PartialResult()
            dataForStep2FromStep1.deserialize(dataArch)

            # Set the local Naive Bayes model as input for the master-node algorithm
            masterAlgorithm.input.add(training.partialModels,
                                      dataForStep2FromStep1)

        # Merge and finalizeCompute the Naive Bayes model on the master node
        masterAlgorithm.compute()
        trainingResult = masterAlgorithm.finalizeCompute()
Esempio n. 2
0
def testModel():
    global predictionResult

    # Retrieve the input data from a .csv file
    testDataTable = createSparseTable(testDatasetFileName)

    # Create an algorithm object to predict values of the Naive Bayes model
    algorithm = prediction.Batch(nClasses, method=prediction.fastCSR)

    # Pass a testing data set and the trained model to the algorithm
    algorithm.input.setTable(classifier.prediction.data, testDataTable)
    algorithm.input.setModel(classifier.prediction.model,
                             trainingResult.get(classifier.training.model))

    # Predict values of the Naive Bayes model
    # Result class from classifier.prediction
    predictionResult = algorithm.compute()
MPI_ROOT = 0

datasetFileNames = [
    jp(DATA_PREFIX, 'covcormoments_csr_1.csv'),
    jp(DATA_PREFIX, 'covcormoments_csr_2.csv'),
    jp(DATA_PREFIX, 'covcormoments_csr_3.csv'),
    jp(DATA_PREFIX, 'covcormoments_csr_4.csv')
]

if __name__ == "__main__":

    comm = MPI.COMM_WORLD
    rankId = comm.Get_rank()

    # Retrieve the input data from a file
    dataTable = createSparseTable(datasetFileNames[rankId])

    # Create an algorithm to compute low order moments on local nodes
    localAlgorithm = low_order_moments.Distributed(
        step1Local, method=low_order_moments.fastCSR)

    # Set the input data set to the algorithm
    localAlgorithm.input.set(low_order_moments.data, dataTable)

    # Compute low order moments
    pres = localAlgorithm.compute()

    # Serialize partial results required by step 2
    dataArch = InputDataArchive()
    pres.serialize(dataArch)
def deserializeDAALObject(buffer, object):
    # Create a data archive to deserialize the numeric table
    dataArch = OutputDataArchive(buffer)

    # Deserialize the numeric table from the data archive
    object.deserialize(dataArch)

    return object


if __name__ == "__main__":

    comm = MPI.COMM_WORLD
    rankId = comm.Get_rank()

    transposedDataTable = createSparseTable(
        transposedTrainDatasetFileNames[rankId])

    step4LocalInput = KeyValueDataCollection()
    itemsPartialResultPrediction = KeyValueDataCollection()

    dataTable = initializeModel()
    trainModel(dataTable, transposedDataTable)
    testModel()

    if rankId == MPI_ROOT:
        for i in range(nBlocks):
            for j in range(nBlocks):
                print("prediction {}, {}".format(i, j))
                printNumericTable(predictedRatingsMaster[i][j].get(
                    ratings.prediction))