def trainModel(): global trainingResult # Retrieve the input data from a .csv file trainDataTable = createSparseTable(trainDatasetFileNames[rankId]) # Initialize FileDataSource to retrieve the input data from a .csv file trainLabelsSource = FileDataSource(trainGroundTruthFileNames[rankId], DataSourceIface.doAllocateNumericTable, DataSourceIface.doDictionaryFromContext) # Retrieve the data from input files trainLabelsSource.loadDataBlock() # Create an algorithm object to train the Naive Bayes model based on the local-node data localAlgorithm = training.Distributed(step1Local, nClasses, method=training.fastCSR) # Pass a training data set and dependent values to the algorithm localAlgorithm.input.set(classifier.training.data, trainDataTable) localAlgorithm.input.set(classifier.training.labels, trainLabelsSource.getNumericTable()) # Train the Naive Bayes model on local nodes pres = localAlgorithm.compute() # Serialize partial results required by step 2 dataArch = InputDataArchive() pres.serialize(dataArch) nodeResults = dataArch.getArchiveAsArray() # Transfer partial results to step 2 on the root node serializedData = comm.gather(nodeResults) if rankId == MPI_ROOT: # Create an algorithm object to build the final Naive Bayes model on the master node masterAlgorithm = training.Distributed(step2Master, nClasses, method=training.fastCSR) for i in range(nBlocks): # Deserialize partial results from step 1 dataArch = OutputDataArchive(serializedData[i]) dataForStep2FromStep1 = training.PartialResult() dataForStep2FromStep1.deserialize(dataArch) # Set the local Naive Bayes model as input for the master-node algorithm masterAlgorithm.input.add(training.partialModels, dataForStep2FromStep1) # Merge and finalizeCompute the Naive Bayes model on the master node masterAlgorithm.compute() trainingResult = masterAlgorithm.finalizeCompute()
def testModel(): global predictionResult # Retrieve the input data from a .csv file testDataTable = createSparseTable(testDatasetFileName) # Create an algorithm object to predict values of the Naive Bayes model algorithm = prediction.Batch(nClasses, method=prediction.fastCSR) # Pass a testing data set and the trained model to the algorithm algorithm.input.setTable(classifier.prediction.data, testDataTable) algorithm.input.setModel(classifier.prediction.model, trainingResult.get(classifier.training.model)) # Predict values of the Naive Bayes model # Result class from classifier.prediction predictionResult = algorithm.compute()
MPI_ROOT = 0 datasetFileNames = [ jp(DATA_PREFIX, 'covcormoments_csr_1.csv'), jp(DATA_PREFIX, 'covcormoments_csr_2.csv'), jp(DATA_PREFIX, 'covcormoments_csr_3.csv'), jp(DATA_PREFIX, 'covcormoments_csr_4.csv') ] if __name__ == "__main__": comm = MPI.COMM_WORLD rankId = comm.Get_rank() # Retrieve the input data from a file dataTable = createSparseTable(datasetFileNames[rankId]) # Create an algorithm to compute low order moments on local nodes localAlgorithm = low_order_moments.Distributed( step1Local, method=low_order_moments.fastCSR) # Set the input data set to the algorithm localAlgorithm.input.set(low_order_moments.data, dataTable) # Compute low order moments pres = localAlgorithm.compute() # Serialize partial results required by step 2 dataArch = InputDataArchive() pres.serialize(dataArch)
def deserializeDAALObject(buffer, object): # Create a data archive to deserialize the numeric table dataArch = OutputDataArchive(buffer) # Deserialize the numeric table from the data archive object.deserialize(dataArch) return object if __name__ == "__main__": comm = MPI.COMM_WORLD rankId = comm.Get_rank() transposedDataTable = createSparseTable( transposedTrainDatasetFileNames[rankId]) step4LocalInput = KeyValueDataCollection() itemsPartialResultPrediction = KeyValueDataCollection() dataTable = initializeModel() trainModel(dataTable, transposedDataTable) testModel() if rankId == MPI_ROOT: for i in range(nBlocks): for j in range(nBlocks): print("prediction {}, {}".format(i, j)) printNumericTable(predictedRatingsMaster[i][j].get( ratings.prediction))