Exemple #1
0
def main(readcsv=read_csv, method='defaultDense'):
    # input data file
    infile = "./data/batch/naivebayes_train_dense.csv"
    testfile = "./data/batch/naivebayes_test_dense.csv"

    # Configure a training object (20 classes)
    talgo = d4p.multinomial_naive_bayes_training(20, method=method)
    
    # Read data. Let's use 20 features per observation
    data   = readcsv(infile, range(20))
    labels = readcsv(infile, range(20,21))
    tresult = talgo.compute(data, labels)

    # Now let's do some prediction
    palgo = d4p.multinomial_naive_bayes_prediction(20, method=method)
    # read test data (with same #features)
    pdata = readcsv(testfile, range(20))
    plabels = readcsv(testfile, range(20,21))
    # now predict using the model from the training above
    presult = palgo.compute(pdata, tresult.model)

    # Prediction result provides prediction
    assert(presult.prediction.shape == (pdata.shape[0], 1))

    return (presult, plabels)
Exemple #2
0
    def naiveBayes(self, X_train, X_test, y_train, y_test, target):
        '''
        Method for Serial
        '''

        # store unique target values
        category_count = len(y_train.unique())

        # Configure a training object (20 classes)
        train_algo = d4p.multinomial_naive_bayes_training(
            category_count, method='defaultDense')
        self.logger.info(
            'Training the Naive Bayes in pydaal Batch/Serial Mode')
        start = time.time()
        train_result = train_algo.compute(X_train, y_train)
        self.latency["Serial Naive Bayes Batch Time"] = time.time() - start
        # Now let's do some prediction
        predict_algo = d4p.multinomial_naive_bayes_prediction(category_count)

        # now predict using the model from the training above
        presult = predict_algo.compute(X_test, train_result.model)

        # Prediction result provides prediction
        assert (presult.prediction.shape == (X_test.shape[0], 1))

        # Store the time taken
        self.latency[
            'Overall Serial Naive bayes Prediction Batch Time'] = time.time(
            ) - start

        self.logger.info('Completed Naive Bayes in pydaal Batch/Serial Mode')

        return
    def naiveBayes(self, Data_Path, test_data_path, target, n):
        '''
        daal4py Naive Bayes SPMD Mode
        '''

        # Initialize SPMD mode
        d4p.daalinit(nthreads=n)

        # training setup
        file = Data_Path + str(d4p.my_procid() + 1) + ".csv"
        data = pd.read_csv(file)
        X = data.drop(columns=target)
        y = data[target]

        # test file setup
        test = pd.read_csv(test_data_path)

        y_test = test[target]
        X_test = test.drop(target, axis=1)

        # store unique target values
        category_count = len(y.unique())
        # print(category_count)

        # Configure a training object
        train_algo = d4p.multinomial_naive_bayes_training(
            category_count, method='defaultDense', distributed=True)
        self.logger.info('Training the Naive Bayes in pydaal SPMD Mode')

        start = time.time()

        train_result = train_algo.compute(X, y)
        self.latency['Parallel_NaiveBayes_Pydaal_Time'] = time.time() - start
        # Now let's do some prediction
        # It runs only on a single node
        if d4p.my_procid() == 0:
            predict_algo = d4p.multinomial_naive_bayes_prediction(
                category_count)

            # now predict using the model from the training above
            presult = predict_algo.compute(X_test, train_result.model)

            self.latency[
                "Overall Parallel Naive Bayes Prediction SPMD Time"] = time.time(
                ) - start

        d4p.daalfini()

        self.logger.info('Completed Naive Bayes in pydaal SPMD Mode')

        return
Exemple #4
0
def main(readcsv=read_csv, method='defaultDense'):
    # input data file
    infile = "./data/batch/naivebayes_train_dense.csv"
    testfile = "./data/batch/naivebayes_test_dense.csv"

    # Configure a training object (20 classes)
    train_algo = d4p.multinomial_naive_bayes_training(20,
                                                      streaming=True,
                                                      method=method)

    chunk_size = 250
    lines_read = 0
    # read and feed chunk by chunk
    while True:
        # Read data in chunks
        # Read data. Let's use 20 features per observation
        try:
            data = readcsv(infile, range(20), lines_read, chunk_size)
            labels = readcsv(infile, range(20, 21), lines_read, chunk_size)
        except:
            break
        # Now feed chunk
        train_algo.compute(data, labels)
        lines_read += data.shape[0]

    # All chunks are done, now finalize the computation
    train_result = train_algo.finalize()

    # Now let's do some prediction
    pred_algo = d4p.multinomial_naive_bayes_prediction(20, method=method)
    # read test data (with same #features)
    pred_data = readcsv(testfile, range(20))
    pred_labels = readcsv(testfile, range(20, 21))
    # now predict using the model from the training above
    pred_result = pred_algo.compute(pred_data, train_result.model)

    # Prediction result provides prediction
    assert (pred_result.prediction.shape == (pred_data.shape[0], 1))

    return (pred_result, pred_labels)
Exemple #5
0
    # Each process gets its own data
    infile = "./data/batch/naivebayes_train_dense.csv"

    # Configure a training object (20 classes)
    talgo = d4p.multinomial_naive_bayes_training(20, distributed=True)

    # Read data. Let's use 20 features per observation
    data = loadtxt(infile, delimiter=',', usecols=range(20))
    labels = loadtxt(infile, delimiter=',', usecols=range(20, 21))
    labels.shape = (labels.size, 1)  # must be a 2d array
    tresult = talgo.compute(data, labels)

    # Now let's do some prediction
    # It runs only on a single node
    if d4p.my_procid() == 0:
        palgo = d4p.multinomial_naive_bayes_prediction(20)
        # read test data (with same #features)
        pdata = loadtxt("./data/batch/naivebayes_test_dense.csv",
                        delimiter=',',
                        usecols=range(20))
        # now predict using the model from the training above
        presult = palgo.compute(pdata, tresult.model)

        # Prediction result provides prediction
        assert (presult.prediction.shape == (pdata.shape[0], 1))

        print('All looks good!')

    d4p.daalfini()