Ejemplo n.º 1
0
 def test_non_contig(self):
     from numpy.random import rand
     p = 10007
     nx = 1017
     ny = 77
     X = rand(p + 1, nx + 1)
     Xp = rand(p + 1, nx + 1)
     y = rand(p + 1, ny + 1)
     Xn = X[:p, :nx]
     Xpn = Xp[:p, :nx]
     yn = y[:p, :ny]
     Xc = np.ascontiguousarray(Xn)
     Xpc = np.ascontiguousarray(Xpn)
     yc = np.ascontiguousarray(yn)
     self.assertTrue(not Xn.flags['C_CONTIGUOUS']
                     and not Xpn.flags['C_CONTIGUOUS']
                     and not yn.flags['C_CONTIGUOUS'])
     self.assertTrue(Xc.flags['C_CONTIGUOUS'] and Xpc.flags['C_CONTIGUOUS']
                     and yc.flags['C_CONTIGUOUS'])
     self.assertTrue(
         np.allclose(Xc, Xn) and np.allclose(Xpc, Xpn)
         and np.allclose(yc, yn))
     regr_train = d4p.linear_regression_training()
     rtc = regr_train.compute(Xc, yc)
     regr_predict = d4p.linear_regression_prediction()
     rpc = regr_predict.compute(Xpc, rtc.model)
     regr_train = d4p.linear_regression_training()
     rtn = regr_train.compute(Xn, yn)
     regr_predict = d4p.linear_regression_prediction()
     rpn = regr_predict.compute(Xpn, rtn.model)
     self.assertTrue(np.allclose(rpn.prediction, rpc.prediction))
Ejemplo n.º 2
0
def _daal4py_fit(self, X, y_):
    y = make2d(y_)
    X_fptype = getFPType(X)

    try:
        lr_algorithm = daal4py.linear_regression_training(
            fptype=X_fptype,
            interceptFlag=bool(self.fit_intercept),
            method='defaultDense')
        lr_res = lr_algorithm.compute(X, y)
    except RuntimeError:
        # Normal system is not invertible, try QR
        try:
            lr_algorithm = daal4py.linear_regression_training(
                fptype=X_fptype,
                interceptFlag=bool(self.fit_intercept),
                method='qrDense')
            lr_res = lr_algorithm.compute(X, y)
        except RuntimeError:
            return None

    lr_model = lr_res.model
    self.daal_model_ = lr_model
    coefs = lr_model.Beta

    self.intercept_ = coefs[:, 0].copy(order='C')
    self.coef_ = coefs[:, 1:].copy(order='C')

    if self.coef_.shape[0] == 1 and y_.ndim == 1:
        self.coef_ = np.ravel(self.coef_)
        self.intercept_ = self.intercept_[0]

    return self
Ejemplo n.º 3
0
 def verify_on_linear_regression(self, X, Y):
     alg1 = d4p.linear_regression_training(interceptFlag=True,
                                           fptype='double')
     res1 = alg1.compute(X, Y)
     Xc = np.ascontiguousarray(X)
     Yc = np.ascontiguousarray(Y).reshape((len(Y), 1))
     alg2 = d4p.linear_regression_training(interceptFlag=True,
                                           fptype='double')
     res2 = alg2.compute(Xc, Yc)
     self.assertTrue(np.allclose(res1.model.Beta, res2.model.Beta))
def run_inference(num_observations: int = 1000):
    """Run xgboost for specified number of observations"""
    # Load data
    train_x_df = common.get_test_data_df(X=common.X_df, size=num_observations)
    train_y_df = common.get_test_data_df(X=common.y_df, size=num_observations)
    num_rows = len(train_x_df)
    ######################
    print("_______________________________________")
    print("Total Number of Rows", num_rows)
    run_times = []
    inference_times = []
    for _ in range(NUM_LOOPS):

        start_time = timer()
        MODEL = d4p.linear_regression_training()
        train_result = MODEL.compute(train_x_df, train_y_df)
        #predictor.compute(data, MODEL)
        end_time = timer()

        total_time = end_time - start_time
        run_times.append(total_time * 10e3)

        inference_time = total_time * (10e6) / num_rows
        inference_times.append(inference_time)

    return_elem = common.calculate_stats(inference_times)
    print(num_observations, ", ", return_elem)
    return return_elem
Ejemplo n.º 5
0
def main(readcsv=read_csv, method='defaultDense'):
    infile = "./data/batch/linear_regression_train.csv"
    testfile = "./data/batch/linear_regression_test.csv"

    # Configure a Linear regression training object
    train_algo = d4p.linear_regression_training(interceptFlag=True)

    # Read data. Let's have 10 independent,
    # and 2 dependent variables (for each observation)
    indep_data = readcsv(infile, range(10))
    dep_data = readcsv(infile, range(10, 12))
    # Now train/compute, the result provides the model for prediction
    train_result = train_algo.compute(indep_data, dep_data)

    # Now let's do some prediction
    predict_algo = d4p.linear_regression_prediction()
    # read test data (with same #features)
    pdata = readcsv(testfile, range(10))
    ptdata = readcsv(testfile, range(10, 12))
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # The prediction result provides prediction
    assert predict_result.prediction.shape == (pdata.shape[0],
                                               dep_data.shape[1])

    return (train_result, predict_result, ptdata)
Ejemplo n.º 6
0
    def linearRegression(self, Data_Path, test_data_path, target, n):
        '''
        daal4py Linear Regression SPMD Mode
        '''

        # Initialize SPMD mode
        d4p.daalinit(nthreads=n)

        # training setup
        file = Data_Path + str(d4p.my_procid() + 1) + ".csv"
        data = pd.read_csv(file)
        X = data.drop(columns=target)
        y = data[target]

        train_algo = d4p.linear_regression_training(method='qrDense',
                                                    distributed=True)

        self.logger.info('Training the Linear Regression in pydaal SPMD Mode')

        start = time.time()

        train_result = train_algo.compute(X, y)

        self.latency['Parallel_LinearRegression_Pydaal_Time'] = time.time() - \
            start

        # test file setup
        test = pd.read_csv(test_data_path)

        y_test = test[target]
        X_test = test.drop(target, axis=1)

        if d4p.my_procid() == 0:
            predict_algo = d4p.linear_regression_prediction()

            # now predict using the model from the training above
            predict_result = predict_algo.compute(X_test, train_result.model)
            self.latency[
                "Overall Parallel Linear Regression Prediction SPMD Time"] = time.time(
                ) - start

            # The prediction result provides prediction
            #assert predict_result.prediction.shape == (X_test.shape[0], y.shape[1])

        d4p.daalfini()

        self.logger.info('Completed Linear Regression in pydaal SPMD Mode')

        # Compute metrics
        mse = mean_squared_error(y_test, predict_result.prediction)
        r2score = r2_score(y_test, predict_result.prediction)

        # Store the time taken and model metrics
        self.metrics['MSE_Parallel_LinearRegression_Pydaal'] = mse
        self.metrics['r2score_Parallel_LinearRegression_Pydaal'] = r2score

        return
Ejemplo n.º 7
0
def compute(train_indep_data, train_dep_data, test_indep_data):
    # Configure a Linear regression training object
    train_algo = d4p.linear_regression_training(interceptFlag=True)
    # Now train/compute, the result provides the model for prediction
    train_result = train_algo.compute(train_indep_data, train_dep_data)
    # Now let's do some prediction
    predict_algo = d4p.linear_regression_prediction()
    # now predict using the model from the training above
    return predict_algo.compute(test_indep_data, train_result.model), train_result
Ejemplo n.º 8
0
def _daal4py_fit(self, X, y):
    y = make2d(y)
    X_fptype = getFPType(X)
    lr_algorithm = daal4py.linear_regression_training(
        fptype=X_fptype,
        interceptFlag=bool(self.fit_intercept),
        method='defaultDense')

    lr_res = lr_algorithm.compute(X, y)
    lr_model = lr_res.model
    self.daal_model_ = lr_model
    coefs = lr_model.Beta

    self.intercept_ = coefs[:,0].copy(order='C')
    self.coef_ = coefs[:,1:].copy(order='C')

    if self.coef_.shape[0] == 1:
        self.coef_ = np.ravel(self.coef_)
        self.intercept_ = self.intercept_[0]

    return self
def main(readcsv=read_csv, method='defaultDense'):
    infile = "./data/batch/linear_regression_train.csv"
    testfile = "./data/batch/linear_regression_test.csv"

    # Configure a Linear regression training object for streaming
    train_algo = d4p.linear_regression_training(interceptFlag=True,
                                                streaming=True)

    chunk_size = 250
    lines_read = 0
    # read and feed chunk by chunk
    while True:
        # Read data in chunks
        # Let's have 10 independent, and 2 dependent variables (for each observation)
        try:
            indep_data = readcsv(infile, range(10), lines_read, chunk_size)
            dep_data = readcsv(infile, range(10, 12), lines_read, chunk_size)
        except:
            break
        # Now feed chunk
        train_algo.compute(indep_data, dep_data)
        lines_read += indep_data.shape[0]

    # All chunks are done, now finalize the computation
    train_result = train_algo.finalize()

    # Now let's do some prediction
    predict_algo = d4p.linear_regression_prediction()
    # read test data (with same #features)
    pdata = readcsv(testfile, range(10))
    ptdata = readcsv(testfile, range(10, 12))
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # The prediction result provides prediction
    assert predict_result.prediction.shape == (pdata.shape[0],
                                               dep_data.shape[1])

    return (train_result, predict_result, ptdata)
Ejemplo n.º 10
0
    def linearRegression(self, X_train, X_test, y_train, y_test, target):
        '''
        Method for Linear Regression
        '''

        # Configure a Linear regression training object
        train_algo = d4p.linear_regression_training(method='qrDense')

        self.logger.info(
            'Training the Linear Regression in pydaal Batch/Serial Mode')
        start = time.time()
        # Now train/compute, the result provides the model for prediction
        lm_trained = train_algo.compute(X_train, y_train)

        self.latency["Serial Linear Regression Batch Time"] = time.time() - \
            start

        y_pred = d4p.linear_regression_prediction().compute(
            X_test, lm_trained.model).prediction

        self.latency[
            'Overall Serial Linear Regression Prediction Batch Time'] = time.time(
            ) - start
        self.logger.info(
            'Completed Linear Regression in pydaal Batch/Serial Mode')

        # Compute metrics
        mse = mean_squared_error(y_test, y_pred)
        r2score = r2_score(y_test, y_pred)

        # Store the time taken and model metrics

        self.metrics['MSE_serial_linear_regression_pydaal'] = mse
        self.metrics['r2_score_serial_linear_regression_pydaal'] = r2score

        return
Ejemplo n.º 11
0
def lr_train(N, D):
    data = np.random.ranf((N, D))
    gt = np.random.ranf((N, 2))
    return daal4py.linear_regression_training(interceptFlag=True,
                                              method='qrDense').compute(
                                                  data, gt)
Ejemplo n.º 12
0
y = data.target[np.newaxis].T  # house price

# splitting the data for training and testing, with a 25% test dataset size
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=1693)

# ## Training and Saving the Model

# Let's **train our model** and look at the model's features!

# In[4]:

# training the model for prediction
train_result = d4p.linear_regression_training().compute(X_train, y_train)

# To **get training model information** and **save it to a file**:

# In[5]:

# retrieving and printing training model
model = train_result.model
print("Here's our model:\n\n\n", model, "\n")

model_filename = './models/linear_regression_batch.sav'

# saving model to a file
pickle.dump(model, open(model_filename, "wb"))

# Now let's **load up the model** and look at one of the model's features.
Ejemplo n.º 13
0
def test_fit(X, y):
    regr_train = linear_regression_training(fptype=getFPType(X),
                                            method=params.method,
                                            interceptFlag=params.fit_intercept)
    return regr_train.compute(X, y)
Ejemplo n.º 14
0
import daal4py as d4p
from timeit import default_timer as timer

from sklearn.metrics import mean_squared_error

import numpy as np
import pandas as pd

import common

NUM_LOOPS = 100
d4p.daalinit()

print("Computing for Linear Regression With Daal")
MODEL = d4p.linear_regression_training()
train_result = MODEL.compute(common.X_df, common.y_df)


def run_inference(num_observations: int = 1000):
    """Run xgboost for specified number of observations"""
    # Load data
    test_df = common.get_test_data_df(X=common.X_df, size=num_observations)
    num_rows = len(test_df)
    ######################
    print("_______________________________________")
    print("Total Number of Rows", num_rows)
    run_times = []
    inference_times = []
    for _ in range(NUM_LOOPS):

        start_time = timer()
Ejemplo n.º 15
0
# each process gets its own data
infile = "./data/linear_regression_train_" + str(d4p.my_procid() + 1) + ".csv"

# read data
indep_data = pd.read_csv(infile).drop(["target"],
                                      axis=1)  # house characteristics
dep_data = pd.read_csv(infile)["target"]  # house price

# ## Training and Saving the Model

# Time to **train our model** and look at the model's features!

# In[16]:

# training the model for prediction
train_result = d4p.linear_regression_training(distributed=True).compute(
    indep_data, dep_data)

# To **get training model information** and **save it to a file**:

# In[17]:

# retrieving and printing training model
model = train_result.model
print("Here's our model:\n\n\n", model, "\n")

model_filename = './models/daal4py_Distributed_LinearRegression_' + str(
    d4p.my_procid() + 1) + '.sav'

# saving model to a file
joblib.dump(model, model_filename)
Ejemplo n.º 16
0
        for n in range(REP):
            t1 = timeit.default_timer()
            r = func(*args, **keyArgs)
            t2 = timeit.default_timer()
            times.append(t2-t1)
        print(min(times))
        return r
    return st_func

p = args.size[0]
n = args.size[1]
X = rand(p,n)
Xp = rand(p,n)
y = rand(p,n)

regr_train = linear_regression_training()
regr_predict = linear_regression_prediction()

@st_time
def test_fit(X,y):
    regr_train.compute(X, y)

@st_time
def test_predict(X, m):
    regr_predict.compute(X, m)

print (','.join([args.batchID, args.arch, args.prefix, "Linear.fit", coreString(args.num_threads), "Double", "%sx%s" % (p,n)]), end=',')
test_fit(X, y)
res = regr_train.compute(X, y)
print (','.join([args.batchID, args.arch, args.prefix, "Linear.prediction", coreString(args.num_threads), "Double", "%sx%s" % (p,n)]), end=',')
test_predict(Xp, res.model)
Ejemplo n.º 17
0
# run like this:
#    mpirun -n 4 python ./linreg_spmd.py

import daal4py as d4p
from numpy import loadtxt, allclose

if __name__ == "__main__":
    # Initialize SPMD mode
    d4p.daalinit()

    # Each process gets its own data
    infile = "./data/distributed/linear_regression_train_" + str(
        d4p.my_procid() + 1) + ".csv"

    # Configure a Linear regression training object
    train_algo = d4p.linear_regression_training(distributed=True)

    # Read data. Let's have 10 independent, and 2 dependent variables (for each observation)
    indep_data = loadtxt(infile, delimiter=',', usecols=range(10))
    dep_data = loadtxt(infile, delimiter=',', usecols=range(10, 12))
    # Now train/compute, the result provides the model for prediction
    train_result = train_algo.compute(indep_data, dep_data)

    # Now let's do some prediction
    # It run only on a single node
    if d4p.my_procid() == 0:
        predict_algo = d4p.linear_regression_prediction()
        # read test data (with same #features)
        pdata = loadtxt("./data/distributed/linear_regression_test.csv",
                        delimiter=',',
                        usecols=range(10))