Beispiel #1
0
def predict_affinities_and_report_results():
    skempi_vectors_df = load_representation(skempi_vectors_path)
    multiplied_vectors_df = calculate_vector_multiplications(skempi_vectors_df)
    model = linear_model.BayesianRidge()
    result_df, result_detail_df = predictAffinityWithModel(
        model, multiplied_vectors_df)
    result_df.to_csv(r"../results/Affinity_prediction_skempiv1_{0}.csv".format(
        representation_name),
                     index=False)
    result_detail_df.to_csv(
        r"../results/Affinity_prediction_skempiv1_{0}_detail.csv".format(
            representation_name),
        index=False)
Beispiel #2
0
def infer_from_img(img_file):
    img = cv2.imread(img_file)
    img = cv2.resize(img, (224, 224))
    # tik = time.time()
    feat = extract_conv_feature(img, layer_name='conv5_1').tolist()
    reg = linear_model.BayesianRidge(np.array(feat))
    reg.fit(np.random.rand(1, len(feat)), np.array(1))
    tik = time.time()
    score = reg.predict(feat)
    tok = time.time()

    print('Beauty score is {0}, it takes {1} seconds!'.format(
        score, (tok - tik) * 1000))
def get_model(x):
    return {
        'LinearRegression': linear_model.LinearRegression(),
        'Ridge': linear_model.Ridge(alpha=.5),
        'RidgeCV': linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0]),
        'Lasso': linear_model.Lasso(alpha=0.1),
        'LassoLars': linear_model.LassoLars(alpha=.1),
        'BayesianRidge': linear_model.BayesianRidge(),
        'SGDRegressor': linear_model.SGDRegressor(),
        'SGDClassifier': linear_model.SGDClassifier(),
        'SVM': svm.SVC(),
        'SVR': svm.SVR(),
    }[x]
Beispiel #4
0
    def __init__(self,
                 ml_algs=[
                     'LR', 'GPR', 'MLP', 'DL', 'SVR', 'RFR', 'DTR', 'GBR'
                 ]):
        super().__init__()
        self.regressors = []

        for alg in ml_algs:
            # if alg == 'DL':
            #     self.regressors.append(DeepLearningRegressor(type='custom'))
            if alg == 'BRR':
                self.regressors.append(linear_model.BayesianRidge())
            elif alg == 'RFR':
                self.regressors.append(RandomForestRegressor(n_estimators=100))
            elif alg == 'DTR':
                self.regressors.append(DecisionTreeRegressor())
            elif alg == 'GBR':
                self.regressors.append(GradientBoostingRegressor())
            elif alg == 'LR':
                self.regressors.append(LinearRegression())
            elif alg == 'GPR':
                self.regressors.append(
                    GaussianProcessRegressor(kernel=DotProduct() +
                                             WhiteKernel(),
                                             random_state=0))
            elif alg == 'SVR':
                self.regressors.append(
                    SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1))
            elif alg == 'MLP':
                self.regressors.append(
                    MLPRegressor(hidden_layer_sizes=(100, ),
                                 activation='relu',
                                 solver='adam',
                                 alpha=0.001,
                                 batch_size='auto',
                                 learning_rate='constant',
                                 learning_rate_init=0.01,
                                 power_t=0.5,
                                 max_iter=1000,
                                 shuffle=True,
                                 random_state=0,
                                 tol=0.0001,
                                 verbose=False,
                                 warm_start=False,
                                 momentum=0.9,
                                 nesterovs_momentum=True,
                                 early_stopping=False,
                                 validation_fraction=0.1,
                                 beta_1=0.9,
                                 beta_2=0.999,
                                 epsilon=1e-08))
Beispiel #5
0
    def __init__(self, method, params, i=0):
        self.algorithm_list = [
            'PLS', 'GP', 'OLS', 'OMP', 'Lasso', 'Elastic Net', 'Ridge',
            'Bayesian Ridge', 'ARD', 'LARS', 'LASSO LARS', 'SVR', 'KRR', 'GBR'
        ]
        self.method = method
        self.outliers = None
        self.ransac = False

        #print(params)
        if self.method[i] == 'PLS':
            self.model = PLSRegression(**params[i])

        if self.method[i] == 'OLS':
            self.model = linear.LinearRegression(**params[i])

        if self.method[i] == 'OMP':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.OrthogonalMatchingPursuit(**params_temp)

        if self.method[i] == 'LASSO':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.Lasso(**params_temp)

        if self.method[i] == 'Elastic Net':
            params_temp = copy.copy(params[i])
            self.model = linear.ElasticNet(**params_temp)

        if self.method[i] == 'Ridge':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.Ridge(**params_temp)

        if self.method[i] == 'BRR':
            self.model = linear.BayesianRidge(**params[i])

        if self.method[i] == 'ARD':
            self.model = linear.ARDRegression(**params[i])

        if self.method[i] == 'LARS':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.Lars(**params_temp)

        if self.method[i] == 'SVR':
            self.model = svm.SVR(**params[i])

        if self.method[i] == 'KRR':
            self.model = kernel_ridge.KernelRidge(**params[i])
Beispiel #6
0
    def load_default(self, machine_list='basic'):
        """
        Loads 4 different scikit-learn regressors by default. The advanced list adds more machines. 
        Parameters
        ----------
        machine_list: optional, list of strings
            List of default machine names to be loaded. 
            Default is basic,
        Returns
        -------
        self : returns an instance of self.
        """
        if machine_list == 'basic':
            machine_list = ['tree', 'ridge', 'random_forest', 'svm']
        if machine_list == 'advanced':
            machine_list = [
                'lasso', 'tree', 'ridge', 'random_forest', 'svm',
                'bayesian_ridge', 'sgd'
            ]

        self.estimators_ = {}
        for machine in machine_list:
            try:
                if machine == 'lasso':
                    self.estimators_['lasso'] = linear_model.LassoCV(
                        random_state=self.random_state).fit(
                            self.X_k_, self.y_k_)
                if machine == 'tree':
                    self.estimators_['tree'] = DecisionTreeRegressor(
                        random_state=self.random_state).fit(
                            self.X_k_, self.y_k_)
                if machine == 'ridge':
                    self.estimators_['ridge'] = linear_model.RidgeCV().fit(
                        self.X_k_, self.y_k_)
                if machine == 'random_forest':
                    self.estimators_['random_forest'] = RandomForestRegressor(
                        random_state=self.random_state).fit(
                            self.X_k_, self.y_k_)
                if machine == 'svm':
                    self.estimators_['svm'] = SVR().fit(self.X_k_, self.y_k_)
                if machine == 'sgd':
                    self.estimators_['sgd'] = linear_model.SGDRegressor(
                        random_state=self.random_state).fit(
                            self.X_k_, self.y_k_)
                if machine == 'bayesian_ridge':
                    self.estimators_[
                        'bayesian_ridge'] = linear_model.BayesianRidge().fit(
                            self.X_k_, self.y_k_)
            except ValueError:
                continue
        return self
Beispiel #7
0
def set_reg_mdl(reg_mdl, alpha, normalize):
    if reg_mdl == 'EN':
        return l_mdl.ElasticNet(alpha=alpha, normalize=normalize), 3  # elastic net regression
    elif reg_mdl == 'BR':
        return l_mdl.BayesianRidge(normalize=normalize), 2  # Bayesian Ridge
    elif reg_mdl == 'RG':
        return l_mdl.Ridge(normalize=normalize), 2  # Ridge regression
    elif reg_mdl == 'LS':
        return l_mdl.Lasso(alpha=alpha, normalize=normalize), 2  # Lassoregression
    elif reg_mdl == 'LR':
        return l_mdl.LinearRegression(), 1  # OLS regression
    else:
        s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR: invalid regression model: ' + str(reg_mdl))
        return None, None
Beispiel #8
0
def LinearRegression_to_BayesianRidge(x_df, y_df):

    reg = linear_model.LinearRegression()
    reg.fit(x_df, y_df)

    rid = linear_model.BayesianRidge()
    rid.fit(x_df, y_df)

    scores = {
        "LinearRegression" : reg.score(x_df, y_df)
        "BayesianRidge" : rid.score(x_df, y_df)
    }

    return scores
    def test_model_bayesian_ridge_return_std(self):
        model, X = fit_regression_model(linear_model.BayesianRidge(),
                                        n_features=2, n_samples=20)
        model_onnx = convert_sklearn(
            model, "bayesian ridge",
            [("input", FloatTensorType([None, X.shape[1]]))],
            options={linear_model.BayesianRidge: {'return_std': True}})
        self.assertIsNotNone(model_onnx)

        sess = InferenceSession(model_onnx.SerializeToString())
        outputs = sess.run(None, {'input': X})
        pred, std = model.predict(X, return_std=True)
        assert_almost_equal(pred, outputs[0].ravel(), decimal=4)
        assert_almost_equal(std, outputs[1].ravel(), decimal=4)
Beispiel #10
0
    def greedy_select_features(self):
        print('initial shapes:', self.train_.shape, self.test_.shape)
        saved = None if self.debug_ else self.load('chosen_features')

        if saved == None:
            g_best_score = 1e9
            g_best_features = []
            current = set()
            finished = False
        else:
            g_best_features, g_best_score, finished = saved
            current = set(g_best_features)
            print('SFS REUSE:', g_best_score, g_best_features, self.now())

        num_columns = self.train_.shape[1]
        col_names = [str(c) for c in range(num_columns)]
        self.train_.columns = col_names
        self.test_.columns = col_names

        if not finished:
            y = self.y_.ravel()
            scorer = metrics.make_scorer(metrics.log_loss)
            loop_count = len(col_names) - len(g_best_features)
            for _ in range(loop_count):
                avail = set(col_names).difference(current)
                best_score = 1e9
                best_features = None
                for f in avail:
                    newf = list(current | {f})
                    score, _ = self.ccv(linear_model.BayesianRidge(),
                                        self.train_[newf], y, scorer)
                    if best_score > score:
                        best_score = score
                        best_features = newf
                current = set(best_features)
                if g_best_score > best_score:
                    g_best_score = best_score
                    g_best_features = best_features
                    print('new best:', g_best_score, g_best_features,
                          self.now())
                if len(best_features) - len(g_best_features) > 5:
                    break
                self.save('chosen_features',
                          (g_best_features, g_best_score, False))
            # now
            self.save('chosen_features', (g_best_features, g_best_score, True))

        print('feature selection complete.', self.now())
        self.train_ = self.train_[g_best_features]
        self.test_ = self.test_[g_best_features]
def main (alp1, alp2, lbd1, lbd2):
    train_x, train_y, test_x, test_y = (sio.loadmat(TRAIN_DIR)['trainx'], sio.loadmat(TRAIN_DIR)['trainy'], sio.loadmat(TEST_DIR)['testx'], sio.loadmat(TEST_DIR)['testy'])
    train_y = train_y.ravel()
    test_y = test_y.ravel()
    clf = linear_model.BayesianRidge(alpha_1=alp1, alpha_2=alp2, lambda_1=lbd1, lambda_2=lbd2)
    clf.fit(train_x,train_y)
    years = clf.predict(test_x)
    
    diff = float(0.0)
    for (i, j) in zip(years, test_y):
        diff += abs(i-j)
    diff /= TEST_SIZE   
    print ("MSE is: " + str(diff) +" with alpha: "+str(alp1)+' '+str(alp2)+' '+str(lbd1)+' '+str(lbd2))
    return diff
Beispiel #12
0
def run_bayesian_ridge(x_df, y_df, analyspar):
    """
    run_bayesian_ridge(x_df, y_df, analyspar)
    """

    steps = [("scaler", preprocessing.StandardScaler()),
             ("model", linear_model.BayesianRidge(compute_score=True))]
    pipl = pipeline.Pipeline(steps)
    pipl.fit(x_df, y_df)

    log_sklearn_results(pipl,
                        analyspar,
                        name="bayes_ridge",
                        var_names=x_df.columns)
Beispiel #13
0
def BayesianRidge(data,data2,data5,during):
    y = data2['prmom'+during+'_f']
    x = data2.drop(['prmom1d_f','prmom1w_f','prmom2w_f','prmom3w_f','uniqcode','date'],axis=1)
    x=x.fillna(0)
    y=np.array(y)
    x=np.array(x)
    reg = linear_model.BayesianRidge()
    reg.fit(x, y)
    X= data5.drop(['prmom1d_f','prmom1w_f','prmom2w_f','prmom3w_f','uniqcode','date','pred'],axis=1)
    X=X.fillna(0)
    X=np.array(X)
    pred1=reg.predict(X)
    data['pred_bay']=pred1
    return data
Beispiel #14
0
def main():
    filename = "2016-capitalbikeshare-tripdata/2016Q1-capitalbikeshare-tripdata.csv"
    file2 = "2016-capitalbikeshare-tripdata/2016Q2-capitalbikeshare-tripdata.csv"
    file3 = "2016-capitalbikeshare-tripdata/2016Q3-capitalbikeshare-tripdata.csv"
    file4 = "2016-capitalbikeshare-tripdata/2016Q4-capitalbikeshare-tripdata.csv"
    testfile = "2017-capitalbikeshare-tripdata/2017Q1-capitalbikeshare-tripdata.csv"
    test2 = "2017-capitalbikeshare-tripdata/2017Q2-capitalbikeshare-tripdata.csv"
    test3 = "2017-capitalbikeshare-tripdata/2017Q3-capitalbikeshare-tripdata.csv"
    test4 = "2017-capitalbikeshare-tripdata/2017Q4-capitalbikeshare-tripdata.csv"
    data = read_data(filename)
    data = data.append(read_data(file2))
    data = data.append(read_data(file3))
    data = data.append(read_data(file4))
    data = data.reset_index(drop=True)
    print(data)
    data = map_data(data)
    data = drop_time(data)
    data = date_as_obj(data)
    #print(data)
    fitData = data.drop(["Counts", "Date"], axis=1)
    #Select data to test with the regressions
    testdata = read_data(testfile)
    testdata = testdata.append(read_data(test2))
    testdata = testdata.append(read_data(test3))
    testdata = testdata.append(read_data(test4))
    testdata = testdata.reset_index(drop=True)
    testdata = map_data(testdata)
    testdata = drop_time(testdata)
    testdata = date_as_obj(testdata)
    testfit = testdata.drop(["Counts", "Date"], axis=1)
    #Bayesian Ridge Regression from skLearn
    brr = skLm.BayesianRidge()
    brr.fit(fitData, data["Counts"])
    resultsBRR = brr.predict(testfit)
    resultsBRR = DataFrame(resultsBRR)
    #Ordinary Linear Regression from sklearn
    olr = skLm.LinearRegression()
    olr.fit(fitData, data["Counts"])
    resultsOLR = olr.predict(testfit)
    resultsOLR = DataFrame(resultsOLR)
    print(data.head(10))
    print(testdata.head(10))
    print("Bayesian Ridge Regression")
    print(resultsBRR.head(10))
    print("RMSE=", do_analysis(resultsBRR, testdata))
    print("Ordinary Linear Regression")
    print(resultsOLR.head(10))
    print("RMSE=", do_analysis(resultsOLR, testdata))
    make_graph(testdata, resultsBRR, resultsOLR)
    return data
Beispiel #15
0
def plot_ml_model_regression(X_train, X_test, y_train, y_test):
    pyplot.close('all')
    #print ("Enter")
    #algos = ["SVM-linear","SVM-Kernel","GaussianNB","BernoulliNB","ComplementNB","DTree-gini","DTree-entropy","RF-50","RF-100","RF-150", "KNN-2", "KNN-6"]

    algos = ["LR", "Lasso", "Ridge", "Bayesian", "SVR", "DT", "RF", "KNNR"]

    rgrs = [
        linear_model.LinearRegression(),
        linear_model.Lasso(),
        linear_model.Ridge(),
        linear_model.BayesianRidge(),
        svm.SVR(),
        tree.DecisionTreeRegressor(),
        RandomForestRegressor(),
        KNeighborsRegressor()
    ]

    cv_results = []

    #scoring = 'accuracy'
    #scoring = 'roc_auc'
    for regressors in rgrs:
        reg = regressors
        reg.fit(X_train, y_train)
        #print('Coefficients: \n', reg.coef_)
        var_score = format(reg.score(X_test, y_test))
        print('Variance score:', var_score)
        cv_results.append(var_score)
        pyplot.style.use('fivethirtyeight')
        pyplot.scatter(reg.predict(X_train),
                       reg.predict(X_train) - y_train,
                       color="green",
                       s=10,
                       label='Train data')
        pyplot.scatter(reg.predict(X_test),
                       reg.predict(X_test) - y_test,
                       color="blue",
                       s=10,
                       label='Test data')
        pyplot.hlines(y=0, xmin=0, xmax=50, linewidth=2)
        pyplot.legend(loc='upper right')
        pyplot.title("Residual errors")
        pyplot.show()
        #cv_results.append(cv_score.mean())

    cv_mean = pd.DataFrame(cv_results, index=algos)
    cv_mean.columns = ["Accuracy"]
    print(cv_mean.sort_values(by="Accuracy", ascending=False))
    '''
def doBayesianRidge(trainInput, trainOutput, predictors):
    alg = linear_model.BayesianRidge()

    alg.fit(trainInput.loc[:, predictors], trainOutput)

    cvMeanScore = model_selection.cross_val_score(alg,
                                                  trainInput.loc[:,
                                                                 predictors],
                                                  trainOutput,
                                                  cv=10,
                                                  scoring='r2',
                                                  n_jobs=-1).mean()

    print("CV Average Score for BayesianRidge Regression:", cvMeanScore)
Beispiel #17
0
    def test_get_set(self):

        np.random.seed(123)
        X = np.random.rand(25, 3)
        X[:, 0] = 100 * X[:, 0]
        X[:, 2] = 25 * X[:, 2]

        regr = linear_model.BayesianRidge()

        fit_obj = ns.MTS(
            regr,
            n_hidden_features=10,
            direct_link=False,
            bias=False,
            nodes_sim="sobol",
            type_scaling=("std", "minmax", "std"),
            activation_name="relu",
            n_clusters=0,
        )

        fit_obj.set_params(
            n_hidden_features=5,
            activation_name="relu",
            a=0.01,
            nodes_sim="sobol",
            bias=True,
            direct_link=True,
            n_clusters=None,
            type_clust="kmeans",
            type_scaling=("std", "std", "std"),
            seed=123,
            lags=1,
        )

        fit_obj2 = ns.MTS(
            regr,
            n_hidden_features=10,
            direct_link=False,
            bias=False,
            dropout=0.5,
            nodes_sim="sobol",
            type_scaling=("std", "minmax", "std"),
            activation_name="relu",
            n_clusters=0,
        )

        self.assertTrue((fit_obj.get_params()["lags"] == 1)
                        & (fit_obj.get_params()["type_scaling"] ==
                           ("std", "std", "std"))
                        & (fit_obj2.get_params()["obj__lambda_1"] == 1e-06))
Beispiel #18
0
    def init_parameters(self,
                        regression_model="BayesianRidge",
                        incremental=True):
        self.incremental = incremental
        self.regression_model = regression_model

        if regression_model == "BayesianRidge":
            self.lin_model = linear_model.BayesianRidge()
        elif regression_model == "RandomForest":
            self.lin_model = RandomForestRegressor(n_estimators=40)
        else:
            raise Exception(
                "Wrongly defined regression model. Available models are: 'RandomForest' and 'BayesianRidge'"
            )
Beispiel #19
0
 def fit(self, X, y=None, *args, **kwargs):
     print "Bayesian Ridge"
     X = preprocessing.scale(X)
     pipelineFit = Pipeline([('pca', decomposition.PCA()),
                             ('lasso', linear_model.BayesianRidge())])
     grid_search = GridSearchCV(pipelineFit,
                                dict(pca__n_components=[1, 2, 4, 6, 8, 10]),
                                scoring='r2')
     grid_search.fit(X, y)
     acc = grid_search.best_score_
     print grid_search.best_params_
     print grid_search.grid_scores_
     print "r2: " + str(acc)
     return self
Beispiel #20
0
def main_scut(filenames, X, y):
    """
    train and eval on SCUT-FBP benchmark with HMTNet descriptor and Ridge Regression
    :param filenames:
    :param X:
    :param y:
    :return:
    """
    from sklearn.model_selection import train_test_split

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=40)
    reg = linear_model.BayesianRidge()
    # reg = linear_model.Ridge(alpha=50.0)
    # reg = linear_model.Lasso(alpha=0.005)
    reg.fit(X_train, y_train)

    y_pred = reg.predict(X_test)

    mae_lr = round(
        mean_absolute_error(np.array(y_test),
                            np.array(y_pred).ravel()), 4)
    rmse_lr = round(
        np.math.sqrt(
            mean_squared_error(np.array(y_test),
                               np.array(y_pred).ravel())), 4)
    pc = round(
        np.corrcoef(np.array(y_test),
                    np.array(y_pred).ravel())[0, 1], 4)

    print(
        '===============The Mean Absolute Error of Trans HMT-Net is {0}===================='
        .format(mae_lr))
    print(
        '===============The Root Mean Square Error of Trans HMT-Net is {0}===================='
        .format(rmse_lr))
    print(
        '===============The Pearson Correlation of Trans HMT-Net is {0}===================='
        .format(pc))

    col = ['filename', 'gt', 'pred']
    rows = []
    for i in range(len(y_test)):
        rows.append([filenames[i], y_test.tolist()[i], y_pred.tolist()[i]])

    df = pd.DataFrame(rows, columns=col)
    df.to_excel("./scutfbp_output.xlsx", sheet_name='Output', index=False)
    print('Output Excel has been generated~')
def BayesianRidge_model(X_train, X_valid, y_train, y_test, y_name,
                        y_train_mean, y_train_std):

    model_name = 'BayesianRidge'
    print 'head items to fit are: ', y_name
    # In[ ]:
    for head_item in range(len(y_name)):

        y_train_item = y_train[:, head_item]
        #y_train_item = np.reshape(y_train_item,[y_train.shape[0],1])

        y_test_item = y_test[:, head_item]
        #y_test_item = np.reshape(y_test_item,[y_test_item.shape[0],1])
        print '********************************** Fitting %s on %s Data **********************************' % (
            model_name, y_name[head_item])
        #Declare model
        model = linear_model.BayesianRidge(compute_score=True)

        #Fit model
        model.fit(X_train, y_train_item)

        #Get predictions
        y_valid_predicted = model.predict(X_valid)

        training_prediction = model.predict(X_train)

        R2s_training = get_R2(y_train_item, training_prediction)
        print 'R2 on training set = ', R2s_training

        #Get metric of fit
        R2s = get_R2(y_test_item, y_valid_predicted)
        print('R2s:', R2s)
        print 'saving prediction ...'
        np.savez(y_name[head_item] + '_%s_ypredicted.npz' % model_name,
                 y_test=y_test_item,
                 y_prediction=y_valid_predicted,
                 y_train_=y_train_item,
                 training_prediction=training_prediction,
                 y_train_mean=y_train_mean[head_item],
                 y_train_std=y_train_std[head_item])
        #print 'saving model ...'
        joblib.dump(model, y_name[head_item] + '_%s.pkl' % model_name)
        print 'plotting results...'
        plot_results(y_test_item,
                     y_valid_predicted,
                     y_name[head_item],
                     R2s,
                     model_name=model_name)

    return model
Beispiel #22
0
    def fit(self, X, y, sample_weight, fitOpt={}):
        ''' Build a decision tree regressor from the training set (X, y).

        Parameters
        ----------
        X: array-like or sparse matrix, shape = [n_samples, n_features]
            The training input samples. Internally, it will be converted to
            dtype=np.float32 and if a sparse matrix is provided to a sparse
            csc_matrix.

        y: array-like, shape = [n_samples] or [n_samples, n_outputs]
            The target values (real numbers). Use dtype=np.float64 and
            order='C' for maximum efficiency.

        sample_weight: array-like, shape = [n_samples] or None
            Sample weights. If None, then samples are equally weighted. Splits
            that would create child nodes with net zero or negative weight are
            ignored while searching for a split in each node.

        fitOpt: dictionary (optional, default: {})
            Options to pass to DecisionTreeRegressor fit function. See
            http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html
            for possibilities.

        Returns
        -------
        Self
        '''

        # Fit a normal regression tree
        super(DecisionTreeRegressorWithLinearLeafRegression,
              self).fit(X, y, sample_weight, **fitOpt)

        # Create a linear regression for all input points which fall into
        # one output leaf
        predictedValues = super(DecisionTreeRegressorWithLinearLeafRegression,
                                self).predict(X)
        leafValues = np.unique(predictedValues)
        for value in leafValues:
            ind = predictedValues == value
            leafLinearRegrsion = linear_model.BayesianRidge()
            leafLinearRegrsion.fit(X[ind, :], y[ind])
            self.leafParameters[value] = {
                "linearRegression": leafLinearRegrsion,
                "max": np.max(y[ind]),
                "min": np.min(y[ind])
            }

        return self
Beispiel #23
0
def main():
    #load the data
    data = np.loadtxt('data.csv', delimiter=',', skiprows=1, usecols=range(0, 8))
    #load the header
    f = open('data.csv')
    reader = csv.reader(f)
    headers=next(reader,None)
    data2 = data.transpose()
    X = data[:,0:7]
    y = data[:,7]

    # plot each scatters of data
    a = headers  #headers
    plot(a,data2[1:7,:],data2[7,:])

    #init
    R = [];Mean = []

    # K-fold Cross Validation
    m = len(X)
    c = np.linspace(20, m , num=2000, endpoint=True, dtype=int)
    for i in c:
        data1=data[0:i]
        X1 = X[0:i]
        y1 = y[0:i]
        ss = ShuffleSplit(n_splits=5, test_size=0.1, random_state=0)
        for train,test in ss.split(data1):
            #print('TRAIN:', train, 'TEST:', test)
            X_train, X_test, y_train, y_test = X1[train], X1[test], y1[train], y1[test]
        print('Test data/Train data: %s/%s' % (len(X_test),len(X_train)))


        #linear regression
        model = linear_model.BayesianRidge()
        model.fit(X_train, y_train)
        y_predictions = model.predict(X_test)

        #R^2 (coefficient of determination) regression score function
        r = r2_score(y_test, y_predictions)
        R.append(r)
        print('R-squared: %.4f' % r,end="        ")

        # Mean absolute error regression loss
        mean = mean_absolute_error(y_test, y_predictions)
        Mean.append(mean)
        print('Mean absolute error : %.4f' % mean)

    plot_final(c,R,'R^2')
    plot_final(c,Mean,'Mean absolute error')
Beispiel #24
0
def doBayesianRegression(df, features):
    x_train, x_test, y_train, y_test = createtesttrain(df, features, 6)
    model = linear_model.BayesianRidge()
    model.fit(x_train, y_train)

    y_predict = model.predict(x_train)
    train_error = calcerror(y_predict, y_train)
    print("Train error = "
          '{}'.format(train_error) + " percent in Bayesian Regression")

    prediction = model.predict(x_test)
    test_error = calcerror(prediction, y_test)
    print("Test error = "
          '{}'.format(test_error) + " percent in Bayesian Regression\n")
    return train_error, test_error
Beispiel #25
0
def Bayes(path):
    data = pd.read_excel(path)
    data.dropna(inplace=True)
    array = data.values
    X = array[:, 1:len(data.columns) - 1]
    y = array[:, len(data.columns) - 1]
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)

    reg = linear_model.BayesianRidge()
    reg_ = reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)
    return (X_test, y_pred)
 def test_model_bayesian_ridge(self):
     model, X = fit_regression_model(linear_model.BayesianRidge())
     model_onnx = convert_sklearn(
         model, "bayesian ridge",
         [("input", FloatTensorType([None, X.shape[1]]))])
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(
         X,
         model,
         model_onnx,
         basename="SklearnBayesianRidge-Dec4",
         allow_failure="StrictVersion("
         "onnxruntime.__version__)"
         "<= StrictVersion('0.2.1')",
     )
 def __init__(self, fileName):
     comments = []
     scores = []
     for line in file:
         data = json.loads(line)
         comments.append(data["body"])
         scores.append(data["score"])
     self.vect = StemmedCountVectorizer(stop_words='english')
     self.trainer = self.vect.fit_transform(comments)
     self.tfidf = TfidfTransformer()
     self.trainer2 = self.tfidf.fit_transform(self.trainer)
     self.reducer = sklearn.feature_selection.SelectKBest(k=1000)
     reduced_data = self.reducer.fit_transform(self.trainer2, scores)
     self.reg = linear_model.BayesianRidge()
     self.reg.fit(reduced_data.toarray(), scores)
Beispiel #28
0
def train_and_eval_scutfbp(train_set_vector, test_set_vector, trainset_label,
                           testset_label, testset_filenames):
    """
    train and eval on SCUT-FBP dataset
    :param train_set_vector:
    :param test_set_vector:
    :param trainset_label:
    :param testset_label:
    :param testset_filenames
    :return:
    """
    print("The shape of training set is {0}".format(
        np.array(train_set_vector).shape))
    print("The shape of test set is {0}".format(
        np.array(test_set_vector).shape))
    reg = linear_model.BayesianRidge()
    reg.fit(train_set_vector, trainset_label)

    predicted_label = reg.predict(test_set_vector)
    mae_lr = round(mean_absolute_error(testset_label, predicted_label), 4)
    rmse_lr = round(
        math.sqrt(mean_squared_error(testset_label, predicted_label)), 4)
    pc = round(np.corrcoef(testset_label, predicted_label)[0, 1], 4)
    print(
        '===============The Mean Absolute Error of Model is {0}===================='
        .format(mae_lr))
    print(
        '===============The Root Mean Square Error of Model is {0}===================='
        .format(rmse_lr))
    print(
        '===============The Pearson Correlation of Model is {0}===================='
        .format(pc))

    mkdirs_if_not_exist('./model')
    joblib.dump(reg, './model/BayesRidge_SCUTFBP.pkl')
    print('The regression model has been persisted...')

    mkdirs_if_not_exist('./result')

    out_result(testset_filenames,
               predicted_label,
               testset_label,
               None,
               path='./result/Pred_GT_SCUTFBP.csv')

    df = pd.DataFrame([mae_lr, rmse_lr, pc])
    df.to_csv('./result/BayesRidge_SCUTFBP.csv', index=False)
    print('The result csv file has been generated...')
    def model_train_and_predict(self, data_l):

        cols = self.data.columns
        features = cols[1:-1]
        target = cols[-1]
        X = self.data[features]
        y = self.data['Chance of Admit ']
        ## datal stands for new line of data, i really can not f*****g think what to name this right now
        ## kz 2019 10 14 00:01
        Xnew = np.array(data_l).reshape(1, -1)

        # Linear Regression
        model = linear_model.LinearRegression()
        model.fit(X, y)
        ynew = model.predict(Xnew)
        y1 = round(ynew[0] * 100, 3)

        # Decision Tree Regression
        model2 = DecisionTreeRegressor()
        model2.fit(X, y)
        ynew2 = model2.predict(Xnew)
        y2 = round(ynew2[0] * 100, 3)

        # Ridge Regression
        model3 = linear_model.Ridge()
        model3.fit(X, y)
        ynew3 = model3.predict(Xnew)
        y3 = round(ynew3[0] * 100, 3)

        # Lasso Linear Model
        model4 = linear_model.Lasso()
        model4.fit(X, y)
        ynew4 = model4.predict(Xnew)
        y4 = round(ynew4[0] * 100, 3)

        # Least Angle Lasso Regression
        model5 = linear_model.LassoLars()
        model5.fit(X, y)
        ynew5 = model5.predict(Xnew)
        y5 = round(ynew5[0] * 100, 3)

        # Bayesian Regression
        model6 = linear_model.BayesianRidge()
        model6.fit(X, y)
        ynew6 = model6.predict(Xnew)
        y6 = round(ynew6[0] * 100, 3)
        #print(ynew,ynew2,ynew3,ynew4,ynew5,ynew6)
        return y1, y2, y3, y4, y5, y6
def build_end_value_prediction_model(X, y, type_="linearsvr"):
    start = int(time.time())

    if type_ == "svr":
        model = svm.SVR()  # SVR regression
    elif type_ == "linear":
        model = linear_model.LinearRegression()  # linear regression
    elif type_ == "bayes":
        model = linear_model.BayesianRidge()  # Bayes
    else:
        model = svm.LinearSVR()

    model.fit(X, y)
    print "End Value Prediction Model Fit Time : ", time.time() - start

    return model