def predict_affinities_and_report_results(): skempi_vectors_df = load_representation(skempi_vectors_path) multiplied_vectors_df = calculate_vector_multiplications(skempi_vectors_df) model = linear_model.BayesianRidge() result_df, result_detail_df = predictAffinityWithModel( model, multiplied_vectors_df) result_df.to_csv(r"../results/Affinity_prediction_skempiv1_{0}.csv".format( representation_name), index=False) result_detail_df.to_csv( r"../results/Affinity_prediction_skempiv1_{0}_detail.csv".format( representation_name), index=False)
def infer_from_img(img_file): img = cv2.imread(img_file) img = cv2.resize(img, (224, 224)) # tik = time.time() feat = extract_conv_feature(img, layer_name='conv5_1').tolist() reg = linear_model.BayesianRidge(np.array(feat)) reg.fit(np.random.rand(1, len(feat)), np.array(1)) tik = time.time() score = reg.predict(feat) tok = time.time() print('Beauty score is {0}, it takes {1} seconds!'.format( score, (tok - tik) * 1000))
def get_model(x): return { 'LinearRegression': linear_model.LinearRegression(), 'Ridge': linear_model.Ridge(alpha=.5), 'RidgeCV': linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0]), 'Lasso': linear_model.Lasso(alpha=0.1), 'LassoLars': linear_model.LassoLars(alpha=.1), 'BayesianRidge': linear_model.BayesianRidge(), 'SGDRegressor': linear_model.SGDRegressor(), 'SGDClassifier': linear_model.SGDClassifier(), 'SVM': svm.SVC(), 'SVR': svm.SVR(), }[x]
def __init__(self, ml_algs=[ 'LR', 'GPR', 'MLP', 'DL', 'SVR', 'RFR', 'DTR', 'GBR' ]): super().__init__() self.regressors = [] for alg in ml_algs: # if alg == 'DL': # self.regressors.append(DeepLearningRegressor(type='custom')) if alg == 'BRR': self.regressors.append(linear_model.BayesianRidge()) elif alg == 'RFR': self.regressors.append(RandomForestRegressor(n_estimators=100)) elif alg == 'DTR': self.regressors.append(DecisionTreeRegressor()) elif alg == 'GBR': self.regressors.append(GradientBoostingRegressor()) elif alg == 'LR': self.regressors.append(LinearRegression()) elif alg == 'GPR': self.regressors.append( GaussianProcessRegressor(kernel=DotProduct() + WhiteKernel(), random_state=0)) elif alg == 'SVR': self.regressors.append( SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)) elif alg == 'MLP': self.regressors.append( MLPRegressor(hidden_layer_sizes=(100, ), activation='relu', solver='adam', alpha=0.001, batch_size='auto', learning_rate='constant', learning_rate_init=0.01, power_t=0.5, max_iter=1000, shuffle=True, random_state=0, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08))
def __init__(self, method, params, i=0): self.algorithm_list = [ 'PLS', 'GP', 'OLS', 'OMP', 'Lasso', 'Elastic Net', 'Ridge', 'Bayesian Ridge', 'ARD', 'LARS', 'LASSO LARS', 'SVR', 'KRR', 'GBR' ] self.method = method self.outliers = None self.ransac = False #print(params) if self.method[i] == 'PLS': self.model = PLSRegression(**params[i]) if self.method[i] == 'OLS': self.model = linear.LinearRegression(**params[i]) if self.method[i] == 'OMP': # create a temporary set of parameters params_temp = copy.copy(params[i]) self.model = linear.OrthogonalMatchingPursuit(**params_temp) if self.method[i] == 'LASSO': # create a temporary set of parameters params_temp = copy.copy(params[i]) self.model = linear.Lasso(**params_temp) if self.method[i] == 'Elastic Net': params_temp = copy.copy(params[i]) self.model = linear.ElasticNet(**params_temp) if self.method[i] == 'Ridge': # create a temporary set of parameters params_temp = copy.copy(params[i]) self.model = linear.Ridge(**params_temp) if self.method[i] == 'BRR': self.model = linear.BayesianRidge(**params[i]) if self.method[i] == 'ARD': self.model = linear.ARDRegression(**params[i]) if self.method[i] == 'LARS': # create a temporary set of parameters params_temp = copy.copy(params[i]) self.model = linear.Lars(**params_temp) if self.method[i] == 'SVR': self.model = svm.SVR(**params[i]) if self.method[i] == 'KRR': self.model = kernel_ridge.KernelRidge(**params[i])
def load_default(self, machine_list='basic'): """ Loads 4 different scikit-learn regressors by default. The advanced list adds more machines. Parameters ---------- machine_list: optional, list of strings List of default machine names to be loaded. Default is basic, Returns ------- self : returns an instance of self. """ if machine_list == 'basic': machine_list = ['tree', 'ridge', 'random_forest', 'svm'] if machine_list == 'advanced': machine_list = [ 'lasso', 'tree', 'ridge', 'random_forest', 'svm', 'bayesian_ridge', 'sgd' ] self.estimators_ = {} for machine in machine_list: try: if machine == 'lasso': self.estimators_['lasso'] = linear_model.LassoCV( random_state=self.random_state).fit( self.X_k_, self.y_k_) if machine == 'tree': self.estimators_['tree'] = DecisionTreeRegressor( random_state=self.random_state).fit( self.X_k_, self.y_k_) if machine == 'ridge': self.estimators_['ridge'] = linear_model.RidgeCV().fit( self.X_k_, self.y_k_) if machine == 'random_forest': self.estimators_['random_forest'] = RandomForestRegressor( random_state=self.random_state).fit( self.X_k_, self.y_k_) if machine == 'svm': self.estimators_['svm'] = SVR().fit(self.X_k_, self.y_k_) if machine == 'sgd': self.estimators_['sgd'] = linear_model.SGDRegressor( random_state=self.random_state).fit( self.X_k_, self.y_k_) if machine == 'bayesian_ridge': self.estimators_[ 'bayesian_ridge'] = linear_model.BayesianRidge().fit( self.X_k_, self.y_k_) except ValueError: continue return self
def set_reg_mdl(reg_mdl, alpha, normalize): if reg_mdl == 'EN': return l_mdl.ElasticNet(alpha=alpha, normalize=normalize), 3 # elastic net regression elif reg_mdl == 'BR': return l_mdl.BayesianRidge(normalize=normalize), 2 # Bayesian Ridge elif reg_mdl == 'RG': return l_mdl.Ridge(normalize=normalize), 2 # Ridge regression elif reg_mdl == 'LS': return l_mdl.Lasso(alpha=alpha, normalize=normalize), 2 # Lassoregression elif reg_mdl == 'LR': return l_mdl.LinearRegression(), 1 # OLS regression else: s_ut.my_print('pid: ' + str(os.getpid()) + ' ERROR: invalid regression model: ' + str(reg_mdl)) return None, None
def LinearRegression_to_BayesianRidge(x_df, y_df): reg = linear_model.LinearRegression() reg.fit(x_df, y_df) rid = linear_model.BayesianRidge() rid.fit(x_df, y_df) scores = { "LinearRegression" : reg.score(x_df, y_df) "BayesianRidge" : rid.score(x_df, y_df) } return scores
def test_model_bayesian_ridge_return_std(self): model, X = fit_regression_model(linear_model.BayesianRidge(), n_features=2, n_samples=20) model_onnx = convert_sklearn( model, "bayesian ridge", [("input", FloatTensorType([None, X.shape[1]]))], options={linear_model.BayesianRidge: {'return_std': True}}) self.assertIsNotNone(model_onnx) sess = InferenceSession(model_onnx.SerializeToString()) outputs = sess.run(None, {'input': X}) pred, std = model.predict(X, return_std=True) assert_almost_equal(pred, outputs[0].ravel(), decimal=4) assert_almost_equal(std, outputs[1].ravel(), decimal=4)
def greedy_select_features(self): print('initial shapes:', self.train_.shape, self.test_.shape) saved = None if self.debug_ else self.load('chosen_features') if saved == None: g_best_score = 1e9 g_best_features = [] current = set() finished = False else: g_best_features, g_best_score, finished = saved current = set(g_best_features) print('SFS REUSE:', g_best_score, g_best_features, self.now()) num_columns = self.train_.shape[1] col_names = [str(c) for c in range(num_columns)] self.train_.columns = col_names self.test_.columns = col_names if not finished: y = self.y_.ravel() scorer = metrics.make_scorer(metrics.log_loss) loop_count = len(col_names) - len(g_best_features) for _ in range(loop_count): avail = set(col_names).difference(current) best_score = 1e9 best_features = None for f in avail: newf = list(current | {f}) score, _ = self.ccv(linear_model.BayesianRidge(), self.train_[newf], y, scorer) if best_score > score: best_score = score best_features = newf current = set(best_features) if g_best_score > best_score: g_best_score = best_score g_best_features = best_features print('new best:', g_best_score, g_best_features, self.now()) if len(best_features) - len(g_best_features) > 5: break self.save('chosen_features', (g_best_features, g_best_score, False)) # now self.save('chosen_features', (g_best_features, g_best_score, True)) print('feature selection complete.', self.now()) self.train_ = self.train_[g_best_features] self.test_ = self.test_[g_best_features]
def main (alp1, alp2, lbd1, lbd2): train_x, train_y, test_x, test_y = (sio.loadmat(TRAIN_DIR)['trainx'], sio.loadmat(TRAIN_DIR)['trainy'], sio.loadmat(TEST_DIR)['testx'], sio.loadmat(TEST_DIR)['testy']) train_y = train_y.ravel() test_y = test_y.ravel() clf = linear_model.BayesianRidge(alpha_1=alp1, alpha_2=alp2, lambda_1=lbd1, lambda_2=lbd2) clf.fit(train_x,train_y) years = clf.predict(test_x) diff = float(0.0) for (i, j) in zip(years, test_y): diff += abs(i-j) diff /= TEST_SIZE print ("MSE is: " + str(diff) +" with alpha: "+str(alp1)+' '+str(alp2)+' '+str(lbd1)+' '+str(lbd2)) return diff
def run_bayesian_ridge(x_df, y_df, analyspar): """ run_bayesian_ridge(x_df, y_df, analyspar) """ steps = [("scaler", preprocessing.StandardScaler()), ("model", linear_model.BayesianRidge(compute_score=True))] pipl = pipeline.Pipeline(steps) pipl.fit(x_df, y_df) log_sklearn_results(pipl, analyspar, name="bayes_ridge", var_names=x_df.columns)
def BayesianRidge(data,data2,data5,during): y = data2['prmom'+during+'_f'] x = data2.drop(['prmom1d_f','prmom1w_f','prmom2w_f','prmom3w_f','uniqcode','date'],axis=1) x=x.fillna(0) y=np.array(y) x=np.array(x) reg = linear_model.BayesianRidge() reg.fit(x, y) X= data5.drop(['prmom1d_f','prmom1w_f','prmom2w_f','prmom3w_f','uniqcode','date','pred'],axis=1) X=X.fillna(0) X=np.array(X) pred1=reg.predict(X) data['pred_bay']=pred1 return data
def main(): filename = "2016-capitalbikeshare-tripdata/2016Q1-capitalbikeshare-tripdata.csv" file2 = "2016-capitalbikeshare-tripdata/2016Q2-capitalbikeshare-tripdata.csv" file3 = "2016-capitalbikeshare-tripdata/2016Q3-capitalbikeshare-tripdata.csv" file4 = "2016-capitalbikeshare-tripdata/2016Q4-capitalbikeshare-tripdata.csv" testfile = "2017-capitalbikeshare-tripdata/2017Q1-capitalbikeshare-tripdata.csv" test2 = "2017-capitalbikeshare-tripdata/2017Q2-capitalbikeshare-tripdata.csv" test3 = "2017-capitalbikeshare-tripdata/2017Q3-capitalbikeshare-tripdata.csv" test4 = "2017-capitalbikeshare-tripdata/2017Q4-capitalbikeshare-tripdata.csv" data = read_data(filename) data = data.append(read_data(file2)) data = data.append(read_data(file3)) data = data.append(read_data(file4)) data = data.reset_index(drop=True) print(data) data = map_data(data) data = drop_time(data) data = date_as_obj(data) #print(data) fitData = data.drop(["Counts", "Date"], axis=1) #Select data to test with the regressions testdata = read_data(testfile) testdata = testdata.append(read_data(test2)) testdata = testdata.append(read_data(test3)) testdata = testdata.append(read_data(test4)) testdata = testdata.reset_index(drop=True) testdata = map_data(testdata) testdata = drop_time(testdata) testdata = date_as_obj(testdata) testfit = testdata.drop(["Counts", "Date"], axis=1) #Bayesian Ridge Regression from skLearn brr = skLm.BayesianRidge() brr.fit(fitData, data["Counts"]) resultsBRR = brr.predict(testfit) resultsBRR = DataFrame(resultsBRR) #Ordinary Linear Regression from sklearn olr = skLm.LinearRegression() olr.fit(fitData, data["Counts"]) resultsOLR = olr.predict(testfit) resultsOLR = DataFrame(resultsOLR) print(data.head(10)) print(testdata.head(10)) print("Bayesian Ridge Regression") print(resultsBRR.head(10)) print("RMSE=", do_analysis(resultsBRR, testdata)) print("Ordinary Linear Regression") print(resultsOLR.head(10)) print("RMSE=", do_analysis(resultsOLR, testdata)) make_graph(testdata, resultsBRR, resultsOLR) return data
def plot_ml_model_regression(X_train, X_test, y_train, y_test): pyplot.close('all') #print ("Enter") #algos = ["SVM-linear","SVM-Kernel","GaussianNB","BernoulliNB","ComplementNB","DTree-gini","DTree-entropy","RF-50","RF-100","RF-150", "KNN-2", "KNN-6"] algos = ["LR", "Lasso", "Ridge", "Bayesian", "SVR", "DT", "RF", "KNNR"] rgrs = [ linear_model.LinearRegression(), linear_model.Lasso(), linear_model.Ridge(), linear_model.BayesianRidge(), svm.SVR(), tree.DecisionTreeRegressor(), RandomForestRegressor(), KNeighborsRegressor() ] cv_results = [] #scoring = 'accuracy' #scoring = 'roc_auc' for regressors in rgrs: reg = regressors reg.fit(X_train, y_train) #print('Coefficients: \n', reg.coef_) var_score = format(reg.score(X_test, y_test)) print('Variance score:', var_score) cv_results.append(var_score) pyplot.style.use('fivethirtyeight') pyplot.scatter(reg.predict(X_train), reg.predict(X_train) - y_train, color="green", s=10, label='Train data') pyplot.scatter(reg.predict(X_test), reg.predict(X_test) - y_test, color="blue", s=10, label='Test data') pyplot.hlines(y=0, xmin=0, xmax=50, linewidth=2) pyplot.legend(loc='upper right') pyplot.title("Residual errors") pyplot.show() #cv_results.append(cv_score.mean()) cv_mean = pd.DataFrame(cv_results, index=algos) cv_mean.columns = ["Accuracy"] print(cv_mean.sort_values(by="Accuracy", ascending=False)) '''
def doBayesianRidge(trainInput, trainOutput, predictors): alg = linear_model.BayesianRidge() alg.fit(trainInput.loc[:, predictors], trainOutput) cvMeanScore = model_selection.cross_val_score(alg, trainInput.loc[:, predictors], trainOutput, cv=10, scoring='r2', n_jobs=-1).mean() print("CV Average Score for BayesianRidge Regression:", cvMeanScore)
def test_get_set(self): np.random.seed(123) X = np.random.rand(25, 3) X[:, 0] = 100 * X[:, 0] X[:, 2] = 25 * X[:, 2] regr = linear_model.BayesianRidge() fit_obj = ns.MTS( regr, n_hidden_features=10, direct_link=False, bias=False, nodes_sim="sobol", type_scaling=("std", "minmax", "std"), activation_name="relu", n_clusters=0, ) fit_obj.set_params( n_hidden_features=5, activation_name="relu", a=0.01, nodes_sim="sobol", bias=True, direct_link=True, n_clusters=None, type_clust="kmeans", type_scaling=("std", "std", "std"), seed=123, lags=1, ) fit_obj2 = ns.MTS( regr, n_hidden_features=10, direct_link=False, bias=False, dropout=0.5, nodes_sim="sobol", type_scaling=("std", "minmax", "std"), activation_name="relu", n_clusters=0, ) self.assertTrue((fit_obj.get_params()["lags"] == 1) & (fit_obj.get_params()["type_scaling"] == ("std", "std", "std")) & (fit_obj2.get_params()["obj__lambda_1"] == 1e-06))
def init_parameters(self, regression_model="BayesianRidge", incremental=True): self.incremental = incremental self.regression_model = regression_model if regression_model == "BayesianRidge": self.lin_model = linear_model.BayesianRidge() elif regression_model == "RandomForest": self.lin_model = RandomForestRegressor(n_estimators=40) else: raise Exception( "Wrongly defined regression model. Available models are: 'RandomForest' and 'BayesianRidge'" )
def fit(self, X, y=None, *args, **kwargs): print "Bayesian Ridge" X = preprocessing.scale(X) pipelineFit = Pipeline([('pca', decomposition.PCA()), ('lasso', linear_model.BayesianRidge())]) grid_search = GridSearchCV(pipelineFit, dict(pca__n_components=[1, 2, 4, 6, 8, 10]), scoring='r2') grid_search.fit(X, y) acc = grid_search.best_score_ print grid_search.best_params_ print grid_search.grid_scores_ print "r2: " + str(acc) return self
def main_scut(filenames, X, y): """ train and eval on SCUT-FBP benchmark with HMTNet descriptor and Ridge Regression :param filenames: :param X: :param y: :return: """ from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=40) reg = linear_model.BayesianRidge() # reg = linear_model.Ridge(alpha=50.0) # reg = linear_model.Lasso(alpha=0.005) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) mae_lr = round( mean_absolute_error(np.array(y_test), np.array(y_pred).ravel()), 4) rmse_lr = round( np.math.sqrt( mean_squared_error(np.array(y_test), np.array(y_pred).ravel())), 4) pc = round( np.corrcoef(np.array(y_test), np.array(y_pred).ravel())[0, 1], 4) print( '===============The Mean Absolute Error of Trans HMT-Net is {0}====================' .format(mae_lr)) print( '===============The Root Mean Square Error of Trans HMT-Net is {0}====================' .format(rmse_lr)) print( '===============The Pearson Correlation of Trans HMT-Net is {0}====================' .format(pc)) col = ['filename', 'gt', 'pred'] rows = [] for i in range(len(y_test)): rows.append([filenames[i], y_test.tolist()[i], y_pred.tolist()[i]]) df = pd.DataFrame(rows, columns=col) df.to_excel("./scutfbp_output.xlsx", sheet_name='Output', index=False) print('Output Excel has been generated~')
def BayesianRidge_model(X_train, X_valid, y_train, y_test, y_name, y_train_mean, y_train_std): model_name = 'BayesianRidge' print 'head items to fit are: ', y_name # In[ ]: for head_item in range(len(y_name)): y_train_item = y_train[:, head_item] #y_train_item = np.reshape(y_train_item,[y_train.shape[0],1]) y_test_item = y_test[:, head_item] #y_test_item = np.reshape(y_test_item,[y_test_item.shape[0],1]) print '********************************** Fitting %s on %s Data **********************************' % ( model_name, y_name[head_item]) #Declare model model = linear_model.BayesianRidge(compute_score=True) #Fit model model.fit(X_train, y_train_item) #Get predictions y_valid_predicted = model.predict(X_valid) training_prediction = model.predict(X_train) R2s_training = get_R2(y_train_item, training_prediction) print 'R2 on training set = ', R2s_training #Get metric of fit R2s = get_R2(y_test_item, y_valid_predicted) print('R2s:', R2s) print 'saving prediction ...' np.savez(y_name[head_item] + '_%s_ypredicted.npz' % model_name, y_test=y_test_item, y_prediction=y_valid_predicted, y_train_=y_train_item, training_prediction=training_prediction, y_train_mean=y_train_mean[head_item], y_train_std=y_train_std[head_item]) #print 'saving model ...' joblib.dump(model, y_name[head_item] + '_%s.pkl' % model_name) print 'plotting results...' plot_results(y_test_item, y_valid_predicted, y_name[head_item], R2s, model_name=model_name) return model
def fit(self, X, y, sample_weight, fitOpt={}): ''' Build a decision tree regressor from the training set (X, y). Parameters ---------- X: array-like or sparse matrix, shape = [n_samples, n_features] The training input samples. Internally, it will be converted to dtype=np.float32 and if a sparse matrix is provided to a sparse csc_matrix. y: array-like, shape = [n_samples] or [n_samples, n_outputs] The target values (real numbers). Use dtype=np.float64 and order='C' for maximum efficiency. sample_weight: array-like, shape = [n_samples] or None Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. fitOpt: dictionary (optional, default: {}) Options to pass to DecisionTreeRegressor fit function. See http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html for possibilities. Returns ------- Self ''' # Fit a normal regression tree super(DecisionTreeRegressorWithLinearLeafRegression, self).fit(X, y, sample_weight, **fitOpt) # Create a linear regression for all input points which fall into # one output leaf predictedValues = super(DecisionTreeRegressorWithLinearLeafRegression, self).predict(X) leafValues = np.unique(predictedValues) for value in leafValues: ind = predictedValues == value leafLinearRegrsion = linear_model.BayesianRidge() leafLinearRegrsion.fit(X[ind, :], y[ind]) self.leafParameters[value] = { "linearRegression": leafLinearRegrsion, "max": np.max(y[ind]), "min": np.min(y[ind]) } return self
def main(): #load the data data = np.loadtxt('data.csv', delimiter=',', skiprows=1, usecols=range(0, 8)) #load the header f = open('data.csv') reader = csv.reader(f) headers=next(reader,None) data2 = data.transpose() X = data[:,0:7] y = data[:,7] # plot each scatters of data a = headers #headers plot(a,data2[1:7,:],data2[7,:]) #init R = [];Mean = [] # K-fold Cross Validation m = len(X) c = np.linspace(20, m , num=2000, endpoint=True, dtype=int) for i in c: data1=data[0:i] X1 = X[0:i] y1 = y[0:i] ss = ShuffleSplit(n_splits=5, test_size=0.1, random_state=0) for train,test in ss.split(data1): #print('TRAIN:', train, 'TEST:', test) X_train, X_test, y_train, y_test = X1[train], X1[test], y1[train], y1[test] print('Test data/Train data: %s/%s' % (len(X_test),len(X_train))) #linear regression model = linear_model.BayesianRidge() model.fit(X_train, y_train) y_predictions = model.predict(X_test) #R^2 (coefficient of determination) regression score function r = r2_score(y_test, y_predictions) R.append(r) print('R-squared: %.4f' % r,end=" ") # Mean absolute error regression loss mean = mean_absolute_error(y_test, y_predictions) Mean.append(mean) print('Mean absolute error : %.4f' % mean) plot_final(c,R,'R^2') plot_final(c,Mean,'Mean absolute error')
def doBayesianRegression(df, features): x_train, x_test, y_train, y_test = createtesttrain(df, features, 6) model = linear_model.BayesianRidge() model.fit(x_train, y_train) y_predict = model.predict(x_train) train_error = calcerror(y_predict, y_train) print("Train error = " '{}'.format(train_error) + " percent in Bayesian Regression") prediction = model.predict(x_test) test_error = calcerror(prediction, y_test) print("Test error = " '{}'.format(test_error) + " percent in Bayesian Regression\n") return train_error, test_error
def Bayes(path): data = pd.read_excel(path) data.dropna(inplace=True) array = data.values X = array[:, 1:len(data.columns) - 1] y = array[:, len(data.columns) - 1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) reg = linear_model.BayesianRidge() reg_ = reg.fit(X_train, y_train) y_pred = reg.predict(X_test) return (X_test, y_pred)
def test_model_bayesian_ridge(self): model, X = fit_regression_model(linear_model.BayesianRidge()) model_onnx = convert_sklearn( model, "bayesian ridge", [("input", FloatTensorType([None, X.shape[1]]))]) self.assertIsNotNone(model_onnx) dump_data_and_model( X, model, model_onnx, basename="SklearnBayesianRidge-Dec4", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
def __init__(self, fileName): comments = [] scores = [] for line in file: data = json.loads(line) comments.append(data["body"]) scores.append(data["score"]) self.vect = StemmedCountVectorizer(stop_words='english') self.trainer = self.vect.fit_transform(comments) self.tfidf = TfidfTransformer() self.trainer2 = self.tfidf.fit_transform(self.trainer) self.reducer = sklearn.feature_selection.SelectKBest(k=1000) reduced_data = self.reducer.fit_transform(self.trainer2, scores) self.reg = linear_model.BayesianRidge() self.reg.fit(reduced_data.toarray(), scores)
def train_and_eval_scutfbp(train_set_vector, test_set_vector, trainset_label, testset_label, testset_filenames): """ train and eval on SCUT-FBP dataset :param train_set_vector: :param test_set_vector: :param trainset_label: :param testset_label: :param testset_filenames :return: """ print("The shape of training set is {0}".format( np.array(train_set_vector).shape)) print("The shape of test set is {0}".format( np.array(test_set_vector).shape)) reg = linear_model.BayesianRidge() reg.fit(train_set_vector, trainset_label) predicted_label = reg.predict(test_set_vector) mae_lr = round(mean_absolute_error(testset_label, predicted_label), 4) rmse_lr = round( math.sqrt(mean_squared_error(testset_label, predicted_label)), 4) pc = round(np.corrcoef(testset_label, predicted_label)[0, 1], 4) print( '===============The Mean Absolute Error of Model is {0}====================' .format(mae_lr)) print( '===============The Root Mean Square Error of Model is {0}====================' .format(rmse_lr)) print( '===============The Pearson Correlation of Model is {0}====================' .format(pc)) mkdirs_if_not_exist('./model') joblib.dump(reg, './model/BayesRidge_SCUTFBP.pkl') print('The regression model has been persisted...') mkdirs_if_not_exist('./result') out_result(testset_filenames, predicted_label, testset_label, None, path='./result/Pred_GT_SCUTFBP.csv') df = pd.DataFrame([mae_lr, rmse_lr, pc]) df.to_csv('./result/BayesRidge_SCUTFBP.csv', index=False) print('The result csv file has been generated...')
def model_train_and_predict(self, data_l): cols = self.data.columns features = cols[1:-1] target = cols[-1] X = self.data[features] y = self.data['Chance of Admit '] ## datal stands for new line of data, i really can not f*****g think what to name this right now ## kz 2019 10 14 00:01 Xnew = np.array(data_l).reshape(1, -1) # Linear Regression model = linear_model.LinearRegression() model.fit(X, y) ynew = model.predict(Xnew) y1 = round(ynew[0] * 100, 3) # Decision Tree Regression model2 = DecisionTreeRegressor() model2.fit(X, y) ynew2 = model2.predict(Xnew) y2 = round(ynew2[0] * 100, 3) # Ridge Regression model3 = linear_model.Ridge() model3.fit(X, y) ynew3 = model3.predict(Xnew) y3 = round(ynew3[0] * 100, 3) # Lasso Linear Model model4 = linear_model.Lasso() model4.fit(X, y) ynew4 = model4.predict(Xnew) y4 = round(ynew4[0] * 100, 3) # Least Angle Lasso Regression model5 = linear_model.LassoLars() model5.fit(X, y) ynew5 = model5.predict(Xnew) y5 = round(ynew5[0] * 100, 3) # Bayesian Regression model6 = linear_model.BayesianRidge() model6.fit(X, y) ynew6 = model6.predict(Xnew) y6 = round(ynew6[0] * 100, 3) #print(ynew,ynew2,ynew3,ynew4,ynew5,ynew6) return y1, y2, y3, y4, y5, y6
def build_end_value_prediction_model(X, y, type_="linearsvr"): start = int(time.time()) if type_ == "svr": model = svm.SVR() # SVR regression elif type_ == "linear": model = linear_model.LinearRegression() # linear regression elif type_ == "bayes": model = linear_model.BayesianRidge() # Bayes else: model = svm.LinearSVR() model.fit(X, y) print "End Value Prediction Model Fit Time : ", time.time() - start return model