def get_models(self, input_dl, type_dl, list_chosen, active, lost): """Generate a library of base learners.""" # linreg=LinearRegression() # svrr = SVR(kernel='rbf') # dtr=DecisionTreeRegressor(random_state=self.SEED) # rf = RandomForestRegressor(n_estimators=100, bootstrap=True, random_state=self.SEED) # br = BaggingRegressor(n_estimators=300,random_state=self.SEED) # ada = AdaBoostRegressor(n_estimators=300,random_state=self.SEED) # gbr = GradientBoostingRegressor(n_estimators=300,random_state=self.SEED) # xgbr1 = xgb.XGBRegressor(n_estimators=100,random_state=self.SEED) linreg=LinearRegression(normalize=True, fit_intercept=True) dtr=DecisionTreeRegressor(random_state=222, min_samples_split=(0.018), min_samples_leaf= (0.007), max_depth=25) svrr = SVR(kernel='linear', epsilon=5) br = BaggingRegressor(n_estimators=350, max_samples=0.9, max_features=0.7, bootstrap=False, random_state=self.SEED) ada = AdaBoostRegressor(n_estimators=7, loss='exponential', learning_rate=0.01, random_state=self.SEED) rf = RandomForestRegressor(n_estimators=1000, max_depth= 30, max_leaf_nodes=1000, random_state=self.SEED)#, min_samples_split=0.002, max_features="auto", max_depth= 30, bootstrap=True, random_state=self.SEED) gbr = GradientBoostingRegressor(n_estimators=1000, learning_rate=0.01,random_state=self.SEED) xgbr1 = xgb.XGBRegressor(random_state=self.SEED)#n_estimators=100, max_depth = 4, random_state=self.SEED) mdl = LGBMRegressor(n_estimators=1000, learning_rate=0.01) las = Lasso() rid = Ridge() en = ElasticNet() huber = HuberRegressor(max_iter=2000) lasl = LassoLars(max_iter=2000, eps = 1, alpha=0.5, normalize=False) pa = PassiveAggressiveRegressor(C=1, max_iter=4000, random_state=self.SEED) sgd = SGDRegressor(max_iter=2000, tol=1e-3) knn = KNeighborsRegressor(n_neighbors=20) ex = ExtraTreeRegressor() exs = ExtraTreesRegressor(n_estimators=1000) dl = self.deep_learning_model(input_dl, dropout_val = 0.2, type = type_dl, active = active, lost = lost) models_temp = { 'deep learning': dl, 'BaggingRegressor': br, 'RandomForestRegressor': rf, 'GradientBoostingRegressor': gbr, 'XGBRegressor': xgbr1, 'LGBMRegressor':mdl, 'ExtraTreesRegressor': exs, 'LinearRegression': linreg, 'SVR': svrr, 'AdaBoostRegressor': ada, 'LassoLars': lasl, 'PassiveAggressiveRegressor': pa, 'SGDRegressor': sgd, 'DecisionTreeRegressor': dtr, 'lasso': las, 'ridge': rid, 'ElasticNet': en, 'HuberRegressor': huber, 'KNeighborsRegressor': knn, 'ExtraTreeRegressor': ex, } models = dict() for model in list_chosen: if model in models_temp: models[model] = models_temp[model] st.write(models) return models
def get_svr_rbf_model(temp_hum,sensor): svr_rbf = SVR(kernel='rbf', gamma='scale') #-->Dow not work on X = temp_hum r_y = sensor.reshape(sensor.shape[0],) return svr_rbf.fit(X,r_y)
# Splitting the dataset into training set and test set """from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) """ # Feature Scaling from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() sc_Y = StandardScaler() X = sc_X.fit_transform(X) y = np.reshape(a=y, newshape=(-1, 1)) y = sc_Y.fit_transform(y) # Fitting the SVR Model to the data set from sklearn.svm import SVR regressor = SVR(kernel='rbf') regressor.fit(X, y) #Predicting a new result # Following line didn't work for me # y_pred = sc_Y.inverse_transform(regressor.predict(sc_X.transform(np.array([6.5])))) y_pred = sc_Y.inverse_transform( regressor.predict(sc_X.transform(np.reshape(a=6.5, newshape=(1, -1))))) print(y_pred) # Visualizing the Regression results plt.scatter(X, y, color='red') plt.plot(X, regressor.predict(X), color='blue') plt.title('Truth or Bluff (SVR Model)')
exit() ridReg = linear_model.Ridge(alpha=0.005) t0_train = time.time() ridReg.fit(X,y) t1_train = time.time() t0_pred = time.time() y_pred = ridReg.predict(X) t1_pred = time.time() y_ridReg = ridReg.predict(X) kFoldsCrossValidation('Kernel-Ridge Regression',ridReg,X,y, y_ridReg) print("training time: %.12f" % (t1_train - t0_train)) print("prediction latency: %.12f\n" % (t1_pred - t0_pred)) # Support Vector Regression model svr_rbf = SVR(kernel='rbf', gamma=0.1, C=100.0) t0_train = time.time() svr_rbf.fit(X,y) t1_train = time.time() t0_pred = time.time() y_svr = svr_rbf.predict(X) t1_pred = time.time() kFoldsCrossValidation('SVR-RBF',svr_rbf,X,y, y_svr) print("training time: %.12f" % (t1_train - t0_train)) print("prediction latency: %.12f\n" % (t1_pred - t0_pred)) # Gaussian Process model kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) t0_train = time.time() gp.fit(X,y)
def svm(X_train, X_test, y_train, y_test, params): print("SVM Training Started, Please wait.........") svm_choose = { 'C': [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0, 10000], 'gamma': ['scale', 'auto'], 'kernel': ['linear', 'poly', 'rbf', 'sigmoid'] } svm_paramgrid = { 'C': hp.choice( 'C', [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0, 10000.0]), 'gamma': hp.choice('gamma', ['scale', 'auto']), 'kernel': hp.choice('kernel', ['linear', 'poly', 'rbf', 'sigmoid']) } def objective_svm(svm_paramgrid): model = SVR(C=svm_paramgrid['C'], gamma=svm_paramgrid['gamma'], kernel=svm_paramgrid['kernel']) accuracy = cross_val_score(model, X_train, y_train, cv=4).mean() # We aim to maximize accuracy, therefore we return it as a negative value return {'loss': -accuracy, 'status': STATUS_OK} trials_svm = Trials() best_svm = fmin(fn=objective_svm, space=svm_paramgrid, algo=tpe.suggest, max_evals=100, trials=trials_svm) print("best_svm") print(best_svm) for i in best_svm.keys(): svm_paramgrid_best[i] = svm_choose[i][best_svm[i]] print("svm_paramgrid_best") print(svm_paramgrid_best) model = SVR(C=svm_paramgrid_best['C'], gamma=svm_paramgrid_best['gamma'], kernel=svm_paramgrid_best['kernel']) model.fit(X_train, y_train) predictions = model.predict(X_test) print('RMSE -', np.sqrt(metrics.mean_squared_error(y_test, predictions))) print("---------------------") print("Score - ", metrics.r2_score(y_test, predictions)) params['algorithms']["svm"] = (metrics.r2_score(y_test, predictions)) * 100 params["algokeys"] = list(params["algorithms"].keys()) params["algovalues"] = list(params["algorithms"].values()) if params["best_acc"] < round(params['algorithms']["svm"], 2): params["best_acc"] = round(params['algorithms']["svm"], 2)
print(lin_reg.predict(6.6)) print(lin_reg2.predict(poly_reg.fit_transform(11))) print(lin_reg2.predict(poly_reg.fit_transform(6.6))) #verilerin olceklenmesi from sklearn.preprocessing import StandardScaler sc1 = StandardScaler() x_olcekli = sc1.fit_transform(X) sc2 = StandardScaler() y_olcekli = sc2.fit_transform(Y) from sklearn.svm import SVR svr_reg = SVR(kernel = 'rbf') svr_reg.fit(x_olcekli,y_olcekli) plt.scatter(x_olcekli,y_olcekli,color='red') plt.plot(x_olcekli,svr_reg.predict(x_olcekli),color='blue') print(svr_reg.predict(11)) print(svr_reg.predict(6.6))
) # %% [markdown] # Thus, we saw that `PolynomialFeatures` is actually doing the same # operation that we did manually above. # # The last possibility is to make a linear model more expressive is to use a # "kernel". Instead of learning a weight per feature as we previously # emphasized, a weight will be assign by sample instead. However, not all # samples will be used. This is the base of the support vector machine # algorithm. # %% from sklearn.svm import SVR svr = SVR(kernel="linear") svr.fit(X, y) y_pred = svr.predict(X) plt.plot(x[sorted_idx], y_pred[sorted_idx], color="tab:orange") plt.scatter(x, y) plt.xlabel("x") plt.ylabel("y") _ = plt.title( f"Mean squared error = " f"{mean_squared_error(y, y_pred):.2f}" ) # %% [markdown] # The algorithm can be modified such that it can use non-linear kernel. Then, # it will compute interaction between samples using this non-linear
def do_test(self, MainWindow): if self.train is not None and self.test is not None: steps = 6 train = DataFrame() train['Data2'] = list(self.train['Data2'].values) train['Data'] = list(self.train['Data'].values) df = series_to_supervised(train.values, steps) xscaler = MinMaxScaler() yscaler = MinMaxScaler() x = df.iloc[:, [a for a in range(steps * 2 - 2)]].values xscaled = xscaler.fit(x) y = df.iloc[:, [steps * 2 - 1]].values.ravel() y2d = df.iloc[:, [steps * 2 - 1]].values yscaled = yscaler.fit(y2d) test = DataFrame() test['Data2'] = list(self.test['Data2'].values) test['Data'] = list(self.test['Data'].values) df = series_to_supervised(test.values, steps) dft = series_to_supervised(list(self.test['Time'].values), steps) xtest = df.iloc[:, [a for a in range(steps * 2 - 2)]].values ytest = df.iloc[:, [steps * 2 - 1]].values.ravel() regressor = SVR(kernel='linear', epsilon=1.0, verbose=True) regressor.fit(xscaler.transform(x), yscaler.transform(y.reshape(-1, 1)).ravel()) ypred = regressor.predict(xscaler.transform(xtest)) score = regressor.score( xscaler.transform(xtest), yscaler.transform(ytest.reshape(-1, 1)).ravel()) mse = self.mse( ytest, yscaler.inverse_transform(ypred.reshape(-1, 1)).ravel()) # mse = mean_squared_error(ytest, yscaler.inverse_transform(ypred.reshape(-1, 1)).ravel()) mae = self.mae( ytest, yscaler.inverse_transform(ypred.reshape(-1, 1)).ravel()) # mae = mean_absolute_error(ytest, yscaler.inverse_transform(ypred.reshape(-1, 1)).ravel()) rmse = self.rmse( ytest, yscaler.inverse_transform(ypred.reshape(-1, 1)).ravel()) # rmse = sqrt(mse) print("SVR Kernel Linear") print(f"Score: {score}") print(f"MSE: {mse}") print(f"MAE: {mae}") print(f"RMSE: {rmse}\n") from sklearn.neural_network import MLPRegressor hidden = 5 reg = MLPRegressor(hidden_layer_sizes=(5, ), activation='logistic', solver='lbfgs', alpha=0.0001, random_state=0, verbose=True) reg.fit(x, y) ypredmlp = reg.predict(xtest) scoremlp = reg.score(xtest, ytest) msemlp = self.mse(ytest, ypredmlp) # msemlp = mean_squared_error(ytest, ypredmlp) maemlp = self.mae(ytest, ypredmlp) # maemlp = mean_absolute_error(ytest, ypredmlp) rmsemlp = self.rmse(ytest, ypredmlp) # rmsemlp = sqrt(msemlp) print("Neural Network - Backpropagation") print(reg) print(f"Input: {x.shape[1]}") print(f"Hidden: {hidden}") print(f"Output: {reg.n_outputs_}") print(f"Score: {scoremlp}") print(f"MSE: {msemlp}") print(f"MAE: {maemlp}") print(f"RMSE: {rmsemlp}\n") item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(0, 5, item) item = self.tableWidget_2.item(0, 5) item.setText(str(scoremlp)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(1, 5, item) item = self.tableWidget_2.item(1, 5) item.setText(str(msemlp)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(2, 5, item) item = self.tableWidget_2.item(2, 5) item.setText(str(maemlp)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(3, 5, item) item = self.tableWidget_2.item(3, 5) item.setText(str(rmsemlp)) f, ax = plt.subplots() actual = ax.plot(self.data['Time'].values, self.data['Data'].values, color='blue', label='Actual') ttest = dft['var1(t)'].values predictedsvr = ax.plot(ttest, yscaler.inverse_transform( ypred.reshape(-1, 1)).ravel(), color='red', label='Predicted (SVR)') predictedmlp = ax.plot(ttest, ypredmlp, color='green', label='Predicted (MLP)') ax.legend() plt.xlabel('Month') plt.ylabel('Number of License') ax.set_ylim([0, 200]) plt.savefig('Plot.png') pic = QtGui.QPixmap('Plot.png') pic = pic.scaled(811, 441) self.graphicsView.setPixmap(pic) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(0, 0, item) item = self.tableWidget_2.item(0, 0) item.setText(str(score)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(1, 0, item) item = self.tableWidget_2.item(1, 0) item.setText(str(mse)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(2, 0, item) item = self.tableWidget_2.item(2, 0) item.setText(str(mae)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(3, 0, item) item = self.tableWidget_2.item(3, 0) item.setText(str(rmse)) regressor = SVR(kernel='rbf', epsilon=1.0) regressor.fit(x, y) ypred = regressor.predict(xtest) score = regressor.score(xtest, ytest) mse = self.mse(ytest, ypred) # mse = mean_squared_error(ytest, ypred) mae = self.mae(ytest, ypred) # mae = mean_absolute_error(ytest, ypred) rmse = self.rmse(ytest, ypred) # rmse = sqrt(mse) print("SVR Kernel RBF") print(f"Score: {score}") print(f"MSE: {mse}") print(f"MAE: {mae}") print(f"RMSE: {rmse}\n") item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(0, 1, item) item = self.tableWidget_2.item(0, 1) item.setText(str(score)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(1, 1, item) item = self.tableWidget_2.item(1, 1) item.setText(str(mse)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(2, 1, item) item = self.tableWidget_2.item(2, 1) item.setText(str(mae)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(3, 1, item) item = self.tableWidget_2.item(3, 1) item.setText(str(rmse)) from sklearn import linear_model reg = linear_model.Lasso(alpha=0.1) reg.fit(x, y) ypred = reg.predict(xtest) score = reg.score(xtest, ytest) mse = self.mse(ytest, ypred) # mse = mean_squared_error(ytest, ypred) mae = self.mae(ytest, ypred) # mae = mean_absolute_error(ytest, ypred) rmse = self.rmse(ytest, ypred) # rmse = sqrt(mse) print("Linear Model - Lasso") print(f"Score: {score}") print(f"MSE: {mse}") print(f"MAE: {mae}") print(f"RMSE: {rmse}\n") item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(0, 2, item) item = self.tableWidget_2.item(0, 2) item.setText(str(score)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(1, 2, item) item = self.tableWidget_2.item(1, 2) item.setText(str(mse)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(2, 2, item) item = self.tableWidget_2.item(2, 2) item.setText(str(mae)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(3, 2, item) item = self.tableWidget_2.item(3, 2) item.setText(str(rmse)) reg = linear_model.ElasticNet(alpha=0.1) reg.fit(x, y) ypred = reg.predict(xtest) score = reg.score(xtest, ytest) mse = mean_squared_error(ytest, ypred) mae = mean_absolute_error(ytest, ypred) rmse = sqrt(mse) print("Linear Model - Elastic Net") print(f"Score: {score}") print(f"MSE: {mse}") print(f"MAE: {mae}") print(f"RMSE: {rmse}\n") item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(0, 3, item) item = self.tableWidget_2.item(0, 3) item.setText(str(score)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(1, 3, item) item = self.tableWidget_2.item(1, 3) item.setText(str(mse)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(2, 3, item) item = self.tableWidget_2.item(2, 3) item.setText(str(mae)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(3, 3, item) item = self.tableWidget_2.item(3, 3) item.setText(str(rmse)) reg = linear_model.Ridge(alpha=0.1) reg.fit(x, y) ypred = reg.predict(xtest) score = reg.score(xtest, ytest) mse = mean_squared_error(ytest, ypred) mae = mean_absolute_error(ytest, ypred) rmse = sqrt(mse) print("Linear Model - Ridge") print(f"Score: {score}") print(f"MSE: {mse}") print(f"MAE: {mae}") print(f"RMSE: {rmse}\n") item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(0, 4, item) item = self.tableWidget_2.item(0, 4) item.setText(str(score)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(1, 4, item) item = self.tableWidget_2.item(1, 4) item.setText(str(mse)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(2, 4, item) item = self.tableWidget_2.item(2, 4) item.setText(str(mae)) item = QtWidgets.QTableWidgetItem() self.tableWidget_2.setItem(3, 4, item) item = self.tableWidget_2.item(3, 4) item.setText(str(rmse)) else: MainWindow.msg = QtWidgets.QMessageBox() MainWindow.msg.setIcon(QtWidgets.QMessageBox.Warning) MainWindow.msg.setWindowTitle("Warning") MainWindow.msg.setText("Anda harus membuka file terlebih dahulu") MainWindow.msg.setStandardButtons(QtWidgets.QMessageBox.Ok) MainWindow.msg.show()
def make_prediction(self): if self.train is not None and self.test is not None: steps = 6 train = DataFrame() train['Data2'] = list(self.train['Data2'].values) train['Data'] = list(self.train['Data'].values) df = series_to_supervised(train.values, steps) x = df.iloc[:, [a for a in range(steps * 2 - 2)]].values y = df.iloc[:, [steps * 2 - 1]].values.ravel() start_year = 2019 start_month = 1 start_date = QtCore.QDate(start_year, start_month, 1) end_date = self.dateEdit.date() diff_year = end_date.year() - start_date.year() end_month = end_date.month() + 1 if diff_year == 0: diff_month = end_month - start_month else: diff_month = 12 - start_month + ((12 * diff_year) - (12 - end_month)) # diff = start_date.daysTo(end_date) sdm_data = np.array([]) forecast_time = np.array([]) forecast_data = np.empty((0, steps * 2 - 2), int) old_forecast = x x_len = len(old_forecast) # diff_month = (diff // 30) + 1 total_records = len(self.train) + len(self.test) if diff_month > 0: for i in range(diff_month): sdm_data = np.append(sdm_data, old_forecast[i % x_len][-2]) forecast_data = np.append(forecast_data, np.array( [old_forecast[i % x_len]]), axis=0) forecast_time = np.append(forecast_time, f'{start_year}-{start_month}') if start_month == 12: start_year = start_year + 1 start_month = 1 else: start_month = start_month + 1 regressor = SVR(kernel='linear', epsilon=1.0, verbose=True) regressor.fit(x, y) ypred = regressor.predict(forecast_data) from sklearn.neural_network import MLPRegressor hidden = 5 reg = MLPRegressor(hidden_layer_sizes=(5, ), activation='logistic', solver='lbfgs', alpha=0.0001, random_state=0, verbose=True) reg.fit(x, y) ypredmlp = reg.predict(forecast_data) f, ax = plt.subplots() actual = ax.plot(self.data['Time'].values, self.data['Data'].values, color='blue', label='Actual') predicted = ax.plot(forecast_time, ypred, color='red', label='Forecast (SVR)') predicted = ax.plot(forecast_time, ypredmlp, color='green', label='Forecast (MLP)') plt.xlabel('Month') plt.ylabel('Number of License') ax.legend() ax.set_ylim([0, 200]) plt.savefig('Plot.png') pic = QtGui.QPixmap('Plot.png') pic = pic.scaled(811, 441) self.graphicsView.setPixmap(pic) self.tableWidget.setRowCount( len(self.train) + len(self.test) + len(ypred)) for i, (time, predicted, sdm, predictedmlp) in enumerate( zip(forecast_time, ypred, sdm_data, ypredmlp)): item = QtWidgets.QTableWidgetItem() self.tableWidget.setItem(i + total_records, 0, item) item = self.tableWidget.item(i + total_records, 0) item.setText(str(time)) item = QtWidgets.QTableWidgetItem() self.tableWidget.setItem(i + total_records, 1, item) item = self.tableWidget.item(i + total_records, 1) item.setText( f"{str(int(predicted))} (SVR), {str(int(predictedmlp))} (MLP)" ) item = QtWidgets.QTableWidgetItem() self.tableWidget.setItem(i + total_records, 2, item) item = self.tableWidget.item(i + total_records, 2) item.setText(str(int(sdm)))
test_y_rf = model_rf.predict(test_x) create_result(test_y_rf, 'rand_forest.csv') # Gradient boost model model_grad_boost = GradientBoostingRegressor(random_state=0, loss='ls').fit(train_x, train_y) test_y_grad_boost = model_grad_boost.predict(test_x) create_result(test_y_grad_boost, 'grad_boost.csv') # KNN model model_knn = KNeighborsRegressor().fit(train_x, train_y) test_y_knn = model_knn.predict(test_x) create_result(test_y_knn, 'knn.csv') # SVM model model_svm = SVR(C=1, epsilon=0.2).fit(train_x, train_y) test_y_svm = model_svm.predict(test_x) create_result(test_y_svm, 'svm.csv') # In[ ]: get_ipython().magic(u'pylab inline') rcParams['figure.figsize'] = (12.0, 6.0) data = [ test_y_lasso, test_y_ridge, test_y_rf, test_y_knn, test_y_grad_boost, test_y_xgb, test_y_svm ] plt.figure() plt.boxplot(data) plt.xticks([1, 2, 3, 4, 5, 6, 7], ('lasso', 'ridge', 'random forest', 'knn', 'gradient boost', 'xgboost', 'svm'))
y = dataset.iloc[:, 2].values # Splitting the dataset into the Training set and Test set """from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)""" # Feature Scaling """from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() sc_y = StandardScaler() X = sc_X.fit_transform(X) y = sc_y.fit_transform(y)""" # Fitting SVR to the dataset from sklearn.svm import SVR regressor = SVR(kernel='rbf') # because our prob is non-linear regressor.fit(X, y) # Predicting a new result y_pred = regressor.predict(6.5) # y_pred = sc_y.inverse_transform(y_pred) # Visualising the SVR results plt.scatter(X, y, color='red') plt.plot(X, regressor.predict(X), color='blue') plt.title('Truth or Bluff (SVR)') plt.xlabel('Position level') plt.ylabel('Salary') plt.show() """# Visualising the SVR results (for higher resolution and smoother curve) X_grid = np.arange(min(X), max(X), 0.01) # choice of 0.01 instead of 0.1 step because the data is feature scaled
def main(): data_dir = './data/' feature_dir = './features' # load data print("Loading data...") hist_feature = np.load(data_dir + 'histogram_feature.npz')['arr_0'] imgNet_feature = np.load(data_dir + 'imageNet_feature.npz')['arr_0'] vSenti_feature = np.load(data_dir + 'visual_senti_feature.npz')['arr_0'] sen2vec_feature = np.load(data_dir + 'text_sentence2vec_feature.npz')['arr_0'] social_feature = load_social_features(data_dir + 'video_id.txt', data_dir + 'video_user.txt', data_dir + 'user_details.txt') senti_feature = [] for line in open(os.path.join(feature_dir, 'senti_scores.txt')): senti_feature.append(line.strip().split('\t')) senti_feature = np.array(senti_feature) print(senti_feature) # feature dimension reduction: it's up to you to decide the size of reduced dimensions; the main purpose is to reduce the computation complexity pca = PCA(n_components=20) hist_feature = pca.fit_transform(hist_feature) # 20, 40, 10 pca = PCA(n_components=10) imgNet_feature = pca.fit_transform(imgNet_feature) pca = PCA(n_components=20) vSenti_feature = pca.fit_transform(vSenti_feature) pca = PCA(n_components=10) sen2vec_feature = pca.fit_transform(sen2vec_feature) # contatenate all the features(after dimension reduction) concat_feature = np.concatenate([ hist_feature, imgNet_feature, vSenti_feature, sen2vec_feature, social_feature, senti_feature ], axis=1) print("The input data dimension is: (%d, %d)" % (concat_feature.shape)) # load ground-truth ground_truth = [] for line in open(os.path.join(data_dir, 'ground_truth.txt')): loop_count = float(line.strip().split('::::')[0]) like_count = float(line.strip().split('::::')[1]) repost_count = float(line.strip().split('::::')[2]) comment_count = float(line.strip().split('::::')[3]) ground_truth.append( (loop_count + like_count + repost_count + comment_count) / 4) ground_truth = np.array(ground_truth, dtype=np.float32) # print("Start tuning model parameters...") # print(svc_param_selection(concat_feature, ground_truth, 10)) print("Start training and predict...") kf = KFold(n_splits=10) nMSEs = [] pop_predicts = np.empty([0, 1]) for train, test in kf.split(concat_feature): # model initialize: you can tune the parameters within SVR(http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html); Or you can select other regression models model = SVR(kernel='rbf', C=75000, gamma=0.0001, epsilon=0.01) # model = GradientBoostingRegressor(max_depth=10, n_estimators=200, learning_rate=0.1, random_state=42) # train model.fit(concat_feature[train], ground_truth[train]) # predict predicts = model.predict(concat_feature[test]) # nMSE(normalized Mean Squared Error) metric calculation nMSE = mean_squared_error(ground_truth[test], predicts) / np.mean( np.square(ground_truth[test])) nMSEs.append(nMSE) pop_predicts = np.concatenate( (pop_predicts, [[predict] for predict in predicts])) print("This round of nMSE is: %f" % (nMSE)) print('Average nMSE is %f.' % (np.mean(nMSEs))) return pop_predicts
memory='nilearn_cache') # cache options # remove features with too low between-subject variance gm_maps_masked = nifti_masker.fit_transform(gray_matter_map_filenames) gm_maps_masked[:, gm_maps_masked.var(0) < 0.01] = 0. # final masking new_images = nifti_masker.inverse_transform(gm_maps_masked) gm_maps_masked = nifti_masker.fit_transform(new_images) n_samples, n_features = gm_maps_masked.shape print n_samples, "subjects, ", n_features, "features" ### Prediction with SVR ####################################################### print "ANOVA + SVR" ### Define the prediction function to be used. # Here we use a Support Vector Classification, with a linear kernel from sklearn.svm import SVR svr = SVR(kernel='linear') ### Dimension reduction from sklearn.feature_selection import SelectKBest, f_regression # Here we use a classical univariate feature selection based on F-test, # namely Anova. feature_selection = SelectKBest(f_regression, k=2000) # We have our predictor (SVR), our feature selection (SelectKBest), and now, # we can plug them together in a *pipeline* that performs the two operations # successively: from sklearn.pipeline import Pipeline anova_svr = Pipeline([('anova', feature_selection), ('svr', svr)]) ### Fit and predict
#from sklearn.model_selection import train_test_split #X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20) temp_dataholder = df_features.index.values.reshape(-1, 1) X_train = temp_dataholder[0:365] y_train = df_features.iloc[:365, -1:] X_test = temp_dataholder[365:370] y_test = df_features.iloc[365:370, -1:] """ #Step 6 --> Training and predicting """ #Training model from sklearn.svm import SVR svregressor = SVR(kernel='rbf') svregressor = SVR(gamma='auto') svregressor.fit(X_train, y_train.values.ravel()) #making predictions y_pred = svregressor.predict(X_test) print(X_test) print(y_test) print(y_pred) # ##saving results to file #result_df = pd.DataFrame() # # ##result_df['ID'] = #
print("R-squared:", metrics.r2_score(y_test_pred, test_minmax[:, 1200])) print("-----------------------------------------------------------") # SUPPORT VECTOR MACHINES # Hyperparameters: # - kernel (default=’rbf’)Specifies the kernel type to be used in the algorithm. # It must be one of ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’ or a callable # - gamma (default=’scale’) Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’. # - C (default=1.0) Regularization parameter. The strength of the regularization is inversely proportional to C. # Must be strictly positive. The penalty is a squared L2 penalty. # Defining the method svr = SVR() # Training the model with reproducibility np.random.seed(123) svr.fit(train_closest, train_minmax[:, 1200]) # Making predictions on the testing partition y_test_pred = svr.predict(test_closest) # And finally computing the test accuracy print("Mean squared error of SVM with default hyperparameters:", metrics.mean_squared_error(y_test_pred, test_minmax[:, 1200])) print("R-squared:", metrics.r2_score(y_test_pred, test_minmax[:, 1200])) print("-----------------------------------------------------------") print("-----------------------------------------------------------")
# Dividir el data set en conjunto de entrenamiento y conjunto de testing """ from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) """ # Escalado de variables (NECESARIO EN SVR) from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() sc_y = StandardScaler() X = sc_X.fit_transform(X) y = sc_y.fit_transform(y.reshape(-1, 1)) # Ajustar la regresión con el dataset from sklearn.svm import SVR regression = SVR(kernel="rbf") regression.fit(X, y) # Predicción de nuestros modelos con SVR y_pred = sc_y.inverse_transform(regression.predict(sc_X.transform([[6.5]]))) # Visualización de los resultados del Modelo Polinómico X_grid = np.arange(min(X), max(X), 0.1) X_grid = X_grid.reshape(len(X_grid), 1) plt.scatter(X, y, color="red") plt.plot(X_grid, regression.predict(X_grid), color="blue") plt.title("Modelo de Regresión SVR") plt.xlabel("Posición del empleado") plt.ylabel("Sueldo (en $)") plt.show()
def main(): xtrain=np.load('data/x_train.npy') ytrain=np.load('data/y_train.npy') ytrainreg=np.load('data/loss.npy') #train-test split ss1=ShuffleSplit(np.shape(ytrain)[0],n_iter=1, test_size=0.2, random_state=42) for train_idx, test_idx in ss1: xtest=xtrain[test_idx,:] ytest=ytrain[test_idx] ytestreg=ytrainreg[test_idx] xtrain=xtrain[train_idx,:] ytrain=ytrain[train_idx] ytrainreg=ytrainreg[train_idx] #regression data xtrain_reg=xtrain[ytrainreg>0] loss_reg=ytrainreg[ytrainreg>0] #split regression training data into train set and cross-validation set (for ensembling) ss2=ShuffleSplit(np.shape(loss_reg)[0],n_iter=1,test_size=0.3, random_state=42) for train_idx, test_idx in ss2: xcv=xtrain_reg[test_idx,:] loss_cv=loss_reg[test_idx] xtrain_reg=xtrain_reg[train_idx,:] loss_reg=loss_reg[train_idx] #classification features, generated by clf_selector.py sel_clf_feats=np.load('features/clf_sel.npy') #regression features #generated by reg_selector_sgd_eps_log.py sel_reg1=np.load('features/reg_sel_sgd_eps.npy') #generated by reg_selector_quant_log.py sel_reg2=np.load('features/reg_sel_quant.npy') #generated by reg_selector_lad_log.py sel_reg3=np.load('features/reg_sel_lad.npy') feats_mat=np.vstack((sel_reg1,sel_reg2,sel_reg3)) regs_unique=5 feat_indic=np.hstack((0*np.ones(regs_unique),1*np.ones(regs_unique), 2*np.ones(regs_unique))) #maps regressors to features clf=GradientBoostingClassifier(init=None, learning_rate=0.1, loss='deviance', max_depth=5, max_features='auto', min_samples_leaf=1, min_samples_split=2, n_estimators=500, random_state=42, subsample=1.0, verbose=0) t0=time.time() print "fitting classifier" clf.fit(xtrain[:,sel_clf_feats],ytrain) print "done with classifier" print "time taken", time.time()-t0 joblib.dump(clf,'models/clf.pkl',compress=3) reg1=linear_model.SGDRegressor(loss='epsilon_insensitive',random_state=0,n_iter=100) reg6=linear_model.SGDRegressor(loss='epsilon_insensitive',random_state=0,n_iter=100) reg11=linear_model.SGDRegressor(loss='epsilon_insensitive',random_state=0,n_iter=100) reg2=SVR(C=0.01,kernel='linear',random_state=42) reg7=SVR(C=0.01,kernel='linear',random_state=42) reg12=SVR(C=0.01,kernel='linear',random_state=42) reg3=GradientBoostingRegressor(loss='lad',min_samples_leaf=5, n_estimators=1000,random_state=42) reg8=GradientBoostingRegressor(loss='lad',min_samples_leaf=5, n_estimators=1000,random_state=42) reg13=GradientBoostingRegressor(loss='lad',min_samples_leaf=5, n_estimators=1000,random_state=42) reg4=GradientBoostingRegressor(loss='huber',alpha=0.6, min_samples_leaf=5, n_estimators=1000,random_state=42) reg9=GradientBoostingRegressor(loss='huber',alpha=0.6, min_samples_leaf=5, n_estimators=1000, random_state=42) reg14=GradientBoostingRegressor(loss='huber',alpha=0.6, min_samples_leaf=5, n_estimators=500, random_state=42) reg5=GradientBoostingRegressor(loss='quantile',alpha=0.45, min_samples_leaf=5, n_estimators=1000,random_state=42) reg10=GradientBoostingRegressor(loss='quantile',alpha=0.45,min_samples_leaf=5, n_estimators=1000,random_state=42) reg15=GradientBoostingRegressor(loss='quantile',alpha=0.45,min_samples_leaf=5, n_estimators=1000,random_state=42) #gather base regressors regs=[reg1,reg2,reg3,reg4,reg5,reg6,reg7,reg8,reg9,reg10,reg11,reg12, reg13,reg14,reg15] n_regs=len(regs) print "fitting regressors" j=0 i=1 for reg in regs: feats=feats_mat[(feat_indic[j]),:] t0=time.time() print "fitting",i, "no of features", np.sum(feats) reg.fit(xtrain_reg[:,feats],np.log(loss_reg)) #training on the log of the loss print "done with",i print "time taken", time.time()-t0 joblib.dump(reg,'models/reg%s.pkl' % str(i),compress=3) i+=1 j+=1 reg_ens1=linear_model.SGDRegressor(loss='huber',random_state=0,n_iter=100) reg_ens2=linear_model.SGDRegressor(loss='epsilon_insensitive',random_state=0,n_iter=100) reg_ens3=SVR(C=0.01,kernel='linear',random_state=42) reg_ens4=GradientBoostingRegressor(loss='huber',alpha=0.6, min_samples_leaf=5, n_estimators=1000, random_state=42) reg_ens5=GradientBoostingRegressor(loss='lad',n_estimators=1000,min_samples_leaf=5, random_state=42) reg_ens6=GradientBoostingRegressor(loss='quantile',alpha=0.45, min_samples_leaf=5, n_estimators=1000,random_state=42) #gather ensemblers reg_ens=[reg_ens1,reg_ens2,reg_ens3,reg_ens4,reg_ens5,reg_ens6] n_reg_ens=len(reg_ens) rows_cv=np.shape(xcv)[0] cv_mat=np.zeros((rows_cv,n_regs)) #matrix of base predictions for ensemblers print "predicting regression values for CV" j=0 i=1 for reg in regs: feats=feats_mat[(feat_indic[j]),:] print "predicting for reg",i, "no of features", np.sum(feats) tmp_preds=reg.predict(xcv[:,feats]) tmp_preds=np.exp(tmp_preds) #training was done on log of loss, hence the exp tmp_preds=np.abs(tmp_preds) tmp_preds[tmp_preds>100]=100 cv_mat[:,j]=tmp_preds j+=1 i+=1 print "fitting ensemble regressors" i=1 for reg in reg_ens: print "fitting",i reg.fit(cv_mat,loss_cv) #for the ensemblers, training was done on the regular loss joblib.dump(reg,'models/reg_ens%s.pkl' % str(i),compress=3) i+=1 rows_test=np.shape(xtest)[0] test_mat=np.zeros((rows_test,n_regs)) #matrix for base predictions on test set print "test-set predicting" class_preds=clf.predict(xtest[:,sel_clf_feats]) print "predicting regression values for test set" j=0 i=1 for reg in regs: feats=feats_mat[(feat_indic[j]),:] print "predicting for reg",i tmp_preds=reg.predict(xtest[:,feats]) tmp_preds=np.exp(tmp_preds) #training was done on log of loss, hence the exp tmp_preds=np.abs(tmp_preds) tmp_preds[tmp_preds>100]=100 test_mat[:,j]=tmp_preds j+=1 i+=1 ens_mat=np.zeros((rows_test,n_reg_ens)) #matrix for ensemble predictions j=0 i=1 print "predicting ensembles" for reg in reg_ens: print "predicting for reg_ens",i tmp_preds=reg.predict(test_mat) tmp_preds=np.abs(tmp_preds) tmp_preds[tmp_preds>100]=100 ens_mat[:,j]=tmp_preds j+=1 i+=1 #multiply regression predictions with class predictions loss_mat=np.multiply(test_mat,class_preds[:,np.newaxis]) #multiply regression predictions with correct classes for mae benchmarks correct_loss=np.multiply(test_mat,ytest[:,np.newaxis]) #multiply ensemble predictions with class predictions ens_losses=np.multiply(ens_mat,class_preds[:,np.newaxis]) #multiply ensemble predictions with correct classes for mae benchmarks ens_losses_correct=np.multiply(ens_mat,ytest[:,np.newaxis]) print "predictor performance" print "output format:" print "model","\t", "mae","\t", "mae for correct classes","\t", "mae for defaults" print "individual learners" for k in range(n_regs): tmp_preds=loss_mat[:,k] mae1=np.mean(np.abs(tmp_preds-ytestreg)) tmp_preds2=correct_loss[:,k] mae2=np.mean(np.abs(tmp_preds2-ytestreg)) mae3=np.mean(np.abs(tmp_preds2[tmp_preds2>0]-ytestreg[tmp_preds2>0])) print "reg",k+1,"\t",mae1,"\t",mae2,"\t",mae3 print "ensemblers" for k in range(n_reg_ens): tmp_preds=ens_losses[:,k] mae1=np.mean(np.abs(tmp_preds-ytestreg)) tmp_preds2=ens_losses_correct[:,k] mae2=np.mean(np.abs(tmp_preds2-ytestreg)) mae3=np.mean(np.abs(tmp_preds2[tmp_preds2>0]-ytestreg[tmp_preds2>0])) print "reg_ens",k+1,"\t",mae1,"\t",mae2,"\t",mae3 #mean of all ensemblers mean_ens_losses=np.mean(ens_losses,1) mean_ens_correct=np.mean(ens_losses_correct,1) mae1=np.mean(np.abs(mean_ens_losses-ytestreg)) mae2=np.mean(np.abs(mean_ens_correct-ytestreg)) mae3=np.mean(np.abs(mean_ens_correct[mean_ens_correct>0]-ytestreg[mean_ens_correct>0])) print "mean_ens","\t",mae1,"\t",mae2,"\t",mae3 #mean of two best ensemblers best_ens=np.mean(ens_losses[:,(0,2)],1) best_ens_correct=np.mean(ens_losses_correct[:,(0,2)],1) mae1=np.mean(np.abs(best_ens-ytestreg)) mae2=np.mean(np.abs(best_ens_correct-ytestreg)) mae3=np.mean(np.abs(best_ens_correct[best_ens_correct>0]-ytestreg[best_ens_correct>0])) print "best_ens","\t",mae1,"\t",mae2,"\t",mae3 #other benchmarks print "mae for class_preds:" print np.mean(np.abs(class_preds-ytestreg)) print "mae for 3*class_preds:" print np.mean(np.abs(3*class_preds-ytestreg)) print "roc_auc for classes:" print roc_auc_score(ytest,class_preds) print "f1-score for classes:" print f1_score(ytest,class_preds) print "mae of all zeroes" print np.mean(np.abs(0-ytestreg))
from sklearn.svm import SVR from sklearn.neural_network import MLPRegressor from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import KFold, GridSearchCV abalone = pd.read_csv('C:/Users/wjdql/Desktop/A6/abalone.csv') d_list = [abalone] for d in d_list: X_train, X_test, y_train, y_test = train_test_split(d.iloc[:, :-1], d.iloc[:, -1], test_size=0.5, random_state=42) pipe = Pipeline([('preprocessing', None), ('regressor', SVR())]) hyperparam_grid = [{ 'regressor': [SVR()], 'preprocessing': [StandardScaler(), MinMaxScaler(), None], 'regressor__gamma': [0.1, 10, 1000], 'regressor__C': [0.001, 0.01, 0.1], 'regressor__epsilon': [0.001, 0.01, 0.1] }, { 'regressor': [MLPRegressor(solver='adam', max_iter=1500)], 'preprocessing': [StandardScaler(), MinMaxScaler(), None], 'regressor__hidden_layer_sizes': [(100, ), (30, 30), (10, 10, 10)], 'regressor__alpha': [0.0001, 0.01, 1], 'regressor__activation': ['tanh', 'relu'] }, {
r2 = 1 - SS_res.sum() / SS_tot.sum() #print(r2) print("*" * 80) print("Linear Regression Constant Mean Prediction") print("RMSE: %f" % np.sqrt(SS_tot.mean())) print("MAE: %f" % abs_.mean()) print("*" * 80) print("Support Vector Regression") x_train, x_test, y_train, y_test = train_test_split(X_, Y, test_size=0.1, random_state=1725) clf = SVR(kernel='linear', C=100000, gamma=1e-7, cache_size=2000, epsilon=0.6) #clf = SVR(kernel='poly', degree=3, C=1000, gamma=0.1, cache_size=2000) clf.fit(x_train, y_train.values.ravel()) y_pred = clf.predict(x_test) print("MSE: %f" % metrics.mean_squared_error(y_test, y_pred)) print("RMSE: %f" % np.sqrt(metrics.mean_squared_error(y_test, y_pred))) print("MAE: %f" % metrics.mean_absolute_error(y_test, y_pred)) print("R^2: %f" % clf.score(x_test, y_test)) SS_tot = (y_test.mean() - y_test) * (y_test.mean() - y_test) SS_res = (y_pred - y_test.values.ravel()) * (y_pred - y_test.values.ravel()) abs_ = abs(y_test.mean() - y_test) #print("SS_res: %f" % SS_res.sum()) #print("SS_tot: %f" % SS_tot.sum())
e_alphas = [0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007] e_l1ratio = [0.8, 0.85, 0.9, 0.95, 0.99, 1] ridge = make_pipeline(RobustScaler(), RidgeCV(alphas=alphas_alt, cv=tscv)) lasso = make_pipeline( RobustScaler(), LassoCV(max_iter=1e7, alphas=alphas2, random_state=42, cv=tscv)) elasticnet = make_pipeline( RobustScaler(), ElasticNetCV(max_iter=1e7, alphas=e_alphas, cv=tscv, l1_ratio=e_l1ratio)) svr = make_pipeline(RobustScaler(), SVR( C=20, epsilon=0.008, gamma=0.0004, )) gbr = GradientBoostingRegressor(n_estimators=3000, learning_rate=0.03, max_depth=4, max_features='sqrt', min_samples_leaf=20, min_samples_split=10, loss='huber', random_state=42) lightgbm = LGBMRegressor( objective='regression', num_leaves=4,
print('max is ', np.max(boston.target)) print('min is ', np.min(boston.target)) print('mean is ', np.mean(boston.target)) ss_x = StandardScaler() ss_y = StandardScaler() x_train = ss_x.fit_transform(x_train) x_test = ss_x.transform(x_test) y_train = ss_y.fit_transform(y_train) y_test = ss_y.transform(y_test) # 使用线性核函数配置 linear_svr = SVR(kernel='linear') linear_svr.fit(x_train, y_train) linear_svr_y_predict = linear_svr.predict(x_test) # 使用多项式核函数配置 ploy_svr = SVR(kernel='poly') ploy_svr.fit(x_train, y_train) ploy_svr_y_predict = ploy_svr.predict(x_test) # 使用径向基核函数配置 rbf_svr = SVR(kernel='rbf') rbf_svr.fit(x_train, y_train) rbf_svr_y_predict = rbf_svr.predict(x_test) print('The R2 ', r2_score(y_test, linear_svr_y_predict))
def svr_predict(x, y, kernel="rbf"): svr_regressor = SVR(kernel=kernel) svr_regressor.fit(x, y) #train predict = svr_regressor.predict(x) return predict
def SVM(x_test,y_test,x_train,y_train): from sklearn.svm import SVR y_pred=SVR(gamma=0.9,C=1.0,epsilon=0.2).fit(x_train,y_train).predict(x_test) return y_pred
# In[26]: print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, predictionsdtr)) print('Mean Squared Error:', metrics.mean_squared_error(y_test, predictionsdtr)) print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, predictionsdtr))) # # Model 4: Support Vector Machine Regressor # In[28]: from sklearn.svm import SVR model = SVR() # In[29]: model.fit(X_train, y_train) # In[30]: predictionssvr = model.predict(X_test) df6 = pd.DataFrame({'Actual': y_test, 'Predicted': predictionssvr}) df7 = df6.head(25)
def get_svr_lin_model(temp_hum,sensor): svr_rbf = SVR(kernel='linear') X = temp_hum r_y = sensor.reshape(sensor.shape[0],) return svr_rbf.fit(X,r_y)
train = pd.read_csv( '/Users/ivan/Work_directory/Afr-Soil-Prediction-master/data/train_py.csv') test = pd.read_csv( '/Users/ivan/Work_directory/Afr-Soil-Prediction-master/data/test_py.csv') labels = train[['Ca', 'P', 'pH', 'SOC', 'Sand']].values PIDN = test[['PIDN']].values train.drop(['Ca', 'P', 'pH', 'SOC', 'Sand'], axis=1, inplace=True) test.drop('PIDN', axis=1, inplace=True) xtrain, xtest = np.array(train)[:, :3569], np.array(test)[:, :3569] xtrain_scaled = preprocessing.scale(xtrain) xtest_scaled = preprocessing.scale(xtest) svr_lin = SVR(kernel='linear', C=1e4, verbose=2) preds = np.zeros((xtest.shape[0], 5)) for i in range(5): svr_lin.fit(xtrain_scaled, labels[:, i]) preds[:, i] = svr_lin.predict(xtest_scaled).astype(float) sample = pd.read_csv( '/Users/ivan/Work_directory/Afr-Soil-Prediction-master/submission_new/2.csv' ) sample['Ca'] = preds[:, 0] sample['P'] = preds[:, 1] sample['pH'] = preds[:, 2] sample['SOC'] = preds[:, 3] sample['Sand'] = preds[:, 4]
from sklearn.svm import SVR, SVC from imblearn.over_sampling import RandomOverSampler, SMOTE from imblearn.pipeline import Pipeline from ...utils.validation import ( _normalize_param_grid, check_param_grids, check_datasets, check_random_states, check_oversamplers_classifiers ) X, y = make_regression() ESTIMATORS = [ ('lr', LinearRegression()), ('svr', SVR()), ('pip', Pipeline([('scaler', MinMaxScaler()), ('lr', LinearRegression())])) ] PARAM_GRIDS = [ {'lr__normalize': [True, False], 'lr__fit_intercept': [True, False]}, {'svr__C': [0.01, 0.1, 1.0], 'svr__kernel': ['rbf', 'linear']}, {'pip__scaler__feature_range': [(0, 1), (0, 10)], 'pip__lr__normalize': [True, False]} ] UPDATED_PARAM_GRIDS = [ {'lr__normalize': [True, False], 'lr__fit_intercept': [True, False], 'est_name':['lr']}, {'svr__C': [0.01, 0.1, 1.0], 'svr__kernel': ['rbf', 'linear'], 'est_name':['svr']}, {'pip__scaler__feature_range': [(0, 1), (0, 10)], 'pip__lr__normalize': [True, False], 'est_name':['pip']} ] OVERSAMPLERS = [ ('random', RandomOverSampler()), ('smote', SMOTE(), {'k_neighbors': [2, 3, 4], 'kind': ['regular', 'borderline1']})
sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) sc_y = StandardScaler() y_train = sc_y.fit_transform(y_train) # ## Training the SVR Model # In[ ]: from sklearn.svm import SVR model = SVR(kernel='rbf') model.fit(X_train, y_train) # ## Predicting the Test Set Results # In[ ]: y_pred = sc_y.inverse_transform(model.predict(sc_X.transform(X_test))) # ## Comparing Predicted Y with Real Y (Test Set) # In[ ]: data = pd.DataFrame()
def setUp(self): """Unittest set up.""" from sklearn.svm import SVR self.model = SVR() super(BostonSvrTest, self)._setup(self.model, self.model.fit, load_boston())
sc_y = StandardScaler() y_train = sc_y.fit_transform(y_train) # Different regression algorithms from sklearn.linear_model import LinearRegression regressor = LinearRegression() from sklearn.preprocessing import PolynomialFeatures regressor = PolynomialFeatures(degree=4) X_poly = regressor.fit_transform(X) from sklearn.svm import SVR regressor = SVR(kernel="rbf") from sklearn.tree import DecisionTreeRegressor regressor = DecisionTreeRegressor() from sklearn.ensemble import RandomForestRegressor regressor = RandomForestRegressor(n_estimators=100) # Fitting and predicting regressor.fit(X, y) y_pred = regressor.predict(10) # In the case where we scaled the variables: y_pred = sc_y.inverse_transform(regressor.predict(sc_X.transform(np.array([[10]]))))