def get_models(self, input_dl, type_dl, list_chosen, active, lost):
        """Generate a library of base learners."""

        # linreg=LinearRegression()
        # svrr = SVR(kernel='rbf')
        # dtr=DecisionTreeRegressor(random_state=self.SEED)
        # rf = RandomForestRegressor(n_estimators=100, bootstrap=True, random_state=self.SEED)
        # br = BaggingRegressor(n_estimators=300,random_state=self.SEED)
        # ada = AdaBoostRegressor(n_estimators=300,random_state=self.SEED)
        # gbr = GradientBoostingRegressor(n_estimators=300,random_state=self.SEED)
        # xgbr1 = xgb.XGBRegressor(n_estimators=100,random_state=self.SEED)

        linreg=LinearRegression(normalize=True, fit_intercept=True)
        dtr=DecisionTreeRegressor(random_state=222, min_samples_split=(0.018), min_samples_leaf= (0.007), max_depth=25)
        svrr = SVR(kernel='linear', epsilon=5)
        br = BaggingRegressor(n_estimators=350, max_samples=0.9, max_features=0.7, bootstrap=False, random_state=self.SEED)
        ada = AdaBoostRegressor(n_estimators=7, loss='exponential', learning_rate=0.01, random_state=self.SEED)
        rf = RandomForestRegressor(n_estimators=1000, max_depth= 30, max_leaf_nodes=1000, random_state=self.SEED)#, min_samples_split=0.002, max_features="auto", max_depth= 30, bootstrap=True, random_state=self.SEED)
        gbr = GradientBoostingRegressor(n_estimators=1000, learning_rate=0.01,random_state=self.SEED)
        xgbr1 = xgb.XGBRegressor(random_state=self.SEED)#n_estimators=100, max_depth = 4, random_state=self.SEED)
        mdl = LGBMRegressor(n_estimators=1000, learning_rate=0.01)

        las = Lasso()
        rid = Ridge()
        en = ElasticNet()
        huber = HuberRegressor(max_iter=2000)
        lasl = LassoLars(max_iter=2000, eps = 1, alpha=0.5, normalize=False)
        pa = PassiveAggressiveRegressor(C=1, max_iter=4000, random_state=self.SEED)
        sgd = SGDRegressor(max_iter=2000, tol=1e-3)

        knn = KNeighborsRegressor(n_neighbors=20)
        ex = ExtraTreeRegressor()
        exs = ExtraTreesRegressor(n_estimators=1000)

        dl = self.deep_learning_model(input_dl, dropout_val = 0.2, type = type_dl, active = active, lost = lost)

        models_temp = {

                    'deep learning': dl,

                    'BaggingRegressor': br,
                   'RandomForestRegressor': rf,
                   'GradientBoostingRegressor': gbr,
                    'XGBRegressor': xgbr1,
                   'LGBMRegressor':mdl,
                   'ExtraTreesRegressor': exs,


                  'LinearRegression': linreg,
                  'SVR': svrr,
                   'AdaBoostRegressor': ada,
                  'LassoLars': lasl,
                  'PassiveAggressiveRegressor': pa,
                  'SGDRegressor': sgd,

                   'DecisionTreeRegressor': dtr,

                  'lasso': las,
                  'ridge': rid,
                  'ElasticNet': en,
                  'HuberRegressor': huber,

                   'KNeighborsRegressor': knn,
                   'ExtraTreeRegressor': ex,
                  }

        models = dict()
        for model in list_chosen:
            if model in models_temp:
                models[model] = models_temp[model]
        st.write(models)

        return models
def get_svr_rbf_model(temp_hum,sensor):
    svr_rbf = SVR(kernel='rbf', gamma='scale') #-->Dow not work on
    X = temp_hum
    r_y = sensor.reshape(sensor.shape[0],)
    return svr_rbf.fit(X,r_y)
Beispiel #3
0
# Splitting the dataset into training set and test set
"""from sklearn.cross_validation import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
"""
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_Y = StandardScaler()
X = sc_X.fit_transform(X)
y = np.reshape(a=y, newshape=(-1, 1))
y = sc_Y.fit_transform(y)

# Fitting the SVR Model to the data set
from sklearn.svm import SVR
regressor = SVR(kernel='rbf')
regressor.fit(X, y)

#Predicting a new result

# Following line didn't work for me
# y_pred = sc_Y.inverse_transform(regressor.predict(sc_X.transform(np.array([6.5]))))

y_pred = sc_Y.inverse_transform(
    regressor.predict(sc_X.transform(np.reshape(a=6.5, newshape=(1, -1)))))
print(y_pred)

# Visualizing the Regression results
plt.scatter(X, y, color='red')
plt.plot(X, regressor.predict(X), color='blue')
plt.title('Truth or Bluff (SVR Model)')
    exit()

ridReg = linear_model.Ridge(alpha=0.005)
t0_train = time.time()
ridReg.fit(X,y)
t1_train = time.time()
t0_pred = time.time()
y_pred = ridReg.predict(X)
t1_pred = time.time()
y_ridReg = ridReg.predict(X)
kFoldsCrossValidation('Kernel-Ridge Regression',ridReg,X,y, y_ridReg)
print("training time: %.12f" % (t1_train - t0_train))
print("prediction latency: %.12f\n" % (t1_pred - t0_pred))

# Support Vector Regression model
svr_rbf = SVR(kernel='rbf', gamma=0.1, C=100.0)
t0_train = time.time()
svr_rbf.fit(X,y)
t1_train = time.time()
t0_pred = time.time()
y_svr = svr_rbf.predict(X)
t1_pred = time.time()
kFoldsCrossValidation('SVR-RBF',svr_rbf,X,y, y_svr)
print("training time: %.12f" % (t1_train - t0_train))
print("prediction latency: %.12f\n" % (t1_pred - t0_pred))

# Gaussian Process model
kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
t0_train = time.time()
gp.fit(X,y)
Beispiel #5
0
def svm(X_train, X_test, y_train, y_test, params):
    print("SVM Training Started, Please wait.........")
    svm_choose = {
        'C': [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0, 10000],
        'gamma': ['scale', 'auto'],
        'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
    }

    svm_paramgrid = {
        'C':
        hp.choice(
            'C',
            [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0, 10000.0]),
        'gamma':
        hp.choice('gamma', ['scale', 'auto']),
        'kernel':
        hp.choice('kernel', ['linear', 'poly', 'rbf', 'sigmoid'])
    }

    def objective_svm(svm_paramgrid):
        model = SVR(C=svm_paramgrid['C'],
                    gamma=svm_paramgrid['gamma'],
                    kernel=svm_paramgrid['kernel'])

        accuracy = cross_val_score(model, X_train, y_train, cv=4).mean()

        # We aim to maximize accuracy, therefore we return it as a negative value
        return {'loss': -accuracy, 'status': STATUS_OK}

    trials_svm = Trials()
    best_svm = fmin(fn=objective_svm,
                    space=svm_paramgrid,
                    algo=tpe.suggest,
                    max_evals=100,
                    trials=trials_svm)
    print("best_svm")
    print(best_svm)

    for i in best_svm.keys():
        svm_paramgrid_best[i] = svm_choose[i][best_svm[i]]

    print("svm_paramgrid_best")
    print(svm_paramgrid_best)

    model = SVR(C=svm_paramgrid_best['C'],
                gamma=svm_paramgrid_best['gamma'],
                kernel=svm_paramgrid_best['kernel'])

    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    print('RMSE -', np.sqrt(metrics.mean_squared_error(y_test, predictions)))
    print("---------------------")
    print("Score - ", metrics.r2_score(y_test, predictions))

    params['algorithms']["svm"] = (metrics.r2_score(y_test, predictions)) * 100

    params["algokeys"] = list(params["algorithms"].keys())
    params["algovalues"] = list(params["algorithms"].values())

    if params["best_acc"] < round(params['algorithms']["svm"], 2):
        params["best_acc"] = round(params['algorithms']["svm"], 2)
print(lin_reg.predict(6.6))

print(lin_reg2.predict(poly_reg.fit_transform(11)))
print(lin_reg2.predict(poly_reg.fit_transform(6.6)))

#verilerin olceklenmesi
from sklearn.preprocessing import StandardScaler

sc1 = StandardScaler()
x_olcekli = sc1.fit_transform(X)
sc2 = StandardScaler()
y_olcekli = sc2.fit_transform(Y)

from sklearn.svm import SVR

svr_reg = SVR(kernel = 'rbf')
svr_reg.fit(x_olcekli,y_olcekli)

plt.scatter(x_olcekli,y_olcekli,color='red')
plt.plot(x_olcekli,svr_reg.predict(x_olcekli),color='blue')

print(svr_reg.predict(11))
print(svr_reg.predict(6.6))







)

# %% [markdown]
# Thus, we saw that `PolynomialFeatures` is actually doing the same
# operation that we did manually above.
#
# The last possibility is to make a linear model more expressive is to use a
# "kernel". Instead of learning a weight per feature as we previously
# emphasized, a weight will be assign by sample instead. However, not all
# samples will be used. This is the base of the support vector machine
# algorithm.

# %%
from sklearn.svm import SVR

svr = SVR(kernel="linear")
svr.fit(X, y)
y_pred = svr.predict(X)

plt.plot(x[sorted_idx], y_pred[sorted_idx], color="tab:orange")
plt.scatter(x, y)
plt.xlabel("x")
plt.ylabel("y")
_ = plt.title(
    f"Mean squared error = "
    f"{mean_squared_error(y, y_pred):.2f}"
)

# %% [markdown]
# The algorithm can be modified such that it can use non-linear kernel. Then,
# it will compute interaction between samples using this non-linear
Beispiel #8
0
    def do_test(self, MainWindow):
        if self.train is not None and self.test is not None:
            steps = 6
            train = DataFrame()
            train['Data2'] = list(self.train['Data2'].values)
            train['Data'] = list(self.train['Data'].values)
            df = series_to_supervised(train.values, steps)

            xscaler = MinMaxScaler()
            yscaler = MinMaxScaler()

            x = df.iloc[:, [a for a in range(steps * 2 - 2)]].values
            xscaled = xscaler.fit(x)

            y = df.iloc[:, [steps * 2 - 1]].values.ravel()
            y2d = df.iloc[:, [steps * 2 - 1]].values
            yscaled = yscaler.fit(y2d)

            test = DataFrame()
            test['Data2'] = list(self.test['Data2'].values)
            test['Data'] = list(self.test['Data'].values)
            df = series_to_supervised(test.values, steps)
            dft = series_to_supervised(list(self.test['Time'].values), steps)
            xtest = df.iloc[:, [a for a in range(steps * 2 - 2)]].values
            ytest = df.iloc[:, [steps * 2 - 1]].values.ravel()

            regressor = SVR(kernel='linear', epsilon=1.0, verbose=True)
            regressor.fit(xscaler.transform(x),
                          yscaler.transform(y.reshape(-1, 1)).ravel())
            ypred = regressor.predict(xscaler.transform(xtest))
            score = regressor.score(
                xscaler.transform(xtest),
                yscaler.transform(ytest.reshape(-1, 1)).ravel())
            mse = self.mse(
                ytest,
                yscaler.inverse_transform(ypred.reshape(-1, 1)).ravel())
            # mse = mean_squared_error(ytest, yscaler.inverse_transform(ypred.reshape(-1, 1)).ravel())
            mae = self.mae(
                ytest,
                yscaler.inverse_transform(ypred.reshape(-1, 1)).ravel())
            # mae = mean_absolute_error(ytest, yscaler.inverse_transform(ypred.reshape(-1, 1)).ravel())
            rmse = self.rmse(
                ytest,
                yscaler.inverse_transform(ypred.reshape(-1, 1)).ravel())
            # rmse = sqrt(mse)
            print("SVR Kernel Linear")
            print(f"Score: {score}")
            print(f"MSE: {mse}")
            print(f"MAE: {mae}")
            print(f"RMSE: {rmse}\n")

            from sklearn.neural_network import MLPRegressor
            hidden = 5
            reg = MLPRegressor(hidden_layer_sizes=(5, ),
                               activation='logistic',
                               solver='lbfgs',
                               alpha=0.0001,
                               random_state=0,
                               verbose=True)
            reg.fit(x, y)
            ypredmlp = reg.predict(xtest)
            scoremlp = reg.score(xtest, ytest)
            msemlp = self.mse(ytest, ypredmlp)
            # msemlp = mean_squared_error(ytest, ypredmlp)
            maemlp = self.mae(ytest, ypredmlp)
            # maemlp = mean_absolute_error(ytest, ypredmlp)
            rmsemlp = self.rmse(ytest, ypredmlp)
            # rmsemlp = sqrt(msemlp)
            print("Neural Network - Backpropagation")
            print(reg)
            print(f"Input: {x.shape[1]}")
            print(f"Hidden: {hidden}")
            print(f"Output: {reg.n_outputs_}")
            print(f"Score: {scoremlp}")
            print(f"MSE: {msemlp}")
            print(f"MAE: {maemlp}")
            print(f"RMSE: {rmsemlp}\n")

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(0, 5, item)
            item = self.tableWidget_2.item(0, 5)
            item.setText(str(scoremlp))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(1, 5, item)
            item = self.tableWidget_2.item(1, 5)
            item.setText(str(msemlp))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(2, 5, item)
            item = self.tableWidget_2.item(2, 5)
            item.setText(str(maemlp))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(3, 5, item)
            item = self.tableWidget_2.item(3, 5)
            item.setText(str(rmsemlp))

            f, ax = plt.subplots()
            actual = ax.plot(self.data['Time'].values,
                             self.data['Data'].values,
                             color='blue',
                             label='Actual')
            ttest = dft['var1(t)'].values
            predictedsvr = ax.plot(ttest,
                                   yscaler.inverse_transform(
                                       ypred.reshape(-1, 1)).ravel(),
                                   color='red',
                                   label='Predicted (SVR)')
            predictedmlp = ax.plot(ttest,
                                   ypredmlp,
                                   color='green',
                                   label='Predicted (MLP)')
            ax.legend()
            plt.xlabel('Month')
            plt.ylabel('Number of License')

            ax.set_ylim([0, 200])

            plt.savefig('Plot.png')

            pic = QtGui.QPixmap('Plot.png')
            pic = pic.scaled(811, 441)
            self.graphicsView.setPixmap(pic)

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(0, 0, item)
            item = self.tableWidget_2.item(0, 0)
            item.setText(str(score))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(1, 0, item)
            item = self.tableWidget_2.item(1, 0)
            item.setText(str(mse))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(2, 0, item)
            item = self.tableWidget_2.item(2, 0)
            item.setText(str(mae))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(3, 0, item)
            item = self.tableWidget_2.item(3, 0)
            item.setText(str(rmse))

            regressor = SVR(kernel='rbf', epsilon=1.0)
            regressor.fit(x, y)
            ypred = regressor.predict(xtest)
            score = regressor.score(xtest, ytest)
            mse = self.mse(ytest, ypred)
            # mse = mean_squared_error(ytest, ypred)
            mae = self.mae(ytest, ypred)
            # mae = mean_absolute_error(ytest, ypred)
            rmse = self.rmse(ytest, ypred)
            # rmse = sqrt(mse)
            print("SVR Kernel RBF")
            print(f"Score: {score}")
            print(f"MSE: {mse}")
            print(f"MAE: {mae}")
            print(f"RMSE: {rmse}\n")

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(0, 1, item)
            item = self.tableWidget_2.item(0, 1)
            item.setText(str(score))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(1, 1, item)
            item = self.tableWidget_2.item(1, 1)
            item.setText(str(mse))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(2, 1, item)
            item = self.tableWidget_2.item(2, 1)
            item.setText(str(mae))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(3, 1, item)
            item = self.tableWidget_2.item(3, 1)
            item.setText(str(rmse))

            from sklearn import linear_model
            reg = linear_model.Lasso(alpha=0.1)
            reg.fit(x, y)
            ypred = reg.predict(xtest)
            score = reg.score(xtest, ytest)
            mse = self.mse(ytest, ypred)
            # mse = mean_squared_error(ytest, ypred)
            mae = self.mae(ytest, ypred)
            # mae = mean_absolute_error(ytest, ypred)
            rmse = self.rmse(ytest, ypred)
            # rmse = sqrt(mse)
            print("Linear Model - Lasso")
            print(f"Score: {score}")
            print(f"MSE: {mse}")
            print(f"MAE: {mae}")
            print(f"RMSE: {rmse}\n")

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(0, 2, item)
            item = self.tableWidget_2.item(0, 2)
            item.setText(str(score))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(1, 2, item)
            item = self.tableWidget_2.item(1, 2)
            item.setText(str(mse))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(2, 2, item)
            item = self.tableWidget_2.item(2, 2)
            item.setText(str(mae))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(3, 2, item)
            item = self.tableWidget_2.item(3, 2)
            item.setText(str(rmse))

            reg = linear_model.ElasticNet(alpha=0.1)
            reg.fit(x, y)
            ypred = reg.predict(xtest)
            score = reg.score(xtest, ytest)
            mse = mean_squared_error(ytest, ypred)
            mae = mean_absolute_error(ytest, ypred)
            rmse = sqrt(mse)
            print("Linear Model - Elastic Net")
            print(f"Score: {score}")
            print(f"MSE: {mse}")
            print(f"MAE: {mae}")
            print(f"RMSE: {rmse}\n")

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(0, 3, item)
            item = self.tableWidget_2.item(0, 3)
            item.setText(str(score))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(1, 3, item)
            item = self.tableWidget_2.item(1, 3)
            item.setText(str(mse))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(2, 3, item)
            item = self.tableWidget_2.item(2, 3)
            item.setText(str(mae))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(3, 3, item)
            item = self.tableWidget_2.item(3, 3)
            item.setText(str(rmse))

            reg = linear_model.Ridge(alpha=0.1)
            reg.fit(x, y)
            ypred = reg.predict(xtest)
            score = reg.score(xtest, ytest)
            mse = mean_squared_error(ytest, ypred)
            mae = mean_absolute_error(ytest, ypred)
            rmse = sqrt(mse)
            print("Linear Model - Ridge")
            print(f"Score: {score}")
            print(f"MSE: {mse}")
            print(f"MAE: {mae}")
            print(f"RMSE: {rmse}\n")

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(0, 4, item)
            item = self.tableWidget_2.item(0, 4)
            item.setText(str(score))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(1, 4, item)
            item = self.tableWidget_2.item(1, 4)
            item.setText(str(mse))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(2, 4, item)
            item = self.tableWidget_2.item(2, 4)
            item.setText(str(mae))

            item = QtWidgets.QTableWidgetItem()
            self.tableWidget_2.setItem(3, 4, item)
            item = self.tableWidget_2.item(3, 4)
            item.setText(str(rmse))

        else:
            MainWindow.msg = QtWidgets.QMessageBox()
            MainWindow.msg.setIcon(QtWidgets.QMessageBox.Warning)
            MainWindow.msg.setWindowTitle("Warning")
            MainWindow.msg.setText("Anda harus membuka file terlebih dahulu")
            MainWindow.msg.setStandardButtons(QtWidgets.QMessageBox.Ok)
            MainWindow.msg.show()
Beispiel #9
0
    def make_prediction(self):
        if self.train is not None and self.test is not None:
            steps = 6
            train = DataFrame()
            train['Data2'] = list(self.train['Data2'].values)
            train['Data'] = list(self.train['Data'].values)
            df = series_to_supervised(train.values, steps)
            x = df.iloc[:, [a for a in range(steps * 2 - 2)]].values
            y = df.iloc[:, [steps * 2 - 1]].values.ravel()

            start_year = 2019
            start_month = 1
            start_date = QtCore.QDate(start_year, start_month, 1)
            end_date = self.dateEdit.date()
            diff_year = end_date.year() - start_date.year()
            end_month = end_date.month() + 1
            if diff_year == 0:
                diff_month = end_month - start_month
            else:
                diff_month = 12 - start_month + ((12 * diff_year) -
                                                 (12 - end_month))

            # diff = start_date.daysTo(end_date)
            sdm_data = np.array([])
            forecast_time = np.array([])
            forecast_data = np.empty((0, steps * 2 - 2), int)
            old_forecast = x
            x_len = len(old_forecast)
            # diff_month = (diff // 30) + 1

            total_records = len(self.train) + len(self.test)

            if diff_month > 0:
                for i in range(diff_month):
                    sdm_data = np.append(sdm_data, old_forecast[i % x_len][-2])
                    forecast_data = np.append(forecast_data,
                                              np.array(
                                                  [old_forecast[i % x_len]]),
                                              axis=0)
                    forecast_time = np.append(forecast_time,
                                              f'{start_year}-{start_month}')
                    if start_month == 12:
                        start_year = start_year + 1
                        start_month = 1
                    else:
                        start_month = start_month + 1

                regressor = SVR(kernel='linear', epsilon=1.0, verbose=True)
                regressor.fit(x, y)
                ypred = regressor.predict(forecast_data)

                from sklearn.neural_network import MLPRegressor
                hidden = 5
                reg = MLPRegressor(hidden_layer_sizes=(5, ),
                                   activation='logistic',
                                   solver='lbfgs',
                                   alpha=0.0001,
                                   random_state=0,
                                   verbose=True)
                reg.fit(x, y)
                ypredmlp = reg.predict(forecast_data)

                f, ax = plt.subplots()
                actual = ax.plot(self.data['Time'].values,
                                 self.data['Data'].values,
                                 color='blue',
                                 label='Actual')
                predicted = ax.plot(forecast_time,
                                    ypred,
                                    color='red',
                                    label='Forecast (SVR)')
                predicted = ax.plot(forecast_time,
                                    ypredmlp,
                                    color='green',
                                    label='Forecast (MLP)')
                plt.xlabel('Month')
                plt.ylabel('Number of License')
                ax.legend()
                ax.set_ylim([0, 200])
                plt.savefig('Plot.png')

                pic = QtGui.QPixmap('Plot.png')
                pic = pic.scaled(811, 441)
                self.graphicsView.setPixmap(pic)

                self.tableWidget.setRowCount(
                    len(self.train) + len(self.test) + len(ypred))
                for i, (time, predicted, sdm, predictedmlp) in enumerate(
                        zip(forecast_time, ypred, sdm_data, ypredmlp)):
                    item = QtWidgets.QTableWidgetItem()
                    self.tableWidget.setItem(i + total_records, 0, item)
                    item = self.tableWidget.item(i + total_records, 0)
                    item.setText(str(time))

                    item = QtWidgets.QTableWidgetItem()
                    self.tableWidget.setItem(i + total_records, 1, item)
                    item = self.tableWidget.item(i + total_records, 1)
                    item.setText(
                        f"{str(int(predicted))} (SVR), {str(int(predictedmlp))} (MLP)"
                    )

                    item = QtWidgets.QTableWidgetItem()
                    self.tableWidget.setItem(i + total_records, 2, item)
                    item = self.tableWidget.item(i + total_records, 2)
                    item.setText(str(int(sdm)))
test_y_rf = model_rf.predict(test_x)
create_result(test_y_rf, 'rand_forest.csv')

# Gradient boost model
model_grad_boost = GradientBoostingRegressor(random_state=0,
                                             loss='ls').fit(train_x, train_y)
test_y_grad_boost = model_grad_boost.predict(test_x)
create_result(test_y_grad_boost, 'grad_boost.csv')

# KNN model
model_knn = KNeighborsRegressor().fit(train_x, train_y)
test_y_knn = model_knn.predict(test_x)
create_result(test_y_knn, 'knn.csv')

# SVM model
model_svm = SVR(C=1, epsilon=0.2).fit(train_x, train_y)
test_y_svm = model_svm.predict(test_x)
create_result(test_y_svm, 'svm.csv')

# In[ ]:

get_ipython().magic(u'pylab inline')
rcParams['figure.figsize'] = (12.0, 6.0)
data = [
    test_y_lasso, test_y_ridge, test_y_rf, test_y_knn, test_y_grad_boost,
    test_y_xgb, test_y_svm
]
plt.figure()
plt.boxplot(data)
plt.xticks([1, 2, 3, 4, 5, 6, 7], ('lasso', 'ridge', 'random forest', 'knn',
                                   'gradient boost', 'xgboost', 'svm'))
Beispiel #11
0
y = dataset.iloc[:, 2].values

# Splitting the dataset into the Training set and Test set
"""from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)"""

# Feature Scaling
"""from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X = sc_X.fit_transform(X)
y = sc_y.fit_transform(y)"""

# Fitting SVR to the dataset
from sklearn.svm import SVR
regressor = SVR(kernel='rbf')  # because our prob is non-linear
regressor.fit(X, y)

# Predicting a new result
y_pred = regressor.predict(6.5)
# y_pred = sc_y.inverse_transform(y_pred)

# Visualising the SVR results
plt.scatter(X, y, color='red')
plt.plot(X, regressor.predict(X), color='blue')
plt.title('Truth or Bluff (SVR)')
plt.xlabel('Position level')
plt.ylabel('Salary')
plt.show()
"""# Visualising the SVR results (for higher resolution and smoother curve)
X_grid = np.arange(min(X), max(X), 0.01) # choice of 0.01 instead of 0.1 step because the data is feature scaled
Beispiel #12
0
def main():
    data_dir = './data/'
    feature_dir = './features'

    # load data
    print("Loading data...")
    hist_feature = np.load(data_dir + 'histogram_feature.npz')['arr_0']
    imgNet_feature = np.load(data_dir + 'imageNet_feature.npz')['arr_0']
    vSenti_feature = np.load(data_dir + 'visual_senti_feature.npz')['arr_0']
    sen2vec_feature = np.load(data_dir +
                              'text_sentence2vec_feature.npz')['arr_0']
    social_feature = load_social_features(data_dir + 'video_id.txt',
                                          data_dir + 'video_user.txt',
                                          data_dir + 'user_details.txt')

    senti_feature = []
    for line in open(os.path.join(feature_dir, 'senti_scores.txt')):
        senti_feature.append(line.strip().split('\t'))
    senti_feature = np.array(senti_feature)
    print(senti_feature)

    # feature dimension reduction: it's up to you to decide the size of reduced dimensions; the main purpose is to reduce the computation complexity
    pca = PCA(n_components=20)
    hist_feature = pca.fit_transform(hist_feature)
    # 20, 40, 10
    pca = PCA(n_components=10)
    imgNet_feature = pca.fit_transform(imgNet_feature)
    pca = PCA(n_components=20)
    vSenti_feature = pca.fit_transform(vSenti_feature)
    pca = PCA(n_components=10)
    sen2vec_feature = pca.fit_transform(sen2vec_feature)

    # contatenate all the features(after dimension reduction)
    concat_feature = np.concatenate([
        hist_feature, imgNet_feature, vSenti_feature, sen2vec_feature,
        social_feature, senti_feature
    ],
                                    axis=1)
    print("The input data dimension is: (%d, %d)" % (concat_feature.shape))

    # load ground-truth
    ground_truth = []
    for line in open(os.path.join(data_dir, 'ground_truth.txt')):
        loop_count = float(line.strip().split('::::')[0])
        like_count = float(line.strip().split('::::')[1])
        repost_count = float(line.strip().split('::::')[2])
        comment_count = float(line.strip().split('::::')[3])
        ground_truth.append(
            (loop_count + like_count + repost_count + comment_count) / 4)
    ground_truth = np.array(ground_truth, dtype=np.float32)

    # print("Start tuning model parameters...")

    # print(svc_param_selection(concat_feature, ground_truth, 10))

    print("Start training and predict...")
    kf = KFold(n_splits=10)
    nMSEs = []
    pop_predicts = np.empty([0, 1])

    for train, test in kf.split(concat_feature):
        # model initialize: you can tune the parameters within SVR(http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html); Or you can select other regression models
        model = SVR(kernel='rbf', C=75000, gamma=0.0001, epsilon=0.01)
        # model = GradientBoostingRegressor(max_depth=10, n_estimators=200, learning_rate=0.1, random_state=42)
        # train
        model.fit(concat_feature[train], ground_truth[train])
        # predict
        predicts = model.predict(concat_feature[test])
        # nMSE(normalized Mean Squared Error) metric calculation
        nMSE = mean_squared_error(ground_truth[test], predicts) / np.mean(
            np.square(ground_truth[test]))
        nMSEs.append(nMSE)
        pop_predicts = np.concatenate(
            (pop_predicts, [[predict] for predict in predicts]))
        print("This round of nMSE is: %f" % (nMSE))

    print('Average nMSE is %f.' % (np.mean(nMSEs)))
    return pop_predicts
Beispiel #13
0
                           memory='nilearn_cache')  # cache options
# remove features with too low between-subject variance
gm_maps_masked = nifti_masker.fit_transform(gray_matter_map_filenames)
gm_maps_masked[:, gm_maps_masked.var(0) < 0.01] = 0.
# final masking
new_images = nifti_masker.inverse_transform(gm_maps_masked)
gm_maps_masked = nifti_masker.fit_transform(new_images)
n_samples, n_features = gm_maps_masked.shape
print n_samples, "subjects, ", n_features, "features"

### Prediction with SVR #######################################################
print "ANOVA + SVR"
### Define the prediction function to be used.
# Here we use a Support Vector Classification, with a linear kernel
from sklearn.svm import SVR
svr = SVR(kernel='linear')

### Dimension reduction
from sklearn.feature_selection import SelectKBest, f_regression

# Here we use a classical univariate feature selection based on F-test,
# namely Anova.
feature_selection = SelectKBest(f_regression, k=2000)

# We have our predictor (SVR), our feature selection (SelectKBest), and now,
# we can plug them together in a *pipeline* that performs the two operations
# successively:
from sklearn.pipeline import Pipeline
anova_svr = Pipeline([('anova', feature_selection), ('svr', svr)])

### Fit and predict
#from sklearn.model_selection import train_test_split
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)

temp_dataholder = df_features.index.values.reshape(-1, 1)
X_train = temp_dataholder[0:365]
y_train = df_features.iloc[:365, -1:]

X_test = temp_dataholder[365:370]
y_test = df_features.iloc[365:370, -1:]
"""
#Step 6 --> Training and predicting 
"""

#Training model
from sklearn.svm import SVR
svregressor = SVR(kernel='rbf')
svregressor = SVR(gamma='auto')
svregressor.fit(X_train, y_train.values.ravel())

#making predictions
y_pred = svregressor.predict(X_test)
print(X_test)
print(y_test)
print(y_pred)
#
##saving results to file
#result_df = pd.DataFrame()
#
#
##result_df['ID'] =
#
Beispiel #15
0
print("R-squared:", metrics.r2_score(y_test_pred, test_minmax[:, 1200]))
print("-----------------------------------------------------------")

# SUPPORT VECTOR MACHINES

# Hyperparameters:
# - kernel (default=’rbf’)Specifies the kernel type to be used in the algorithm.
# It must be one of ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’ or a callable

# - gamma (default=’scale’) Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’.

# - C (default=1.0) Regularization parameter. The strength of the regularization is inversely proportional to C.
# Must be strictly positive. The penalty is a squared L2 penalty.

# Defining the method
svr = SVR()

# Training the model with reproducibility
np.random.seed(123)
svr.fit(train_closest, train_minmax[:, 1200])

# Making predictions on the testing partition
y_test_pred = svr.predict(test_closest)

# And finally computing the test accuracy
print("Mean squared error of SVM with default hyperparameters:",
      metrics.mean_squared_error(y_test_pred, test_minmax[:, 1200]))
print("R-squared:", metrics.r2_score(y_test_pred, test_minmax[:, 1200]))
print("-----------------------------------------------------------")
print("-----------------------------------------------------------")
Beispiel #16
0
# Dividir el data set en conjunto de entrenamiento y conjunto de testing
"""
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
"""

# Escalado de variables (NECESARIO EN SVR)
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X = sc_X.fit_transform(X)
y = sc_y.fit_transform(y.reshape(-1, 1))

# Ajustar la regresión con el dataset
from sklearn.svm import SVR
regression = SVR(kernel="rbf")
regression.fit(X, y)

# Predicción de nuestros modelos con SVR
y_pred = sc_y.inverse_transform(regression.predict(sc_X.transform([[6.5]])))

# Visualización de los resultados del Modelo Polinómico
X_grid = np.arange(min(X), max(X), 0.1)
X_grid = X_grid.reshape(len(X_grid), 1)
plt.scatter(X, y, color="red")
plt.plot(X_grid, regression.predict(X_grid), color="blue")
plt.title("Modelo de Regresión SVR")
plt.xlabel("Posición del empleado")
plt.ylabel("Sueldo (en $)")
plt.show()
Beispiel #17
0
def main():
    xtrain=np.load('data/x_train.npy')
    ytrain=np.load('data/y_train.npy')
    ytrainreg=np.load('data/loss.npy')
    
    #train-test split
    ss1=ShuffleSplit(np.shape(ytrain)[0],n_iter=1, test_size=0.2, random_state=42)
    for train_idx, test_idx in ss1:
        xtest=xtrain[test_idx,:]
        ytest=ytrain[test_idx]
        ytestreg=ytrainreg[test_idx]
        xtrain=xtrain[train_idx,:]
        ytrain=ytrain[train_idx]
        ytrainreg=ytrainreg[train_idx]
    
    #regression data
    xtrain_reg=xtrain[ytrainreg>0]
    loss_reg=ytrainreg[ytrainreg>0]
    
    #split regression training data into train set and cross-validation set (for ensembling)
    ss2=ShuffleSplit(np.shape(loss_reg)[0],n_iter=1,test_size=0.3, random_state=42)
    for train_idx, test_idx in ss2:
        xcv=xtrain_reg[test_idx,:]
        loss_cv=loss_reg[test_idx]
        xtrain_reg=xtrain_reg[train_idx,:]
        loss_reg=loss_reg[train_idx]
        
        
    #classification features, generated by clf_selector.py
    sel_clf_feats=np.load('features/clf_sel.npy')
    
    #regression features
    #generated by reg_selector_sgd_eps_log.py
    sel_reg1=np.load('features/reg_sel_sgd_eps.npy')
    #generated by reg_selector_quant_log.py
    sel_reg2=np.load('features/reg_sel_quant.npy')
    #generated by reg_selector_lad_log.py
    sel_reg3=np.load('features/reg_sel_lad.npy') 
    
    feats_mat=np.vstack((sel_reg1,sel_reg2,sel_reg3))
    regs_unique=5
    feat_indic=np.hstack((0*np.ones(regs_unique),1*np.ones(regs_unique),
                          2*np.ones(regs_unique))) #maps regressors to features
    
    clf=GradientBoostingClassifier(init=None, learning_rate=0.1, loss='deviance',
                  max_depth=5, max_features='auto', min_samples_leaf=1,
                  min_samples_split=2, n_estimators=500, random_state=42,
                  subsample=1.0, verbose=0)
    
    t0=time.time()
    print "fitting classifier"
    clf.fit(xtrain[:,sel_clf_feats],ytrain)
    print "done with classifier"
    print "time taken", time.time()-t0
    joblib.dump(clf,'models/clf.pkl',compress=3)
    
    reg1=linear_model.SGDRegressor(loss='epsilon_insensitive',random_state=0,n_iter=100)
    reg6=linear_model.SGDRegressor(loss='epsilon_insensitive',random_state=0,n_iter=100)
    reg11=linear_model.SGDRegressor(loss='epsilon_insensitive',random_state=0,n_iter=100)
    reg2=SVR(C=0.01,kernel='linear',random_state=42)
    reg7=SVR(C=0.01,kernel='linear',random_state=42)
    reg12=SVR(C=0.01,kernel='linear',random_state=42)
    reg3=GradientBoostingRegressor(loss='lad',min_samples_leaf=5,
                                   n_estimators=1000,random_state=42)
    reg8=GradientBoostingRegressor(loss='lad',min_samples_leaf=5,
                                    n_estimators=1000,random_state=42)
    reg13=GradientBoostingRegressor(loss='lad',min_samples_leaf=5,
                                    n_estimators=1000,random_state=42)
    reg4=GradientBoostingRegressor(loss='huber',alpha=0.6, min_samples_leaf=5,
                                    n_estimators=1000,random_state=42)
    reg9=GradientBoostingRegressor(loss='huber',alpha=0.6, min_samples_leaf=5,
                                    n_estimators=1000, random_state=42)
    reg14=GradientBoostingRegressor(loss='huber',alpha=0.6, min_samples_leaf=5,
                                    n_estimators=500, random_state=42)
    reg5=GradientBoostingRegressor(loss='quantile',alpha=0.45, min_samples_leaf=5,
                                    n_estimators=1000,random_state=42)
    reg10=GradientBoostingRegressor(loss='quantile',alpha=0.45,min_samples_leaf=5,
                                    n_estimators=1000,random_state=42)
    reg15=GradientBoostingRegressor(loss='quantile',alpha=0.45,min_samples_leaf=5,
                                    n_estimators=1000,random_state=42)                               
    
    #gather base regressors
    regs=[reg1,reg2,reg3,reg4,reg5,reg6,reg7,reg8,reg9,reg10,reg11,reg12,
          reg13,reg14,reg15]
    n_regs=len(regs)
    
    print "fitting regressors"
    j=0
    i=1
    for reg in regs:
        feats=feats_mat[(feat_indic[j]),:]
        t0=time.time()
        print "fitting",i, "no of features", np.sum(feats)
        reg.fit(xtrain_reg[:,feats],np.log(loss_reg)) #training on the log of the loss
        print "done with",i
        print "time taken", time.time()-t0
        joblib.dump(reg,'models/reg%s.pkl' % str(i),compress=3)
        i+=1
        j+=1
    
    reg_ens1=linear_model.SGDRegressor(loss='huber',random_state=0,n_iter=100)
    reg_ens2=linear_model.SGDRegressor(loss='epsilon_insensitive',random_state=0,n_iter=100)
    reg_ens3=SVR(C=0.01,kernel='linear',random_state=42)
    reg_ens4=GradientBoostingRegressor(loss='huber',alpha=0.6, min_samples_leaf=5,
                                    n_estimators=1000, random_state=42)
    reg_ens5=GradientBoostingRegressor(loss='lad',n_estimators=1000,min_samples_leaf=5,
                                       random_state=42)
    reg_ens6=GradientBoostingRegressor(loss='quantile',alpha=0.45, min_samples_leaf=5,
                                    n_estimators=1000,random_state=42)
    
    #gather ensemblers
    reg_ens=[reg_ens1,reg_ens2,reg_ens3,reg_ens4,reg_ens5,reg_ens6]
    n_reg_ens=len(reg_ens) 
    
    rows_cv=np.shape(xcv)[0]
    cv_mat=np.zeros((rows_cv,n_regs)) #matrix of base predictions for ensemblers
    
    
    print "predicting regression values for CV"
    j=0
    i=1
    for reg in regs:
        feats=feats_mat[(feat_indic[j]),:]
        print "predicting for reg",i, "no of features", np.sum(feats) 
        tmp_preds=reg.predict(xcv[:,feats])
        tmp_preds=np.exp(tmp_preds) #training was done on log of loss, hence the exp
        tmp_preds=np.abs(tmp_preds)
        tmp_preds[tmp_preds>100]=100
        cv_mat[:,j]=tmp_preds
        j+=1
        i+=1
    
    print "fitting ensemble regressors"
    
    i=1
    for reg in reg_ens:
        print "fitting",i
        reg.fit(cv_mat,loss_cv) #for the ensemblers, training was done on the regular loss
        joblib.dump(reg,'models/reg_ens%s.pkl' % str(i),compress=3)
        i+=1
    
    rows_test=np.shape(xtest)[0]
    test_mat=np.zeros((rows_test,n_regs)) #matrix for base predictions on test set
    
    print "test-set predicting"
    class_preds=clf.predict(xtest[:,sel_clf_feats])
    
    print "predicting regression values for test set"
    j=0
    i=1
    for reg in regs:
        feats=feats_mat[(feat_indic[j]),:]
        print "predicting for reg",i
        tmp_preds=reg.predict(xtest[:,feats])
        tmp_preds=np.exp(tmp_preds) #training was done on log of loss, hence the exp
        tmp_preds=np.abs(tmp_preds)
        tmp_preds[tmp_preds>100]=100    
        test_mat[:,j]=tmp_preds
        j+=1
        i+=1
    
    ens_mat=np.zeros((rows_test,n_reg_ens)) #matrix for ensemble predictions
    j=0
    i=1
    print "predicting ensembles"
    for reg in reg_ens:
        print "predicting for reg_ens",i
        tmp_preds=reg.predict(test_mat)
        tmp_preds=np.abs(tmp_preds)
        tmp_preds[tmp_preds>100]=100
        ens_mat[:,j]=tmp_preds
        j+=1
        i+=1
    
    #multiply regression predictions with class predictions
    loss_mat=np.multiply(test_mat,class_preds[:,np.newaxis])
    #multiply regression predictions with correct classes for mae benchmarks
    correct_loss=np.multiply(test_mat,ytest[:,np.newaxis])
    
    #multiply ensemble predictions with class predictions    
    ens_losses=np.multiply(ens_mat,class_preds[:,np.newaxis])
    #multiply ensemble predictions with correct classes for mae benchmarks
    ens_losses_correct=np.multiply(ens_mat,ytest[:,np.newaxis])
    
    print "predictor performance"
    print "output format:"
    print "model","\t", "mae","\t", "mae for correct classes","\t", "mae for defaults"
    print "individual learners"
    for k in range(n_regs):
        tmp_preds=loss_mat[:,k]
        mae1=np.mean(np.abs(tmp_preds-ytestreg))
        tmp_preds2=correct_loss[:,k]
        mae2=np.mean(np.abs(tmp_preds2-ytestreg))
        mae3=np.mean(np.abs(tmp_preds2[tmp_preds2>0]-ytestreg[tmp_preds2>0]))
        print "reg",k+1,"\t",mae1,"\t",mae2,"\t",mae3
    
    print "ensemblers"
    for k in range(n_reg_ens):
        tmp_preds=ens_losses[:,k]
        mae1=np.mean(np.abs(tmp_preds-ytestreg))
        tmp_preds2=ens_losses_correct[:,k]
        mae2=np.mean(np.abs(tmp_preds2-ytestreg))  
        mae3=np.mean(np.abs(tmp_preds2[tmp_preds2>0]-ytestreg[tmp_preds2>0]))
        print "reg_ens",k+1,"\t",mae1,"\t",mae2,"\t",mae3
    
    #mean of all ensemblers
    mean_ens_losses=np.mean(ens_losses,1)
    mean_ens_correct=np.mean(ens_losses_correct,1)
    mae1=np.mean(np.abs(mean_ens_losses-ytestreg))
    mae2=np.mean(np.abs(mean_ens_correct-ytestreg))
    mae3=np.mean(np.abs(mean_ens_correct[mean_ens_correct>0]-ytestreg[mean_ens_correct>0]))
    print "mean_ens","\t",mae1,"\t",mae2,"\t",mae3
    
    #mean of two best ensemblers
    best_ens=np.mean(ens_losses[:,(0,2)],1)
    best_ens_correct=np.mean(ens_losses_correct[:,(0,2)],1)
    mae1=np.mean(np.abs(best_ens-ytestreg))
    mae2=np.mean(np.abs(best_ens_correct-ytestreg))
    mae3=np.mean(np.abs(best_ens_correct[best_ens_correct>0]-ytestreg[best_ens_correct>0]))
    print "best_ens","\t",mae1,"\t",mae2,"\t",mae3
    
    #other benchmarks
    print "mae for class_preds:"
    print np.mean(np.abs(class_preds-ytestreg))
    print "mae for 3*class_preds:"
    print np.mean(np.abs(3*class_preds-ytestreg))
    print "roc_auc for classes:"
    print roc_auc_score(ytest,class_preds)
    print "f1-score for classes:"
    print f1_score(ytest,class_preds)
    print "mae of all zeroes"
    print np.mean(np.abs(0-ytestreg))
Beispiel #18
0
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold, GridSearchCV

abalone = pd.read_csv('C:/Users/wjdql/Desktop/A6/abalone.csv')

d_list = [abalone]

for d in d_list:
    X_train, X_test, y_train, y_test = train_test_split(d.iloc[:, :-1],
                                                        d.iloc[:, -1],
                                                        test_size=0.5,
                                                        random_state=42)

    pipe = Pipeline([('preprocessing', None), ('regressor', SVR())])
    hyperparam_grid = [{
        'regressor': [SVR()],
        'preprocessing': [StandardScaler(),
                          MinMaxScaler(), None],
        'regressor__gamma': [0.1, 10, 1000],
        'regressor__C': [0.001, 0.01, 0.1],
        'regressor__epsilon': [0.001, 0.01, 0.1]
    }, {
        'regressor': [MLPRegressor(solver='adam', max_iter=1500)],
        'preprocessing': [StandardScaler(),
                          MinMaxScaler(), None],
        'regressor__hidden_layer_sizes': [(100, ), (30, 30), (10, 10, 10)],
        'regressor__alpha': [0.0001, 0.01, 1],
        'regressor__activation': ['tanh', 'relu']
    }, {
Beispiel #19
0
    r2 = 1 - SS_res.sum() / SS_tot.sum()
    #print(r2)
    print("*" * 80)
    print("Linear Regression Constant Mean Prediction")
    print("RMSE: %f" % np.sqrt(SS_tot.mean()))
    print("MAE: %f" % abs_.mean())
    print("*" * 80)

    print("Support Vector Regression")
    x_train, x_test, y_train, y_test = train_test_split(X_,
                                                        Y,
                                                        test_size=0.1,
                                                        random_state=1725)
    clf = SVR(kernel='linear',
              C=100000,
              gamma=1e-7,
              cache_size=2000,
              epsilon=0.6)
    #clf = SVR(kernel='poly', degree=3, C=1000, gamma=0.1, cache_size=2000)
    clf.fit(x_train, y_train.values.ravel())
    y_pred = clf.predict(x_test)
    print("MSE: %f" % metrics.mean_squared_error(y_test, y_pred))
    print("RMSE: %f" % np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
    print("MAE: %f" % metrics.mean_absolute_error(y_test, y_pred))
    print("R^2: %f" % clf.score(x_test, y_test))
    SS_tot = (y_test.mean() - y_test) * (y_test.mean() - y_test)
    SS_res = (y_pred - y_test.values.ravel()) * (y_pred -
                                                 y_test.values.ravel())
    abs_ = abs(y_test.mean() - y_test)
    #print("SS_res: %f" % SS_res.sum())
    #print("SS_tot: %f" % SS_tot.sum())
e_alphas = [0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007]
e_l1ratio = [0.8, 0.85, 0.9, 0.95, 0.99, 1]

ridge = make_pipeline(RobustScaler(), RidgeCV(alphas=alphas_alt, cv=tscv))

lasso = make_pipeline(
    RobustScaler(),
    LassoCV(max_iter=1e7, alphas=alphas2, random_state=42, cv=tscv))

elasticnet = make_pipeline(
    RobustScaler(),
    ElasticNetCV(max_iter=1e7, alphas=e_alphas, cv=tscv, l1_ratio=e_l1ratio))

svr = make_pipeline(RobustScaler(), SVR(
    C=20,
    epsilon=0.008,
    gamma=0.0004,
))

gbr = GradientBoostingRegressor(n_estimators=3000,
                                learning_rate=0.03,
                                max_depth=4,
                                max_features='sqrt',
                                min_samples_leaf=20,
                                min_samples_split=10,
                                loss='huber',
                                random_state=42)

lightgbm = LGBMRegressor(
    objective='regression',
    num_leaves=4,
Beispiel #21
0
print('max is ', np.max(boston.target))
print('min is ', np.min(boston.target))
print('mean is ', np.mean(boston.target))

ss_x = StandardScaler()
ss_y = StandardScaler()

x_train = ss_x.fit_transform(x_train)
x_test = ss_x.transform(x_test)

y_train = ss_y.fit_transform(y_train)
y_test = ss_y.transform(y_test)

# 使用线性核函数配置
linear_svr = SVR(kernel='linear')
linear_svr.fit(x_train, y_train)
linear_svr_y_predict = linear_svr.predict(x_test)

# 使用多项式核函数配置
ploy_svr = SVR(kernel='poly')
ploy_svr.fit(x_train, y_train)
ploy_svr_y_predict = ploy_svr.predict(x_test)

# 使用径向基核函数配置
rbf_svr = SVR(kernel='rbf')
rbf_svr.fit(x_train, y_train)
rbf_svr_y_predict = rbf_svr.predict(x_test)


print('The R2 ', r2_score(y_test, linear_svr_y_predict))
Beispiel #22
0
def svr_predict(x, y, kernel="rbf"):
    svr_regressor = SVR(kernel=kernel)
    svr_regressor.fit(x, y)  #train
    predict = svr_regressor.predict(x)
    return predict
def SVM(x_test,y_test,x_train,y_train):
 from sklearn.svm import SVR
 y_pred=SVR(gamma=0.9,C=1.0,epsilon=0.2).fit(x_train,y_train).predict(x_test)
 return y_pred
Beispiel #24
0
# In[26]:


print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, predictionsdtr))  
print('Mean Squared Error:', metrics.mean_squared_error(y_test, predictionsdtr))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, predictionsdtr)))


# # Model 4: Support Vector Machine Regressor

# In[28]:


from sklearn.svm import SVR 
model = SVR()


# In[29]:


model.fit(X_train, y_train)


# In[30]:


predictionssvr = model.predict(X_test)

df6 = pd.DataFrame({'Actual': y_test, 'Predicted': predictionssvr})
df7 = df6.head(25)
Beispiel #25
0
def get_svr_lin_model(temp_hum,sensor):
    svr_rbf = SVR(kernel='linear')
    X = temp_hum
    r_y = sensor.reshape(sensor.shape[0],)
    return svr_rbf.fit(X,r_y)
train = pd.read_csv(
    '/Users/ivan/Work_directory/Afr-Soil-Prediction-master/data/train_py.csv')
test = pd.read_csv(
    '/Users/ivan/Work_directory/Afr-Soil-Prediction-master/data/test_py.csv')
labels = train[['Ca', 'P', 'pH', 'SOC', 'Sand']].values
PIDN = test[['PIDN']].values

train.drop(['Ca', 'P', 'pH', 'SOC', 'Sand'], axis=1, inplace=True)
test.drop('PIDN', axis=1, inplace=True)

xtrain, xtest = np.array(train)[:, :3569], np.array(test)[:, :3569]
xtrain_scaled = preprocessing.scale(xtrain)
xtest_scaled = preprocessing.scale(xtest)

svr_lin = SVR(kernel='linear', C=1e4, verbose=2)

preds = np.zeros((xtest.shape[0], 5))
for i in range(5):
    svr_lin.fit(xtrain_scaled, labels[:, i])
    preds[:, i] = svr_lin.predict(xtest_scaled).astype(float)

sample = pd.read_csv(
    '/Users/ivan/Work_directory/Afr-Soil-Prediction-master/submission_new/2.csv'
)
sample['Ca'] = preds[:, 0]
sample['P'] = preds[:, 1]
sample['pH'] = preds[:, 2]
sample['SOC'] = preds[:, 3]
sample['Sand'] = preds[:, 4]
from sklearn.svm import SVR, SVC
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.pipeline import Pipeline

from ...utils.validation import (
    _normalize_param_grid,
    check_param_grids,
    check_datasets,
    check_random_states,
    check_oversamplers_classifiers
)

X, y = make_regression()
ESTIMATORS = [
    ('lr', LinearRegression()),
    ('svr', SVR()),
    ('pip', Pipeline([('scaler', MinMaxScaler()), ('lr', LinearRegression())]))
]
PARAM_GRIDS = [
    {'lr__normalize': [True, False], 'lr__fit_intercept': [True, False]},
    {'svr__C': [0.01, 0.1, 1.0], 'svr__kernel': ['rbf', 'linear']},
    {'pip__scaler__feature_range': [(0, 1), (0, 10)], 'pip__lr__normalize': [True, False]}
]
UPDATED_PARAM_GRIDS = [
    {'lr__normalize': [True, False], 'lr__fit_intercept': [True, False], 'est_name':['lr']},
    {'svr__C': [0.01, 0.1, 1.0], 'svr__kernel': ['rbf', 'linear'], 'est_name':['svr']},
    {'pip__scaler__feature_range': [(0, 1), (0, 10)], 'pip__lr__normalize': [True, False], 'est_name':['pip']}
]
OVERSAMPLERS = [
    ('random', RandomOverSampler()),
    ('smote', SMOTE(), {'k_neighbors': [2, 3, 4], 'kind': ['regular', 'borderline1']})
sc_X = StandardScaler()

X_train = sc_X.fit_transform(X_train)

sc_y = StandardScaler()

y_train = sc_y.fit_transform(y_train)

# ## Training the SVR Model

# In[ ]:

from sklearn.svm import SVR

model = SVR(kernel='rbf')

model.fit(X_train, y_train)

# ## Predicting the Test Set Results

# In[ ]:

y_pred = sc_y.inverse_transform(model.predict(sc_X.transform(X_test)))

# ## Comparing Predicted Y with Real Y (Test Set)

# In[ ]:

data = pd.DataFrame()
Beispiel #29
0
 def setUp(self):
     """Unittest set up."""
     from sklearn.svm import SVR
     self.model = SVR()
     super(BostonSvrTest, self)._setup(self.model, self.model.fit,
                                       load_boston())
sc_y = StandardScaler()
y_train = sc_y.fit_transform(y_train)

# Different regression algorithms
from sklearn.linear_model import LinearRegression

regressor = LinearRegression()

from sklearn.preprocessing import PolynomialFeatures

regressor = PolynomialFeatures(degree=4)
X_poly = regressor.fit_transform(X)

from sklearn.svm import SVR

regressor = SVR(kernel="rbf")

from sklearn.tree import DecisionTreeRegressor

regressor = DecisionTreeRegressor()

from sklearn.ensemble import RandomForestRegressor

regressor = RandomForestRegressor(n_estimators=100)

# Fitting and predicting
regressor.fit(X, y)
y_pred = regressor.predict(10)
# In the case where we scaled the variables:
y_pred = sc_y.inverse_transform(regressor.predict(sc_X.transform(np.array([[10]]))))