Ejemplo n.º 1
0
    def trainsvr (self, train, trainlabel, seed, Cmin, Cmax, numC, rmin, rmax, numr, degree=3,\
                  verbose = 1, method = 'roc_auc', rad_stat =2):
        C_range = np.logspace(Cmin, Cmax, num=numC, base=2, endpoint=True)
        gamma_range = np.logspace(rmin, rmax, num=numr, base=2, endpoint=True)

        scr = SVR(kernel=seed)
        #        mean_score=[]
        df_C_gamma = pd.DataFrame({'gamma_range': gamma_range})
        #        df_this = DataFrame({'gamma_range':gamma_range})
        count = 0
        for C in C_range:
            score_C = []
            #            score_C_this = []
            count = count + 1
            for gamma in gamma_range:

                scr.C = C
                scr.gamma = gamma
                scr.degree = degree
                scr.random_state = rad_stat
                this_scores = cross_val_score(scr, train, trainlabel, scoring=method, cv=10, n_jobs=-1 \
                                              )

                score_C.append(np.mean(this_scores))

            #score_C_this.append(np.mean(this_scores))
            if verbose == 1:
                print(np.mean(score_C))
                print("%r cycle finished, %r left" % (count, numC - count))
            df_C_gamma[C] = score_C
            #df_this[C] = score_C_this

        return df_C_gamma
Ejemplo n.º 2
0
    def connectWidgets(self):
        svr = SVR()
        svr.kernel = 'rbf'
        svr.degree = 3
        svr.gamma = 'auto'
        svr.coef0 = 0.0
        svr.tol = 1e-3
        svr.C = 1.0
        svr.epsilon = 0.1
        svr.shrinking = True
        svr.cache_size = 200
        svr.verbose = False
        svr.max_iter = -1

        self.cDoubleSpinBox.setValue(svr.C)
        self.epsilonDoubleSpinBox.setValue(svr.epsilon)
        self.defaultComboItem(self.kernelComboBox, svr.kernel)
        self.degreeSpinBox.setValue(svr.degree)
        self.defaultComboItem(self.gammaComboBox, svr.gamma)
        self.coeff0DoubleSpinBox.setValue(svr.coef0)
        self.shrinkingCheckBox.setChecked(svr.shrinking)
        self.toleranceDoubleSpinBox.setValue(svr.tol)
        self.cacheSizeSpinBox.setValue(svr.cache_size)
        self.verboseCheckBox.setChecked(svr.verbose)
        self.maxIterationsSpinBox.setValue(svr.max_iter)
    def train_regress (self, train, trainlabel, seed, Cmin, Cmax, numC, rmin, rmax, numr, degree=3, method = 'rrmse', rad_stat =2):
        C_range=np.logspace(Cmin, Cmax, num=numC, base=2,endpoint= True)
        gamma_range=np.logspace(rmin, rmax, num=numr, base=2,endpoint= True)
        
        svc = SVR(kernel=seed)
#        mean_score=[]
        df_C_gamma= DataFrame({'gamma_range':gamma_range})
#        df_this = DataFrame({'gamma_range':gamma_range})
        count = 0 
        for C in C_range:    
            score_C=[]    
#            score_C_this = []
            count=count+1
            for gamma in gamma_range: 
                svc.epsilon = 0.00001                 
     
                svc.C = C
                svc.gamma = gamma
                svc.degree = degree
                svc.random_state = rad_stat
                this_scores = cross_val_score(svc, train, trainlabel, scoring=method, cv=10, n_jobs=-1 \
                                              )
                
                score_C.append(np.mean(this_scores))                                      

               #score_C_this.append(np.mean(this_scores))
            print (np.mean(score_C) )
            print ("%r cycle finished, %r left" %(count, numC-count))
            df_C_gamma[C]= score_C
            #df_this[C] = score_C_this        
        
        return df_C_gamma 
Ejemplo n.º 4
0
def test_energy_model(X,
                      y,
                      epsilon=0.0841395,
                      C=0.122,
                      seed=None,
                      silent=False):

    # best eps = 0.08413951416
    # best C = 0.122

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=seed)

    svr = SVR()
    svr.epsilon = epsilon
    svr.C = C

    svr.fit(X_train, y_train)

    p_train = svr.predict(X_train)
    p_test = svr.predict(X_test)

    mse_train = np.mean((p_train - y_train)**2)
    mse_test = np.mean((p_test - y_test)**2)

    mean_abs_err_train = np.mean(np.abs(p_train - y_train))
    mean_abs_err_test = np.mean(np.abs(p_test - y_test))

    err_rel_train = np.mean(relative_err(p_train, y_train))
    err_rel_test = np.mean(relative_err(p_test, y_test))

    score_train = r2_score(y_train, p_train)
    score_test = r2_score(y_test, p_test)

    results = {
        'mse_train': mse_train,
        'mse_test': mse_test,
        'err_rel_train': err_rel_train,
        'err_rel_test': err_rel_test,
        'mean_abs_err_train': mean_abs_err_train,
        'mean_abs_err_test': mean_abs_err_test,
        'score_train': score_train,
        'score_test': score_test,
        'y_train': y_train,
        'p_train': p_train,
        'y_test': y_test,
        'p_test': p_test,
    }

    if not silent:
        print(results)

    return results
Ejemplo n.º 5
0
def test_energy_model_cv(X, y, epsilon=0.0841395, C=0.122, cv=5, silent=False):

    svr = SVR()
    svr.epsilon = epsilon
    svr.C = C

    cv_score = cross_val_score(svr, X, y, cv=cv)
    y_pred = cross_val_predict(svr, X, y, cv=cv)

    cv_mse = mean_squared_error(y, y_pred)
    cv_r2 = r2_score(y, y_pred)

    if not silent:
        print('cv_score', cv_score)
        print('cv_r2', cv_r2)
        print('cv_mse', cv_mse)

    return {'cv_score': cv_score, 'cv_r2': cv_r2, 'cv_mse': cv_mse}
    def train_regress(self,
                      train,
                      trainlabel,
                      seed,
                      Cmin,
                      Cmax,
                      numC,
                      rmin,
                      rmax,
                      numr,
                      degree=3,
                      method='rrmse',
                      rad_stat=2):
        C_range = np.logspace(Cmin, Cmax, num=numC, base=2, endpoint=True)
        gamma_range = np.logspace(rmin, rmax, num=numr, base=2, endpoint=True)

        svc = SVR(kernel=seed)
        #        mean_score=[]
        df_C_gamma = DataFrame({'gamma_range': gamma_range})
        #        df_this = DataFrame({'gamma_range':gamma_range})
        count = 0
        for C in C_range:
            score_C = []
            #            score_C_this = []
            count = count + 1
            for gamma in gamma_range:
                svc.epsilon = 0.00001

                svc.C = C
                svc.gamma = gamma
                svc.degree = degree
                svc.random_state = rad_stat
                this_scores = cross_val_score(svc, train, trainlabel, scoring=method, cv=10, n_jobs=-1 \
                                              )

                score_C.append(np.mean(this_scores))

            #score_C_this.append(np.mean(this_scores))
            print(np.mean(score_C))
            print("%r cycle finished, %r left" % (count, numC - count))
            df_C_gamma[C] = score_C
            #df_this[C] = score_C_this

        return df_C_gamma
Ejemplo n.º 7
0
    def connectWidgets(self):
        svr = SVR()
        svr.kernel = 'rbf'
        svr.degree = 3
        svr.gamma = 'auto'
        svr.coef0 = 0.0
        svr.tol = 1e-3
        svr.C = 1.0
        svr.epsilon = 0.1
        svr.shrinking = True
        svr.cache_size = 200
        svr.verbose = False
        svr.max_iter = -1

        self.cLineEdit.setText(str(svr.C))
        self.epsilonLineEdit.setText(str(svr.epsilon))
        self.kernel_list.setCurrentItem(self.kernel_list.findItems('Radial Basis Function', QtCore.Qt.MatchExactly)[0])
        self.degreeLineEdit.setText(str(svr.degree))
        self.coeff0LineEdit.setText(str(svr.coef0))
        self.shrinking_list.setCurrentItem(self.shrinking_list.findItems(str(svr.shrinking), QtCore.Qt.MatchExactly)[0])
        self.toleranceLineEdit.setText(str(svr.tol))
        self.maxIterationsLineEdit.setText(str(svr.max_iter))
Ejemplo n.º 8
0
def main():
    df_train = pd.read_csv("data/train.csv")
    df_test = pd.read_csv("data/test.csv")
    y = np.log(np.array(df_train["revenue"]))
    plt.hist(df_train['P28'])
    plt.show()
    return
    #test_id=df_test["Id"]
    df = pd.concat([df_train, df_test])
    df = processing(df)
    df_train = df[0:137, :]
    df_test = df[137:, :]
    X = df_train
    print X.shape, y.shape
    X_test = df_test
    model = SVR(kernel='linear')
    #samplesubmit.head()
    if (1):
        selector = SelectPercentile(f_regression, percentile=100)
        selector.fit(X, y)
        print selector.pvalues_
        scores = -np.log10(selector.pvalues_)
        scores /= scores.max()
        print scores
        feature_index_0 = scores > 0.1
    #model.C=1.e4
    #model.gamma=0.2
    X = X[:, feature_index_0]
    #print X.shape
    #model.fit(X,y)
    #y_pred=model.predict(X)
    #meanres=np.median(y-y_pred)
    #stdres=np.median(np.abs(y-y_pred-meanres))
    #print meanres,stdres
    #outlierindex=np.abs(y-y_pred-meanres)>5.*stdres
    #print len(y[outlierindex])
    #plt.plot(y-y_pred,'.')
    #plt.plot((y-y_pred)[outlierindex],'r.')
    #plt.show()
    #return
    #X=X[~outlierindex,:]
    #y=y[~outlierindex]
    #if(1):
    #    selector=SelectPercentile(f_regression,percentile=100)
    #    selector.fit(X,y)
    #    print selector.pvalues_
    #    scores=-np.log10(selector.pvalues_)
    #    scores/=scores.max()
    #    print scores
    #    feature_index_1=scores>0.1
    ##return
    if 0:
        #gammas=np.logspace(-7,7,50)
        #gammas=[0.1]
        #C_s=np.array([1])

        #return
        #model=SVR()
        #X=X[:,feature_index_1]
        C_s = np.logspace(-7, 2, 50)
        #C_s=np.logspace(0,5,50)
        scores_mean = []
        scores_std = []
        best_score = 1.e20
        best_C = 1
        best_gamma = 1
        for C in C_s:
            if (1):
                #for gamma in gammas:
                model.C = C
                #model.gamma=gamma
                scores = cross_val_score(model,
                                         X,
                                         y,
                                         cv=10,
                                         scoring='mean_squared_error')
                scores_mean.append(np.sqrt(-np.mean(scores)))
                print np.sqrt(-np.mean(scores))
                #if np.sqrt(-np.mean(scores))<best_score:
                #    best_C=C
                #    best_gamma=gamma
                #    best_score=np.sqrt(-np.mean(scores))
                scores_std.append(np.std(scores))
        #plt.semilogx(gammas,scores_mean,'.')
        #plt.plot(X[:,0],y-y_pred,'.')
        #plt.plot(X[:,1],y-y_pred,'.')
        #plt.plot(X[:,2],y-y_pred,'.')
        #plt.plot(X[:,3],y-y_pred,'.')
        plt.semilogx(C_s, np.array(scores_mean) / 1.e6, '.')
        plt.show()
        #print best_C,best_gamma,best_score
        return
    #
    #model=SVR()
    #X=X[:,feature_index_1]
    #X=X[:,feature_index_0]
    #model.gamma=0.1
    #model.C=1.5e3
    #model=SVR()
    #model.C=1.38949549437
    model.C = 0.01
    #model.gamma=0.193069772888
    model.fit(X, y)
    #y_pred=model.predict(X)
    #plt.plot(y-y_pred,'.')
    #plt.show()
    #return
    X_test = X_test[:, feature_index_0]
    #X_test=X_test[:,feature_index_1]
    y_pred = model.predict(X_test)
    samplesubmit = pd.read_csv("data/sampleSubmission.csv")
    samplesubmit["Prediction"] = np.exp(y_pred)
    #samplesubmit.to_csv
    #samplesubmit.to_csv("data/submit_fistsvr.csv",index=False)
    #samplesubmit.to_csv("data/submit_linearsvr_fregression_age.csv",index=False)
    samplesubmit.to_csv("data/submit_svr_logrevenue.csv", index=False)
    #samplesubmit.to_csv("data/submit_linearsvr_fregression_outlierreject.csv",index=False)

    return
Ejemplo n.º 9
0
plt.xlabel('Energies (after)')
#plt.show()

#Support Vector Regression
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error

train_mse = []
val_mse = []

Cs = np.logspace(-3, 5, 20)
print(Cs)

for C in Cs:
    model = SVR()
    model.C = C
    model.fit(designmatrix_train_std, Energies_train_std)
    Energies_train_pred = Energies_mu + model.predict(
        designmatrix_train_std) * Energies_std
    Energies_val_pred = Energies_mu + model.predict(
        designmatrix_val_std) * Energies_std

    train_mse.append(
        mean_squared_error(y_true=Energies_train, y_pred=Energies_train_pred))
    val_mse.append(
        mean_squared_error(y_true=Energies_val, y_pred=Energies_val_pred))

plt.figure()
plt.plot(Cs, train_mse, label='Training set MSE')
plt.plot(Cs, val_mse, label='Validation set MSE')
plt.ylabel('Mean Squared Error')
Ejemplo n.º 10
0
def main():
    df_train = pd.read_csv("data/train.csv")
    df_test = pd.read_csv("data/test.csv")
    y = np.log(np.array(df_train["revenue"]))
    plt.hist(df_train["P28"])
    plt.show()
    return
    # test_id=df_test["Id"]
    df = pd.concat([df_train, df_test])
    df = processing(df)
    df_train = df[0:137, :]
    df_test = df[137:, :]
    X = df_train
    print X.shape, y.shape
    X_test = df_test
    model = SVR(kernel="linear")
    # samplesubmit.head()
    if 1:
        selector = SelectPercentile(f_regression, percentile=100)
        selector.fit(X, y)
        print selector.pvalues_
        scores = -np.log10(selector.pvalues_)
        scores /= scores.max()
        print scores
        feature_index_0 = scores > 0.1
    # model.C=1.e4
    # model.gamma=0.2
    X = X[:, feature_index_0]
    # print X.shape
    # model.fit(X,y)
    # y_pred=model.predict(X)
    # meanres=np.median(y-y_pred)
    # stdres=np.median(np.abs(y-y_pred-meanres))
    # print meanres,stdres
    # outlierindex=np.abs(y-y_pred-meanres)>5.*stdres
    # print len(y[outlierindex])
    # plt.plot(y-y_pred,'.')
    # plt.plot((y-y_pred)[outlierindex],'r.')
    # plt.show()
    # return
    # X=X[~outlierindex,:]
    # y=y[~outlierindex]
    # if(1):
    #    selector=SelectPercentile(f_regression,percentile=100)
    #    selector.fit(X,y)
    #    print selector.pvalues_
    #    scores=-np.log10(selector.pvalues_)
    #    scores/=scores.max()
    #    print scores
    #    feature_index_1=scores>0.1
    ##return
    if 0:
        # gammas=np.logspace(-7,7,50)
        # gammas=[0.1]
        # C_s=np.array([1])

        # return
        # model=SVR()
        # X=X[:,feature_index_1]
        C_s = np.logspace(-7, 2, 50)
        # C_s=np.logspace(0,5,50)
        scores_mean = []
        scores_std = []
        best_score = 1.0e20
        best_C = 1
        best_gamma = 1
        for C in C_s:
            if 1:
                # for gamma in gammas:
                model.C = C
                # model.gamma=gamma
                scores = cross_val_score(model, X, y, cv=10, scoring="mean_squared_error")
                scores_mean.append(np.sqrt(-np.mean(scores)))
                print np.sqrt(-np.mean(scores))
                # if np.sqrt(-np.mean(scores))<best_score:
                #    best_C=C
                #    best_gamma=gamma
                #    best_score=np.sqrt(-np.mean(scores))
                scores_std.append(np.std(scores))
        # plt.semilogx(gammas,scores_mean,'.')
        # plt.plot(X[:,0],y-y_pred,'.')
        # plt.plot(X[:,1],y-y_pred,'.')
        # plt.plot(X[:,2],y-y_pred,'.')
        # plt.plot(X[:,3],y-y_pred,'.')
        plt.semilogx(C_s, np.array(scores_mean) / 1.0e6, ".")
        plt.show()
        # print best_C,best_gamma,best_score
        return
    #
    # model=SVR()
    # X=X[:,feature_index_1]
    # X=X[:,feature_index_0]
    # model.gamma=0.1
    # model.C=1.5e3
    # model=SVR()
    # model.C=1.38949549437
    model.C = 0.01
    # model.gamma=0.193069772888
    model.fit(X, y)
    # y_pred=model.predict(X)
    # plt.plot(y-y_pred,'.')
    # plt.show()
    # return
    X_test = X_test[:, feature_index_0]
    # X_test=X_test[:,feature_index_1]
    y_pred = model.predict(X_test)
    samplesubmit = pd.read_csv("data/sampleSubmission.csv")
    samplesubmit["Prediction"] = np.exp(y_pred)
    # samplesubmit.to_csv
    # samplesubmit.to_csv("data/submit_fistsvr.csv",index=False)
    # samplesubmit.to_csv("data/submit_linearsvr_fregression_age.csv",index=False)
    samplesubmit.to_csv("data/submit_svr_logrevenue.csv", index=False)
    # samplesubmit.to_csv("data/submit_linearsvr_fregression_outlierreject.csv",index=False)

    return