def trainsvr (self, train, trainlabel, seed, Cmin, Cmax, numC, rmin, rmax, numr, degree=3,\ verbose = 1, method = 'roc_auc', rad_stat =2): C_range = np.logspace(Cmin, Cmax, num=numC, base=2, endpoint=True) gamma_range = np.logspace(rmin, rmax, num=numr, base=2, endpoint=True) scr = SVR(kernel=seed) # mean_score=[] df_C_gamma = pd.DataFrame({'gamma_range': gamma_range}) # df_this = DataFrame({'gamma_range':gamma_range}) count = 0 for C in C_range: score_C = [] # score_C_this = [] count = count + 1 for gamma in gamma_range: scr.C = C scr.gamma = gamma scr.degree = degree scr.random_state = rad_stat this_scores = cross_val_score(scr, train, trainlabel, scoring=method, cv=10, n_jobs=-1 \ ) score_C.append(np.mean(this_scores)) #score_C_this.append(np.mean(this_scores)) if verbose == 1: print(np.mean(score_C)) print("%r cycle finished, %r left" % (count, numC - count)) df_C_gamma[C] = score_C #df_this[C] = score_C_this return df_C_gamma
def connectWidgets(self): svr = SVR() svr.kernel = 'rbf' svr.degree = 3 svr.gamma = 'auto' svr.coef0 = 0.0 svr.tol = 1e-3 svr.C = 1.0 svr.epsilon = 0.1 svr.shrinking = True svr.cache_size = 200 svr.verbose = False svr.max_iter = -1 self.cDoubleSpinBox.setValue(svr.C) self.epsilonDoubleSpinBox.setValue(svr.epsilon) self.defaultComboItem(self.kernelComboBox, svr.kernel) self.degreeSpinBox.setValue(svr.degree) self.defaultComboItem(self.gammaComboBox, svr.gamma) self.coeff0DoubleSpinBox.setValue(svr.coef0) self.shrinkingCheckBox.setChecked(svr.shrinking) self.toleranceDoubleSpinBox.setValue(svr.tol) self.cacheSizeSpinBox.setValue(svr.cache_size) self.verboseCheckBox.setChecked(svr.verbose) self.maxIterationsSpinBox.setValue(svr.max_iter)
def train_regress (self, train, trainlabel, seed, Cmin, Cmax, numC, rmin, rmax, numr, degree=3, method = 'rrmse', rad_stat =2): C_range=np.logspace(Cmin, Cmax, num=numC, base=2,endpoint= True) gamma_range=np.logspace(rmin, rmax, num=numr, base=2,endpoint= True) svc = SVR(kernel=seed) # mean_score=[] df_C_gamma= DataFrame({'gamma_range':gamma_range}) # df_this = DataFrame({'gamma_range':gamma_range}) count = 0 for C in C_range: score_C=[] # score_C_this = [] count=count+1 for gamma in gamma_range: svc.epsilon = 0.00001 svc.C = C svc.gamma = gamma svc.degree = degree svc.random_state = rad_stat this_scores = cross_val_score(svc, train, trainlabel, scoring=method, cv=10, n_jobs=-1 \ ) score_C.append(np.mean(this_scores)) #score_C_this.append(np.mean(this_scores)) print (np.mean(score_C) ) print ("%r cycle finished, %r left" %(count, numC-count)) df_C_gamma[C]= score_C #df_this[C] = score_C_this return df_C_gamma
def test_energy_model(X, y, epsilon=0.0841395, C=0.122, seed=None, silent=False): # best eps = 0.08413951416 # best C = 0.122 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=seed) svr = SVR() svr.epsilon = epsilon svr.C = C svr.fit(X_train, y_train) p_train = svr.predict(X_train) p_test = svr.predict(X_test) mse_train = np.mean((p_train - y_train)**2) mse_test = np.mean((p_test - y_test)**2) mean_abs_err_train = np.mean(np.abs(p_train - y_train)) mean_abs_err_test = np.mean(np.abs(p_test - y_test)) err_rel_train = np.mean(relative_err(p_train, y_train)) err_rel_test = np.mean(relative_err(p_test, y_test)) score_train = r2_score(y_train, p_train) score_test = r2_score(y_test, p_test) results = { 'mse_train': mse_train, 'mse_test': mse_test, 'err_rel_train': err_rel_train, 'err_rel_test': err_rel_test, 'mean_abs_err_train': mean_abs_err_train, 'mean_abs_err_test': mean_abs_err_test, 'score_train': score_train, 'score_test': score_test, 'y_train': y_train, 'p_train': p_train, 'y_test': y_test, 'p_test': p_test, } if not silent: print(results) return results
def test_energy_model_cv(X, y, epsilon=0.0841395, C=0.122, cv=5, silent=False): svr = SVR() svr.epsilon = epsilon svr.C = C cv_score = cross_val_score(svr, X, y, cv=cv) y_pred = cross_val_predict(svr, X, y, cv=cv) cv_mse = mean_squared_error(y, y_pred) cv_r2 = r2_score(y, y_pred) if not silent: print('cv_score', cv_score) print('cv_r2', cv_r2) print('cv_mse', cv_mse) return {'cv_score': cv_score, 'cv_r2': cv_r2, 'cv_mse': cv_mse}
def train_regress(self, train, trainlabel, seed, Cmin, Cmax, numC, rmin, rmax, numr, degree=3, method='rrmse', rad_stat=2): C_range = np.logspace(Cmin, Cmax, num=numC, base=2, endpoint=True) gamma_range = np.logspace(rmin, rmax, num=numr, base=2, endpoint=True) svc = SVR(kernel=seed) # mean_score=[] df_C_gamma = DataFrame({'gamma_range': gamma_range}) # df_this = DataFrame({'gamma_range':gamma_range}) count = 0 for C in C_range: score_C = [] # score_C_this = [] count = count + 1 for gamma in gamma_range: svc.epsilon = 0.00001 svc.C = C svc.gamma = gamma svc.degree = degree svc.random_state = rad_stat this_scores = cross_val_score(svc, train, trainlabel, scoring=method, cv=10, n_jobs=-1 \ ) score_C.append(np.mean(this_scores)) #score_C_this.append(np.mean(this_scores)) print(np.mean(score_C)) print("%r cycle finished, %r left" % (count, numC - count)) df_C_gamma[C] = score_C #df_this[C] = score_C_this return df_C_gamma
def connectWidgets(self): svr = SVR() svr.kernel = 'rbf' svr.degree = 3 svr.gamma = 'auto' svr.coef0 = 0.0 svr.tol = 1e-3 svr.C = 1.0 svr.epsilon = 0.1 svr.shrinking = True svr.cache_size = 200 svr.verbose = False svr.max_iter = -1 self.cLineEdit.setText(str(svr.C)) self.epsilonLineEdit.setText(str(svr.epsilon)) self.kernel_list.setCurrentItem(self.kernel_list.findItems('Radial Basis Function', QtCore.Qt.MatchExactly)[0]) self.degreeLineEdit.setText(str(svr.degree)) self.coeff0LineEdit.setText(str(svr.coef0)) self.shrinking_list.setCurrentItem(self.shrinking_list.findItems(str(svr.shrinking), QtCore.Qt.MatchExactly)[0]) self.toleranceLineEdit.setText(str(svr.tol)) self.maxIterationsLineEdit.setText(str(svr.max_iter))
def main(): df_train = pd.read_csv("data/train.csv") df_test = pd.read_csv("data/test.csv") y = np.log(np.array(df_train["revenue"])) plt.hist(df_train['P28']) plt.show() return #test_id=df_test["Id"] df = pd.concat([df_train, df_test]) df = processing(df) df_train = df[0:137, :] df_test = df[137:, :] X = df_train print X.shape, y.shape X_test = df_test model = SVR(kernel='linear') #samplesubmit.head() if (1): selector = SelectPercentile(f_regression, percentile=100) selector.fit(X, y) print selector.pvalues_ scores = -np.log10(selector.pvalues_) scores /= scores.max() print scores feature_index_0 = scores > 0.1 #model.C=1.e4 #model.gamma=0.2 X = X[:, feature_index_0] #print X.shape #model.fit(X,y) #y_pred=model.predict(X) #meanres=np.median(y-y_pred) #stdres=np.median(np.abs(y-y_pred-meanres)) #print meanres,stdres #outlierindex=np.abs(y-y_pred-meanres)>5.*stdres #print len(y[outlierindex]) #plt.plot(y-y_pred,'.') #plt.plot((y-y_pred)[outlierindex],'r.') #plt.show() #return #X=X[~outlierindex,:] #y=y[~outlierindex] #if(1): # selector=SelectPercentile(f_regression,percentile=100) # selector.fit(X,y) # print selector.pvalues_ # scores=-np.log10(selector.pvalues_) # scores/=scores.max() # print scores # feature_index_1=scores>0.1 ##return if 0: #gammas=np.logspace(-7,7,50) #gammas=[0.1] #C_s=np.array([1]) #return #model=SVR() #X=X[:,feature_index_1] C_s = np.logspace(-7, 2, 50) #C_s=np.logspace(0,5,50) scores_mean = [] scores_std = [] best_score = 1.e20 best_C = 1 best_gamma = 1 for C in C_s: if (1): #for gamma in gammas: model.C = C #model.gamma=gamma scores = cross_val_score(model, X, y, cv=10, scoring='mean_squared_error') scores_mean.append(np.sqrt(-np.mean(scores))) print np.sqrt(-np.mean(scores)) #if np.sqrt(-np.mean(scores))<best_score: # best_C=C # best_gamma=gamma # best_score=np.sqrt(-np.mean(scores)) scores_std.append(np.std(scores)) #plt.semilogx(gammas,scores_mean,'.') #plt.plot(X[:,0],y-y_pred,'.') #plt.plot(X[:,1],y-y_pred,'.') #plt.plot(X[:,2],y-y_pred,'.') #plt.plot(X[:,3],y-y_pred,'.') plt.semilogx(C_s, np.array(scores_mean) / 1.e6, '.') plt.show() #print best_C,best_gamma,best_score return # #model=SVR() #X=X[:,feature_index_1] #X=X[:,feature_index_0] #model.gamma=0.1 #model.C=1.5e3 #model=SVR() #model.C=1.38949549437 model.C = 0.01 #model.gamma=0.193069772888 model.fit(X, y) #y_pred=model.predict(X) #plt.plot(y-y_pred,'.') #plt.show() #return X_test = X_test[:, feature_index_0] #X_test=X_test[:,feature_index_1] y_pred = model.predict(X_test) samplesubmit = pd.read_csv("data/sampleSubmission.csv") samplesubmit["Prediction"] = np.exp(y_pred) #samplesubmit.to_csv #samplesubmit.to_csv("data/submit_fistsvr.csv",index=False) #samplesubmit.to_csv("data/submit_linearsvr_fregression_age.csv",index=False) samplesubmit.to_csv("data/submit_svr_logrevenue.csv", index=False) #samplesubmit.to_csv("data/submit_linearsvr_fregression_outlierreject.csv",index=False) return
plt.xlabel('Energies (after)') #plt.show() #Support Vector Regression from sklearn.svm import SVR from sklearn.metrics import mean_squared_error, mean_absolute_error train_mse = [] val_mse = [] Cs = np.logspace(-3, 5, 20) print(Cs) for C in Cs: model = SVR() model.C = C model.fit(designmatrix_train_std, Energies_train_std) Energies_train_pred = Energies_mu + model.predict( designmatrix_train_std) * Energies_std Energies_val_pred = Energies_mu + model.predict( designmatrix_val_std) * Energies_std train_mse.append( mean_squared_error(y_true=Energies_train, y_pred=Energies_train_pred)) val_mse.append( mean_squared_error(y_true=Energies_val, y_pred=Energies_val_pred)) plt.figure() plt.plot(Cs, train_mse, label='Training set MSE') plt.plot(Cs, val_mse, label='Validation set MSE') plt.ylabel('Mean Squared Error')
def main(): df_train = pd.read_csv("data/train.csv") df_test = pd.read_csv("data/test.csv") y = np.log(np.array(df_train["revenue"])) plt.hist(df_train["P28"]) plt.show() return # test_id=df_test["Id"] df = pd.concat([df_train, df_test]) df = processing(df) df_train = df[0:137, :] df_test = df[137:, :] X = df_train print X.shape, y.shape X_test = df_test model = SVR(kernel="linear") # samplesubmit.head() if 1: selector = SelectPercentile(f_regression, percentile=100) selector.fit(X, y) print selector.pvalues_ scores = -np.log10(selector.pvalues_) scores /= scores.max() print scores feature_index_0 = scores > 0.1 # model.C=1.e4 # model.gamma=0.2 X = X[:, feature_index_0] # print X.shape # model.fit(X,y) # y_pred=model.predict(X) # meanres=np.median(y-y_pred) # stdres=np.median(np.abs(y-y_pred-meanres)) # print meanres,stdres # outlierindex=np.abs(y-y_pred-meanres)>5.*stdres # print len(y[outlierindex]) # plt.plot(y-y_pred,'.') # plt.plot((y-y_pred)[outlierindex],'r.') # plt.show() # return # X=X[~outlierindex,:] # y=y[~outlierindex] # if(1): # selector=SelectPercentile(f_regression,percentile=100) # selector.fit(X,y) # print selector.pvalues_ # scores=-np.log10(selector.pvalues_) # scores/=scores.max() # print scores # feature_index_1=scores>0.1 ##return if 0: # gammas=np.logspace(-7,7,50) # gammas=[0.1] # C_s=np.array([1]) # return # model=SVR() # X=X[:,feature_index_1] C_s = np.logspace(-7, 2, 50) # C_s=np.logspace(0,5,50) scores_mean = [] scores_std = [] best_score = 1.0e20 best_C = 1 best_gamma = 1 for C in C_s: if 1: # for gamma in gammas: model.C = C # model.gamma=gamma scores = cross_val_score(model, X, y, cv=10, scoring="mean_squared_error") scores_mean.append(np.sqrt(-np.mean(scores))) print np.sqrt(-np.mean(scores)) # if np.sqrt(-np.mean(scores))<best_score: # best_C=C # best_gamma=gamma # best_score=np.sqrt(-np.mean(scores)) scores_std.append(np.std(scores)) # plt.semilogx(gammas,scores_mean,'.') # plt.plot(X[:,0],y-y_pred,'.') # plt.plot(X[:,1],y-y_pred,'.') # plt.plot(X[:,2],y-y_pred,'.') # plt.plot(X[:,3],y-y_pred,'.') plt.semilogx(C_s, np.array(scores_mean) / 1.0e6, ".") plt.show() # print best_C,best_gamma,best_score return # # model=SVR() # X=X[:,feature_index_1] # X=X[:,feature_index_0] # model.gamma=0.1 # model.C=1.5e3 # model=SVR() # model.C=1.38949549437 model.C = 0.01 # model.gamma=0.193069772888 model.fit(X, y) # y_pred=model.predict(X) # plt.plot(y-y_pred,'.') # plt.show() # return X_test = X_test[:, feature_index_0] # X_test=X_test[:,feature_index_1] y_pred = model.predict(X_test) samplesubmit = pd.read_csv("data/sampleSubmission.csv") samplesubmit["Prediction"] = np.exp(y_pred) # samplesubmit.to_csv # samplesubmit.to_csv("data/submit_fistsvr.csv",index=False) # samplesubmit.to_csv("data/submit_linearsvr_fregression_age.csv",index=False) samplesubmit.to_csv("data/submit_svr_logrevenue.csv", index=False) # samplesubmit.to_csv("data/submit_linearsvr_fregression_outlierreject.csv",index=False) return