def predict_prices(dates, prices, x): dates = np.reshape(dates, (len(dates), 1)) svr_lin = svr(kernel="linear", C=1e3) svr_poly = svr(kernel="poly", C=1e3, degree=2) svr_rbf = svr(kernel="rbf", C=1e3, gamma=0.1) svr_lin.fit(dates, prices) svr_poly.fit(dates, prices) svr_rbf.fit(dates, prices) plt.scatter(dates, prices, color="black", label="Data") plt.plot(dates, svr_lin.predict(dates), color="red", label="Linear Model") plt.plot(dates, svr_poly.predict(dates), color="green", label="Polynomial Model") plt.plot(dates, svr_rbf.predict(dates), color="blue", label="RBF Model") plt.xlabel("Date") plt.ylabel("Price") plt.title("Support Vector Regression") plt.legend() plt.show() return svr_lin.predict(x)[0], svr_poly.predict(x)[0], svr_rbf.predict(x)[0]
def regression(self, metric="root_mean_squared_error", folds=10, alphas=[], graph=False): size = 1.3 * self.report_width // 10 models = {} models["Linear regressor"] = lr() models["Lasso regressor"] = lassor() models["Lasso CV regressor"] = lassocvr() models["Ridge regressor"] = rr(alpha=0, normalize=True) models["Ridge CV regressor"] = rcvr(alphas = alphas) models["K nearest neighbors regressor K2u"] = knnr(n_neighbors=2, weights='uniform') models["K nearest neighbors regressor K2d"] = knnr(n_neighbors=2, weights='distance') models["K nearest neighbors regressor K5"] = knnr(n_neighbors=5) models["K nearest neighbors regressor K10"] = knnr(n_neighbors=10) models["SGD regressor"] = sgdr(max_iter=10000, warm_start=True) models["Decision tree regressor"] = dtr() models["Decision tree regressor D3"] = dtr(max_depth=3) models["Random forest regressor"] = rfr() models["Ada boost regressor"] = abr() models["Gradient boost regressor"] = gbr() models["Support vector regressor"] = svr() self.models = models print('\n') print(self.report_width * '*', '\n*') print('* REGRESSION RESULTS - BEFORE PARAMETERS BOOSTING \n*') #kf = StratifiedKFold(n_splits=folds, shuffle=True) kf = KFold(n_splits=folds) results = [] names = [] for model_name in models: cv_scores = -1 * cross_val_score(models[model_name], self.Xt_train, self.yt_train.values.ravel(), cv=kf, scoring=metric) results.append(cv_scores) names.append(model_name) print(self.report_width * '*', '') report = pd.DataFrame({'Regressor': names, 'Score': results}) report['Score (avg)'] = report.Score.apply(lambda x: x.mean()) report['Score (std)'] = report.Score.apply(lambda x: x.std()) report['Score (VC)'] = 100 * report['Score (std)'] / report['Score (avg)'] report.sort_values(by='Score (avg)', inplace=True) report.drop('Score', axis=1, inplace=True) display(report) print('\n') if graph: fig, ax = plt.subplots(figsize=(size, 0.5 * size)) plt.title('Regressor Comparison') #ax = fig.add_subplot(111) plt.boxplot(results) ax.set_xticklabels(names) plt.xticks(rotation=45) plt.subplots_adjust(hspace=0.0) plt.show() return None
def fit(self, X, Y): """ Fit the classifier to training data X and lables Y. Arguments: X (np.array): training data matrix of shape (n_samples, n_features) Y (np.array): label matrix of shape (n_samples, n_labels) """ n_labels = Y.shape[1] for idx in range(n_labels): Y_col = Y[:, idx] predictor = svr() predictor.fit(X, Y_col) self.predictors.append(predictor)
def trainfsvm(X, y, M, sigma, ker, svrc, svrp): # L is the number of data samples L = X.shape[0] #gives number of row count # m is the number of rules m = M.shape[0] # n is the number of features n = X.shape[1] #gives number of col count out = [] # from copy trn_labels = y trn_features = np.zeros((L, m * (n + 1))) trnA = np.zeros((L, m * (n + 1))) weights = np.zeros((L, m)) itermax = 1 for iter in range(0, itermax): for i in range(0, L): U = [] for j in range(0, m): u = 1 for t in range(0, n): u = u * (gaussmf(X[t][i], sigma[t][j], M[t][j])) U = U + [u] fa = U / sum(U) # this is the weight row = np.append(X.iloc[[i]].values, 1) xtemp = np.zeros(shape=(fa.size, row.size)) for ii in range(0, fa.size): for jj in range(0, row.size): xtemp[ii][jj] = fa[ii] * row[jj] xtemp = np.reshape(xtemp, fa.size * row.size) trnA.transpose()[:, i] = xtemp weights.transpose()[:, i] = fa trn_features = trnA # X #trn_labels = y clf = svr(kernel=ker, C=svrc, epsilon=svrp) clf.fit(trn_features, trn_labels) w = np.dot(clf.support_vectors_.transpose(), clf.dual_coef_.transpose()) bias = clf.intercept_ C = np.reshape(w, (m, n + 1)) C = pd.DataFrame(C) return clf, C, bias
def evalerror(pred, df): label = df.get_label().values.copy() score = mean_squared_error(label, pred) * 0.5 return ('0.5mse', score, False) print('开始训练...') print('开始CV 5折训练...') t0 = time.time() train_preds = np.zeros(train_feat.shape[0]) test_preds = np.zeros((test_feat.shape[0], 5)) kf = KFold(len(train_feat), n_folds=5, shuffle=True, random_state=520) print(kf) for i, (train_index, test_index) in enumerate(kf): print('第{}次训练...'.format(i), train_index, test_index) train_feat1 = train_feat.iloc[train_index] train_feat2 = train_feat.iloc[test_index] svr_rbf = svr(kernel='rbf', C=1e3, gamma=0.1) model_rbf = svr_rbf.fit(train_feat1[predictors], train_feat1['血糖']) print("debug") train_preds[test_index] = model_rbf.predict(train_feat2[predictors]) test_preds[:, i] = model_rbf.predict(test_feat[predictors]) print('线下得分: {}'.format( mean_squared_error(train_feat['血糖'], train_preds) * 0.5)) print('CV训练用时{}秒'.format(time.time() - t0))
def regression(self, metric, folds=10, alphas=[], printt=True, graph=False): size = self.graph_width # significant model setup differences should be list as different models models = {} models["Linear regressor"] = lr() models["Lasso regressor"] = lassor() models["Lasso CV regressor"] = lassocvr() models["Ridge regressor"] = rr(alpha=0, normalize=True) models["Ridge CV regressor"] = rcvr(alphas = alphas) models["Elastic net regressor"] = enr() models["K nearest neighbors regressor K2u"] = knnr(n_neighbors=2, weights='uniform') models["K nearest neighbors regressor K2d"] = knnr(n_neighbors=2, weights='distance') models["K nearest neighbors regressor K5"] = knnr(n_neighbors=5) models["K nearest neighbors regressor K10"] = knnr(n_neighbors=10) models["SGD regressor"] = sgdr(max_iter=10000, warm_start=True) models["Decision tree regressor"] = dtr() models["Decision tree regressor D3"] = dtr(max_depth=3) models["Random forest regressor"] = rfr() models["Ada boost regressor"] = abr() models["Gradient boost regressor"] = gbr() models["Support vector regressor RBF"] = svr() models["Support vector regressor Linear"] = svr('linear') models["Support vector regressor Poly"] = svr(kernel='poly') self.models = models kf = KFold(n_splits=folds, shuffle=True) results = [] names = [] et = [] for model_name in models: start = time.time() cv_scores = -1 * cross_val_score(models[model_name], self.Xt_train, self.yt_train, cv=kf, scoring=metric) results.append(cv_scores) names.append(model_name) et.append((time.time() - start)) report = pd.DataFrame({'Model': names, 'Score': results, 'Elapsed Time': et}) report['Score (avg)'] = report.Score.apply(lambda x: np.sqrt(x).mean()) report['Score (std)'] = report.Score.apply(lambda x: np.sqrt(x).std()) report['Score (VC)'] = 100 * report['Score (std)'] / report['Score (avg)'] report.sort_values(by='Score (avg)', inplace=True) report.drop('Score', axis=1, inplace=True) report.reset_index(inplace=True, drop=True) self.report_performance = report if printt: print('\n') print(self.report_width * '*', '\n*') print('* REGRESSION RESULTS - BEFORE PARAMETERS BOOSTING \n*') print(self.report_width * '*', '') print(report) print('\n') if graph: fig, ax = plt.subplots(figsize=(size, 0.5 * size)) plt.title('Regressor Comparison') #ax = fig.add_subplot(111) plt.boxplot(results) ax.set_xticklabels(names) plt.xticks(rotation=45) plt.subplots_adjust(hspace=0.0, bottom=0.25) self.graphs_model.append(fig) plt.show() return None
def combined_test(i): # Get feature vectors tfidf_vectors = get_tfidf(i, 'train') extra_features_vector = get_extra_features(i, 'train') num_topics_for_lda = 30 lda,lda_vector = get_lda(i, 100, 10, num_topics_for_lda) train_features = np.concatenate((tfidf_vectors,extra_features_vector),1) train_features = np.concatenate((lda_vector,train_features),1) normalized_tfidf_vectors = get_normalized_tfidf(i, 'train') normalized_extra_features_vector = get_normalized_extra_features(i, 'train') normalized_train_features = np.concatenate((normalized_tfidf_vectors,normalized_extra_features_vector),1) normalized_train_features = np.concatenate((lda_vector,normalized_train_features),1) tfidf_train_features = tfidf_vectors extra_features_train_features = extra_features_vector lda_train_features = lda_vector tfidf_extra_train_features = np.concatenate((tfidf_vectors,extra_features_vector),1) lda_extra_train_features = np.concatenate((lda_vector,extra_features_vector),1) print colored('feature vectors loaded', 'cyan') # Set up classifiers knnr_classifier = knnR(n_neighbors=5, weights = 'distance') svr_classifier = svr() normalized_knnr_classifier = knnR(n_neighbors=5, weights = 'distance') normalized_svr_classifier = svr() tfidf_knnr_classifier = knnR(n_neighbors=5, weights = 'distance') tfidf_svr_classifier = svr() extra_features_knnr_classifier = knnR(n_neighbors=5, weights = 'distance') extra_features_svr_classifier = svr() lda_knnr_classifier = knnR(n_neighbors=5, weights = 'distance') lda_svr_classifier = svr() tfidf_extra_knnr_classifier = knnR(n_neighbors=5, weights = 'distance') tfidf_extra_svr_classifier = svr() lda_extra_knnr_classifier = knnR(n_neighbors=5, weights = 'distance') lda_extra_svr_classifier = svr() print colored('classifiers setup', 'cyan') # Load training essay scores scores = [] with open('data/set%d.scores' % i) as f: for score in f: scores.append(int(score.split('\n')[0])) # Load training essay dictionary and corpus myDict = gensim.corpora.Dictionary.load('data/set%d.dict' % i) corpus = gensim.corpora.MmCorpus('data/set%d.mm' % i) # train classifiers knnr_classifier.fit(train_features, scores) svr_classifier.fit(train_features, scores) normalized_knnr_classifier.fit(normalized_train_features, scores) normalized_svr_classifier.fit(normalized_train_features, scores) tfidf_knnr_classifier.fit(tfidf_train_features, scores) tfidf_svr_classifier.fit(tfidf_train_features, scores) extra_features_knnr_classifier.fit(extra_features_train_features, scores) extra_features_svr_classifier.fit(extra_features_train_features, scores) lda_knnr_classifier.fit(lda_train_features, scores) lda_svr_classifier.fit(lda_train_features, scores) tfidf_extra_knnr_classifier.fit(tfidf_extra_train_features, scores) tfidf_extra_svr_classifier.fit(tfidf_extra_train_features, scores) lda_extra_knnr_classifier.fit(lda_extra_train_features, scores) lda_extra_svr_classifier.fit(lda_extra_train_features, scores) test_essays,test_scores = get_test_examples(i) index = 0 print colored('classifiers trained', 'cyan') # Load test essay feature vectors tfidf_test = get_tfidf(i, 'test') extra_features_test = get_extra_features(i, 'test') test_features = np.concatenate((tfidf_test,extra_features_test),1) normalized_tfidf_test = get_normalized_tfidf(i, 'test') normalized_extra_features_test = get_normalized_extra_features(i, 'test') normalized_test_features = np.concatenate((normalized_tfidf_test,normalized_extra_features_test),1) knnr_predicted = [] svr_predicted = [] knnr_normalized_predicted = [] svr_normalized_predicted = [] knnr_tfidf_predicted = [] svr_tfidf_predicted = [] knnr_extra_features_predicted = [] svr_extra_features_predicted = [] knnr_lda_predicted = [] svr_lda_predicted = [] knnr_tfidf_extra_predicted = [] svr_tfidf_extra_predicted = [] knnr_lda_extra_predicted = [] svr_lda_extra_predicted = [] actual = [] print colored('Testing...', 'cyan') for idx, test_essay in enumerate(test_essays): doc_bow = myDict.doc2bow(test_essay) doc_lda = lda[doc_bow] # Test feature vectors vectorized_lda = topic_distribution_to_vector(doc_lda, num_topics_for_lda) test_feature = np.concatenate((vectorized_lda, test_features[index]), 1) normalized_test_feature = np.concatenate((vectorized_lda, normalized_test_features[index]), 1) tfidf_test_feature = tfidf_test[index] extra_features_test_feature = extra_features_test[index] lda_test_feature = vectorized_lda tfidf_extra_feature = test_features[index] lda_extra_feature = np.concatenate((vectorized_lda,extra_features_test[index]), 1) knnr_predicted_score = knnr_classifier.predict(test_feature) svr_predicted_score = svr_classifier.predict(test_feature) knnr_normalized_predicted_score = normalized_knnr_classifier.predict(normalized_test_feature) svr_normalized_predicted_score = normalized_svr_classifier.predict(normalized_test_feature) knnr_tfidf_predicted_score = tfidf_knnr_classifier.predict(tfidf_test_feature) svr_tfidf_predicted_score = tfidf_svr_classifier.predict(tfidf_test_feature) knnr_extra_features_predicted_score = extra_features_knnr_classifier.predict(extra_features_test_feature) svr_extra_features_predicted_score = extra_features_svr_classifier.predict(extra_features_test_feature) knnr_lda_predicted_score = lda_knnr_classifier.predict(lda_test_feature) svr_lda_predicted_score = lda_svr_classifier.predict(lda_test_feature) knnr_tfidf_extra_predicted_score = tfidf_extra_knnr_classifier.predict(tfidf_extra_feature) svr_tfidf_extra_predicted_score = tfidf_extra_svr_classifier.predict(tfidf_extra_feature) knnr_lda_extra_predicted_score = lda_extra_knnr_classifier.predict(lda_extra_feature) svr_lda_extra_predicted_score = lda_extra_svr_classifier.predict(lda_extra_feature) actual.append(float(test_scores[idx])) knnr_predicted.append(float(knnr_predicted_score)) svr_predicted.append(float(svr_predicted_score)) knnr_normalized_predicted.append(float(knnr_normalized_predicted_score)) svr_normalized_predicted.append(float(svr_normalized_predicted_score)) knnr_tfidf_predicted.append(float(knnr_tfidf_predicted_score)) svr_tfidf_predicted.append(float(svr_tfidf_predicted_score)) knnr_extra_features_predicted.append(float(knnr_extra_features_predicted_score)) svr_extra_features_predicted.append(float(svr_extra_features_predicted_score)) knnr_lda_predicted.append(float(knnr_lda_predicted_score)) svr_lda_predicted.append(float(svr_lda_predicted_score)) knnr_tfidf_extra_predicted.append(float(knnr_tfidf_extra_predicted_score)) svr_tfidf_extra_predicted.append(float(svr_tfidf_extra_predicted_score)) knnr_lda_extra_predicted.append(float(knnr_lda_extra_predicted_score)) svr_lda_extra_predicted.append(float(svr_lda_extra_predicted_score)) print colored('essay #%d tested' % idx, 'cyan') index += 1 # pickle data pickle.dump(actual, open('data/set%d_actual_scores.pkl' % i, 'w+')) pickle.dump(knnr_predicted, open('data/set%d_knnr_predicted_scores.pkl' % i, 'w+')) pickle.dump(svr_predicted, open('data/set%d_svr_predicted_scores.pkl' % i, 'w+')) pickle.dump(knnr_normalized_predicted, open('data/set%d_knnr_normalized_predicted_scores.pkl' % i, 'w+')) pickle.dump(svr_normalized_predicted, open('data/set%d_svr_normalized_predicted_scores.pkl' % i, 'w+')) pickle.dump(knnr_tfidf_predicted, open('data/set%d_knnr_tfidf_predicted_scores.pkl' % i, 'w+')) pickle.dump(svr_tfidf_predicted, open('data/set%d_svr_tfidf_predicted_scores.pkl' % i, 'w+')) pickle.dump(knnr_extra_features_predicted, open('data/set%d_knnr_statistics_predicted_scores.pkl' % i, 'w+')) pickle.dump(svr_extra_features_predicted, open('data/set%d_svr_statistics_predicted_scores.pkl' % i, 'w+')) pickle.dump(knnr_lda_predicted, open('data/set%d_knnr_lda_predicted_scores.pkl' % i, 'w+')) pickle.dump(svr_lda_predicted, open('data/set%d_svr_lda_predicted_scores.pkl' % i, 'w+')) pickle.dump(knnr_tfidf_extra_predicted, open('data/set%d_knnr_tfidf_statistics_predicted_scores.pkl' % i, 'w+')) pickle.dump(svr_tfidf_extra_predicted, open('data/set%d_svr_tfidf_statistics_predicted_scores.pkl' % i, 'w+')) pickle.dump(knnr_lda_extra_predicted, open('data/set%d_knnr_lda_statistics_predicted_scores.pkl' % i, 'w+')) pickle.dump(svr_lda_extra_predicted, open('data/set%d_svr_lda_statistics_predicted_scores.pkl' % i, 'w+')) print colored('essay set%d data dumped' % i, 'grey') print colored('ESSAY SET %d' % i, 'green', attrs=['bold']) knnr_actual,knnr_predicted = filter_nan(actual, knnr_predicted) print colored('(RAW) KNN MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(knnr_actual, knnr_predicted), mean_absolute_error(knnr_actual, knnr_predicted)), 'green', attrs=['bold']) svr_actual,svr_predicted = filter_nan(actual, svr_predicted) print colored('(RAW) SVM MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(svr_actual, svr_predicted), mean_absolute_error(svr_actual, svr_predicted)), 'green', attrs=['bold']) knnr_normalized_actual,knnr_normalized_predicted = filter_nan(actual, knnr_normalized_predicted) print colored('(NORMALIZED) KNN MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(knnr_normalized_actual, knnr_normalized_predicted), mean_absolute_error(knnr_normalized_actual, knnr_normalized_predicted)), 'green', attrs=['bold']) svr_normalized_actual,svr_normalized_predicted = filter_nan(actual, svr_normalized_predicted) print colored('(NORMALIZED) SVM MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(svr_normalized_actual, svr_normalized_predicted), mean_absolute_error(svr_normalized_actual, svr_normalized_predicted)), 'green', attrs=['bold']) knnr_tfidf_extra_actual,knnr_tfidf_extra_predicted = filter_nan(actual, knnr_tfidf_extra_predicted) print colored('(TFIDF + STATISTICS) KNN MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(knnr_tfidf_extra_actual, knnr_tfidf_extra_predicted), mean_absolute_error(knnr_tfidf_extra_actual, knnr_tfidf_extra_predicted)), 'green', attrs=['bold']) svr_tfidf_extra_actual,svr_tfidf_extra_predicted = filter_nan(actual, svr_tfidf_extra_predicted) print colored('(TFIDF + STATISTICS) SVM MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(svr_tfidf_extra_actual, svr_tfidf_extra_predicted), mean_absolute_error(svr_tfidf_extra_actual, svr_tfidf_extra_predicted)), 'green', attrs=['bold']) knnr_lda_extra_actual,knnr_lda_extra_predicted = filter_nan(actual, knnr_lda_extra_predicted) print colored('(LDA + STATISTICS) KNN MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(knnr_lda_extra_actual, knnr_lda_extra_predicted), mean_absolute_error(knnr_lda_extra_actual, knnr_lda_extra_predicted)), 'green', attrs=['bold']) svr_lda_extra_actual,svr_lda_extra_predicted = filter_nan(actual, svr_lda_extra_predicted) print colored('(LDA + STATISTICS) SVM MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(svr_lda_extra_actual, svr_lda_extra_predicted), mean_absolute_error(svr_lda_extra_actual, svr_lda_extra_predicted)), 'green', attrs=['bold']) knnr_tfidf_actual,knnr_tfidf_predicted = filter_nan(actual, knnr_tfidf_predicted) print colored('(TFIDF) KNN MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(knnr_tfidf_actual, knnr_tfidf_predicted), mean_absolute_error(knnr_tfidf_actual, knnr_tfidf_predicted)), 'green', attrs=['bold']) svr_tfidf_actual,svr_tfidf_predicted = filter_nan(actual, svr_tfidf_predicted) print colored('(TFIDF) SVM MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(svr_tfidf_actual, svr_tfidf_predicted), mean_absolute_error(svr_tfidf_actual, svr_tfidf_predicted)), 'green', attrs=['bold']) knnr_extra_features_actual,knnr_extra_features_predicted = filter_nan(actual, knnr_extra_features_predicted) print colored('(STATISTICS) KNN MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(knnr_extra_features_actual, knnr_extra_features_predicted), mean_absolute_error(knnr_extra_features_actual, knnr_extra_features_predicted)), 'green', attrs=['bold']) svr_extra_features_actual,svr_extra_features_predicted = filter_nan(actual, svr_extra_features_predicted) print colored('(STATISTICS) SVM MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(svr_extra_features_actual, svr_extra_features_predicted), mean_absolute_error(svr_extra_features_actual, svr_extra_features_predicted)), 'green', attrs=['bold']) knnr_lda_actual,knnr_lda_predicted = filter_nan(actual, knnr_lda_predicted) print colored('(LDA) KNN MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(knnr_lda_actual, knnr_lda_predicted), mean_absolute_error(knnr_lda_actual, knnr_lda_predicted)), 'green', attrs=['bold']) svr_lda_actual,svr_lda_predicted = filter_nan(actual, svr_lda_predicted) print colored('(LDA) SVM MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(svr_lda_actual, svr_lda_predicted), mean_absolute_error(svr_lda_actual, svr_lda_predicted)), 'green', attrs=['bold'])
def __init__(self, kernelType, CValue=1): if kernelType == 1: self.regressor = svr(kernel='linear', C=CValue) elif kernelType > 1: self.regressor = svr(kernel='poly', C=CValue, degree=kernelType)
ind_param = dataset.iloc[:, 1:2].values dep_param = dataset.iloc[:, -1].values # ====================================================== # No missing values neither categorical data. # Can't split the dataset into train and test set, due to it's small size # ====================================================== # Feature scale is a must in Support Vector Regression Class in Python Package from sklearn.preprocessing import StandardScaler as sklp_ss ind_scaler = sklp_ss() ind_param = ind_scaler.fit_transform(ind_param.astype(float).reshape(-1, 1)) dep_scaler = sklp_ss() dep_param = dep_scaler.fit_transform(dep_param.astype(float).reshape(-1, 1)) # ====================================================== # Construct a initial kernel model from sklearn.svm import SVR as svr regressor = svr(kernel='rbf') regressor.fit(ind_param, dep_param.ravel()) # ====================================================== # Make a predicition import numpy as np # First, create a numpy array of the wanted input values input_prediction = np.array([[6.5]]) # Then, transform it input array to the scale of the model # BEWARE: use the independent (x) parameter scaler! input_prediction = ind_scaler.transform(input_prediction) # Make a prediction with the SVR model predictions = regressor.predict(input_prediction) # Now do a inverse transformation, in order to inperpret the result # CAUTION: use the dependent (f(x)) parameter scaler! predictions = dep_scaler.inverse_transform(predictions) # One line code: predictions = dep_scaler.inverse_transform(regressor.predict(ind_scaler.transform(np.array([[6.5]]))))
f = plt.figure() plt.plot(x1, y, 'r.', label='x1') plt.plot(x2, y, 'g.', label='x2') plt.xlabel('x values') plt.ylabel('y values') plt.legend(loc='lower right') plt.show() f.savefig('testdata.pdf', bbox_inches='tight') #%% Modelos x = np.column_stack((x1, x2)) randomforest = rfr().fit(x, y) randomforest.predicted = randomforest.predict(x) svm = svr().fit(x, y) svm.predicted = svm.predict(x) #%% Performance f = plt.figure() plt.plot(randomforest.predicted, y, 'b.', label='g1(x)') plt.plot(svm.predicted, y, 'y.', label='g2(x)') plt.plot([0, 25], [0, 25], 'r-', label='identity') plt.xlabel('predicted values') plt.ylabel('y values') plt.legend(loc='lower right') plt.show() f.savefig('performance.pdf', bbox_inches='tight') randomforest.performance = partpred(randomforest.predicted, y, x, 10) svm.performance = partpred(svm.predicted, y, x, 10)
test_data_X = pca.transform(test_data_X) ###############################--------Model Setup--------############################### ann_regressor = KerasRegressor(build_fn=ann_model, epochs=30, batch_size=10, verbose=1) xgb_regressor = xgb(learning_rate = 0.0825, min_child_weight = 1, max_depth = 7, subsample = 0.8, verbose = 10, random_state = 2017, n_jobs = -1, eval_metric = "rmse") rfr_regressor = rfr(max_features = 0.9, min_samples_leaf = 50) gbr_regressor = gbr(n_estimators = 200, verbose = 5, learning_rate = 0.08, max_depth = 7, max_features = 0.5, min_samples_leaf = 50, subsample = 0.8, random_state = 2017) etr_regressor = etr(n_estimators = 200, verbose = 10, max_depth = 7, min_samples_leaf = 100, max_features = 0.9, min_impurity_split = 100, random_state = 2017) lr_regressor = lr() svr_regressor = svr(verbose = 10) ensemble = Ensemble(n_folds = 5,stacker = lr_regressor,base_models = [ann_regressor, xgb_regressor, rfr_regressor, gbr_regressor, etr_regressor]) ###############################--------Grid Search--------############################### if (Env_var.get('GridSearch') == 1): if (Env_var.get('Model') == 'ann'): dropout_rate = [0.0, 0.001, 0.01] ann_parameters = dict(dropout_rate=dropout_rate) score, best_parameters, best_model = AutoGridSearch(ann_parameters,ann_regressor, train_data_X, train_data_y)