def predict_prices(dates, prices, x):
    dates = np.reshape(dates, (len(dates), 1))

    svr_lin = svr(kernel="linear", C=1e3)
    svr_poly = svr(kernel="poly", C=1e3, degree=2)
    svr_rbf = svr(kernel="rbf", C=1e3, gamma=0.1)

    svr_lin.fit(dates, prices)
    svr_poly.fit(dates, prices)
    svr_rbf.fit(dates, prices)

    plt.scatter(dates, prices, color="black", label="Data")
    plt.plot(dates, svr_lin.predict(dates), color="red", label="Linear Model")
    plt.plot(dates,
             svr_poly.predict(dates),
             color="green",
             label="Polynomial Model")
    plt.plot(dates, svr_rbf.predict(dates), color="blue", label="RBF Model")
    plt.xlabel("Date")
    plt.ylabel("Price")
    plt.title("Support Vector Regression")
    plt.legend()
    plt.show()

    return svr_lin.predict(x)[0], svr_poly.predict(x)[0], svr_rbf.predict(x)[0]
Exemple #2
0
    def regression(self, metric="root_mean_squared_error", folds=10, alphas=[], graph=False):
        size = 1.3 * self.report_width // 10

        models = {}
        models["Linear regressor"]                  = lr()
        models["Lasso regressor"]                   = lassor()
        models["Lasso CV regressor"]                = lassocvr()
        models["Ridge regressor"]                   = rr(alpha=0, normalize=True)
        models["Ridge CV regressor"]                = rcvr(alphas = alphas)
        models["K nearest neighbors regressor K2u"] = knnr(n_neighbors=2, weights='uniform')
        models["K nearest neighbors regressor K2d"] = knnr(n_neighbors=2, weights='distance')
        models["K nearest neighbors regressor K5"]  = knnr(n_neighbors=5)
        models["K nearest neighbors regressor K10"] = knnr(n_neighbors=10)
        models["SGD regressor"]                     = sgdr(max_iter=10000, warm_start=True)
        models["Decision tree regressor"]           = dtr()
        models["Decision tree regressor D3"]        = dtr(max_depth=3)
        models["Random forest regressor"]           = rfr()
        models["Ada boost regressor"]               = abr()
        models["Gradient boost regressor"]          = gbr()
        models["Support vector regressor"]          = svr()
        self.models = models

        print('\n')
        print(self.report_width * '*', '\n*')
        print('* REGRESSION RESULTS - BEFORE PARAMETERS BOOSTING \n*')
        #kf = StratifiedKFold(n_splits=folds, shuffle=True)
        kf = KFold(n_splits=folds)
        results = []
        names = []
        for model_name in models:
            cv_scores = -1 * cross_val_score(models[model_name], self.Xt_train, self.yt_train.values.ravel(), cv=kf, scoring=metric)  
            results.append(cv_scores)
            names.append(model_name)
        print(self.report_width * '*', '')
        report = pd.DataFrame({'Regressor': names, 'Score': results})
        report['Score (avg)'] = report.Score.apply(lambda x: x.mean())
        report['Score (std)'] = report.Score.apply(lambda x: x.std())
        report['Score (VC)'] = 100 * report['Score (std)'] / report['Score (avg)']
        report.sort_values(by='Score (avg)', inplace=True)
        report.drop('Score', axis=1, inplace=True)
        display(report)
        print('\n')
        if graph:
            fig, ax = plt.subplots(figsize=(size, 0.5 * size))
            plt.title('Regressor Comparison')
            #ax = fig.add_subplot(111)
            plt.boxplot(results)
            ax.set_xticklabels(names)
            plt.xticks(rotation=45)
            plt.subplots_adjust(hspace=0.0)
            plt.show()             
        return None
Exemple #3
0
    def fit(self, X, Y):
        """
        Fit the classifier to training data X and lables Y.

        Arguments:
            X (np.array): training data matrix of shape (n_samples, n_features)
            Y (np.array): label matrix of shape (n_samples, n_labels)
        """
        n_labels = Y.shape[1]
        for idx in range(n_labels):
            Y_col = Y[:, idx]
            predictor = svr()
            predictor.fit(X, Y_col)
            self.predictors.append(predictor)
Exemple #4
0
def trainfsvm(X, y, M, sigma, ker, svrc, svrp):
    # L is the number of data samples
    L = X.shape[0]  #gives number of row count
    # m is the number of rules
    m = M.shape[0]
    # n is the number of features
    n = X.shape[1]  #gives number of col count

    out = []  # from copy

    trn_labels = y
    trn_features = np.zeros((L, m * (n + 1)))

    trnA = np.zeros((L, m * (n + 1)))
    weights = np.zeros((L, m))

    itermax = 1

    for iter in range(0, itermax):
        for i in range(0, L):
            U = []
            for j in range(0, m):
                u = 1
                for t in range(0, n):
                    u = u * (gaussmf(X[t][i], sigma[t][j], M[t][j]))
                U = U + [u]
            fa = U / sum(U)  # this is the weight
            row = np.append(X.iloc[[i]].values, 1)
            xtemp = np.zeros(shape=(fa.size, row.size))
            for ii in range(0, fa.size):
                for jj in range(0, row.size):
                    xtemp[ii][jj] = fa[ii] * row[jj]
            xtemp = np.reshape(xtemp, fa.size * row.size)
            trnA.transpose()[:, i] = xtemp
            weights.transpose()[:, i] = fa
        trn_features = trnA
        # X #trn_labels = y
        clf = svr(kernel=ker, C=svrc, epsilon=svrp)
        clf.fit(trn_features, trn_labels)
        w = np.dot(clf.support_vectors_.transpose(),
                   clf.dual_coef_.transpose())
        bias = clf.intercept_
        C = np.reshape(w, (m, n + 1))
        C = pd.DataFrame(C)
    return clf, C, bias

def evalerror(pred, df):
    label = df.get_label().values.copy()
    score = mean_squared_error(label, pred) * 0.5
    return ('0.5mse', score, False)


print('开始训练...')
print('开始CV 5折训练...')
t0 = time.time()
train_preds = np.zeros(train_feat.shape[0])
test_preds = np.zeros((test_feat.shape[0], 5))
kf = KFold(len(train_feat), n_folds=5, shuffle=True, random_state=520)
print(kf)
for i, (train_index, test_index) in enumerate(kf):
    print('第{}次训练...'.format(i), train_index, test_index)

    train_feat1 = train_feat.iloc[train_index]
    train_feat2 = train_feat.iloc[test_index]

    svr_rbf = svr(kernel='rbf', C=1e3, gamma=0.1)
    model_rbf = svr_rbf.fit(train_feat1[predictors], train_feat1['血糖'])
    print("debug")

    train_preds[test_index] = model_rbf.predict(train_feat2[predictors])
    test_preds[:, i] = model_rbf.predict(test_feat[predictors])

print('线下得分:    {}'.format(
    mean_squared_error(train_feat['血糖'], train_preds) * 0.5))
print('CV训练用时{}秒'.format(time.time() - t0))
    def regression(self, metric, folds=10, alphas=[], printt=True, graph=False):
        size = self.graph_width

        # significant model setup differences should be list as different models
        models = {}
        models["Linear regressor"]                  = lr()
        models["Lasso regressor"]                   = lassor()
        models["Lasso CV regressor"]                = lassocvr()
        models["Ridge regressor"]                   = rr(alpha=0, normalize=True)
        models["Ridge CV regressor"]                = rcvr(alphas = alphas)
        models["Elastic net regressor"]             = enr()
        models["K nearest neighbors regressor K2u"] = knnr(n_neighbors=2, weights='uniform')
        models["K nearest neighbors regressor K2d"] = knnr(n_neighbors=2, weights='distance')
        models["K nearest neighbors regressor K5"]  = knnr(n_neighbors=5)
        models["K nearest neighbors regressor K10"] = knnr(n_neighbors=10)
        models["SGD regressor"]                     = sgdr(max_iter=10000, warm_start=True)
        models["Decision tree regressor"]           = dtr()
        models["Decision tree regressor D3"]        = dtr(max_depth=3)
        models["Random forest regressor"]           = rfr()
        models["Ada boost regressor"]               = abr()
        models["Gradient boost regressor"]          = gbr()
        models["Support vector regressor RBF"]      = svr()
        models["Support vector regressor Linear"]   = svr('linear')
        models["Support vector regressor Poly"]     = svr(kernel='poly')
        self.models = models

        kf = KFold(n_splits=folds, shuffle=True)
        results = []
        names = []
        et = []
        for model_name in models:
            start = time.time()
            cv_scores = -1 * cross_val_score(models[model_name], self.Xt_train, self.yt_train, cv=kf, scoring=metric)  
            results.append(cv_scores)
            names.append(model_name)
            et.append((time.time() - start))
        report = pd.DataFrame({'Model': names, 'Score': results, 'Elapsed Time': et})
        report['Score (avg)'] = report.Score.apply(lambda x: np.sqrt(x).mean())
        report['Score (std)'] = report.Score.apply(lambda x: np.sqrt(x).std())
        report['Score (VC)'] = 100 * report['Score (std)'] / report['Score (avg)']
        report.sort_values(by='Score (avg)', inplace=True)
        report.drop('Score', axis=1, inplace=True)
        report.reset_index(inplace=True, drop=True)
        self.report_performance = report
        
        if printt:
            print('\n')
            print(self.report_width * '*', '\n*')
            print('* REGRESSION RESULTS - BEFORE PARAMETERS BOOSTING \n*')
            print(self.report_width * '*', '')
            print(report)
            print('\n')

        if graph:
            fig, ax = plt.subplots(figsize=(size, 0.5 * size))
            plt.title('Regressor Comparison')
            #ax = fig.add_subplot(111)
            plt.boxplot(results)
            ax.set_xticklabels(names)
            plt.xticks(rotation=45)
            plt.subplots_adjust(hspace=0.0, bottom=0.25)
            self.graphs_model.append(fig)
            plt.show()             
        return None
def combined_test(i):
    # Get feature vectors
    tfidf_vectors = get_tfidf(i, 'train')
    extra_features_vector = get_extra_features(i, 'train')
    num_topics_for_lda = 30
    lda,lda_vector = get_lda(i, 100, 10, num_topics_for_lda)
    train_features = np.concatenate((tfidf_vectors,extra_features_vector),1)
    train_features = np.concatenate((lda_vector,train_features),1)
    
    normalized_tfidf_vectors = get_normalized_tfidf(i, 'train')
    normalized_extra_features_vector = get_normalized_extra_features(i, 'train')
    normalized_train_features = np.concatenate((normalized_tfidf_vectors,normalized_extra_features_vector),1)
    normalized_train_features = np.concatenate((lda_vector,normalized_train_features),1)   

    tfidf_train_features = tfidf_vectors
    extra_features_train_features = extra_features_vector
    lda_train_features = lda_vector
    tfidf_extra_train_features = np.concatenate((tfidf_vectors,extra_features_vector),1)
    lda_extra_train_features = np.concatenate((lda_vector,extra_features_vector),1)

    print colored('feature vectors loaded', 'cyan')

    # Set up classifiers
    knnr_classifier = knnR(n_neighbors=5, weights = 'distance')
    svr_classifier = svr()

    normalized_knnr_classifier = knnR(n_neighbors=5, weights = 'distance')
    normalized_svr_classifier = svr()

    tfidf_knnr_classifier = knnR(n_neighbors=5, weights = 'distance')
    tfidf_svr_classifier = svr()

    extra_features_knnr_classifier = knnR(n_neighbors=5, weights = 'distance')
    extra_features_svr_classifier = svr()

    lda_knnr_classifier = knnR(n_neighbors=5, weights = 'distance')
    lda_svr_classifier = svr()

    tfidf_extra_knnr_classifier = knnR(n_neighbors=5, weights = 'distance')
    tfidf_extra_svr_classifier = svr()

    lda_extra_knnr_classifier = knnR(n_neighbors=5, weights = 'distance')
    lda_extra_svr_classifier = svr()


    print colored('classifiers setup', 'cyan')

    # Load training essay scores
    scores = []
    with open('data/set%d.scores' % i) as f:
        for score in f:
            scores.append(int(score.split('\n')[0]))

    # Load training essay dictionary and corpus
    myDict = gensim.corpora.Dictionary.load('data/set%d.dict' % i)
    corpus = gensim.corpora.MmCorpus('data/set%d.mm' % i)

    # train classifiers
    knnr_classifier.fit(train_features, scores)
    svr_classifier.fit(train_features, scores)

    normalized_knnr_classifier.fit(normalized_train_features, scores)
    normalized_svr_classifier.fit(normalized_train_features, scores)

    tfidf_knnr_classifier.fit(tfidf_train_features, scores)
    tfidf_svr_classifier.fit(tfidf_train_features, scores)

    extra_features_knnr_classifier.fit(extra_features_train_features, scores)
    extra_features_svr_classifier.fit(extra_features_train_features, scores)

    lda_knnr_classifier.fit(lda_train_features, scores)
    lda_svr_classifier.fit(lda_train_features, scores)

    tfidf_extra_knnr_classifier.fit(tfidf_extra_train_features, scores)
    tfidf_extra_svr_classifier.fit(tfidf_extra_train_features, scores)

    lda_extra_knnr_classifier.fit(lda_extra_train_features, scores)
    lda_extra_svr_classifier.fit(lda_extra_train_features, scores)

    test_essays,test_scores = get_test_examples(i)

    index = 0

    print colored('classifiers trained', 'cyan')

    # Load test essay feature vectors
    tfidf_test = get_tfidf(i, 'test')
    extra_features_test = get_extra_features(i, 'test')
    test_features = np.concatenate((tfidf_test,extra_features_test),1)

    normalized_tfidf_test = get_normalized_tfidf(i, 'test')
    normalized_extra_features_test = get_normalized_extra_features(i, 'test')
    normalized_test_features = np.concatenate((normalized_tfidf_test,normalized_extra_features_test),1)


    knnr_predicted = []
    svr_predicted = []
    knnr_normalized_predicted = []
    svr_normalized_predicted = []
    knnr_tfidf_predicted = []
    svr_tfidf_predicted = []
    knnr_extra_features_predicted = []
    svr_extra_features_predicted = []
    knnr_lda_predicted = []
    svr_lda_predicted = []
    knnr_tfidf_extra_predicted = []
    svr_tfidf_extra_predicted = []
    knnr_lda_extra_predicted = []
    svr_lda_extra_predicted = []

    actual = []

    print colored('Testing...', 'cyan')

    for idx, test_essay in enumerate(test_essays):

        doc_bow = myDict.doc2bow(test_essay)
        doc_lda = lda[doc_bow]
        
        # Test feature vectors
        vectorized_lda = topic_distribution_to_vector(doc_lda, num_topics_for_lda)
        test_feature = np.concatenate((vectorized_lda, test_features[index]), 1)        
        normalized_test_feature = np.concatenate((vectorized_lda, normalized_test_features[index]), 1)
        tfidf_test_feature = tfidf_test[index]
        extra_features_test_feature = extra_features_test[index]
        lda_test_feature = vectorized_lda
        tfidf_extra_feature = test_features[index]
        lda_extra_feature = np.concatenate((vectorized_lda,extra_features_test[index]), 1)

        knnr_predicted_score = knnr_classifier.predict(test_feature)
        svr_predicted_score = svr_classifier.predict(test_feature)
        knnr_normalized_predicted_score = normalized_knnr_classifier.predict(normalized_test_feature)
        svr_normalized_predicted_score = normalized_svr_classifier.predict(normalized_test_feature)
        knnr_tfidf_predicted_score = tfidf_knnr_classifier.predict(tfidf_test_feature)
        svr_tfidf_predicted_score = tfidf_svr_classifier.predict(tfidf_test_feature)
        knnr_extra_features_predicted_score = extra_features_knnr_classifier.predict(extra_features_test_feature)
        svr_extra_features_predicted_score = extra_features_svr_classifier.predict(extra_features_test_feature)
        knnr_lda_predicted_score = lda_knnr_classifier.predict(lda_test_feature)
        svr_lda_predicted_score = lda_svr_classifier.predict(lda_test_feature)        
        knnr_tfidf_extra_predicted_score = tfidf_extra_knnr_classifier.predict(tfidf_extra_feature)
        svr_tfidf_extra_predicted_score = tfidf_extra_svr_classifier.predict(tfidf_extra_feature)
        knnr_lda_extra_predicted_score = lda_extra_knnr_classifier.predict(lda_extra_feature)
        svr_lda_extra_predicted_score = lda_extra_svr_classifier.predict(lda_extra_feature)

        actual.append(float(test_scores[idx]))
        knnr_predicted.append(float(knnr_predicted_score))
        svr_predicted.append(float(svr_predicted_score))
        knnr_normalized_predicted.append(float(knnr_normalized_predicted_score))
        svr_normalized_predicted.append(float(svr_normalized_predicted_score))
        knnr_tfidf_predicted.append(float(knnr_tfidf_predicted_score))
        svr_tfidf_predicted.append(float(svr_tfidf_predicted_score))
        knnr_extra_features_predicted.append(float(knnr_extra_features_predicted_score))
        svr_extra_features_predicted.append(float(svr_extra_features_predicted_score))
        knnr_lda_predicted.append(float(knnr_lda_predicted_score))
        svr_lda_predicted.append(float(svr_lda_predicted_score))
        knnr_tfidf_extra_predicted.append(float(knnr_tfidf_extra_predicted_score))
        svr_tfidf_extra_predicted.append(float(svr_tfidf_extra_predicted_score))
        knnr_lda_extra_predicted.append(float(knnr_lda_extra_predicted_score))
        svr_lda_extra_predicted.append(float(svr_lda_extra_predicted_score))

        print colored('essay #%d tested' % idx, 'cyan')
        index += 1

    # pickle data
    pickle.dump(actual, open('data/set%d_actual_scores.pkl' % i, 'w+'))
    pickle.dump(knnr_predicted, open('data/set%d_knnr_predicted_scores.pkl' % i, 'w+'))
    pickle.dump(svr_predicted, open('data/set%d_svr_predicted_scores.pkl' % i, 'w+'))
    pickle.dump(knnr_normalized_predicted, open('data/set%d_knnr_normalized_predicted_scores.pkl' % i, 'w+'))
    pickle.dump(svr_normalized_predicted, open('data/set%d_svr_normalized_predicted_scores.pkl' % i, 'w+'))
    pickle.dump(knnr_tfidf_predicted, open('data/set%d_knnr_tfidf_predicted_scores.pkl' % i, 'w+'))
    pickle.dump(svr_tfidf_predicted, open('data/set%d_svr_tfidf_predicted_scores.pkl' % i, 'w+'))
    pickle.dump(knnr_extra_features_predicted, open('data/set%d_knnr_statistics_predicted_scores.pkl' % i, 'w+'))
    pickle.dump(svr_extra_features_predicted, open('data/set%d_svr_statistics_predicted_scores.pkl' % i, 'w+'))
    pickle.dump(knnr_lda_predicted, open('data/set%d_knnr_lda_predicted_scores.pkl' % i, 'w+'))
    pickle.dump(svr_lda_predicted, open('data/set%d_svr_lda_predicted_scores.pkl' % i, 'w+'))
    pickle.dump(knnr_tfidf_extra_predicted, open('data/set%d_knnr_tfidf_statistics_predicted_scores.pkl' % i, 'w+'))
    pickle.dump(svr_tfidf_extra_predicted, open('data/set%d_svr_tfidf_statistics_predicted_scores.pkl' % i, 'w+'))
    pickle.dump(knnr_lda_extra_predicted, open('data/set%d_knnr_lda_statistics_predicted_scores.pkl' % i, 'w+'))
    pickle.dump(svr_lda_extra_predicted, open('data/set%d_svr_lda_statistics_predicted_scores.pkl' % i, 'w+'))
    print colored('essay set%d data dumped' % i, 'grey')

    print colored('ESSAY SET %d' % i, 'green', attrs=['bold'])
    knnr_actual,knnr_predicted = filter_nan(actual, knnr_predicted)
    print colored('(RAW) KNN MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(knnr_actual, knnr_predicted), mean_absolute_error(knnr_actual, knnr_predicted)), 'green', attrs=['bold'])
    svr_actual,svr_predicted = filter_nan(actual, svr_predicted)
    print colored('(RAW) SVM MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(svr_actual, svr_predicted), mean_absolute_error(svr_actual, svr_predicted)), 'green', attrs=['bold'])

    knnr_normalized_actual,knnr_normalized_predicted = filter_nan(actual, knnr_normalized_predicted)
    print colored('(NORMALIZED) KNN MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(knnr_normalized_actual, knnr_normalized_predicted), mean_absolute_error(knnr_normalized_actual, knnr_normalized_predicted)), 'green', attrs=['bold'])
    svr_normalized_actual,svr_normalized_predicted = filter_nan(actual, svr_normalized_predicted)
    print colored('(NORMALIZED) SVM MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(svr_normalized_actual, svr_normalized_predicted), mean_absolute_error(svr_normalized_actual, svr_normalized_predicted)), 'green', attrs=['bold'])

    knnr_tfidf_extra_actual,knnr_tfidf_extra_predicted = filter_nan(actual, knnr_tfidf_extra_predicted)
    print colored('(TFIDF + STATISTICS) KNN MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(knnr_tfidf_extra_actual, knnr_tfidf_extra_predicted), mean_absolute_error(knnr_tfidf_extra_actual, knnr_tfidf_extra_predicted)), 'green', attrs=['bold'])
    svr_tfidf_extra_actual,svr_tfidf_extra_predicted = filter_nan(actual, svr_tfidf_extra_predicted)
    print colored('(TFIDF + STATISTICS) SVM MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(svr_tfidf_extra_actual, svr_tfidf_extra_predicted), mean_absolute_error(svr_tfidf_extra_actual, svr_tfidf_extra_predicted)), 'green', attrs=['bold'])

    knnr_lda_extra_actual,knnr_lda_extra_predicted = filter_nan(actual, knnr_lda_extra_predicted)
    print colored('(LDA + STATISTICS) KNN MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(knnr_lda_extra_actual, knnr_lda_extra_predicted), mean_absolute_error(knnr_lda_extra_actual, knnr_lda_extra_predicted)), 'green', attrs=['bold'])
    svr_lda_extra_actual,svr_lda_extra_predicted = filter_nan(actual, svr_lda_extra_predicted)
    print colored('(LDA + STATISTICS) SVM MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(svr_lda_extra_actual, svr_lda_extra_predicted), mean_absolute_error(svr_lda_extra_actual, svr_lda_extra_predicted)), 'green', attrs=['bold'])

    knnr_tfidf_actual,knnr_tfidf_predicted = filter_nan(actual, knnr_tfidf_predicted)
    print colored('(TFIDF) KNN MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(knnr_tfidf_actual, knnr_tfidf_predicted), mean_absolute_error(knnr_tfidf_actual, knnr_tfidf_predicted)), 'green', attrs=['bold'])
    svr_tfidf_actual,svr_tfidf_predicted = filter_nan(actual, svr_tfidf_predicted)
    print colored('(TFIDF) SVM MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(svr_tfidf_actual, svr_tfidf_predicted), mean_absolute_error(svr_tfidf_actual, svr_tfidf_predicted)), 'green', attrs=['bold'])

    knnr_extra_features_actual,knnr_extra_features_predicted = filter_nan(actual, knnr_extra_features_predicted)
    print colored('(STATISTICS) KNN MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(knnr_extra_features_actual, knnr_extra_features_predicted), mean_absolute_error(knnr_extra_features_actual, knnr_extra_features_predicted)), 'green', attrs=['bold'])
    svr_extra_features_actual,svr_extra_features_predicted = filter_nan(actual, svr_extra_features_predicted)
    print colored('(STATISTICS) SVM MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(svr_extra_features_actual, svr_extra_features_predicted), mean_absolute_error(svr_extra_features_actual, svr_extra_features_predicted)), 'green', attrs=['bold'])

    knnr_lda_actual,knnr_lda_predicted = filter_nan(actual, knnr_lda_predicted)
    print colored('(LDA) KNN MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(knnr_lda_actual, knnr_lda_predicted), mean_absolute_error(knnr_lda_actual, knnr_lda_predicted)), 'green', attrs=['bold'])
    svr_lda_actual,svr_lda_predicted = filter_nan(actual, svr_lda_predicted)
    print colored('(LDA) SVM MEAN SQUARE, ABSOLUTE: ', 'cyan'), colored('%f, %f' % (mean_squared_error(svr_lda_actual, svr_lda_predicted), mean_absolute_error(svr_lda_actual, svr_lda_predicted)), 'green', attrs=['bold'])
Exemple #8
0
 def __init__(self, kernelType, CValue=1):
     if kernelType == 1:
         self.regressor = svr(kernel='linear', C=CValue)
     elif kernelType > 1:
         self.regressor = svr(kernel='poly', C=CValue, degree=kernelType)
ind_param = dataset.iloc[:, 1:2].values
dep_param = dataset.iloc[:, -1].values
# ======================================================
# No missing values neither categorical data.
# Can't split the dataset into train and test set, due to it's small size
# ======================================================
# Feature scale is a must in Support Vector Regression Class in Python Package
from sklearn.preprocessing import StandardScaler as sklp_ss
ind_scaler = sklp_ss()
ind_param = ind_scaler.fit_transform(ind_param.astype(float).reshape(-1, 1))
dep_scaler = sklp_ss()
dep_param = dep_scaler.fit_transform(dep_param.astype(float).reshape(-1, 1))
# ======================================================
# Construct a initial kernel model
from sklearn.svm import SVR as svr
regressor = svr(kernel='rbf')
regressor.fit(ind_param, dep_param.ravel())
# ======================================================
# Make a predicition
import numpy as np
# First, create a numpy array of the wanted input values
input_prediction = np.array([[6.5]])
# Then, transform it input array to the scale of the model
# BEWARE: use the independent (x) parameter scaler!
input_prediction = ind_scaler.transform(input_prediction)
# Make a prediction with the SVR model
predictions = regressor.predict(input_prediction)
# Now do a inverse transformation, in order to inperpret the result
# CAUTION: use the dependent (f(x)) parameter scaler!
predictions = dep_scaler.inverse_transform(predictions)
# One line code: predictions = dep_scaler.inverse_transform(regressor.predict(ind_scaler.transform(np.array([[6.5]]))))
Exemple #10
0
f = plt.figure()
plt.plot(x1, y, 'r.', label='x1')
plt.plot(x2, y, 'g.', label='x2')
plt.xlabel('x values')
plt.ylabel('y values')
plt.legend(loc='lower right')
plt.show()
f.savefig('testdata.pdf', bbox_inches='tight')

#%% Modelos
x = np.column_stack((x1, x2))
randomforest = rfr().fit(x, y)
randomforest.predicted = randomforest.predict(x)

svm = svr().fit(x, y)
svm.predicted = svm.predict(x)

#%% Performance
f = plt.figure()
plt.plot(randomforest.predicted, y, 'b.', label='g1(x)')
plt.plot(svm.predicted, y, 'y.', label='g2(x)')
plt.plot([0, 25], [0, 25], 'r-', label='identity')
plt.xlabel('predicted values')
plt.ylabel('y values')
plt.legend(loc='lower right')
plt.show()
f.savefig('performance.pdf', bbox_inches='tight')

randomforest.performance = partpred(randomforest.predicted, y, x, 10)
svm.performance = partpred(svm.predicted, y, x, 10)
Exemple #11
0
        test_data_X = pca.transform(test_data_X)
    
    ###############################--------Model Setup--------###############################
    ann_regressor = KerasRegressor(build_fn=ann_model, epochs=30, batch_size=10, verbose=1)
    
    xgb_regressor = xgb(learning_rate = 0.0825, min_child_weight = 1, max_depth = 7, subsample = 0.8, verbose = 10, random_state = 2017, n_jobs = -1, eval_metric = "rmse")
    
    rfr_regressor = rfr(max_features = 0.9, min_samples_leaf = 50)
    
    gbr_regressor = gbr(n_estimators = 200, verbose = 5, learning_rate = 0.08, max_depth = 7, max_features = 0.5, min_samples_leaf = 50, subsample = 0.8, random_state = 2017)
    
    etr_regressor = etr(n_estimators = 200, verbose = 10, max_depth = 7, min_samples_leaf = 100, max_features = 0.9, min_impurity_split = 100, random_state = 2017)
    
    lr_regressor = lr()
    
    svr_regressor = svr(verbose = 10)
    
    ensemble = Ensemble(n_folds = 5,stacker =  lr_regressor,base_models = [ann_regressor, xgb_regressor, rfr_regressor, gbr_regressor, etr_regressor])
    
    
    ###############################--------Grid Search--------###############################

  
    if (Env_var.get('GridSearch') == 1):
        
        if (Env_var.get('Model') == 'ann'):
            dropout_rate = [0.0, 0.001, 0.01]
            ann_parameters = dict(dropout_rate=dropout_rate)
            
            score, best_parameters, best_model = AutoGridSearch(ann_parameters,ann_regressor, train_data_X, train_data_y)