def test_univariate_normal_equation_std(): w_exp = np.array([[0.7]]) b_exp = np.array([0.0]) ne_lr = LinearRegression(minibatches=None) ne_lr.fit(X_rm_std, y_std) assert_almost_equal(ne_lr.w_, w_exp, decimal=1) assert_almost_equal(ne_lr.b_, b_exp, decimal=1)
def test_multivariate_normal_equation(): w_exp = np.array([[5.1], [-0.6]]) b_exp = np.array([-1.5]) ne_lr = LinearRegression(minibatches=None) ne_lr.fit(X_rm_lstat, y) assert_almost_equal(ne_lr.w_, w_exp, decimal=1) assert_almost_equal(ne_lr.b_, b_exp, decimal=1)
def test_univariate_stochastic_gradient_descent(): sgd_lr = LinearRegression(minibatches=len(y), eta=0.0001, epochs=100, random_seed=0) sgd_lr.fit(X_rm_std, y_std) assert_almost_equal(sgd_lr.w_, expect_rm_std, decimal=2)
def test_progress_3(): gd_lr = LinearRegression(minibatches=1, eta=0.001, epochs=1, print_progress=2, random_seed=0) gd_lr.fit(X_rm_std, y_std)
def test_multivariate_gradient_descent(): gd_lr = LinearRegression(eta=0.001, epochs=500, minibatches=1, random_seed=0) gd_lr.fit(X_rm_lstat_std, y_std) assert_almost_equal(gd_lr.w_, expect_rm_lstat_std, decimal=3)
def test_univariate_normal_equation(): w_exp = np.array([[9.1]]) b_exp = np.array([-34.7]) ne_lr = LinearRegression(minibatches=None) ne_lr.fit(X_rm, y) assert_almost_equal(ne_lr.w_, w_exp, decimal=1) assert_almost_equal(ne_lr.b_, b_exp, decimal=1)
def test_univariate_gradient_descent(): gd_lr = LinearRegression(solver='gd', eta=0.001, epochs=500, random_seed=0) gd_lr.fit(X_rm_std, y_std) assert_almost_equal(gd_lr.w_, expect_rm_std, decimal=3)
def test_multivariate_svd(): w_exp = np.array([[5.1], [-0.6]]) b_exp = np.array([-1.5]) svd_lr = LinearRegression(method='svd') svd_lr.fit(X_rm_lstat, y) assert_almost_equal(svd_lr.w_, w_exp, decimal=1) assert_almost_equal(svd_lr.b_, b_exp, decimal=1)
def test_univariate_svd(): w_exp = np.array([[9.1]]) b_exp = np.array([-34.7]) svd_lr = LinearRegression(method='svd') svd_lr.fit(X_rm, y) assert_almost_equal(svd_lr.w_, w_exp, decimal=1) assert_almost_equal(svd_lr.b_, b_exp, decimal=1)
def test_multivariate_stochastic_gradient_descent(): sgd_lr = LinearRegression(eta=0.0001, epochs=500, solver='sgd', random_seed=0) sgd_lr.fit(X_rm_lstat_std, y_std) assert_almost_equal(sgd_lr.w_, expect_rm_lstat_std, decimal=2)
def test_ary_persistency_in_shuffling(): orig = X_rm_lstat_std.copy() sgd_lr = LinearRegression(eta=0.0001, epochs=500, minibatches=len(y), random_seed=0) sgd_lr.fit(X_rm_lstat_std, y_std) np.testing.assert_almost_equal(orig, X_rm_lstat_std, 6)
def test_univariate_stochastic_gradient_descent(): w_exp = np.array([[0.7]]) b_exp = np.array([0.0]) sgd_lr = LinearRegression(minibatches=len(y), eta=0.0001, epochs=150, random_seed=0) sgd_lr.fit(X_rm_std, y_std) assert_almost_equal(sgd_lr.w_, w_exp, decimal=1) assert_almost_equal(sgd_lr.b_, b_exp, decimal=1)
def test_multivariate_gradient_descent(): w_exp = np.array([[0.4], [-0.5]]) b_exp = np.array([0.0]) gd_lr = LinearRegression(eta=0.001, epochs=500, minibatches=1, random_seed=0) gd_lr.fit(X_rm_lstat_std, y_std) assert_almost_equal(gd_lr.w_, w_exp, decimal=1) assert_almost_equal(gd_lr.b_, b_exp, decimal=1)
def test_multivariate_stochastic_gradient_descent(): w_exp = np.array([[0.389], [-0.499]]) b_exp = np.array([0.000]) sgd_lr = LinearRegression(eta=0.0001, epochs=500, minibatches=len(y), random_seed=0) sgd_lr.fit(X_rm_lstat_std, y_std) assert_almost_equal(sgd_lr.w_, w_exp, decimal=3) assert_almost_equal(sgd_lr.b_, b_exp, decimal=3)
def test_univariate_gradient_descent(): w_exp = np.array([[0.695]]) b_exp = np.array([0.00]) gd_lr = LinearRegression(minibatches=1, eta=0.001, epochs=500, random_seed=0) gd_lr.fit(X_rm_std, y_std) assert_almost_equal(gd_lr.w_, w_exp, decimal=3) assert_almost_equal(gd_lr.b_, b_exp, decimal=3)
import numpy as np import matplotlib.pyplot as plt from mlxtend.regressor import LinearRegression X = np.array([1.0, 2.1, 3.6, 4.2, 6])[:, np.newaxis] y = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) print X print y ne_lr = LinearRegression(minibatches=None) ne_lr.fit(X, y) print('Intercept: %.2f' % ne_lr.b_) print('Slope: %.2f' % ne_lr.w_[0]) def lin_regplot(X, y, model): plt.scatter(X, y, c='blue') plt.plot(X, model.predict(X), color='red') return lin_regplot(X, y, ne_lr) plt.show()
def test_multivariate_normal_equation(): ne_lr = LinearRegression(solver='normal equation') ne_lr.fit(X_rm_lstat, y) assert_almost_equal(ne_lr.w_, expect_rm_lstat, decimal=3)
and test and compare them on linear regression over your synthetic data-points. You need not to perform a cross-validation scheme here; only use the whole data set as your training set. """ X = np.asanyarray(x).reshape( -1, 1 ) #x need to be converted into matrix without changing the array values to fit the model eta1 = 0.0001 eta2 = 0.1 from mlxtend.regressor import LinearRegression from sklearn import metrics ada1_bgd = LinearRegression(method='sgd', eta=eta1, epochs=20, random_seed=0, minibatches=1) #for adalline bgd ada1_bgd.fit(X, y) y_pred = ada1_bgd.predict(X) mse1 = metrics.mean_squared_error(y_pred, y) ada2_bgd = LinearRegression(method='sgd', eta=eta2, epochs=20, random_seed=0, minibatches=1) #for adaline bgd ada2_bgd.fit(X, y) y_pred = ada2_bgd.predict(X) mse2 = metrics.mean_squared_error(y_pred, y) print("Adaline Batch Gradient Descent Regression Algorithm") print("-----------------------------------------------------")
def test_univariate_normal_equation_std(): ne_lr = LinearRegression(solver='normal equation') ne_lr.fit(X_rm_std, y_std) assert_almost_equal(ne_lr.w_, expect_rm_std, decimal=3)
for j in month_count: mult_month_count.append([j]) # print mult_month_count X = np.array(mult_index)[:, np.newaxis] y = np.array(month_count) # print len(X) # print len(y) # print "X: ", X # print "y: ", y # Linear Regression # reg = LinearRegression(minibatches=None) # reg.fit(X, y) reg = LinearRegression(minibatches=None) reg.fit(X, y) # X_train, X_test, y_train, y_test file_name = filename[70:] file_name = file_name.replace(".csv", "") # print file_name # print reg.w_ print 'Intercept: %.2f' % reg.b_ print 'Slope: %.2f\n' % reg.w_[0] filenames.append(file_name) intercepts.append(reg.b_) slopes.append(reg.w_)
def test_clone(): regr = LinearRegression() clone(regr)
def test_univariate_normal_equation_std(): ne_lr = LinearRegression(minibatches=None) ne_lr.fit(X_rm_std, y_std) assert_almost_equal(ne_lr.w_, expect_rm_std, decimal=3)
rowdata = pd.read_csv('data/normal/爱情字典-287637.csv') year = rowdata.year month = rowdata.month day = rowdata.day date = {} for i in range(len(year)): da = f'{year[i]}-{month[i]}-{day[i]}' if da not in date.keys(): date[da] = 1 else: date[da] += 1 points = [(key, value) for key, value in date.items()][::-1] gd_lr = LinearRegression() x_ = [float(time.mktime(time.strptime(x[0], "%Y-%m-%d"))) for x in points] y_ = [float(y[1]) for y in points] gd_lr.fit(np.array(x_)[:, np.newaxis], np.array(y_)) x_axis = [time.strftime("%Y-%m-%d", time.localtime(i)) for i in x_] print(x_axis[::18]) plt.rcParams['font.sans-serif'] = ['simhei'] #设置字体 plt.figure(figsize=[12, 8]) plt.title('回归模型') plt.scatter(x_axis, y_, alpha=0.4, edgecolors='white') #plt.xticks(range(7), [2013,2014,2015,2016,2017,2018,2019]) #plt.yticks(y_, fontsize=9) plt.plot(x_axis, gd_lr.predict(np.array(x_)[:, np.newaxis]), color='gray') ax = plt.gca() ax.spines['right'].set_color('none')
usecols=[0, 1, 2, 3], nrows=data_size) X = dataframe[["Feature 1", "Feature 2", "Feature 3"]] Y = dataframe["Target"] print(set_sizes[0]) print('here', set_sizes[nrows] * 0.7) X_train = X.head(int(set_sizes[nrows] * 0.7)) X_test = X.tail(int(set_sizes[nrows] * 0.3)) Y_train = Y.head(int(set_sizes[nrows] * 0.7)) Y_test = Y.tail(int(set_sizes[nrows] * 0.3)) ne_lr = LinearRegression(minibatches=None) Y2 = pd.to_numeric(Y, downcast='float') print("here", type((Y2))) print(type(Y_train)) ne_lr.fit(X_train, pd.to_numeric(Y_train, downcast='float')) print(ne_lr) y_pred = ne_lr.predict(X_test) res = mean_squared_error(Y_test, y_pred) #res = scoring(y_target=Y_test, y_predicted=y_pred, metric='rmse') print("results: ", res)
def test_multivariate_normal_equation(): ne_lr = LinearRegression(minibatches=None) ne_lr.fit(X_rm_lstat, y) assert_almost_equal(ne_lr.w_, expect_rm_lstat, decimal=3)
from sklearn.model_selection import GridSearchCV from xgboost import XGBRegressor from timeseries.time_series_processing import * if __name__ == '__main__': ####生成数据集 scaler, train_X, train_y, test_X, test_y = generate_mul_dataset( 2, 1, 1, -90) xgbt = XGBRegressor() gbdt = GradientBoostingRegressor() ####加入随机森林模型 rf = RandomForestRegressor() ####加入线性模型 lr = LinearRegression() sclf = StackingRegressor(regressors=[rf, gbdt, xgbt], meta_regressor=rf) # 对预测结果进行展示 for clf, label in zip([rf, gbdt, sclf], ['model1', 'Random Forest', 'StackingClassifier']): clf.fit(train_X, train_y) lr_train = clf.predict(train_X) ####对模型进行打分 print("模型打分情况:", metrics.r2_score(train_y, lr_train)) yhat = clf.predict(test_X) # 对预测结果进行展示 plot_mul_results(scaler, test_X, yhat, test_y)