day = rowdata.day date = {} for i in range(len(year)): da = f'{year[i]}-{month[i]}-{day[i]}' if da not in date.keys(): date[da] = 1 else: date[da] += 1 points = [(key, value) for key, value in date.items()][::-1] gd_lr = LinearRegression() x_ = [float(time.mktime(time.strptime(x[0], "%Y-%m-%d"))) for x in points] y_ = [float(y[1]) for y in points] gd_lr.fit(np.array(x_)[:, np.newaxis], np.array(y_)) x_axis = [time.strftime("%Y-%m-%d", time.localtime(i)) for i in x_] print(x_axis[::18]) plt.rcParams['font.sans-serif'] = ['simhei'] #设置字体 plt.figure(figsize=[12, 8]) plt.title('回归模型') plt.scatter(x_axis, y_, alpha=0.4, edgecolors='white') #plt.xticks(range(7), [2013,2014,2015,2016,2017,2018,2019]) #plt.yticks(y_, fontsize=9) plt.plot(x_axis, gd_lr.predict(np.array(x_)[:, np.newaxis]), color='gray') ax = plt.gca() ax.spines['right'].set_color('none') ax.spines['top'].set_color('none') xmajorLocator = LinearLocator(10) ax.xaxis.set_major_locator(xmajorLocator) plt.show()
# X_train, X_test, y_train, y_test file_name = filename[70:] file_name = file_name.replace(".csv", "") # print file_name # print reg.w_ print 'Intercept: %.2f' % reg.b_ print 'Slope: %.2f\n' % reg.w_[0] filenames.append(file_name) intercepts.append(reg.b_) slopes.append(reg.w_) plt.scatter(X, y, c='blue') plt.plot(X, reg.predict(X), color='red') plt.show() # mult_index_X_train = mult_index[:-15] # mult_index_X_test = mult_index[-20:] # mult_month_count_y_train = mult_month_count[:-15] # mult_month_count_y_test = mult_month_count[-20:] # lm = LinearRegression() # lm.fit(mult_index_X_train, mult_month_count_y_train) # mult_month_count_y_pred = lm.predict(mult_index_X_test) # plt.scatter(mult_index_X_test, mult_month_count_y_pred, color='blue', linewidth=3) # plt.show()
print(set_sizes[0]) print('here', set_sizes[nrows] * 0.7) X_train = X.head(int(set_sizes[nrows] * 0.7)) X_test = X.tail(int(set_sizes[nrows] * 0.3)) Y_train = Y.head(int(set_sizes[nrows] * 0.7)) Y_test = Y.tail(int(set_sizes[nrows] * 0.3)) ne_lr = LinearRegression(minibatches=None) Y2 = pd.to_numeric(Y, downcast='float') print("here", type((Y2))) print(type(Y_train)) ne_lr.fit(X_train, pd.to_numeric(Y_train, downcast='float')) print(ne_lr) y_pred = ne_lr.predict(X_test) res = mean_squared_error(Y_test, y_pred) #res = scoring(y_target=Y_test, y_predicted=y_pred, metric='rmse') print("results: ", res) lin = linear_model.LinearRegression() lin.fit(X_train, Y_train) predictedCV = cvp(lin, X, Y, cv=10) print("rmse cross val", mean_squared_error(Y, predictedCV))
X = np.asanyarray(x).reshape( -1, 1 ) #x need to be converted into matrix without changing the array values to fit the model eta1 = 0.0001 eta2 = 0.1 from mlxtend.regressor import LinearRegression from sklearn import metrics ada1_bgd = LinearRegression(method='sgd', eta=eta1, epochs=20, random_seed=0, minibatches=1) #for adalline bgd ada1_bgd.fit(X, y) y_pred = ada1_bgd.predict(X) mse1 = metrics.mean_squared_error(y_pred, y) ada2_bgd = LinearRegression(method='sgd', eta=eta2, epochs=20, random_seed=0, minibatches=1) #for adaline bgd ada2_bgd.fit(X, y) y_pred = ada2_bgd.predict(X) mse2 = metrics.mean_squared_error(y_pred, y) print("Adaline Batch Gradient Descent Regression Algorithm") print("-----------------------------------------------------") print("\tLearning Rate: ", eta1, "\t\t\tLearning Rate: ", eta2) print('\tIntercept: %.2f' % ada1_bgd.w_, end='') print('\t\t\t\tIntercept: %.2f' % ada2_bgd.w_) print('\tSlope: %.2f' % ada1_bgd.b_, end='')