def train(x, y): model = PolynomialFeatures(degree=2) x = model.fit_transform(x) x_test = model.transform(x) model = LinearRegression() model.fit(x, y) joblib.dump(model, 'f**k.m') print('多项式回归正确率', model.score(x_test, y))
X_train_no_intercept = X_train X_train = X_train.reshape(-1, X_train.shape[1]) poly.fit(X_train, y_train) # The intercept print('Intercept: \n', poly.intercept_) # The coefficients print('Coefficients: \n', poly.coef_) # The mean square error print("Residual sum of squares, training data: %.2f" % np.mean((poly.predict(X_train) - y_train) ** 2)) print("Residual sum of squares, test data: %.2f" % np.mean((poly.predict(X_test) - y_test) ** 2)) var_to_graph['multReg_poly'] = np.mean((poly.predict(X_test) - y_test) ** 2) # Explained variance score: 1 is perfect prediction print('Variance score, training data: %.2f' % poly.score(X_train, y_train)) #vector of prediction error print('Distribution of prediction error on training data:') predError = poly.predict(X_train) - y_train plt.hist(predError) plt.show() print('Distribution of prediction error on test data:') predError = poly.predict(X_test) - y_test plt.hist(predError) plt.show() ## RIDGE REGRESSION # DOC: http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html # create training and test sets
y_pred = reg2.predict(x_poly) plt.scatter(x_train[:, 0], y_train, color='red') plt.plot(np.sort(x_test[:, 0]), np.sort(y_pred), color='blue') plt.title("railways with poly reg") plt.show() #linear regression reg = LinearRegression() reg.fit(x_train, y_train) y_pred = reg.predict(x_test) plt.scatter(x_train[:, 3], y_train, color='red') plt.plot(np.sort(x_test[:, 3]), np.sort(y_pred), color='blue') plt.title("railways with linear reg") plt.show() print('The accuracy of the linear reg is {:.2f} out of 1 on training data'. format(reg.score(x_train, y_train))) print('The accuracy of the linear reg is {:.2f} out of 1 on test data'.format( reg.score(x_test, y_test))) #reg.coef_ #reg.intercept_ from sklearn.metrics import mean_squared_error rmse = np.sqrt(mean_squared_error(y_test, y_pred)) print(rmse) #KNN from sklearn.neighbors import KNeighborsClassifier reg = KNeighborsClassifier() reg.fit(x_train, y_train) y_pred = reg.predict(x_test) plt.scatter(x_train[:, 0], y_train, color='red') plt.plot(x_test[:, 0], y_pred, color='blue') plt.title("railways with KNN reg")
model_rsq_dic['lr'] = (lr.score(lstat_x, y), mean_squared_error(y, y_lr_pred)) plt.scatter(lstat_x, y, s=10) plt.plot(lstat_x, y_lr_pred, color='red', label='Linear Regression - rsq: 0.544') poly = PolynomialFeatures(degree=2) lstat_x_transformed = poly.fit_transform(lstat_x) poly = LinearRegression() poly.fit(lstat_x_transformed, y) print("poly equation", poly.coef_, " intercept ", poly.intercept_) y_poly_pred = poly.predict(lstat_x_transformed) print("r^2 of poly", poly.score(lstat_x_transformed, y)) print("mse of poly", mean_squared_error(y, y_poly_pred)) model_rsq_dic['poly'] = (poly.score(lstat_x_transformed, y), mean_squared_error(y, y_poly_pred)) #REALLY IMPT to sort the values of x before line plot sort_axis = operator.itemgetter(0) sorted_zip = sorted(zip(lstat_x, y_poly_pred), key=sort_axis) x, y_poly_pred = zip(*sorted_zip) plt.plot(x, y_poly_pred, color='green', label='Poly Regression - rsq: 0.641') mars = Earth() mars.fit( lstat_x, y,
x_train_fit = poly.fit_transform(x_train_2[["horsepower"]]) x_test_fit = poly.fit_transform((x_test_2[["horsepower"]])) # Create a linear regression model "poly" and train it poly = LinearRegression() poly.fit(x_train_fit, y_train_2) # Get predicted values of price column Yhat = poly.predict(x_test_fit) print("Predicted values from polynomial regression :\n", Yhat[0:5]) # Take the first five predicted values and compare it to the actual targets print("Actual values :\n", y_test_2[0:5]) # Get R^2 of training and test data R_sq_train_poly = poly.score(x_train_fit, y_train_2) R_sq_test_poly = poly.score(x_test_fit, y_test_2) print("Polynomial Regression R^2 values ", R_sq_train_poly, " and ", R_sq_test_poly) # How the R^2 changes on the test data for different order polynomials and plot the results orders = [1, 2, 3, 4, 5, 6] R_sq = [] X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15, random_state=1) for order in orders: polyF = PolynomialFeatures(degree=order) X_train_fit = polyF.fit_transform(X_train[["horsepower"]]) X_test_fit = polyF.fit_transform(X_test[["horsepower"]])