def fit_poly_reg(degr, X_in, y_in): """ Transform X feature to degr polynomial. Return fitted model object with y target variable. """ X_poly = pf(degr).fit_transform(X_in) m = LinearRegression().fit(X_poly, y_in) return m
def createPhi(self, inpt, outpt, resids, polDeg, incB, inMean, outMean, yDim, nU, nY, nE): #creates phi matrix PhiTempBasic = np.zeros(shape=(yDim, (nU + nY + nE)), dtype=np.float64) #basic phi matrix degree 1 if (nE != 0): resMean = np.mean(resids) for i in range(yDim): for u in range(nU): if (i - u - 1) < 0: PhiTempBasic[i, u] = inMean else: PhiTempBasic[i, u] = inpt[i - u - 1] for y in range(nY): if (i - y - 1) < 0: PhiTempBasic[i, y + nU] = outMean else: PhiTempBasic[i, y + nU] = outpt[i - y - 1] for e in range(nE): if (i - e - 1) < 0: PhiTempBasic[i, e + nU + nY] = resMean else: PhiTempBasic[i, e + nU + nY] = resids[i - e - 1] if (not np.isfinite(PhiTempBasic).all()): return None PhiTemp = pf(polDeg, include_bias=incB).fit_transform( PhiTempBasic) #expand to the right polynomial degree return PhiTemp.copy()
def answer_four(): # Converting train and test features to polynomials X_train_poly = pf(12).fit_transform(X_train) X_test_poly = pf(12).fit_transform(X_test) # Training models linreg = LinearRegression().fit(X_train_poly, y_train) lassoreg = Lasso(alpha=0.01, max_iter=1e4).fit(X_train_poly, y_train) # Calculating r2 on test sample linreg_test_pred = linreg.predict(X_test_poly) linreg_test_r2 = r2_score(y_test, linreg_test_pred) lassoreg_test_pred = lassoreg.predict(X_test_poly) lassoreg_test_r2 = r2_score(y_test, lassoreg_test_pred) return (linreg_test_r2, lassoreg_test_r2)
def gbdt_gen2(data): x = data.x poly = pf(2, interaction_only=True) x = poly.fit_transform(x) tmp = data.x data.x = torch.FloatTensor(x) res = gbdt_gen(data) data.x = tmp return res
def fit_models(X_in, y_in): """ Fit multiple polynomial regression models. Store models with polynomial degree as a list of touples """ regs = [] for i in range(0, 10): X_poly = pf(i).fit_transform(X_in) reg = LinearRegression().fit(X_poly, y_in) regs.append((i, reg)) return regs
def calc_r2_scores(regs_in, X_in, y_in): """ Calculate R2 score for given set of polynomial regressions (regs_in), features(X_in) and target (y_in) """ scores = [] for i, reg in regs_in: X_poly = pf(i).fit_transform(X_in) pred = reg.predict(X_poly) score = r2_score(y_in, pred) scores.append(score) return np.array(scores)
def answer_one(): # Fitting models reg1 = LinearRegression().fit(X_train, y_train) reg3 = fit_poly_reg(3, X_train, y_train) reg6 = fit_poly_reg(6, X_train, y_train) reg9 = fit_poly_reg(9, X_train, y_train) # Generating polynomial features over linspace ls = np.linspace(0, 10, 100).reshape(-1, 1) ls3 = pf(3).fit_transform(ls) ls6 = pf(6).fit_transform(ls) ls9 = pf(9).fit_transform(ls) # Predicting over linspace pred1 = reg1.predict(ls).reshape(1, -1) pred3 = reg3.predict(ls3).reshape(1, -1) pred6 = reg6.predict(ls6).reshape(1, -1) pred9 = reg9.predict(ls9).reshape(1, -1) return np.array([pred1, pred3, pred6, pred9]).reshape(4, 100)
def predict_prices(x): _dates = np.reshape(dates, (len(dates), 1)) svr_rbf = SVR(C=1e3, gamma=0.1) svr_rbf.fit(_dates, prices) lin_reg = LinearRegression() lin_reg.fit(_dates, prices) poly_reg = pf(degree=2) X_poly = poly_reg.fit_transform(_dates) pol_reg = LinearRegression() pol_reg.fit(X_poly, prices) plt.scatter(_dates, prices, color='blue', label='Data') plt.plot(_dates, svr_rbf.predict(_dates), color='green', label='RBF Model') plt.plot(_dates, lin_reg.predict(_dates), color='red', label='Linear Model') plt.plot(_dates, pol_reg.predict(poly_reg.fit_transform(_dates)), color='orange', label='Polynomial Model') prediction_rbf = svr_rbf.predict(np.array(x).reshape(1, 1))[0] prediction_lin = lin_reg.predict(np.array(x).reshape(1, 1))[0] prediction_poly = pol_reg.predict(poly_reg.fit_transform([[x]])) prediction_combo = ((prediction_rbf * 0.28791198607186473) + (prediction_lin * 0.5628614578028085) + (prediction_poly * 0.14922655612532662))[0] plt.scatter([x], [prediction_rbf], color='green', label='RBF Prediction') plt.scatter([x], [prediction_lin], color='red', label='Linear Prediction') plt.scatter([x], [prediction_poly], color='orange', label='Polynomial Prediction') plt.scatter([x], [prediction_combo], color='purple', label='Combo Prediction') plt.xlabel('Date (Day)') plt.ylabel('Price (£)') plt.title('Price Prediction') plt.legend() filename = CURRENCY + '_' + shortdates[len(dates) - 1] + '.png' plt.savefig('../Documents/Generated_Graphs/' + filename) filesforblob.append(filename) plt.show() return prediction_rbf, prediction_lin, prediction_poly, prediction_combo
def generateYhat(self, type, u, y, e, theta, polDegree, inclBias, yDim, nU, nY, nE): #generates Yhat if (type == 0 and nE == 0): #identification or narx Phi = self.createPhi(u, y, e, polDegree, inclBias, np.mean(u), 0.0, yDim, nU, nY, nE) Yhat = Phi.dot(theta) else: if ( nE != 0 and type != 0 ): #simulation narmax -> residuals are random from normal distribution resMean = np.mean(e) resStdDev = np.std(e) Yhat = np.zeros(shape=(yDim), dtype=np.float64) Phi = self.createPhi(u, np.zeros(shape=(yDim), dtype=np.float64), np.zeros(shape=(yDim), dtype=np.float64), 1, False, np.mean(u), 0.0, yDim, nU, nY, nE) #basic Phi with only U for i in range(yDim): try: Yhat[i] = pf( polDegree, include_bias=inclBias ).fit_transform(Phi[i, :].reshape(1, -1)).dot( theta) #expand row[i] of Phi and calculate Yhat if (nE != 0 and type != 0): res = np.random.normal(resMean, resStdDev) for j in range(nY): #fill next rows of Phi with Y and E if ((i + j + 1) > (yDim - 1)): break if (type != 0): Phi[i + j + 1, nU + j] = Yhat[i] else: Phi[i + j + 1, nU + j] = y[i] for k in range(nE): if ((i + k + 1) > (yDim - 1)): break if (type == 0): Phi[i + k + 1, nU + nY + k] = y[i] - Yhat[i] else: Phi[i + k + 1, nU + nY + k] = res except ValueError: if (type == 0): print("Model Divergent in prediction\n") return None else: print("Model Divergent in simulation\n") return None return Yhat
def main(col=1, deg=1): """ Main function which is called at the end of this file. Takes two optional int parameters. col is the column on which the regression is to be performed, for example col = 1 corresponds to the bullish investors; deg determines the degree of the polynomial regression function. Check the data.col attribute for a list of all column names. """ api_key = 'pqxsHzei5fGpxxCZ-yKH' aaii = quandl.dataset('AAII', 'AAII_SENTIMENT', api_key) regdata = createData(aaii) clf = lm.TheilSenRegressor() poly = pf(degree=deg) X = np.array([e for e in regdata[:, col]]).reshape(-1, 1) Y = np.array([e for e in regdata[:, 4]]) clf.fit(poly.fit_transform(X), Y) print('Regression coefficients: {0}'.format(clf.coef_)) linsp = np.arange(0., 1., 0.2).reshape(-1, 1) plotY = clf.predict(poly.fit_transform(linsp)) pyplot.plot(X, Y, 'o', linsp, plotY) pyplot.show()
def national_covid_deathrate_pr(df): df['death_rate'] = (df['deaths'] * 100) / df['cases'] print(df.head(10)) df_day_X = df.iloc[:, 2].to_numpy() df_dr_Y = df.iloc[:, 3].to_numpy() # model training time. pf_covid = pf(degree=2) X_poly = pf_covid.fit_transform(df_day_X.reshape(-1, 1)) lr_model = LinearRegression() lr_model.fit(X_poly, df_dr_Y) y_pred = lr_model.predict(X_poly) print("r2 score {}".format(r2_score(df_dr_Y, y_pred))) print("mean squared error score {}".format(mean_squared_error(df_dr_Y, y_pred))) print("mean absolute error score {}".format(mean_absolute_error(df_dr_Y, y_pred))) # print("estimator values {}".format(lr_model.estimators_)) sns.scatterplot(x=df_day_X, y=df_dr_Y, color='blue', label='deaths') sns.scatterplot(x=df_day_X, y=y_pred, color='red', label='death predictions') plt.title('national covid death chart', fontsize=18) plt.xlabel('days', fontsize=16) plt.ylabel('death rate', fontsize=16) plt.show() rem_days = np.arange(111, 365, 1).reshape(-1, 1) rem_poly = pf_covid.fit_transform(rem_days) rem_cases = lr_model.predict(rem_poly) sns.lineplot(x=df_day_X, y=df_dr_Y, color='blue', label='deaths') sns.lineplot(x=rem_days.ravel(), y=rem_cases, color='red', label='death predictions') plt.title('national covid death rate prediction for the year', fontsize=18) plt.ticklabel_format(style='plain', axis='y') plt.xlabel('days', fontsize=16) plt.ylabel('rate of death', fontsize=16) plt.show() return
def predict_prices(x): _dates = np.reshape(dates, (len(dates), 1)) svr_rbf = SVR(C=1e3, gamma=0.1) svr_rbf.fit(_dates, prices) lin_reg = LinearRegression() lin_reg.fit(_dates, prices) poly_reg = pf(degree=2) X_poly = poly_reg.fit_transform(_dates) pol_reg = LinearRegression() pol_reg.fit(X_poly, prices) prediction_rbf = svr_rbf.predict(np.array(x).reshape(1, 1))[0] prediction_lin = lin_reg.predict(np.array(x).reshape(1, 1))[0] prediction_poly = pol_reg.predict(poly_reg.fit_transform([[x]])) prediction_combo = ((prediction_rbf * 0.28791198607186473) + (prediction_lin * 0.5628614578028085) + (prediction_poly * 0.14922655612532662))[0] return prediction_rbf, prediction_lin, prediction_poly, prediction_combo
@author: kumar """ import pandas as pd import numpy as np import matplotlib.pyplot as pt from sklearn.preprocessing import PolynomialFeatures as pf,StandardScaler from sklearn.metrics import mean_squared_error data=pd.read_csv("C:\\Users\\kumar\\Desktop\\train.csv") tdata=pd.read_csv("C:\\Users\\kumar\\Desktop\\test.csv") ivalue=data.values sc=StandardScaler() transformed=ivalue X_origin=transformed[:,0:4] Y_origin=transformed[:,4] p=pf(2) X=p.fit_transform(X_origin) def GD(B,X,Y,alfa): m=len(Y) z=alfa/m B=B-(z*(((B.T.dot(X))-Y).dot(X.T)).T) return(B) def Polynomial_regression(X,Y): a=X.shape B=np.matrix([np.zeros(a[0])]) alfa=0.00000000000001 NOI=1000 Thresh_error=0.001 J=[] for i in range(NOI): yp=B.dot(X)
meth = [] mse_m = [] rmse_m = [] mae_m = [] mdae_m = [] evs_m = [] r2_m = [] #Parameter Values k = list(param['SVR Kernel'])[0] md = list(param['DTR Max Depth'])[0] deg = list(param['PR Degree'])[0] #Creating models mlr = lm.LinearRegression() svr = SVR(kernel=k, epsilon=0.1, C=1) dt = dtr(max_depth=md) poly = pf(degree=deg) pr = lm.LinearRegression() c = 0 #Repeated K Fold Cross Validation for tr_i, ts_i in rkf.split(data): print(i, c) train, test = data.iloc[tr_i], data.iloc[ts_i] train_x = train.drop(columns=['Index', 'District', 'Rainfall']) train_y = train['Rainfall'] test_x = test.drop(columns=['Index', 'District', 'Rainfall']) test_y = test['Rainfall'] poly_tr = poly.fit_transform(train_x) poly_ts = poly.fit_transform(test_x) #Fitting the data in the model mlr.fit(train_x, train_y) svr.fit(train_x, train_y)
nyse_lag = nyse_close.shift(1) nyse_lag.fillna('0', inplace=True) nyse_lag.tail(2) nikkei_close.tail(2) # In[18]: from sklearn.model_selection import train_test_split nyse_train, nyse_test, nikkei_train, nikkei_test = train_test_split( nyse_lag, nikkei_close, test_size=0.25, random_state=42) # In[20]: from sklearn.preprocessing import PolynomialFeatures as pf from sklearn.linear_model import LinearRegression poly = pf(degree=1, include_bias=False) nyse_new = poly.fit_transform(nyse_train) nyse_test_new = poly.fit_transform(nyse_test) model = LinearRegression() model.fit(nyse_new, nikkei_train) nikkei_pred = model.predict(nyse_new) nikkei_test_pred = model.predict(nyse_test_new) plt.scatter(nyse_train, nikkei_train) plt.plot(nyse_new[:, 0], nikkei_pred, 'r') plt.plot(nyse_test_new[:, 0], nikkei_test_pred, 'g') plt.legend(['Predicted line', 'Test data', 'Observed data']) plt.show() # In[41]:
dist = [ 'Dindigul', 'Erode', 'Karur', 'Thoothukkudi', 'Ariyalur', 'Chennai', 'Cuddalore', 'Kancheepuram', 'Namakkal', 'Perambalur', 'Salem', 'Thiruvallur', 'Viluppuram', 'Coimbatore', 'Madurai', 'Ramanathapuram', 'Theni', 'The Nilgiris', 'Virudhunagar', 'Tirunelveli', 'Nagapattinam', 'Pudukkottai', 'Sivaganga', 'Thanjavur', 'Thiruvarur', 'Tiruchirapalli', 'Dharmapuri', 'Tiruvannamalai', 'Vellore' ] gen = pd.read_csv( 'C:\\Users\\Preetham G\\Documents\\Research Projects\\Forecast of Rainfall Quantity and its variation using Envrionmental Features\\Data\\Normalized & Combined Data\\All Districts.csv' ) parameters = pd.read_csv( 'C:\\Users\\Preetham G\\Documents\\Research Projects\\Forecast of Rainfall Quantity and its variation using Envrionmental Features\\Results\\Parameters\\Parameters.csv' ) gen_poly = pf(degree=4) ds_poly = pf(degree=2) gen_pr = lm.LinearRegression() clus_pr = lm.LinearRegression() #MSE for mean mse_ts_ds = [] mse_ts_clus = [] mse_ts_gen = [] #RMSE for mean rmse_ts_ds = [] rmse_ts_clus = [] rmse_ts_gen = [] #MAE for mean mae_ts_ds = [] mae_ts_clus = [] mae_ts_gen = []
X = data["x"] Y = data["y"] X = np.array(X).reshape(len(X), 1) Y = np.array(Y).reshape(len(Y), 1) lrg = lr() lrg.fit(X,Y) plt.scatter(X,Y) plt.show() predictX = lrg.predict(X) plt.plot(X, predictX, color="red") plt.scatter(X,Y, color="blue") pl = pf(degree=2) X_New = pl.fit_transform(X) lrg2 = lr() lrg2.fit(X_New, Y) predictX2 = lrg2.predict(X_New) plt.title("Linear and Polynomial Regression") plt.plot(X, predictX2, color="orange") plt.show()
from sklearn.preprocessing import PolynomialFeatures as pf import numpy as np X = np.arange(6).reshape(3, 2) print(X) print() poly1 = pf(2) # Xnew1 = poly1.fit_transform(X) # print(Xnew1) feat_names = poly1.get_feature_names(['y0', 'y1']) print(feat_names) feat_poly = poly1.get_params() print(feat_poly) # print() # poly2 = pf(2,interaction_only=True) # Xnew2 = poly2.fit_transform(X) # print(Xnew2)
import matplotlib.pyplot as plt import scipy.stats as st from sklearn import metrics from sklearn import cluster from sklearn import mixture from sklearn.decomposition import PCA from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression as lr from sklearn.preprocessing import PolynomialFeatures as pf df = pd.read_csv("/home/rahul/Downloads/winequality-red.csv") x_train, x_test = train_test_split(df, test_size=0.3, random_state=42, shuffle=True) y_train = x_train["quality"] y_test = x_test["quality"] x_train = x_train[["pH"]] x_test = x_test[["pH"]] model = lr() model.fit(x_train, y_train) y_pred = model.predict(x_test) print(metrics.mean_squared_error(y_test, y_pred)**0.5) model1 = lr() model = pf(degree=2) x_train = model.fit_transform(x_train) x_test = model.fit_transform(x_test) model1.fit(x_train, y_train) y_pred = model1.predict(x_test) print(metrics.mean_squared_error(y_test, y_pred)**0.5) plt.scatter(y_test, y_pred)
for j in m: comb_names.append(list(j)) models = [SVR(kernel='linear', epsilon=0.1, C=1)] model_names = ['SVR(L)'] d = {} for j, k in zip(models, model_names): mse_t = [] evs_t = [] for i in comb_names: print(k, i) train_x = train[i] train_y = train['Actual'] test_x = test[i] test_y = test['Actual'] if k == 'PR(4)': poly = pf(degree=4) train_x = poly.fit_transform(train_x) test_x = poly.fit_transform(test_x) model = j model.fit(train_x, train_y) ts_p = model.predict(test_x) mse_t.append(mse(test_y, ts_p)) evs_t.append(evs(test_y, ts_p)) l = 'MSE - ' + k m = 'EVS - ' + k d[l] = mse_t d[m] = evs_t d['Combination'] = comb_names df = pd.DataFrame(d, columns=['Combination', 'MSE - MLR', 'MSE - PR(4)', 'MSE - DTR(6)', 'MSE - SVR(L)', 'EVS - MLR', 'EVS - PR(4)', 'EVS - DTR(6)', 'EVS - SVR(L)']) df.to_csv('C:\\Users\\Preetham G\\Documents\\Research Projects\\Ensemble Rainfall\\Results\\final prediction models.csv', index=False)
rmse_d = [] mae_d = [] mdae_d = [] evs_d = [] r2_d = [] c = 0 #Repeated K Fold Cross Validation for tr_i, ts_i in rkf.split(data): train, test = data.iloc[tr_i], data.iloc[ts_i] train_x = train.drop(columns=['District', 'Index', 'Rainfall']) train_y = train['Rainfall'] test_x = test.drop(columns=['District', 'Index', 'Rainfall']) test_y = test['Rainfall'] for j in deg: print(i, c, j) poly = pf(degree=j) poly_tr = poly.fit_transform(train_x) poly_ts = poly.fit_transform(test_x) pr.fit(poly_tr, train_y) pr_p = pr.predict(poly_ts) #Error values d.append(j) mse_d.append(mse(test_y, pr_p)) rmse_d.append(rmse(test_y, pr_p)) mae_d.append(mae(test_y, pr_p)) mdae_d.append(mdae(test_y, pr_p)) evs_d.append(evs(test_y, pr_p)) r2_d.append(r2(test_y, pr_p)) c += 1 t = {} t['Degree'] = d
testing_data = pd.read_csv("project - part D - testing data set.csv") # In[22]: x_train = training_data[['Father']] y_train = training_data[['Son']].values.reshape(-1, 1) x_test = testing_data[['Father']] y_test = testing_data['Son'].values.reshape(-1, 1) # In[23]: from sklearn.preprocessing import PolynomialFeatures as pf model = lm.Lasso( ) #Default alpha = 1.0 is used when alpha value is not explicitly mentioned poly = pf(degree=10) modified_x_train = poly.fit_transform(x_train) model.fit(modified_x_train, y_train) y_pred = model.predict(modified_x_train) y_pred = y_pred.reshape(-1, 1) #Metrics on the Accuracy of the model with training data MSE = (1 / len(y_train)) * np.sum((y_pred - y_train)**2) RMSE = np.sqrt(MSE) print("Training Root Mean Squared Error (RMSE): ", RMSE) modified_x_test = poly.fit_transform(x_test) y_pred = model.predict(modified_x_test) y_pred = y_pred.reshape(-1, 1)
def poli_f(X, grado): poly = pf(grado) return poly, poly.fit_transform(X)
import pandas as pd import matplotlib.pyplot as plt from sklearn.preprocessing import Imputer,OneHotEncoder from sklearn.preprocessing import LabelEncoder as le from sklearn.model_selection import train_test_split as tts from sklearn.preprocessing import StandardScaler as ss from sklearn.linear_model import LinearRegression as lr from sklearn.preprocessing import PolynomialFeatures as pf plt.rcParams['font.sans-serif']=['SimHei'] plt.rcParams['axes.unicode_minus'] = False dataset=pd.read_csv("Position_Salaries.csv") X=dataset.iloc[:,1:2].values y=dataset.iloc[:,2].values lin_reg=lr() lin_reg.fit(X,y) poly_reg=pf(degree=4) X_poly=poly_reg.fit_transform(X) poly_reg.fit(X_poly,y) lin_reg2=lr() lin_reg2.fit(X_poly,y) plt.scatter(X,y,color="green") plt.plot(X,lin_reg.predict(X),color="blue") plt.title("线性回归") plt.xlabel("级别") plt.ylabel("工资") plt.show() plt.scatter(X,y,color="green") plt.plot(X,lin_reg2.predict(poly_reg.fit_transform(X)), \ color="blue") plt.title("多项式回归(参数为4)") plt.xlabel("级别")
def polinomial_features(X, grado): poly = pf(grado) return (poly, poly.fit_transform(X))
plt.scatter(x,y) """""" #Linear Regression tahminlinear = LinearRegression() tahminlinear.fit(x,y) #Verilerimi oturtuyorum. tahminLin = tahminlinear.predict(x) #Güne göre tahmin yapacağız.O yüzden x'i aldık. #Bu işlemler sonucunda tahmin ettirdik. Şimdi çizdirmeye geçelim. plt.plot(x,tahminLin,color="red") #Polynomial Regression tahminpolinom = pf(degree=6) #6.dereceden bir polinom olacağını belirttik.Dereceleri değişterek hangisinde daha iyi sonuçlar aldığımızı görebiliriz. xYeni = tahminpolinom.fit_transform(x) #Oturtulmuş x değerlerini yeni bir değişkene atadık. #xYeni tahmin yapabilmemiz için oluşturduğumuz bir araform. polinomModel = LinearRegression() polinomModel.fit(xYeni,y) #Şimdi tekrar linear olarak oluşturup,oturttuk. tahminPol = polinomModel.predict(xYeni) plt.plot(x,tahminPol,color="green") #Linearin mi,polinomun mu daha iyi olduğunu anlamak için: hataKaresiLinear = 0 hataKaresiPolinom = 0 for i in range(len(xYeni)): hataKaresiPolinom = hataKaresiPolinom + (float(y[i])-float(tahminPol[i]))**2 #(Gerçek-Tahminimiz)'in karesi
from sklearn.preprocessing import PolynomialFeatures as pf import pandas as pd import matplotlib.pyplot as plt data = pd.read_csv("SalaryPosition.csv") X = data["Salary"] Y = data["Level"] X = np.array(X).reshape(len(X), 1) Y = np.array(Y).reshape(len(Y), 1) lregression = lr() lregression.fit(X, Y) lregression2 = pf() X_Pol = lregression2.fit_transform(X) predictX = lregression.predict(X) m = lregression.coef_ b = lregression.intercept_ a = np.arange(12000) plt.scatter(X, Y) plt.plot(X, predictX, color="blue") plt.scatter(a, a * m + b, color="red") plt.title("Salary Position") print(mean_squared_error(X, Y))
from sklearn.preprocessing import PolynomialFeatures as pf X_train = [[6], [8], [10], [14], [18]] y_train = [[7], [9], [13], [17.5], [18]] X_test = [[6], [8], [11], [16]] y_test = [[8], [12], [15], [18]] lr = LinearRegression() lr.fit(X_train, y_train) xx = np.linspace(0, 26, 100) yy = lr.predict(xx.reshape(xx.shape[0], 1)) plt.plot(xx, yy) print 'Simple linear regression r-squared', lr.score(X_test, y_test) quadratic_featurizer = pf(degree=3) X_train_quadratic = quadratic_featurizer.fit_transform(X_train) X_test_quadratic = quadratic_featurizer.transform(X_test) lr_quadratic = lr lr_quadratic.fit(X_train_quadratic, y_train) xx_quadratic = quadratic_featurizer.transform(xx.reshape(xx.shape[0], 1)) plt.plot(xx, lr_quadratic.predict(xx_quadratic), c='r', linestyle='--') plt.title('Pizza price regressed on diameter') plt.xlabel('Diameter in inches') plt.ylabel('Price in dollars') plt.axis([0, 25, 0, 25]) plt.grid(True) plt.scatter(X_train, y_train) plt.show()