cv_ridge.plot(title="Validation - Just Do It") plt.xlabel("alpha") plt.ylabel("rmse") cv_ridge.min() #cv_lasso.min() model = Ridge(alpha=cv_ridge.idxmin()) model.fit(X_train[pred_cols], y_train['item_cnt_day']) # poisson model pois_fam = sm.families.Poisson() model = sm.GLM(endog=y_train['item_cnt_day'], exog=X_train[pred_cols], family=pois_fam) model = model.fit() model.params model.summary() # dtree model model = DecisionTreeRegressor(criterion='mse', splitter='best') model.fit(X=X_train[pred_cols], y=y_train['item_cnt_day']) # make predictions y_valid['meta_lvl_II_preds'] = model.predict(X_valid[pred_cols]) y_test['meta_lvl_II_preds'] = model.predict(X_test[pred_cols]) y_holdout['meta_lvl_II_preds'] = model.predict(X_holdout[pred_cols]) # clip predictions y_valid['meta_lvl_II_preds'] = y_valid['meta_lvl_II_preds'].clip( cons.lower_bound, cons.upper_bound) y_test['meta_lvl_II_preds'] = y_test['meta_lvl_II_preds'].clip( cons.lower_bound, cons.upper_bound)
coeffs = [] for col, coef in zip(X_train.columns, final_model.coef_): if coef > 0: coeffs.append((col, coef)) coeffs = sorted(coeffs, key=itemgetter(1), reverse=True) useCoeffs = [x[0] for x in coeffs[:15]] # read in the complete dataset df = pd.read_pickle('../Data/dfw_final.pkl') # add Target and date to useCoeffs, so these are included in final dataset useCoeffs.append('TARGET') useCoeffs.append('new_date') # create a final dataframe final = df[useCoeffs][:].copy() final.set_index('new_date', inplace=True) # export to csv to use for charting final.to_csv('../Data/netflix_ridge.csv') # create X of useCoeffs and y of Target to get R2 and other statistics X = df[useCoeffs].copy() y = df['TARGET'] X = sm.add_constant(X) model = sm.OLS(y, X).fit() predictions = model.predict(X) print(model.summary())