예제 #1
0
cv_ridge.plot(title="Validation - Just Do It")
plt.xlabel("alpha")
plt.ylabel("rmse")
cv_ridge.min()
#cv_lasso.min()
model = Ridge(alpha=cv_ridge.idxmin())
model.fit(X_train[pred_cols], y_train['item_cnt_day'])

# poisson model
pois_fam = sm.families.Poisson()
model = sm.GLM(endog=y_train['item_cnt_day'],
               exog=X_train[pred_cols],
               family=pois_fam)
model = model.fit()
model.params
model.summary()

# dtree model
model = DecisionTreeRegressor(criterion='mse', splitter='best')
model.fit(X=X_train[pred_cols], y=y_train['item_cnt_day'])

# make predictions
y_valid['meta_lvl_II_preds'] = model.predict(X_valid[pred_cols])
y_test['meta_lvl_II_preds'] = model.predict(X_test[pred_cols])
y_holdout['meta_lvl_II_preds'] = model.predict(X_holdout[pred_cols])

# clip predictions
y_valid['meta_lvl_II_preds'] = y_valid['meta_lvl_II_preds'].clip(
    cons.lower_bound, cons.upper_bound)
y_test['meta_lvl_II_preds'] = y_test['meta_lvl_II_preds'].clip(
    cons.lower_bound, cons.upper_bound)
예제 #2
0
coeffs = []
for col, coef in zip(X_train.columns, final_model.coef_):
    if coef > 0:
        coeffs.append((col, coef))

coeffs = sorted(coeffs, key=itemgetter(1), reverse=True)
useCoeffs = [x[0] for x in coeffs[:15]]

# read in the complete dataset
df = pd.read_pickle('../Data/dfw_final.pkl')

# add Target and date to useCoeffs, so these are included in final dataset
useCoeffs.append('TARGET')
useCoeffs.append('new_date')

# create a final dataframe
final = df[useCoeffs][:].copy()
final.set_index('new_date', inplace=True)

# export to csv to use for charting
final.to_csv('../Data/netflix_ridge.csv')

# create X of useCoeffs and y of Target to get R2 and other statistics
X = df[useCoeffs].copy()
y = df['TARGET']
X = sm.add_constant(X)

model = sm.OLS(y, X).fit()
predictions = model.predict(X)
print(model.summary())