def stackerTraining(stacker, folds, level0_trainFeatures, trainData, target=None): for k in stacker.keys(): print('Training stacker %s' % (k)) stacker_model = stacker[k] stacker_mase = [] y_pred = np.zeros_like(trainData[target].values) y_true = np.zeros_like(trainData[target].values) for t, v in folds.split(X, y): train, validation = level0_trainFeatures[t, :], level0_trainFeatures[v, :] # Get the features and target train_features, train_target = train, trainData.iloc[t][target] validation_features, validation_target = validation, trainData.iloc[v][target] if (stacker_model.__class__ == xgb.sklearn.XGBRegressor) | ( stacker_model.__class__ == lgb.sklearn.LGBMRegressor): print('Fitting a boost model with limited tree rounds') evalset = [(validation_features, np.ravel(validation_target))] stacker_model.fit(train_features, np.ravel(train_target), eval_set=evalset, early_stopping_rounds=20, verbose=False) print(stacker_model.best_iteration_) else: stacker_model.fit(level0_trainFeatures[t, :], train_target) y_pred[v] = stacker_model.predict(level0_trainFeatures[v]) y_true[v] = trainData.iloc[v][target].values stacker_mase = mase(y_pred, y_true) average_mase = mase(level0_trainFeatures.mean(axis=1), y_true) print('%s Stacker MASE: %s' % (k, stacker_mase)) print('%s Averaging MASE: %s' % (k, average_mase))
def base_fit(model, folds, features, target, trainData, testData): # Initialize empty lists and matrix to store data model_mase = [] model_val_predictions = np.empty((trainData.shape[0], 1)) k = 0 # Loop through the index in KFolds model_test_predictions = np.zeros((testData.shape[0],)) model_val_true = np.zeros((trainData.shape[0], 1)) for train_index, val_index in folds.split(trainData): k = k + 1 # Split the train data into train and validation data train, validation = trainData.iloc[train_index], trainData.iloc[val_index] # Get the features and target train_features, train_target = train[features], train[target] validation_features, validation_target = validation[features], validation[target] # Fit the base model to the train data and make prediciton for validation data if (model.__class__ == xgb.sklearn.XGBRegressor) | (model.__class__ == lgb.sklearn.LGBMRegressor): # print('Fitting a boost model with limited tree rounds') evalset = [(validation_features, np.ravel(validation_target))] model.fit(train_features, np.ravel(train_target), eval_set=evalset, verbose=False) else: model.fit(train_features, train_target.values) if (model.__class__ == xgb.sklearn.XGBRegressor): # print(model.best_ntree_limit) # print('Using xgboost with limited tree rounds') validation_predictions = model.predict(validation_features, ntree_limit=model.best_ntree_limit) elif (model.__class__ == lgb.sklearn.LGBMRegressor): # print(model.best_iteration_) # print('Using lgbmboost with limited tree rounds') validation_predictions = model.predict(validation_features, num_iteration=model.best_iteration_) else: print('Using generic predict') validation_predictions = model.predict(validation_features) # Calculate and store the MASE for validation data # print(mase(validation_predictions, validation_target)) # model_mase.append(mase(validation_predictions,validation_target)) # Save the validation prediction for level 1 model training model_val_predictions[val_index, 0] = validation_predictions.reshape(validation.shape[0]) model_val_true[val_index, 0] = validation_target.values model_test_predictions += model.predict(testData[features]) model_test_predictions = model_test_predictions / k # Fit the base model to the whole training data # model.fit(trainData[features], np.ravel(trainData[target])) # Get base model prediction for the test data # model_test_predictions = model.predict(testData[features]) # Calculate and store the MASE for validation data # model_val_predictions = model_val_predictions model_mase.append(mase(model_val_predictions, model_val_true)) return model_mase, model_val_predictions, model_test_predictions
def cv_lgboost(model, X_base=None, y=None): train_index = X_base.index.year.isin([2016, 2017]) valid_index = X_base.index.year.isin([2018]) X_train, X_valid = X_base.iloc[train_index], X_base.iloc[valid_index] y_train, y_valid = y.iloc[train_index], y.iloc[valid_index] model.fit(X_train.values, y_train.values.reshape(-1, )) # ntree = model.best_iteration_ preds = model.predict(X_valid.values) oof_scores = mase(preds, y_valid.values) return oof_scores
def mase_error(y_true, y_pred): return mase(y_pred, y_true)
history = [None for i in range(n_folds)] for fold_n, (train_index, valid_index) in enumerate(cv.split(yf)): # print('Fold', fold_n, 'started at', time.ctime()) print(train_index) X_train, X_valid = Xw[train_index, :, :], Xw[valid_index, :, :] mask_train, mask_valid = mask[train_index], mask[valid_index] y_train, y_valid = yf[train_index], yf[valid_index] # Do the base params = { "validation_data": (X_valid, y_valid), "epochs": 30, "verbose": 1, "batch_size": 8, "callbacks": callbacks, 'sample_weight': mask_train } model = KerasRegressor(build_fn=create_model, verbose=0) history[fold_n] = model.fit(X_train, y_train, **params) preds = model.predict(X_valid) preds_all_base[valid_index] = preds true_all[valid_index] = y_valid score_val = mase(preds, y_valid) df = pd.DataFrame({"preds": preds, "true": y_valid}) df.plot(ax=ax[fold_n], style=['-o', '-o', '-o'], title=f'CV score base: {score_val:.4f}', markersize=1.5) fig.savefig('window-keras.png') oof_scores = mase(preds_all_base, true_all) print(oof_scores)