Ejemplo n.º 1
0
def stackerTraining(stacker, folds, level0_trainFeatures, trainData, target=None):
    for k in stacker.keys():
        print('Training stacker %s' % (k))
        stacker_model = stacker[k]
        stacker_mase = []
        y_pred = np.zeros_like(trainData[target].values)
        y_true = np.zeros_like(trainData[target].values)
        for t, v in folds.split(X, y):
            train, validation = level0_trainFeatures[t, :], level0_trainFeatures[v, :]
            # Get the features and target
            train_features, train_target = train, trainData.iloc[t][target]
            validation_features, validation_target = validation, trainData.iloc[v][target]

            if (stacker_model.__class__ == xgb.sklearn.XGBRegressor) | (
                    stacker_model.__class__ == lgb.sklearn.LGBMRegressor):
                print('Fitting a boost model with limited tree rounds')
                evalset = [(validation_features, np.ravel(validation_target))]
                stacker_model.fit(train_features, np.ravel(train_target), eval_set=evalset, early_stopping_rounds=20,
                                  verbose=False)
                print(stacker_model.best_iteration_)
            else:
                stacker_model.fit(level0_trainFeatures[t, :], train_target)

            y_pred[v] = stacker_model.predict(level0_trainFeatures[v])
            y_true[v] = trainData.iloc[v][target].values

        stacker_mase = mase(y_pred, y_true)
        average_mase = mase(level0_trainFeatures.mean(axis=1), y_true)
        print('%s Stacker MASE: %s' % (k, stacker_mase))
        print('%s Averaging MASE: %s' % (k, average_mase))
Ejemplo n.º 2
0
def base_fit(model, folds, features, target, trainData, testData):
    # Initialize empty lists and matrix to store data
    model_mase = []
    model_val_predictions = np.empty((trainData.shape[0], 1))
    k = 0
    # Loop through the index in KFolds
    model_test_predictions = np.zeros((testData.shape[0],))
    model_val_true = np.zeros((trainData.shape[0], 1))

    for train_index, val_index in folds.split(trainData):
        k = k + 1
        # Split the train data into train and validation data
        train, validation = trainData.iloc[train_index], trainData.iloc[val_index]
        # Get the features and target
        train_features, train_target = train[features], train[target]
        validation_features, validation_target = validation[features], validation[target]

        # Fit the base model to the train data and make prediciton for validation data
        if (model.__class__ == xgb.sklearn.XGBRegressor) | (model.__class__ == lgb.sklearn.LGBMRegressor):
         #   print('Fitting a boost model with limited tree rounds')
            evalset = [(validation_features, np.ravel(validation_target))]
            model.fit(train_features, np.ravel(train_target), eval_set=evalset, verbose=False)
        else:
            model.fit(train_features, train_target.values)

        if (model.__class__ == xgb.sklearn.XGBRegressor):
         #   print(model.best_ntree_limit)
         #   print('Using xgboost with limited tree rounds')
            validation_predictions = model.predict(validation_features, ntree_limit=model.best_ntree_limit)

        elif (model.__class__ == lgb.sklearn.LGBMRegressor):
         #   print(model.best_iteration_)
        #   print('Using lgbmboost with limited tree rounds')
            validation_predictions = model.predict(validation_features, num_iteration=model.best_iteration_)
        else:
            print('Using generic predict')
            validation_predictions = model.predict(validation_features)

        # Calculate and store the MASE for validation data
       # print(mase(validation_predictions, validation_target))
        # model_mase.append(mase(validation_predictions,validation_target))

        # Save the validation prediction for level 1 model training
        model_val_predictions[val_index, 0] = validation_predictions.reshape(validation.shape[0])
        model_val_true[val_index, 0] = validation_target.values
        model_test_predictions += model.predict(testData[features])

    model_test_predictions = model_test_predictions / k
    # Fit the base model to the whole training data
    # model.fit(trainData[features], np.ravel(trainData[target]))
    # Get base model prediction for the test data
    # model_test_predictions = model.predict(testData[features])
    # Calculate and store the MASE for validation data

    # model_val_predictions = model_val_predictions
    model_mase.append(mase(model_val_predictions, model_val_true))

    return model_mase, model_val_predictions, model_test_predictions
Ejemplo n.º 3
0
def cv_lgboost(model, X_base=None, y=None):
    train_index = X_base.index.year.isin([2016, 2017])
    valid_index = X_base.index.year.isin([2018])
    X_train, X_valid = X_base.iloc[train_index], X_base.iloc[valid_index]
    y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]

    model.fit(X_train.values, y_train.values.reshape(-1, ))
    # ntree = model.best_iteration_
    preds = model.predict(X_valid.values)
    oof_scores = mase(preds, y_valid.values)
    return oof_scores
Ejemplo n.º 4
0
def mase_error(y_true, y_pred):
    return mase(y_pred, y_true)
Ejemplo n.º 5
0
history = [None for i in range(n_folds)]

for fold_n, (train_index, valid_index) in enumerate(cv.split(yf)):
    # print('Fold', fold_n, 'started at', time.ctime())
    print(train_index)
    X_train, X_valid = Xw[train_index, :, :], Xw[valid_index, :, :]
    mask_train, mask_valid = mask[train_index], mask[valid_index]
    y_train, y_valid = yf[train_index], yf[valid_index]
    # Do the base
    params = {
        "validation_data": (X_valid, y_valid),
        "epochs": 30,
        "verbose": 1,
        "batch_size": 8,
        "callbacks": callbacks,
        'sample_weight': mask_train
    }
    model = KerasRegressor(build_fn=create_model, verbose=0)
    history[fold_n] = model.fit(X_train, y_train, **params)
    preds = model.predict(X_valid)
    preds_all_base[valid_index] = preds
    true_all[valid_index] = y_valid
    score_val = mase(preds, y_valid)
    df = pd.DataFrame({"preds": preds, "true": y_valid})
    df.plot(ax=ax[fold_n],
            style=['-o', '-o', '-o'],
            title=f'CV score base: {score_val:.4f}',
            markersize=1.5)
fig.savefig('window-keras.png')
oof_scores = mase(preds_all_base, true_all)
print(oof_scores)