def save_embeddings(embeddings, missing, save_dir="../embeddings/"):
    safe_open_dir(save_dir)
    np.save(os.path.join(save_dir, "embeddings.npy"), embeddings)
    print("Saved the embeddings")
    with open(os.path.join(save_dir, "missing.pkl"), 'wb') as missing_file:
        pkl.dump(missing, missing_file)
    print("Saved missing indicies")
def save_text(name, ids, text, labels=None, save_dest="../processed_input/"):
    safe_open_dir(save_dest)
    save_path = os.path.join(save_dest, name + ".npz")
    if labels is not None:
        np.savez(save_path, ids=ids, texts=text, labels=labels)
    else:
        np.savez(save_path, ids=ids, texts=text)
    logging.info("Saved %s" % name)
 def save_validation(self):
     val_preds = np.stack(
         [model.val_preds for model in self.best_model.models], axis=-1)
     logging.info("Saving validation preds with shape {}".format(
         val_preds.shape))
     ensemble_utils.save_predictions(val_preds, self.base_dataset.y,
                                     self.ensemble_id,
                                     safe_open_dir("../superlearner_preds"))
Exemple #4
0
        X = expit(test_X[:, :, i])
        logits = level2_models[i].predict_proba(X)[:, 1]
        test_preds[:, i] = logits
    logging.info("Stacked Test preds of shape: {}".format(test_preds.shape))

    return ids, test_preds


if __name__ == "__main__":
    args = parser.parse_args()
    SEED = args.seed
    np.random.seed(SEED)
    logging.info("Opening the ensemble configs")
    ensemble_config_dict = load_ensemble_configs()
    ensemble_config = get_ensemble_config(ensemble_config_dict, args.ensemble_id)
    pred_savepath = safe_open_dir("../superlearner_preds/")
    # Get the predictions
    pred_x, pred_y = create_preds(ensemble_config["files"], ensemble_config["data"], batch_size=args.batch_size,
                                  k=args.kfold, seed=SEED, savedir=pred_savepath)

    # Train the meta-learner
    if args.superlearn:
        if args.use_sklearn:
            logging.info("Training superlearner with scikit-learn")
            weights, mus, sigmas = train_superlearner_sklearn(pred_x, pred_y, reg_type=args.penalty, C=args.C)
            mus, sigmas = None, None
        elif args.use_xgboost:
            logging.info("Training superlearner with xgboost")
            gbm = train_superlearner_xgboost(pred_x, pred_y, ensemble_config['params'])
        else:
            weights = train_superlearner(pred_x, pred_y)
Exemple #5
0
def save_predictions(predictions, pred_labels, model_name, pred_savedir):
    savepath = os.path.join(safe_open_dir(pred_savedir), model_name + ".npz")
    np.savez(savepath, X=predictions, Y=pred_labels)
    logging.info("Saved validation preds to {}".format(savepath))