def save_embeddings(embeddings, missing, save_dir="../embeddings/"): safe_open_dir(save_dir) np.save(os.path.join(save_dir, "embeddings.npy"), embeddings) print("Saved the embeddings") with open(os.path.join(save_dir, "missing.pkl"), 'wb') as missing_file: pkl.dump(missing, missing_file) print("Saved missing indicies")
def save_text(name, ids, text, labels=None, save_dest="../processed_input/"): safe_open_dir(save_dest) save_path = os.path.join(save_dest, name + ".npz") if labels is not None: np.savez(save_path, ids=ids, texts=text, labels=labels) else: np.savez(save_path, ids=ids, texts=text) logging.info("Saved %s" % name)
def save_validation(self): val_preds = np.stack( [model.val_preds for model in self.best_model.models], axis=-1) logging.info("Saving validation preds with shape {}".format( val_preds.shape)) ensemble_utils.save_predictions(val_preds, self.base_dataset.y, self.ensemble_id, safe_open_dir("../superlearner_preds"))
X = expit(test_X[:, :, i]) logits = level2_models[i].predict_proba(X)[:, 1] test_preds[:, i] = logits logging.info("Stacked Test preds of shape: {}".format(test_preds.shape)) return ids, test_preds if __name__ == "__main__": args = parser.parse_args() SEED = args.seed np.random.seed(SEED) logging.info("Opening the ensemble configs") ensemble_config_dict = load_ensemble_configs() ensemble_config = get_ensemble_config(ensemble_config_dict, args.ensemble_id) pred_savepath = safe_open_dir("../superlearner_preds/") # Get the predictions pred_x, pred_y = create_preds(ensemble_config["files"], ensemble_config["data"], batch_size=args.batch_size, k=args.kfold, seed=SEED, savedir=pred_savepath) # Train the meta-learner if args.superlearn: if args.use_sklearn: logging.info("Training superlearner with scikit-learn") weights, mus, sigmas = train_superlearner_sklearn(pred_x, pred_y, reg_type=args.penalty, C=args.C) mus, sigmas = None, None elif args.use_xgboost: logging.info("Training superlearner with xgboost") gbm = train_superlearner_xgboost(pred_x, pred_y, ensemble_config['params']) else: weights = train_superlearner(pred_x, pred_y)
def save_predictions(predictions, pred_labels, model_name, pred_savedir): savepath = os.path.join(safe_open_dir(pred_savedir), model_name + ".npz") np.savez(savepath, X=predictions, Y=pred_labels) logging.info("Saved validation preds to {}".format(savepath))