def __init__(self, model_file, word_embeddings_cache_file, stopwords_file, word2dfs_file): # init torch random seeds torch.manual_seed(1234) np.random.seed(1234) # load model self.model = QAModel.load('', model_file) # load vectors self.vec_dim = self._preload_cached_embeddings( word_embeddings_cache_file) self.unk_term_vec = np.random.uniform(-0.25, 0.25, self.vec_dim) # stopwords self.stoplist = set([line.strip() for line in open(stopwords_file)]) # word dfs if os.path.isfile(word2dfs_file): with open(word2dfs_file, "rb") as w2dfin: self.word2dfs = pickle.load(w2dfin)
logger.info('Achieved better dev_map ... saved model') if args.test_on_each_epoch: test_scores = trainer.test(test_set, args.batch_size) map, mrr = compute_map_mrr(args.dataset_folder, test_set, test_scores) logger.info("------- MAP {}, MRR {}".format(map, mrr)) if (i - best_model) >= args.patience: logger.warning('No improvement since the last {} epochs. Stopping training'\ .format(i - best_model)) break logger.info(' ------------ Training epochs completed! ------------') logger.info('Best dev MAP in training phase = {:.4f}'.format(best_map)) trained_model = QAModel.load(args.model_outfile) evaluator = Trainer(trained_model, args.eta, args.mom, args.no_loss_reg, vec_dim, args.cuda) for split in [test_set, dev_set]: evaluator.load_input_data(args.dataset_folder, cache_file, None, None, split) if args.paper_ext_feats or args.paper_ext_feats_stem: evaluator.data_splits[split][-1] = ext_feats_for_splits[split] #set_external_features_as_per_paper(evaluator) split_scores = evaluator.test(split, args.batch_size) map, mrr = compute_map_mrr(args.dataset_folder, split, split_scores, args.run_name_prefix) logger.info("-------{} MAP {}, MRR {}".format(split, map, mrr))
dev_map, dev_mrr = compute_map_mrr(args.dataset_folder, dev_set, dev_scores) logger.info("------- MAP {}, MRR {}".format(dev_map, dev_mrr)) if dev_map - best_map > 1e-3: # new map is better than best map best_model = i best_map = dev_map QAModel.save(net, args.dataset_folder, args.model_fname) logger.info('Achieved better dev_map ... saved model') if args.test_on_each_epoch: test_scores = trainer.test(test_set, args.batch_size) map, mrr = compute_map_mrr(args.dataset_folder, test_set, test_scores) logger.info("------- MAP {}, MRR {}".format(map, mrr)) if (i - best_model) >= args.patience: logger.warning('No improvement since the last {} epochs. Stopping training'.format(i - best_model)) break logger.info(' ------------ Training epochs completed! ------------') logger.info('Best MAP in training phase = {:.4f}'.format(best_map)) trained_model = QAModel.load(args.dataset_folder, args.model_fname) evaluator = Trainer(trained_model, args.eta, args.mom, args.no_loss_reg, vec_dim) evaluator.load_input_data(args.dataset_folder, cache_file, None, None, test_set) test_scores = evaluator.test(test_set, args.batch_size) map, mrr = compute_map_mrr(args.dataset_folder, test_set, test_scores) logger.info("------- MAP {}, MRR {}".format(map, mrr))