def main(results): # restore config and model dir_output = results config_data = Config(dir_output + "data.json") config_vocab = Config(dir_output + "vocab.json") config_model = Config(dir_output + "model.json") vocab = Vocab(config_vocab) model = Img2SeqModel(config_model, dir_output, vocab) model.build_pred() model.restore_session(dir_output + "model.weights/") # load dataset test_set = DataGenerator( index_file=config_data.index_test, path_formulas=config_data.path_formulas_test, dir_images=config_data.dir_images_test, max_iter=config_data.max_iter, path_matching=config_data.path_matching_test, form_prepro=vocab.form_prepro) # use model to write predictions in files config_eval = Config({"dir_answers": dir_output + "formulas_test/", "batch_size": 20}) files, perplexity = model.write_prediction(config_eval, test_set) formula_ref, formula_hyp = files[0], files[1] # score the ref and prediction files scores = score_files(formula_ref, formula_hyp) scores["perplexity"] = perplexity msg = " - ".join(["{} {:04.2f}".format(k, v) for k, v in scores.items()]) model.logger.info("- Test Txt: {}".format(msg))
def _run_evaluate(self, config, test_set): """Performs an epoch of evaluation Args: test_set: Dataset instance config: (Config) with batch_size and dir_answers Returns: scores: (dict) scores["acc"] = 0.85 for instance """ files, perp = self.write_prediction(config, test_set) scores = score_files(files[0], files[1]) scores["perplexity"] = perp return scores
def _run_evaluate(self, config, test_set): """Performs an epoch of evaluation Args: test_set: Dataset instance params: (dict) with extra params in it - "dir_name": (string) Returns: scores: (dict) scores["acc"] = 0.85 for instance """ files, perp = self.write_prediction(config, test_set) scores = score_files(files[0], files[1]) scores["perplexity"] = perp return scores
def _run_evaluate_epoch(self, config, test_set): """Performs an epoch of evaluation Args: test_set: Dataset instance params: (dict) with extra params in it - "dir_name": (string) Returns: scores: (dict) scores["acc"] = 0.85 for instance """ self.model.eval() self.encoder.eval() self.decoder.eval() # initialize containers of references and predictions if self._config.decoding == "greedy": refs, hyps = [], [[]] elif self._config.decoding == "beam_search": refs, hyps = [], [[] for i in range(self._config.beam_size)] references = list() # references (true captions) for calculating BLEU-4 score hypotheses = list() # hypotheses (predictions) with torch.no_grad(): nbatches = len(test_set) prog = Progbar(nbatches) test_loader = torch.utils.data.DataLoader(ImgFormulaDataset(test_set), batch_size=nbatches, shuffle=True, num_workers=3, pin_memory=True) for i, (img, formula) in enumerate(minibatches(test_set, nbatches)): # print(type(img), len(img), img[0].shape) # print(type(formula), formula) # Move to GPU, if available img = pad_batch_images_2(img) img = torch.FloatTensor(img) # (N, W, H, C) formula, formula_length = pad_batch_formulas(formula, self._vocab.id_pad, self._vocab.id_end) img = img.permute(0, 3, 1, 2) # (N, C, W, H) formula = torch.LongTensor(formula) # (N,) img = img.to(self.device) formula = formula.to(self.device) # Forward prop. imgs = self.encoder(img) scores, caps_sorted, decode_lengths, alphas, sort_ind = self.decoder(imgs, formula, torch.LongTensor([[len(i)] for i in formula])) # Since we decoded starting with <start>, the targets are all words after <start>, up to <end> targets = caps_sorted[:, 1:] # Remove timesteps that we didn't decode at, or are pads # pack_padded_sequence is an easy trick to do this scores, _ = pack_padded_sequence(scores, decode_lengths, batch_first=True) targets, _ = pack_padded_sequence(targets, decode_lengths, batch_first=True) # Calculate loss loss = self.criterion(scores, targets) print(scores.shape, targets.shape) print(loss) alpha_c = 1. # Add doubly stochastic attention regularization loss += alpha_c * ((1. - alphas.sum(dim=1)) ** 2).mean() loss_eval = loss.item() prog.update(i + 1, [("loss", loss_eval), ("perplexity", np.exp(loss_eval))]) # Store references (true captions), and hypothesis (prediction) for each image # If for n images, we have n hypotheses, and references a, b, c... for each image, we need - # references = [[ref1a, ref1b, ref1c], [ref2a, ref2b], ...], hypotheses = [hyp1, hyp2, ...] # print("---------------------------------------------------------------formula and prediction :") for form, preds in zip(formula, scores): refs.append(form) # print(form, " ---------- ", preds[0]) for i, pred in enumerate(preds): hyps[i].append(pred) files = write_answers(refs, hyps, self._vocab.id_to_tok, config.dir_answers, self._vocab.id_end) scores = score_files(files[0], files[1]) # perp = - np.exp(ce_words / float(n_words)) # scores["perplexity"] = perp self.logger.info("- Evaluating: {}".format(prog.info)) return { "perplexity": loss.item() }