def test(self, vs, reporting_fns, phase, **kwargs): if phase == 'Test': return self._evaluate(vs, reporting_fns, **kwargs) self.model.eval() total_loss = total_toks = 0 steps = len(vs) self.valid_epochs += 1 preds = [] golds = [] start = time.time() pg = create_progress_bar(steps) for batch_dict in pg(vs): input_ = self._input(batch_dict) tgt = input_['tgt'] tgt_lens = batch_dict['tgt_lengths'] pred = self.model(input_) loss = self.crit(pred, tgt) toks = self._num_toks(tgt_lens) total_loss += loss.item() * toks total_toks += toks greedy_preds = [p[0] for p in self._predict(input_, beam=1, make_input=False)] preds.extend(convert_seq2seq_preds(greedy_preds, self.tgt_rlut)) golds.extend(convert_seq2seq_golds(tgt.cpu().numpy(), tgt_lens, self.tgt_rlut)) metrics = self.calc_metrics(total_loss, total_toks) metrics['bleu'] = bleu(preds, golds, self.bleu_n_grams)[0] self.report( self.valid_epochs, metrics, start, phase, 'EPOCH', reporting_fns ) return metrics
def test(self, vs, reporting_fns, phase='Valid', dataset=True): """Run an epoch of testing over the dataset If we are using a `tf.dataset`-based `fit_func`, we will just cycle the number of steps and let the `dataset` yield new batches. If we are using `feed_dict`s, we convert each batch from the `DataFeed` and pass that into TF as the `feed_dict` :param vs: A validation set :param reporting_fns: Reporting hooks :param phase: The phase of evaluation (`Test`, `Valid`) :param dataset: (`bool`) Are we using `tf.dataset`s :return: Metrics """ if phase == 'Test' and not dataset: return self._evaluate(vs, reporting_fns) self.valid_epochs += 1 total_loss = 0 total_toks = 0 preds = [] golds = [] start = time.time() pg = create_progress_bar(len(vs)) for batch_dict in pg(vs): if dataset: lossv, top_preds = self.model.sess.run( [self.test_loss, self.model.decoder.best]) else: feed_dict = self.model.make_input(batch_dict) lossv, top_preds = self.model.sess.run( [self.test_loss, self.model.decoder.best], feed_dict=feed_dict) toks = self._num_toks(batch_dict['tgt_lengths']) total_loss += lossv * toks total_toks += toks preds.extend(convert_seq2seq_preds(top_preds.T, self.tgt_rlut)) golds.extend( convert_seq2seq_golds(batch_dict['tgt'], batch_dict['tgt_lengths'], self.tgt_rlut)) metrics = self.calc_metrics(total_loss, total_toks) metrics['bleu'] = bleu(preds, golds, self.bleu_n_grams)[0] self.report(self.valid_epochs, metrics, start, phase, 'EPOCH', reporting_fns) return metrics
def test(self, vs, reporting_fns, phase='Valid', **kwargs): """Run an epoch of testing over the dataset If we are using a `tf.dataset`-based `fit_func`, we will just cycle the number of steps and let the `dataset` yield new batches. If we are using `feed_dict`s, we convert each batch from the `DataFeed` and pass that into TF as the `feed_dict` :param vs: A validation set :param reporting_fns: Reporting hooks :param phase: The phase of evaluation (`Test`, `Valid`) :param dataset: (`bool`) Are we using `tf.dataset`s :return: Metrics """ SET_TRAIN_FLAG(False) if phase == 'Test': return self._evaluate(vs, reporting_fns, **kwargs) self.valid_epochs += 1 total_loss = 0 total_toks = 0 preds = [] golds = [] start = time.perf_counter() for features, tgt in vs: features['dst'] = tgt[:, :-1] top_preds = self.model.predict(features, beam=1, make_input=False)[0] loss_value = self.loss(self.model, features, tgt).numpy() toks = tf.cast(self._num_toks(features['tgt_len']), tf.float32).numpy() total_loss += loss_value * toks total_toks += toks preds.extend( convert_seq2seq_preds(top_preds[:, 0, :], self.tgt_rlut)) golds.extend( convert_seq2seq_golds(tgt, features['tgt_len'], self.tgt_rlut)) metrics = self.calc_metrics(total_loss, total_toks) metrics['bleu'] = bleu(preds, golds, self.bleu_n_grams)[0] self.report(self.valid_epochs, metrics, start, phase, 'EPOCH', reporting_fns) return metrics
def _evaluate(self, es, reporting_fns, **kwargs): self.model.eval() pg = create_progress_bar(len(es)) preds = [] golds = [] start = time.time() for batch_dict in pg(es): tgt = batch_dict['tgt'] tgt_lens = batch_dict['tgt_lengths'] pred = [p[0] for p in self._predict(batch_dict, numpy_to_tensor=False, **kwargs)] preds.extend(convert_seq2seq_preds(pred, self.tgt_rlut)) golds.extend(convert_seq2seq_golds(tgt, tgt_lens, self.tgt_rlut)) metrics = {'bleu': bleu(preds, golds, self.bleu_n_grams)[0]} self.report( 0, metrics, start, 'Test', 'EPOCH', reporting_fns ) return metrics
def _evaluate(self, es, reporting_fns): """Run the model with beam search and report Bleu. :param es: `DataFeed` of input :param reporting_fns: Input hooks """ pg = create_progress_bar(len(es)) preds = [] golds = [] start = time.time() for batch_dict in pg(es): tgt = batch_dict.pop('tgt') tgt_lens = batch_dict.pop('tgt_lengths') pred = [p[0] for p in self.model.predict(batch_dict)] preds.extend(convert_seq2seq_preds(pred, self.tgt_rlut)) golds.extend(convert_seq2seq_golds(tgt, tgt_lens, self.tgt_rlut)) metrics = {'bleu': bleu(preds, golds, self.bleu_n_grams)[0]} self.report(0, metrics, start, 'Test', 'EPOCH', reporting_fns) return metrics
def _evaluate(self, es, reporting_fns, **kwargs): """Run the model with beam search and report Bleu. :param es: `tf.dataset` of input :param reporting_fns: Input hooks """ preds = [] golds = [] start = time.time() kwargs['make_input'] = False for features, tgt in es: tgt_lens = features.pop('tgt_len') top_preds = self.model.predict(features, **kwargs) preds.extend( convert_seq2seq_preds(top_preds[:, 0, :], self.tgt_rlut)) golds.extend(convert_seq2seq_golds(tgt, tgt_lens, self.tgt_rlut)) metrics = {'bleu': bleu(preds, golds, self.bleu_n_grams)[0]} self.report(0, metrics, start, 'Test', 'EPOCH', reporting_fns) return metrics