def rl_train_step(self, batch): maxlen = self.question_truncate or 30 preds, text, nll = self.sample(batch, latest_turn_only=True) if self.rl_baseline_method == "self_critic": g_preds, g_text, g_scores = self.predict(batch, latest_turn_only=True, no_grad=True) retval = Output(text[:1], log_probs=nll[:1], episode_end=[batch['episode_end']], ques_len=[len(preds[0]) - 1], diverged_outputs=[[(t, nll[i], len(preds[i]) - 1) for i, t in enumerate(text[1:])] ], greedy_master_output=g_text[:1], greedy_output=[[t for t in g_text[1:]]]) else: retval = Output(text[:1], log_probs=nll[:1], episode_end=[batch['episode_end']], ques_len=[len(preds[0]) - 1], diverged_outputs=[[(t, nll[i], len(preds[i]) - 1) for i, t in enumerate(text[1:])] ]) return retval
def eval_step(self, batch): """ Evaluate a single batch of examples. """ if batch.text_vec is None: return self.model.eval() scores = self.score(batch) probs = F.softmax(scores, dim=1) _, prediction_id = torch.max(probs.float().cpu(), 1) preds = [self.class_list[idx] for idx in prediction_id] if batch.labels is None or self.opt['ignore_labels']: # interactive mode if self.opt.get('print_scores', False): preds = self._format_interactive_output(probs, prediction_id) else: labels = self._get_label_tensor(batch) loss = self.criterion(scores, labels) self.record_local_metric('loss', AverageMetric.many(loss)) preds = [self.class_list[idx] for idx in prediction_id] labels = batch.labels if preds is not None and labels is not None: self._update_confusion_matrix(preds, labels) if self.opt.get('print_scores', False): return Output(preds, probs=probs.cpu()) else: return Output(preds)
def train_step(self, batch): """Train on a single batch of examples.""" if batch.text_vec is None: return batchsize = batch.text_vec.size(0) self.model.train() self.zero_grad() cands, cand_vecs, label_inds = self._build_candidates( batch, source=self.opt['candidates'], mode='train') scores = self.score_candidates(batch, cand_vecs) loss = self.rank_loss(scores, label_inds) # Update loss self.metrics['loss'] += loss.item() self.metrics['examples'] += batchsize loss.backward() self.update_params() # Get train predictions if self.opt['candidates'] == 'batch': self.get_batch_train_metrics(scores) return Output() if not self.opt.get('train_predict', False): warn_once( "Some training metrics are omitted for speed. Set the flag " "`--train-predict` to calculate train metrics.") return Output() return self.get_train_preds(scores, label_inds, cands, cand_vecs)
def train_step(self, batch): """ Train on a single batch of examples. """ if batch.text_vec is None: return Output() self.model.train() self.zero_grad() # Calculate loss labels = self._get_label_tensor(batch) scores = self.score(batch) loss = self.criterion(scores, labels) self.record_local_metric('loss', AverageMetric.many(loss)) loss = loss.mean() self.backward(loss) self.update_params() # Get predictions _, prediction_id = torch.max(scores.float().cpu(), 1) preds = [self.class_list[idx] for idx in prediction_id] labels_field = self.get_labels_field(batch['observations']) labels_lst = self._get_labels(batch['observations'], labels_field) self._update_confusion_matrix(preds, labels_lst) return Output(preds)
def eval_step(self, batch): if batch.text_vec is None: return self.model.eval() batchsize = batch.text_vec.size(0) self.metrics['examples'] += batchsize if self.subtask == 'dialog': _, preds, cand_ranked = self.dialog_step(batch) if self.opt['interactive']: if self.opt['prev_response_filter']: preds = self.check_prev_response(preds, cand_ranked) return Output(preds) else: return Output(preds, cand_ranked) elif self.subtask == 'feedback': _, preds, cand_ranked = self.feedback_step(batch) return Output(preds, cand_ranked) elif self.subtask == 'satisfaction': if self.opt['uncertainty_predictor']: # Use uncertainty of dialog model to classify bot's previous utterance preds = self.predict_satisfaction_by_uncertainty(batch) else: # Use satisfaction of user response to classify bot's previous response _, preds = self.satisfaction_step(batch) preds = [str(p) for p in preds] return Output(preds)
def eval_step(self, batch): """ Train on a single batch of examples. """ if batch.text_vec is None: return self.model.eval() scores = self.score(batch) probs = F.softmax(scores, dim=1) if self.threshold is None: _, prediction_id = torch.max(probs.cpu(), 1) else: ref_prob = probs.cpu()[:, 0] # choose ref class if Prob(ref class) > threshold prediction_id = (ref_prob <= self.threshold).to(torch.int64) preds = [self.class_list[idx] for idx in prediction_id] if batch.labels is None or self.opt['ignore_labels']: # interactive mode if self.opt.get('print_scores', False): preds = self._format_interactive_output(probs, prediction_id) else: labels = self._get_labels(batch) loss = self.criterion(scores, labels) self.record_local_metric('loss', AverageMetric.many(loss)) loss = loss.mean() self._update_confusion_matrix(batch, preds) if self.opt.get('print_scores', False): return Output(preds, probs=probs.cpu()) else: return Output(preds)
def train_step(self, batch): """ Train on a single batch of examples. """ if batch.text_vec is None: return Output() self.model.train() self.optimizer.zero_grad() # calculate loss labels = self._get_labels(batch) scores = self.score(batch) loss = self.criterion(scores, labels) loss.backward() self.update_params() # update metrics self.metrics['loss'] += loss.item() self.metrics['examples'] += len(batch.text_vec) # get predictions _, prediction_id = torch.max(scores.cpu(), 1) preds = [self.class_list[idx] for idx in prediction_id] self._update_confusion_matrix(batch, preds) return Output(preds)
def eval_step(self, batch): """ Evaluate a single batch of examples. """ #print(self.model._encoder_input(batch)) if batch.text_vec is None and batch.image is None: return Output('N') if batch.text_vec is not None: bsz = batch.text_vec.size(0) else: bsz = len(batch.image) self.model.eval() cand_scores = None token_losses = None if batch.label_vec is not None: # calculate loss on targets with teacher forcing loss, model_output = self.compute_loss(batch, return_output=True) if self.output_token_losses: token_losses = self._construct_token_losses( batch.label_vec, model_output) preds = None if self.skip_generation: warn_once( "--skip-generation does not produce accurate metrics beyond ppl", RuntimeWarning, ) else: maxlen = self.label_truncate or 20 n_best_beam_preds_scores, _ = self._generate( batch, self.beam_size, maxlen) preds = [] scores = [] for n_best_list in n_best_beam_preds_scores: p, s = zip(*n_best_list) preds.append(p) scores.append(s) cand_choices = None self.rank_candidates = True if self.rank_candidates: # compute MMI to rank candidates bestpreds = [] for i in range(bsz): cands, _ = self._pad_tensor(preds[i]) cand_scores = self.computeMMI(batch.text_vec[i], cands, list(scores[i])) _, ordering = cand_scores.sort() bestpreds.append(preds[i][ordering[0]]) text = [self._v2t(p) for p in bestpreds] if bestpreds is not None else None if text and self.compute_tokenized_bleu: # compute additional bleu scores self._compute_fairseq_bleu(batch, preds) self._compute_nltk_bleu(batch, text) return Output(text, cand_choices, token_losses=token_losses)
def train_step(self, batch): """ Train on a single batch of examples. """ self._maybe_invalidate_fixed_encs_cache() if batch.text_vec is None and batch.image is None: return batchsize = ( batch.text_vec.size(0) if batch.text_vec is not None else batch.image.size(0) ) self.model.train() self.zero_grad() cands, cand_vecs, label_inds = self._build_candidates( batch, source=self.candidates, mode='train' ) try: scores = self.score_candidates(batch, cand_vecs) loss = self.criterion(scores, label_inds) self.backward(loss) self.update_params() except RuntimeError as e: # catch out of memory exceptions during fwd/bck (skip batch) if 'out of memory' in str(e): print( '| WARNING: ran out of memory, skipping batch. ' 'if this happens frequently, decrease batchsize or ' 'truncate the inputs to the model.' ) return Output() else: raise e # Update loss self.metrics['loss'] += loss.item() self.metrics['examples'] += batchsize # Get train predictions if self.candidates == 'batch': self._get_batch_train_metrics(scores) return Output() if not self.opt.get('train_predict', False): warn_once( "Some training metrics are omitted for speed. Set the flag " "`--train-predict` to calculate train metrics." ) return Output() return self._get_train_preds(scores, label_inds, cands, cand_vecs)
def eval_step(self, batch): questions = batch.text_vec contexts = padded_3d(batch.memory_vecs) if contexts.shape[0] != self.batch_size: return Output( self.dict.vec2txt( np.random.choice(self.dictionnary_size, size=contexts.shape[0])).split(" ")) output = self.recurrent_entity_network(questions, contexts) pred = output.argmax(dim=1) return Output(self.dict.vec2txt(pred).split(" "))
def eval_step(self, batch): """ Evaluate a single batch of examples. """ if batch.text_vec is None: return self.model.eval() scores = self.score(batch) probs = F.softmax(scores, dim=1) if self.calc_auc: self._update_aucs(batch, probs) if self.threshold is None: _, prediction_id = torch.max(probs.cpu(), 1) else: ref_prob = probs.cpu()[:, 0] # choose ref class if Prob(ref class) > threshold prediction_id = (ref_prob <= self.threshold).to(torch.int64) preds = [self.class_list[idx] for idx in prediction_id] if batch.labels is None or self.opt['ignore_labels']: # interactive mode if self.opt.get('print_scores', False): preds = self._format_interactive_output(probs, prediction_id) else: labels = self._get_labels(batch) loss = self.criterion(scores, labels) self.record_local_metric('loss', AverageMetric.many(loss)) loss = loss.mean() self._update_confusion_matrix(batch, preds) if self.opt.get('print_scores', False): return Output(preds, class_list=[self.class_list], probs=probs.cpu()) if self.opt.get('return_cand_scores', False): sorted_scores, ranks = probs.sort(1, descending=True) sorted_scores = sorted_scores.cpu() text_cands = [] for i in range(0, ranks.size(0)): ordered_list = [self.class_list[i] for i in ranks[i]] text_cands.append(ordered_list) return Output(preds, text_candidates=text_cands, sorted_scores=sorted_scores) else: return Output(preds)
def eval_step(self, batch): """Evaluate a single batch of examples.""" self.model.eval() cand_params = self._build_cands(batch) out = self.model(batch.text_vec, ys=None, cand_params=cand_params) scores, cand_scores = out[0], out[1] _, preds = scores.max(2) if batch.label_vec is not None: # calculate loss on targets with teacher forcing out = self.model(batch.text_vec, batch.label_vec) f_scores = out[0] # forced scores _, f_preds = f_scores.max(2) # forced preds score_view = f_scores.view(-1, f_scores.size(-1)) loss = self.criterion(score_view, batch.label_vec.view(-1)) # save loss to metrics notnull = batch.label_vec.ne(self.NULL_IDX) target_tokens = notnull.long().sum().item() correct = ((batch.label_vec == f_preds) * notnull).sum().item() self.metrics['correct_tokens'] += correct self.metrics['loss'] += loss.item() self.metrics['num_tokens'] += target_tokens cand_choices = None if cand_scores is not None: cand_preds = cand_scores.sort(1, True)[1] # now select the text of the cands based on their scores cand_choices = self._pick_cands(cand_preds, cand_params[1], batch.candidates) text = [self._v2t(p) for p in preds.cpu()] return Output(text, cand_choices)
def rl_eval_step(self, batch): div_batch = batch.get('diverged_batch', None) if not div_batch: div_batch = batch token_losses = None if batch.label_vec is not None: # calculate loss on targets with teacher forcing loss, model_output = self.compute_loss(batch, return_output=True) if self.output_token_losses: token_losses = self._construct_token_losses( batch.label_vec, model_output) preds = None maxlen = self.question_truncate or 30 if self.eva_sample: preds, text, scores = self.sample(div_batch, latest_turn_only=True) else: preds, text, scores = self.predict(div_batch, latest_turn_only=True) retval = Output(text[:1], log_probs=scores[:1], episode_end=[batch.episode_end], ques_len=[len(preds[0]) - 1], diverged_outputs=[[(t, scores[i], len(preds[i]) - 1) for i, t in enumerate(text[1:])]]) return retval
def _eval_dnli_step(self, batch): """Evaluate a single batch of examples.""" assert self.alpha >= 0 self.model.eval() ranked_cands, ordering = self.rank(batch) bsz = len(ranked_cands) dnli_metrics = [] for batch_idx in range(bsz): dnli_score = {'contradict@1': 0, 'entail@1': 0, 'neutral@1': 0} top1_idx = ordering[batch_idx][0].item() if top1_idx == 0: pass # dnli_metrics['dnli_hit@1'] += 1 elif top1_idx > 0 and top1_idx < 11: dnli_score['contradict@1'] += 1 elif top1_idx >= 11 and top1_idx < 21: dnli_score['entail@1'] += 1 else: dnli_score['neutral@1'] += 1 dnli_metrics.append(dnli_score) return Output(text_candidates=ranked_cands, metrics=dnli_metrics)
def probe_step(self, batch): """Probe a single batch of examples.""" if batch.text_vec is None: return bsz = batch.text_vec.size(0) self.model.eval() cand_scores = None if self.opt['probe'] == 'word_embeddings': embeddings = self.probe_word_embeddings(batch) elif self.opt['probe'] == 'encoder_state': embeddings = self.probe_encoder_state(batch) elif self.opt['probe'] == 'combined': embeddings = self.probe_combined(batch) else: raise Exception(f"Input type {self.opt['probe']} not understood.") try: self.probing_outputs = np.vstack( (self.probing_outputs, embeddings)) except: # In case probing_outputs empty array self.probing_outputs = embeddings cand_choices = None text = None return Output(text, cand_choices)
def train_step(self, batch): """ Train on a single batch of examples. """ if batch.text_vec is None: return self.model.train() self.optimizer.zero_grad() batchsize = batch.text_vec.size(0) self.metrics['examples'] += batchsize if self.subtask == 'dialog': loss, preds, _ = self.dialog_step(batch) elif self.subtask == 'feedback': loss, preds, _ = self.feedback_step(batch) elif self.subtask == 'satisfaction': loss, preds = self.satisfaction_step(batch) preds = [str(p) for p in preds] # Weight loss by task-weight loss *= self.task_weight[self.subtask] loss.backward() self.update_params() return Output(preds)
def eval_step(self, batch): """Generate a response to the input tokens. :param batch: parlai.core.torch_agent.Batch, contains tensorized version of observations. Return predicted responses (list of strings of length batchsize). """ item1 = { "text": "Do you like playing, or watching sports?", "labels": ["I like watching sports."], "label_candidates": list(batch.observations[0]["label_candidates"]) } item1["label_candidates"].append(item1["labels"][0]) item2 = { "text": "Do you think chess counts as a sport?", "labels": ["Yes, I think it does."], "label_candidates": list(batch.observations[0]["label_candidates"]) } item2["label_candidates"].append(item2["labels"][0]) batch = Batch(observations=[item1, item2]) inputs, candidates = self._tokenize_observation(batch) # just predict self.model.eval() output = self.model(inputs.cuda()) pred_text = self._get_predictions(output, candidates, 2) print(pred_text) print("EVALUATING") return Output(pred_text)
def eval_step(self, batch): """Return confirmation of evaluation.""" return Output([ 'Evaluating {} (responding to {})!'.format( i, batch.observations[i]['text']) for i in range(len(batch.text_vec)) ])
def make_preds_for_inject(self, batch): assert self.inject == True assert len(batch.labels ) == 1, 'only single batch is assumed here to keep order' # human human = batch.labels # greedy out = self.model(batch.text_vec, ys=None) scores, _ = out[0], out[1] _, preds = scores.max(2) greedy = [self._v2t(p) for p in preds] encoder_states = out[2] # beam out = SteroidSeq2seqAgent.beam_search( self.model, batch, encoder_states, self.beam_size, start=self.START_IDX, end=self.END_IDX, pad=self.NULL_IDX, min_length=self.beam_min_length, min_n_best=self.beam_min_n_best, beam_block_hypos=[], block_ngram=self.beam_block_ngram) beam10 = [self._v2t(out[0][0][0][1:])] output = Output(beam10, cand_choices, human, greedy, beam10) return output
def train_step(self, batch): """Train on a single batch of examples.""" if batch.text_vec is None: return batchsize = batch.text_vec.size(0) self.model.train() self.optimizer.zero_grad() mems = self._build_mems(batch.memory_vecs) cands, label_inds = self._build_train_cands(batch.label_vec, batch.candidate_vecs) scores = self.model(batch.text_vec, mems, cands) loss = self.rank_loss(scores, label_inds) self.metrics['loss'] += loss.item() self.metrics['batches'] += batchsize _, ranks = scores.sort(1, descending=True) for b in range(batchsize): rank = (ranks[b] == label_inds[b]).nonzero().item() self.metrics['rank'] += 1 + rank loss.backward() self.update_params() # get predictions but not full rankings--too slow to get hits@1 score preds = [self._v2t(cands[row[0]]) for row in ranks] return Output(preds)
def train_step(self, batch): """Train on a single batch of examples.""" if batch.text_vec is None: return batchsize = batch.text_vec.size(0) self.model.train() self.optimizer.zero_grad() cands, cand_vecs, label_inds = self._build_candidates( batch, source=self.opt['candidates'], mode='train') scores = self.score_candidates(batch, cand_vecs) loss = self.rank_loss(scores, label_inds) # Update metrics self.metrics['loss'] += loss.item() self.metrics['examples'] += batchsize _, ranks = scores.sort(1, descending=True) for b in range(batchsize): rank = (ranks[b] == label_inds[b]).nonzero().item() self.metrics['rank'] += 1 + rank loss.backward() self.update_params() # Get predictions but not full rankings for the sake of speed if cand_vecs.dim() == 2: preds = [cands[ordering[0]] for ordering in ranks] elif cand_vecs.dim() == 3: preds = [cands[i][ordering[0]] for i, ordering in enumerate(ranks)] return Output(preds)
def eval_step(self, batch): images = torch.stack([self.transform(img) for img in batch.image]) if self.use_cuda: images = images.cuda(async=True) # Need to collate then sort the captions by length cands = [ self.candidate_helper(label_cands_vec, label_cands, self.mode == 'test') for label_cands_vec, label_cands in zip(batch.candidate_vecs, batch.candidates) ] self.model.eval() # Obtain the image embeddings img_embs, _ = self.model(images, None, None) ranks = [] top1 = [] # Each image has their own caption candidates, so we need to # iteratively create the embeddings and rank for i, (cap, _, lens, truth_idx) in enumerate(cands): _, embs = self.model(None, cap, lens) # Hack to pass through the truth label's index to compute the # rank and top metrics offset = truth_idx if truth_idx is not None else 0 _, rank, top = self.criterion(img_embs[i, :].unsqueeze(0), embs, offset) ranks += rank top1.append(top[0]) self.metrics['r@'] += ranks predictions = [] for i, score_idx in enumerate(top1): predictions.append(cands[i][1][score_idx]) return Output(predictions, None)
def eval_step(self, batch): """Train on a single batch of examples.""" if batch.text_vec is None: return self.model.eval() scores = self.score(batch) probs = F.softmax(scores, dim=1) if self.threshold is None: _, prediction_id = torch.max(probs.cpu(), 1) else: ref_prob = probs.cpu()[:, 0] # choose ref class if Prob(ref class) > threshold prediction_id = ref_prob <= self.threshold preds = [self.class_list[idx] for idx in prediction_id] if batch.labels is None: # interactive mode if self.opt.get('print_scores', False): preds = self._format_interactive_output(probs, prediction_id) else: labels = self._get_labels(batch) loss = self.criterion(scores, labels) self.metrics['loss'] += loss.item() self.metrics['examples'] += len(batch.text_vec) self._update_confusion_matrix(batch, preds) return Output(preds)
def eval_step(self, batch): """Evaluate a single batch of examples.""" if batch.text_vec is None: return batchsize = batch.text_vec.size(0) self.model.eval() cands, cand_vecs, label_inds = self._build_candidates( batch, source=self.opt['eval_candidates'], mode='eval') scores = self.score_candidates(batch, cand_vecs) _, ranks = scores.sort(1, descending=True) # Update metrics if label_inds is not None: loss = self.rank_loss(scores, label_inds) self.metrics['loss'] += loss.item() self.metrics['examples'] += batchsize for b in range(batchsize): rank = (ranks[b] == label_inds[b]).nonzero().item() self.metrics['rank'] += 1 + rank cand_preds = [] for i, ordering in enumerate(ranks): if cand_vecs.dim() == 2: cand_list = cands elif cand_vecs.dim() == 3: cand_list = cands[i] cand_preds.append([cand_list[rank] for rank in ordering]) preds = [cand_preds[i][0] for i in range(batchsize)] return Output(preds, cand_preds)
def eval_step(self, batch): """Process batch of inputs. If the batch includes labels, calculate validation metrics as well. :param batch: parlai.core.torch_agent.Batch, contains tensorized version of observations. """ if batch.text_vec is None: return self.is_training = False self.model.eval() output = self.model(batch.text_vec, batch.text_mask) if batch.label_vec is not None: # Interactive mode won't have a gold label missed = self.criterion(batch.label_vec, output[0], batch.text_mask) self.metrics['error.sum'] += float(missed.sum()) self.metrics['eval_exs'] += int(batch.text_lengths.sum()) pred = predict(output[0], batch.text_lengths, batch.text_vec, batch.text_mask) text = self._v2t(batch.text_vec[0]) self.vars = (text, pred[0], batch.text_vec[0], int(batch.text_lengths[0]), *tuple(v[0] for v in output[2:])) return Output(text=pred)
def eval_step(self, batch): """ Evaluate a single batch of examples. """ if batch.text_vec is None: return bsz = batch.text_vec.size(0) self.model.eval() cand_scores = None token_losses = None if batch.label_vec is not None: # calculate loss on targets with teacher forcing loss, model_output = self.compute_loss(batch, return_output=True) self.metrics['loss'] += loss.item() if self.output_token_losses: token_losses = self._construct_token_losses( batch.label_vec, model_output) preds = None if self.skip_generation: warn_once( "--skip-generation does not produce accurate metrics beyond ppl", RuntimeWarning, ) else: maxlen = self.label_truncate or 256 beam_preds_scores, _ = self._generate(batch, self.beam_size, maxlen) preds, scores = zip(*beam_preds_scores) cand_choices = None # TODO: abstract out the scoring here if self.rank_candidates: # compute roughly ppl to rank candidates cand_choices = [] encoder_states = self.model.encoder(*self._model_input(batch)) for i in range(bsz): num_cands = len(batch.candidate_vecs[i]) enc = self.model.reorder_encoder_states( encoder_states, [i] * num_cands) cands, _ = padded_tensor(batch.candidate_vecs[i], self.NULL_IDX, self.use_cuda) scores, _ = self.model.decode_forced(enc, cands) cand_losses = F.cross_entropy( scores.view(num_cands * cands.size(1), -1), cands.view(-1), reduction='none', ).view(num_cands, cands.size(1)) # now cand_losses is cands x seqlen size, but we still need to # check padding and such mask = (cands != self.NULL_IDX).float() cand_scores = (cand_losses * mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9) _, ordering = cand_scores.sort() cand_choices.append([batch.candidates[i][o] for o in ordering]) text = [self._v2t(p) for p in preds] if preds is not None else None return Output(text, cand_choices, token_losses=token_losses)
def eval_step(self, batch): """Evaluate a single batch of examples.""" if batch.text_vec is None and batch.image is None: return batchsize = (batch.text_vec.size(0) if batch.text_vec is not None else batch.image.size(0)) self.model.eval() cands, cand_vecs, label_inds = self._build_candidates( batch, source=self.eval_candidates, mode='eval') cand_encs = None if self.encode_candidate_vecs: # if we cached candidate encodings for a fixed list of candidates, # pass those into the score_candidates function if self.eval_candidates == 'fixed': cand_encs = self.fixed_candidate_encs elif self.eval_candidates == 'vocab': cand_encs = self.vocab_candidate_encs scores = self.score_candidates(batch, cand_vecs, cand_encs=cand_encs) if self.rank_top_k > 0: _, ranks = scores.topk(min(self.rank_top_k, scores.size(1)), 1, largest=True) else: _, ranks = scores.sort(1, descending=True) # Update metrics if label_inds is not None: loss = self.rank_loss(scores, label_inds) self.metrics['loss'] += loss.item() self.metrics['examples'] += batchsize for b in range(batchsize): rank = (ranks[b] == label_inds[b]).nonzero() rank = rank.item() if len(rank) == 1 else scores.size(1) self.metrics['rank'] += 1 + rank self.metrics['mrr'] += 1.0 / (1 + rank) ranks = ranks.cpu() max_preds = self.opt['cap_num_predictions'] cand_preds = [] for i, ordering in enumerate(ranks): if cand_vecs.dim() == 2: cand_list = cands elif cand_vecs.dim() == 3: cand_list = cands[i] # using a generator instead of a list comprehension allows # to cap the number of elements. cand_preds_generator = (cand_list[rank] for rank in ordering if rank < len(cand_list)) cand_preds.append(list(islice(cand_preds_generator, max_preds))) if (self.opt.get('repeat_blocking_heuristic', True) and self.eval_candidates == 'fixed'): cand_preds = self.block_repeats(cand_preds) preds = [cand_preds[i][0] for i in range(batchsize)] return Output(preds, cand_preds)
def eval_step(self, batch): """ Return confirmation of evaluation. """ return Output([ f'Evaluating {i} (responding to {batch.text_vec.tolist()})!' for i in range(batch.batchsize) ])
def eval_step(self, batch): if batch.text_vec is None: return self.model.eval() bs = (batch.label_vec == 1).sum().item() labels = torch.zeros(bs, dtype=torch.long) # create subgraph for propagation seed_sets = [] turns = [] for i, (b, movieIdx) in enumerate(batch.label_vec.nonzero().tolist()): # seed set (i.e. mentioned movies + entitites) seed_set = batch.text_vec[b].nonzero().view(-1).tolist() labels[i] = movieIdx seed_sets.append(seed_set) turns.append(batch.turn[b]) if self.use_cuda: labels = labels.cuda() return_dict = self.model(seed_sets, labels) loss = return_dict["loss"] self.metrics["base_loss"] += return_dict["base_loss"].item() self.metrics["loss"] += loss.item() self.counts["num_tokens"] += bs self.counts["num_batches"] += 1 outputs = return_dict["scores"].cpu() outputs = outputs[:, torch.LongTensor(self.movie_ids)] _, pred_idx = torch.topk(outputs, k=100, dim=1) for b in range(bs): target_idx = self.movie_ids.index(labels[b].item()) self.metrics["recall@1"] += int( target_idx in pred_idx[b][:1].tolist()) self.metrics["recall@10"] += int( target_idx in pred_idx[b][:10].tolist()) self.metrics["recall@50"] += int( target_idx in pred_idx[b][:50].tolist()) self.metrics[f"recall@1@turn{turns[b]}"] += int( target_idx in pred_idx[b][:1].tolist()) self.metrics[f"recall@10@turn{turns[b]}"] += int( target_idx in pred_idx[b][:10].tolist()) self.metrics[f"recall@50@turn{turns[b]}"] += int( target_idx in pred_idx[b][:50].tolist()) self.counts[f"recall@1@turn{turns[b]}"] += 1 self.counts[f"recall@10@turn{turns[b]}"] += 1 self.counts[f"recall@50@turn{turns[b]}"] += 1 self.counts[f"recall@1"] += 1 self.counts[f"recall@10"] += 1 self.counts[f"recall@50"] += 1 return Output( list( map(lambda x: str(self.movie_ids[x]), outputs.argmax(dim=1).tolist())))
def eval_step(self, batch): if batch.batchsize <= 0: return else: bsz = batch.batchsize self.model.eval() loss, outputs = self.compute_loss(batch, return_output=True) batch_best_preds = outputs['pred']['outputs'] return Output(batch_best_preds)