def eval_conll(model, session, tokens, tags, short_report=True): """Computes NER quality measures using CONLL shared task script.""" y_true, y_pred = [], [] for x_batch, y_batch, lengths in batches_generator(1, tokens, tags): tags_batch, tokens_batch = predict_tags(model, session, x_batch, lengths) if len(x_batch[0]) != len(tags_batch[0]): raise Exception("Incorrect length of prediction for the input, " "expected length: %i, got: %i" % (len(x_batch[0]), len(tags_batch[0]))) predicted_tags = [] ground_truth_tags = [] for gt_tag_idx, pred_tag, token in zip(y_batch[0], tags_batch[0], tokens_batch[0]): if token != '<PAD>': ground_truth_tags.append(idx2tag[gt_tag_idx]) predicted_tags.append(pred_tag) # We extend every prediction and ground truth sequence with 'O' tag # to indicate a possible end of entity. y_true.extend(ground_truth_tags + ['O']) y_pred.extend(predicted_tags + ['O']) results = precision_recall_f1(y_true, y_pred, print_results=True, short_report=short_report) return results
def run_epoch(model, optimizer, criterion, dataloader, epoch, idx2target_vocab, mode='train', device=None, early_stop=False): if mode == 'train': model.train() else: model.eval() epoch_loss = 0.0 epoch_tp, epoch_fp, epoch_fn = 0.0, 0.0, 0.0 num_batches = 0 for starts, contexts, ends, labels in dataloader: starts, contexts, ends = starts.to(device), contexts.to( device), ends.to(device) labels = labels.to(device) code_vector, y_pred = model(starts, contexts, ends) loss = criterion(y_pred, labels) tp, fp, fn = precision_recall_f1(y_pred, labels, idx2target_vocab) epoch_tp += tp epoch_fp += fp epoch_fn += fn if optimizer is not None: optimizer.zero_grad() loss.backward() optimizer.step() epoch_loss += loss.item() num_batches += 1 if early_stop: break num_batches = float(num_batches) epoch_tp, epoch_fp, epoch_fn = float(epoch_tp), float(epoch_fp), float( epoch_fn) epsilon = 1e-7 precision = epoch_tp / (epoch_tp + epoch_fp + epsilon) recall = epoch_tp / (epoch_tp + epoch_fn + epsilon) f1 = 2 * precision * recall / (precision + recall + epsilon) return epoch_loss / num_batches, precision, recall, f1
def run_epoch(self, mode, dataloader): if mode == 'train': self.model.train() else: self.model.eval() epoch_loss = 0.0 epoch_tp, epoch_fp, epoch_fn = 0.0, 0.0, 0.0 num_batches = 0 for starts, contexts, ends, labels in dataloader: starts, contexts, ends = starts.to(self.DEVICE), contexts.to( self.DEVICE), ends.to(self.DEVICE) labels = labels.to(self.DEVICE) _, y_pred = self.model(starts, contexts, ends) loss = self.criterion(y_pred, labels) tp, fp, fn = precision_recall_f1(y_pred, labels, self.idx2target_vocab) epoch_tp += tp epoch_fp += fp epoch_fn += fn if mode == 'train': self.optimizer.zero_grad() loss.backward() self.optimizer.step() epoch_loss += loss.item() num_batches += 1 if self.early_stop: break num_batches = float(num_batches) epoch_tp, epoch_fp, epoch_fn = float(epoch_tp), float(epoch_fp), float( epoch_fn) epsilon = 1e-7 precision = epoch_tp / (epoch_tp + epoch_fp + epsilon) recall = epoch_tp / (epoch_tp + epoch_fn + epsilon) f1 = 2 * precision * recall / (precision + recall + epsilon) return epoch_loss / num_batches, precision, recall, f1
warm_start_init_step = 49000 if warm_start_init_step != 0: ckpt_file = 'checkpoints/{}-{}'.format(model_name, warm_start_init_step) saver.restore(session, ckpt_file) for step_num in range(training_steps): _, batch_loss, filenames, line_nums = \ session.run([step, loss, fnames, lines]) # logging to stdout for sanity checks every 50 steps if step_num % 50 == 0: x, y, y_ = session.run([tokens, tags, preds]) if (y != 0).sum() > 0: accuracy = 1.0 * ((y == y_) & (y != 0)).sum() / (y != 0).sum() precision, recall, f1 = metrics.precision_recall_f1(reader, y, y_) # print some info about the batch print 'Loss: ', batch_loss print 'Precision: ', precision print 'Recall: ', recall print 'f1: ', f1 print 'Sentence: ', reader.decode_tokens(x[0][(y != 0)[0]][:15]) print 'Truth: ', reader.decode_tags(y[0][(y != 0)[0]][:15]) print 'Pred: ', reader.decode_tags(y_[0][(y != 0)[0]][:15]) print # write train accuracy to log files every 100 steps if step_num % 100 == 0: train_loss = 0 train_eval_size = 50