class ClassifyTrainerPyTorch(EpochReportingTrainer): def __init__(self, model, **kwargs): super().__init__() if type(model) is dict: checkpoint = kwargs.get('checkpoint') if checkpoint: model['checkpoint'] = checkpoint model = create_model_for('classify', **model) self.clip = float(kwargs.get('clip', 5)) self.labels = model.labels self.gpus = int(kwargs.get('gpus', 1)) if self.gpus == -1: self.gpus = len( os.getenv('CUDA_VISIBLE_DEVICES', os.getenv('NV_GPU', '0')).split(',')) self.optimizer = OptimizerManager(model, **kwargs) self.model = model if self.gpus > 0 and self.model.gpu: self.crit = model.create_loss().cuda() if self.gpus > 1: self.model = torch.nn.DataParallel(model).cuda() else: self.model.cuda() else: logger.warning("Requested training on CPU. This will be slow.") self.crit = model.create_loss() self.model = model self.nsteps = kwargs.get('nsteps', six.MAXSIZE) def _get_pytorch_model(self): return self.model.module if self.gpus > 1 else self.model def save(self, model_file): self._get_pytorch_model().save(model_file) def _make_input(self, batch_dict, **kwargs): return self._get_pytorch_model().make_input(batch_dict, **kwargs) @staticmethod def _get_batchsz(batch_dict): return len(batch_dict['y']) def _test(self, loader, **kwargs): self.model.eval() total_loss = 0 total_norm = 0 steps = len(loader) pg = create_progress_bar(steps) cm = ConfusionMatrix(self.labels) verbose = kwargs.get("verbose", None) output = kwargs.get('output') txts = kwargs.get('txts') handle = None line_number = 0 if output is not None and txts is not None: handle = open(output, "w") with torch.no_grad(): for batch_dict in pg(loader): example = self._make_input(batch_dict) ys = example.pop('y') pred = self.model(example) loss = self.crit(pred, ys) if handle is not None: for p, y in zip(pred, ys): handle.write('{}\t{}\t{}\n'.format( " ".join(txts[line_number]), self.model.labels[p], self.model.labels[y])) line_number += 1 batchsz = self._get_batchsz(batch_dict) total_loss += loss.item() * batchsz total_norm += batchsz _add_to_cm(cm, ys, pred) metrics = cm.get_all_metrics() metrics['avg_loss'] = total_loss / float(total_norm) verbose_output(verbose, cm) if handle is not None: handle.close() return metrics def _train(self, loader, **kwargs): self.model.train() reporting_fns = kwargs.get('reporting_fns', []) steps = len(loader) pg = create_progress_bar(steps) cm = ConfusionMatrix(self.labels) epoch_loss = 0 epoch_div = 0 for batch_dict in pg(loader): self.optimizer.zero_grad() example = self._make_input(batch_dict) y = example.pop('y') pred = self.model(example) loss = self.crit(pred, y) batchsz = self._get_batchsz(batch_dict) report_loss = loss.item() * batchsz epoch_loss += report_loss epoch_div += batchsz self.nstep_agg += report_loss self.nstep_div += batchsz loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) _add_to_cm(cm, y, pred) self.optimizer.step() if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) metrics['lr'] = self.optimizer.current_lr self.report(self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps) self.reset_nstep() metrics = cm.get_all_metrics() metrics['lr'] = self.optimizer.current_lr metrics['avg_loss'] = epoch_loss / float(epoch_div) return metrics
class LanguageModelTrainerPyTorch(Trainer): def __init__(self, model, **kwargs): super().__init__() if type(model) is dict: model = create_model_for('lm', **model) self.model = model self.clip = float(kwargs.get('clip', 5)) self.gpus = kwargs.get('gpus', 1) if self.gpus > 0: self.crit = model.create_loss().cuda() if self.gpus > 1: self.model = torch.nn.DataParallel(model).cuda() else: self.model.cuda() else: logger.warning("Requested training on CPU. This will be slow.") self.crit = model.create_loss() self.nsteps = kwargs.get('nsteps', 500) self.optimizer = OptimizerManager(self.model, **kwargs) def repackage_hidden(self, h): """Wraps hidden states in new Variables, to detach them from their history.""" if isinstance(h, torch.Tensor): return h.detach() else: return tuple(self.repackage_hidden(v) for v in h) def save(self, model_file): self._get_pytorch_model().save(model_file) def _get_pytorch_model(self): return self.model.module if self.gpus > 1 else self.model @staticmethod def _get_dims(loader): batch_dict = loader.dataset[0] return batch_dict['y'].shape @staticmethod def _num_toks(batch_dict): return np.prod(batch_dict['y'].shape) def calc_metrics(self, agg, norm): metrics = super().calc_metrics(agg, norm) metrics['perplexity'] = np.exp(metrics['avg_loss']) return metrics def test(self, vs, reporting_fns, phase='Valid', **kwargs): epoch = 0 if phase == 'Valid': self.valid_epochs += 1 epoch = self.valid_epochs start = time.time() self.model.eval() total_loss = 0 total_toks = 0 batchsz, nctx = self._get_dims(vs) hidden = self._get_pytorch_model().zero_state(batchsz) for batch_dict in vs: inputs = self._get_pytorch_model().make_input(batch_dict) y = inputs.pop('y') output, hidden = self.model(inputs, hidden) toks = self._num_toks(batch_dict) total_loss += self.crit(output, y).item() * toks total_toks += toks if hidden is not None: hidden = self.repackage_hidden(hidden) metrics = self.calc_metrics(total_loss, total_toks) self.report( epoch, metrics, start, phase, 'EPOCH', reporting_fns ) return metrics def train(self, ts, reporting_fns): start = time.time() self.nstep_start = start self.model.train() epoch_loss = 0 epoch_toks = 0 batchsz, nctx = self._get_dims(ts) hidden = self._get_pytorch_model().zero_state(batchsz) for batch_dict in ts: if hidden is not None: hidden = self.repackage_hidden(hidden) inputs = self._get_pytorch_model().make_input(batch_dict) y = inputs.pop('y') self.optimizer.zero_grad() output, hidden = self.model(inputs, hidden) loss = self.crit(output, y) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) self.optimizer.step() toks = self._num_toks(batch_dict) report_loss = loss.item() * toks epoch_loss += report_loss epoch_toks += toks self.nstep_agg += report_loss self.nstep_div += toks if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) metrics['lr'] = self.optimizer.current_lr self.report( self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps ) self.reset_nstep() metrics = self.calc_metrics(epoch_loss, epoch_toks) metrics['lr'] = self.optimizer.current_lr self.train_epochs += 1 self.report( self.train_epochs, metrics, start, 'Train', 'EPOCH', reporting_fns ) return metrics
class TaggerTrainerPyTorch(EpochReportingTrainer): def __init__(self, model, **kwargs): super().__init__() if type(model) is dict: checkpoint = kwargs.get('checkpoint') if checkpoint: model['checkpoint'] = checkpoint model = create_model_for('tagger', **model) self.grad_accum = int(kwargs.get('grad_accum', 1)) self.gpus = int(kwargs.get('gpus', 1)) # By default support IOB1/IOB2 self.span_type = kwargs.get('span_type', 'iob') self.verbose = kwargs.get('verbose', False) logger.info('Setting span type %s', self.span_type) self.model = model self.idx2label = revlut(self.model.labels) self.clip = float(kwargs.get('clip', 5)) self.optimizer = OptimizerManager(self.model, **kwargs) if self.gpus > 1: logger.info( "Trainer for PyTorch tagger currently doesnt support multiple GPUs. Setting to 1" ) self.gpus = 1 if self.gpus > 0 and self.model.gpu: self.model = model.cuda() else: logger.warning("Requested training on CPU. This will be slow.") self.nsteps = kwargs.get('nsteps', six.MAXSIZE) def save(self, model_file): self.model.save(model_file) @staticmethod def _get_batchsz(batch_dict): return batch_dict['y'].shape[0] def process_output(self, guess, truth, sentence_lengths, ids, handle=None, txts=None): # For acc correct_labels = 0 total_labels = 0 truth_n = truth.cpu().numpy() # For f1 gold_chunks = [] pred_chunks = [] # For each sentence for b in range(len(guess)): sentence = guess[b] if isinstance(sentence, torch.Tensor): sentence = sentence.cpu().numpy() sentence_length = sentence_lengths[b] gold = truth_n[b, :sentence_length] sentence = sentence[:sentence_length] valid_guess = sentence[gold != Offsets.PAD] valid_gold = gold[gold != Offsets.PAD] valid_sentence_length = np.sum(gold != Offsets.PAD) correct_labels += np.sum(np.equal(valid_guess, valid_gold)) total_labels += valid_sentence_length gold_chunks.append( set( to_spans(valid_gold, self.idx2label, self.span_type, self.verbose))) pred_chunks.append( set( to_spans(valid_guess, self.idx2label, self.span_type, self.verbose))) # Should we write a file out? If so, we have to have txts if handle is not None and txts is not None: txt_id = ids[b] txt = txts[txt_id] write_sentence_conll(handle, valid_guess, valid_gold, txt, self.idx2label) return correct_labels, total_labels, gold_chunks, pred_chunks def _test(self, ts, **kwargs): self.model.eval() total_sum = 0 total_correct = 0 gold_spans = [] pred_spans = [] metrics = {} steps = len(ts) conll_output = kwargs.get('conll_output', None) txts = kwargs.get('txts', None) handle = None if conll_output is not None and txts is not None: handle = open(conll_output, "w") pg = create_progress_bar(steps) for batch_dict in pg(ts): inputs = self.model.make_input(batch_dict) y = inputs.pop('y') lengths = inputs['lengths'] ids = inputs['ids'] with torch.no_grad(): pred = self.model(inputs) correct, count, golds, guesses = self.process_output( pred, y.data, lengths, ids, handle, txts) total_correct += correct total_sum += count gold_spans.extend(golds) pred_spans.extend(guesses) total_acc = total_correct / float(total_sum) metrics['acc'] = total_acc metrics['f1'] = span_f1(gold_spans, pred_spans) if self.verbose: # TODO: Add programmatic access to these metrics? conll_metrics = per_entity_f1(gold_spans, pred_spans) conll_metrics['acc'] = total_acc * 100 conll_metrics['tokens'] = total_sum.item() logger.info(conlleval_output(conll_metrics)) return metrics def _train(self, ts, **kwargs): self.model.train() reporting_fns = kwargs.get('reporting_fns', []) epoch_loss = 0 epoch_norm = 0 steps = len(ts) pg = create_progress_bar(steps) self.optimizer.zero_grad() for i, batch_dict in enumerate(pg(ts)): inputs = self.model.make_input(batch_dict) loss = self.model.compute_loss(inputs) loss.backward() if (i + 1) % self.grad_accum == 0 or (i + 1) == steps: torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) self.optimizer.step() self.optimizer.zero_grad() bsz = self._get_batchsz(batch_dict) report_loss = loss.item() * bsz epoch_loss += report_loss epoch_norm += bsz self.nstep_agg += report_loss self.nstep_div += bsz if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) metrics['lr'] = self.optimizer.current_lr self.report(self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps) self.reset_nstep() metrics = self.calc_metrics(epoch_loss, epoch_norm) metrics['lr'] = self.optimizer.current_lr return metrics
class Seq2SeqTrainerPyTorch(Trainer): def __init__(self, model, **kwargs): super().__init__() if type(model) is dict: model = create_model_for('seq2seq', **model) self.clip = float(kwargs.get('clip', 5)) self.model = model self.optimizer = OptimizerManager(self.model, **kwargs) self._input = model.make_input self._predict = model.predict self.tgt_rlut = kwargs['tgt_rlut'] self.gpus = kwargs.get('gpus', 1) self.bleu_n_grams = int(kwargs.get("bleu_n_grams", 4)) self.label_smoothing = kwargs.get("label_smoothing") if self.gpus > 0: self.crit = model.create_loss(label_smooth=self.label_smoothing).cuda() if self.gpus > 1: self.model = torch.nn.DataParallel(model).cuda() else: self.model.cuda() else: logger.warning("Requested training on CPU. This will be slow.") self.crit = model.create_loss() self.nsteps = kwargs.get('nsteps', 500) @staticmethod def _num_toks(tgt_lens): return torch.sum(tgt_lens).item() def save(self, model_file): self._get_pytorch_model().save(model_file) def _get_pytorch_model(self): return self.model.module if self.gpus > 1 else self.model def calc_metrics(self, agg, norm): metrics = super().calc_metrics(agg, norm) metrics['perplexity'] = np.exp(metrics['avg_loss']) return metrics def test(self, vs, reporting_fns, phase, **kwargs): if phase == 'Test': return self._evaluate(vs, reporting_fns, **kwargs) self.model.eval() total_loss = total_toks = 0 steps = len(vs) self.valid_epochs += 1 preds = [] golds = [] start = time.time() pg = create_progress_bar(steps) for batch_dict in pg(vs): input_ = self._input(batch_dict) tgt = input_['tgt'] tgt_lens = batch_dict['tgt_lengths'] pred = self.model(input_) loss = self.crit(pred, tgt) toks = self._num_toks(tgt_lens) total_loss += loss.item() * toks total_toks += toks greedy_preds = [p[0] for p in self._predict(input_, beam=1, make_input=False)] preds.extend(convert_seq2seq_preds(greedy_preds, self.tgt_rlut)) golds.extend(convert_seq2seq_golds(tgt.cpu().numpy(), tgt_lens, self.tgt_rlut)) metrics = self.calc_metrics(total_loss, total_toks) metrics['bleu'] = bleu(preds, golds, self.bleu_n_grams)[0] self.report( self.valid_epochs, metrics, start, phase, 'EPOCH', reporting_fns ) return metrics def _evaluate(self, es, reporting_fns, **kwargs): self.model.eval() pg = create_progress_bar(len(es)) preds = [] golds = [] start = time.time() for batch_dict in pg(es): tgt = batch_dict['tgt'] tgt_lens = batch_dict['tgt_lengths'] pred = [p[0] for p in self._predict(batch_dict, numpy_to_tensor=False, **kwargs)] preds.extend(convert_seq2seq_preds(pred, self.tgt_rlut)) golds.extend(convert_seq2seq_golds(tgt, tgt_lens, self.tgt_rlut)) metrics = {'bleu': bleu(preds, golds, self.bleu_n_grams)[0]} self.report( 0, metrics, start, 'Test', 'EPOCH', reporting_fns ) return metrics def train(self, ts, reporting_fns): self.model.train() epoch_loss = 0 epoch_toks = 0 start = time.time() self.nstep_start = start for batch_dict in ts: start_time = time.time() self.optimizer.zero_grad() input_ = self._input(batch_dict) tgt = input_['tgt'] pred = self.model(input_) loss = self.crit(pred, tgt) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) self.optimizer.step() tgt_lens = batch_dict['tgt_lengths'] tok_count = self._num_toks(tgt_lens) reporting_loss = loss.item() * tok_count epoch_loss += reporting_loss epoch_toks += tok_count self.nstep_agg += reporting_loss self.nstep_div += tok_count if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) metrics['lr'] = self.optimizer.current_lr self.report( self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps ) self.reset_nstep() metrics = self.calc_metrics(epoch_loss, epoch_toks) metrics['lr'] = self.optimizer.current_lr self.train_epochs += 1 self.report( self.train_epochs, metrics, start, 'Train', 'EPOCH', reporting_fns ) return metrics
class DependencyParserTrainerPyTorch(EpochReportingTrainer): def __init__(self, model, **kwargs): if type(model) is dict: model = create_model_for('deps', **model) super().__init__() if type(model) is dict: model = create_model_for('deps', **model) self.punct_eval = kwargs.get('punct_eval', False) self.clip = float(kwargs.get('clip', 5)) self.labels = model.labels self.gpus = int(kwargs.get('gpus', 1)) if self.gpus == -1: self.gpus = len(os.getenv('CUDA_VISIBLE_DEVICES', os.getenv('NV_GPU', '0')).split(',')) self.optimizer = OptimizerManager(model, **kwargs) self.model = model if self.gpus > 0 and self.model.gpu: self.crit = model.create_loss().cuda() if self.gpus > 1: self.model = torch.nn.DataParallel(model).cuda() else: self.model.cuda() else: logger.warning("Requested training on CPU. This will be slow.") self.crit = model.create_loss() self.model = model self.nsteps = kwargs.get('nsteps', six.MAXSIZE) def _get_pytorch_model(self): return self.model.module if self.gpus > 1 else self.model def save(self, model_file): self._get_pytorch_model().save(model_file) def _make_input(self, batch_dict, **kwargs): return self._get_pytorch_model().make_input(batch_dict, **kwargs) @staticmethod def _get_batchsz(batch_dict): return len(batch_dict['labels']) def _test(self, loader, **kwargs): self.model.eval() steps = len(loader) pg = create_progress_bar(steps) metrics = [LAS(), UAS(), LCM(), UCM()] with torch.no_grad(): for batch_dict in pg(loader): example = self._make_input(batch_dict) labels_gold = example.pop('labels') heads_gold = example.pop('heads') batchsz = self._get_batchsz(batch_dict) greedy_heads_pred, greedy_labels_pred = self.model.decode(example) T = greedy_labels_pred.shape[1] labels_gold_trimmed = labels_gold[:, :T] heads_gold_trimmed = heads_gold[:, :T] for i in range(batchsz): for m in metrics: if self.punct_eval is False: labels_gold_trimmed[i].masked_fill_(labels_gold_trimmed[i] == self.model.punct, Offsets.PAD) m.add(greedy_heads_pred[i], heads_gold_trimmed[i], greedy_labels_pred[i], labels_gold_trimmed[i]) metrics = {m.name: m.score for m in metrics} return metrics def _train(self, loader, **kwargs): self.model.train() reporting_fns = kwargs.get('reporting_fns', []) steps = len(loader) pg = create_progress_bar(steps) epoch_loss = 0 epoch_div = 0 for batch_dict in pg(loader): self.optimizer.zero_grad() example = self._make_input(batch_dict) heads_gold = example.pop('heads') labels_gold = example.pop('labels') heads_pred, labels_pred = self.model(example) loss = self.crit(heads_pred, heads_gold, labels_pred, labels_gold) batchsz = self._get_batchsz(batch_dict) report_loss = loss.item() * batchsz epoch_loss += report_loss epoch_div += batchsz self.nstep_agg += report_loss self.nstep_div += batchsz loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) self.optimizer.step() if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) metrics['lr'] = self.optimizer.current_lr self.report( self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps ) self.reset_nstep() metrics = {} metrics['lr'] = self.optimizer.current_lr metrics['avg_loss'] = epoch_loss / float(epoch_div) return metrics