コード例 #1
0
ファイル: train.py プロジェクト: wenshuoliu/baseline
class ClassifyTrainerPyTorch(EpochReportingTrainer):
    def __init__(self, model, **kwargs):
        super().__init__()

        if type(model) is dict:
            checkpoint = kwargs.get('checkpoint')
            if checkpoint:
                model['checkpoint'] = checkpoint
            model = create_model_for('classify', **model)

        self.clip = float(kwargs.get('clip', 5))
        self.labels = model.labels
        self.gpus = int(kwargs.get('gpus', 1))
        if self.gpus == -1:
            self.gpus = len(
                os.getenv('CUDA_VISIBLE_DEVICES', os.getenv('NV_GPU',
                                                            '0')).split(','))

        self.optimizer = OptimizerManager(model, **kwargs)
        self.model = model
        if self.gpus > 0 and self.model.gpu:
            self.crit = model.create_loss().cuda()
            if self.gpus > 1:
                self.model = torch.nn.DataParallel(model).cuda()
            else:
                self.model.cuda()
        else:
            logger.warning("Requested training on CPU.  This will be slow.")
            self.crit = model.create_loss()
            self.model = model
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)

    def _get_pytorch_model(self):
        return self.model.module if self.gpus > 1 else self.model

    def save(self, model_file):
        self._get_pytorch_model().save(model_file)

    def _make_input(self, batch_dict, **kwargs):
        return self._get_pytorch_model().make_input(batch_dict, **kwargs)

    @staticmethod
    def _get_batchsz(batch_dict):
        return len(batch_dict['y'])

    def _test(self, loader, **kwargs):
        self.model.eval()
        total_loss = 0
        total_norm = 0
        steps = len(loader)
        pg = create_progress_bar(steps)
        cm = ConfusionMatrix(self.labels)
        verbose = kwargs.get("verbose", None)
        output = kwargs.get('output')
        txts = kwargs.get('txts')
        handle = None
        line_number = 0
        if output is not None and txts is not None:
            handle = open(output, "w")

        with torch.no_grad():
            for batch_dict in pg(loader):
                example = self._make_input(batch_dict)
                ys = example.pop('y')
                pred = self.model(example)
                loss = self.crit(pred, ys)
                if handle is not None:
                    for p, y in zip(pred, ys):
                        handle.write('{}\t{}\t{}\n'.format(
                            " ".join(txts[line_number]), self.model.labels[p],
                            self.model.labels[y]))
                        line_number += 1
                batchsz = self._get_batchsz(batch_dict)
                total_loss += loss.item() * batchsz
                total_norm += batchsz
                _add_to_cm(cm, ys, pred)

        metrics = cm.get_all_metrics()
        metrics['avg_loss'] = total_loss / float(total_norm)
        verbose_output(verbose, cm)
        if handle is not None:
            handle.close()

        return metrics

    def _train(self, loader, **kwargs):
        self.model.train()
        reporting_fns = kwargs.get('reporting_fns', [])
        steps = len(loader)
        pg = create_progress_bar(steps)
        cm = ConfusionMatrix(self.labels)
        epoch_loss = 0
        epoch_div = 0
        for batch_dict in pg(loader):
            self.optimizer.zero_grad()
            example = self._make_input(batch_dict)
            y = example.pop('y')
            pred = self.model(example)
            loss = self.crit(pred, y)
            batchsz = self._get_batchsz(batch_dict)
            report_loss = loss.item() * batchsz
            epoch_loss += report_loss
            epoch_div += batchsz
            self.nstep_agg += report_loss
            self.nstep_div += batchsz
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
            _add_to_cm(cm, y, pred)
            self.optimizer.step()

            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                metrics['lr'] = self.optimizer.current_lr
                self.report(self.optimizer.global_step + 1, metrics,
                            self.nstep_start, 'Train', 'STEP', reporting_fns,
                            self.nsteps)
                self.reset_nstep()

        metrics = cm.get_all_metrics()
        metrics['lr'] = self.optimizer.current_lr

        metrics['avg_loss'] = epoch_loss / float(epoch_div)
        return metrics
コード例 #2
0
ファイル: train.py プロジェクト: blester125/baseline
class LanguageModelTrainerPyTorch(Trainer):

    def __init__(self, model, **kwargs):
        super().__init__()
        if type(model) is dict:
            model = create_model_for('lm', **model)
        self.model = model
        self.clip = float(kwargs.get('clip', 5))
        self.gpus = kwargs.get('gpus', 1)
        if self.gpus > 0:
            self.crit = model.create_loss().cuda()
            if self.gpus > 1:
                self.model = torch.nn.DataParallel(model).cuda()
            else:
                self.model.cuda()
        else:
            logger.warning("Requested training on CPU.  This will be slow.")
            self.crit = model.create_loss()

        self.nsteps = kwargs.get('nsteps', 500)
        self.optimizer = OptimizerManager(self.model, **kwargs)

    def repackage_hidden(self, h):
        """Wraps hidden states in new Variables, to detach them from their history."""
        if isinstance(h, torch.Tensor):
            return h.detach()
        else:
            return tuple(self.repackage_hidden(v) for v in h)

    def save(self, model_file):
        self._get_pytorch_model().save(model_file)

    def _get_pytorch_model(self):
        return self.model.module if self.gpus > 1 else self.model

    @staticmethod
    def _get_dims(loader):
        batch_dict = loader.dataset[0]
        return batch_dict['y'].shape

    @staticmethod
    def _num_toks(batch_dict):
        return np.prod(batch_dict['y'].shape)

    def calc_metrics(self, agg, norm):
        metrics = super().calc_metrics(agg, norm)
        metrics['perplexity'] = np.exp(metrics['avg_loss'])
        return metrics

    def test(self, vs, reporting_fns, phase='Valid', **kwargs):
        epoch = 0
        if phase == 'Valid':
            self.valid_epochs += 1
            epoch = self.valid_epochs
        start = time.time()
        self.model.eval()
        total_loss = 0
        total_toks = 0
        batchsz, nctx = self._get_dims(vs)
        hidden = self._get_pytorch_model().zero_state(batchsz)

        for batch_dict in vs:
            inputs = self._get_pytorch_model().make_input(batch_dict)
            y = inputs.pop('y')
            output, hidden = self.model(inputs, hidden)
            toks = self._num_toks(batch_dict)
            total_loss += self.crit(output, y).item() * toks
            total_toks += toks
            if hidden is not None:
                hidden = self.repackage_hidden(hidden)
        metrics = self.calc_metrics(total_loss, total_toks)
        self.report(
            epoch, metrics, start,
            phase, 'EPOCH', reporting_fns
        )
        return metrics

    def train(self, ts, reporting_fns):
        start = time.time()
        self.nstep_start = start
        self.model.train()
        epoch_loss = 0
        epoch_toks = 0
        batchsz, nctx = self._get_dims(ts)
        hidden = self._get_pytorch_model().zero_state(batchsz)

        for batch_dict in ts:
            if hidden is not None:
                hidden = self.repackage_hidden(hidden)
            inputs = self._get_pytorch_model().make_input(batch_dict)
            y = inputs.pop('y')
            self.optimizer.zero_grad()
            output, hidden = self.model(inputs, hidden)
            loss = self.crit(output, y)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
            self.optimizer.step()
            toks = self._num_toks(batch_dict)
            report_loss = loss.item() * toks
            epoch_loss += report_loss
            epoch_toks += toks
            self.nstep_agg += report_loss
            self.nstep_div += toks
            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                metrics['lr'] = self.optimizer.current_lr

                self.report(
                    self.optimizer.global_step + 1, metrics, self.nstep_start,
                    'Train', 'STEP', reporting_fns, self.nsteps
                )
                self.reset_nstep()

        metrics = self.calc_metrics(epoch_loss, epoch_toks)
        metrics['lr'] = self.optimizer.current_lr

        self.train_epochs += 1
        self.report(
            self.train_epochs, metrics, start,
            'Train', 'EPOCH', reporting_fns
        )
        return metrics
コード例 #3
0
class TaggerTrainerPyTorch(EpochReportingTrainer):
    def __init__(self, model, **kwargs):
        super().__init__()
        if type(model) is dict:
            checkpoint = kwargs.get('checkpoint')
            if checkpoint:
                model['checkpoint'] = checkpoint
            model = create_model_for('tagger', **model)
        self.grad_accum = int(kwargs.get('grad_accum', 1))
        self.gpus = int(kwargs.get('gpus', 1))
        # By default support IOB1/IOB2
        self.span_type = kwargs.get('span_type', 'iob')
        self.verbose = kwargs.get('verbose', False)

        logger.info('Setting span type %s', self.span_type)
        self.model = model
        self.idx2label = revlut(self.model.labels)
        self.clip = float(kwargs.get('clip', 5))
        self.optimizer = OptimizerManager(self.model, **kwargs)
        if self.gpus > 1:
            logger.info(
                "Trainer for PyTorch tagger currently doesnt support multiple GPUs.  Setting to 1"
            )
            self.gpus = 1
        if self.gpus > 0 and self.model.gpu:
            self.model = model.cuda()
        else:
            logger.warning("Requested training on CPU.  This will be slow.")

        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)

    def save(self, model_file):
        self.model.save(model_file)

    @staticmethod
    def _get_batchsz(batch_dict):
        return batch_dict['y'].shape[0]

    def process_output(self,
                       guess,
                       truth,
                       sentence_lengths,
                       ids,
                       handle=None,
                       txts=None):

        # For acc
        correct_labels = 0
        total_labels = 0
        truth_n = truth.cpu().numpy()
        # For f1
        gold_chunks = []
        pred_chunks = []

        # For each sentence
        for b in range(len(guess)):
            sentence = guess[b]
            if isinstance(sentence, torch.Tensor):
                sentence = sentence.cpu().numpy()
            sentence_length = sentence_lengths[b]
            gold = truth_n[b, :sentence_length]
            sentence = sentence[:sentence_length]

            valid_guess = sentence[gold != Offsets.PAD]
            valid_gold = gold[gold != Offsets.PAD]
            valid_sentence_length = np.sum(gold != Offsets.PAD)
            correct_labels += np.sum(np.equal(valid_guess, valid_gold))
            total_labels += valid_sentence_length
            gold_chunks.append(
                set(
                    to_spans(valid_gold, self.idx2label, self.span_type,
                             self.verbose)))
            pred_chunks.append(
                set(
                    to_spans(valid_guess, self.idx2label, self.span_type,
                             self.verbose)))

            # Should we write a file out?  If so, we have to have txts
            if handle is not None and txts is not None:
                txt_id = ids[b]
                txt = txts[txt_id]
                write_sentence_conll(handle, valid_guess, valid_gold, txt,
                                     self.idx2label)

        return correct_labels, total_labels, gold_chunks, pred_chunks

    def _test(self, ts, **kwargs):

        self.model.eval()
        total_sum = 0
        total_correct = 0

        gold_spans = []
        pred_spans = []

        metrics = {}
        steps = len(ts)
        conll_output = kwargs.get('conll_output', None)
        txts = kwargs.get('txts', None)
        handle = None
        if conll_output is not None and txts is not None:
            handle = open(conll_output, "w")
        pg = create_progress_bar(steps)
        for batch_dict in pg(ts):

            inputs = self.model.make_input(batch_dict)
            y = inputs.pop('y')
            lengths = inputs['lengths']
            ids = inputs['ids']
            with torch.no_grad():
                pred = self.model(inputs)
            correct, count, golds, guesses = self.process_output(
                pred, y.data, lengths, ids, handle, txts)
            total_correct += correct
            total_sum += count
            gold_spans.extend(golds)
            pred_spans.extend(guesses)

        total_acc = total_correct / float(total_sum)
        metrics['acc'] = total_acc
        metrics['f1'] = span_f1(gold_spans, pred_spans)
        if self.verbose:
            # TODO: Add programmatic access to these metrics?
            conll_metrics = per_entity_f1(gold_spans, pred_spans)
            conll_metrics['acc'] = total_acc * 100
            conll_metrics['tokens'] = total_sum.item()
            logger.info(conlleval_output(conll_metrics))
        return metrics

    def _train(self, ts, **kwargs):
        self.model.train()
        reporting_fns = kwargs.get('reporting_fns', [])
        epoch_loss = 0
        epoch_norm = 0
        steps = len(ts)
        pg = create_progress_bar(steps)
        self.optimizer.zero_grad()

        for i, batch_dict in enumerate(pg(ts)):
            inputs = self.model.make_input(batch_dict)
            loss = self.model.compute_loss(inputs)
            loss.backward()

            if (i + 1) % self.grad_accum == 0 or (i + 1) == steps:
                torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                               self.clip)
                self.optimizer.step()
                self.optimizer.zero_grad()

            bsz = self._get_batchsz(batch_dict)
            report_loss = loss.item() * bsz
            epoch_loss += report_loss
            epoch_norm += bsz
            self.nstep_agg += report_loss
            self.nstep_div += bsz
            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                metrics['lr'] = self.optimizer.current_lr
                self.report(self.optimizer.global_step + 1, metrics,
                            self.nstep_start, 'Train', 'STEP', reporting_fns,
                            self.nsteps)
                self.reset_nstep()

        metrics = self.calc_metrics(epoch_loss, epoch_norm)
        metrics['lr'] = self.optimizer.current_lr

        return metrics
コード例 #4
0
class Seq2SeqTrainerPyTorch(Trainer):

    def __init__(self, model, **kwargs):
        super().__init__()
        if type(model) is dict:
            model = create_model_for('seq2seq', **model)

        self.clip = float(kwargs.get('clip', 5))
        self.model = model
        self.optimizer = OptimizerManager(self.model, **kwargs)
        self._input = model.make_input
        self._predict = model.predict
        self.tgt_rlut = kwargs['tgt_rlut']
        self.gpus = kwargs.get('gpus', 1)
        self.bleu_n_grams = int(kwargs.get("bleu_n_grams", 4))
        self.label_smoothing = kwargs.get("label_smoothing")

        if self.gpus > 0:
            self.crit = model.create_loss(label_smooth=self.label_smoothing).cuda()
            if self.gpus > 1:
                self.model = torch.nn.DataParallel(model).cuda()
            else:
                self.model.cuda()
        else:
            logger.warning("Requested training on CPU.  This will be slow.")
            self.crit = model.create_loss()

        self.nsteps = kwargs.get('nsteps', 500)

    @staticmethod
    def _num_toks(tgt_lens):
        return torch.sum(tgt_lens).item()

    def save(self, model_file):
        self._get_pytorch_model().save(model_file)

    def _get_pytorch_model(self):
        return self.model.module if self.gpus > 1 else self.model

    def calc_metrics(self, agg, norm):
        metrics = super().calc_metrics(agg, norm)
        metrics['perplexity'] = np.exp(metrics['avg_loss'])
        return metrics

    def test(self, vs, reporting_fns, phase, **kwargs):
        if phase == 'Test':
            return self._evaluate(vs, reporting_fns, **kwargs)

        self.model.eval()
        total_loss = total_toks = 0
        steps = len(vs)
        self.valid_epochs += 1
        preds = []
        golds = []

        start = time.time()
        pg = create_progress_bar(steps)
        for batch_dict in pg(vs):
            input_ = self._input(batch_dict)
            tgt = input_['tgt']
            tgt_lens = batch_dict['tgt_lengths']
            pred = self.model(input_)
            loss = self.crit(pred, tgt)
            toks = self._num_toks(tgt_lens)
            total_loss += loss.item() * toks
            total_toks += toks
            greedy_preds = [p[0] for p in self._predict(input_, beam=1, make_input=False)]
            preds.extend(convert_seq2seq_preds(greedy_preds, self.tgt_rlut))
            golds.extend(convert_seq2seq_golds(tgt.cpu().numpy(), tgt_lens, self.tgt_rlut))

        metrics = self.calc_metrics(total_loss, total_toks)
        metrics['bleu'] = bleu(preds, golds, self.bleu_n_grams)[0]
        self.report(
            self.valid_epochs, metrics, start,
            phase, 'EPOCH', reporting_fns
        )
        return metrics

    def _evaluate(self, es, reporting_fns, **kwargs):
        self.model.eval()
        pg = create_progress_bar(len(es))
        preds = []
        golds = []
        start = time.time()
        for batch_dict in pg(es):
            tgt = batch_dict['tgt']
            tgt_lens = batch_dict['tgt_lengths']
            pred = [p[0] for p in self._predict(batch_dict, numpy_to_tensor=False, **kwargs)]
            preds.extend(convert_seq2seq_preds(pred, self.tgt_rlut))
            golds.extend(convert_seq2seq_golds(tgt, tgt_lens, self.tgt_rlut))
        metrics = {'bleu': bleu(preds, golds, self.bleu_n_grams)[0]}
        self.report(
            0, metrics, start, 'Test', 'EPOCH', reporting_fns
        )
        return metrics

    def train(self, ts, reporting_fns):
        self.model.train()

        epoch_loss = 0
        epoch_toks = 0

        start = time.time()
        self.nstep_start = start
        for batch_dict in ts:

            start_time = time.time()
            self.optimizer.zero_grad()
            input_ = self._input(batch_dict)
            tgt = input_['tgt']
            pred = self.model(input_)
            loss = self.crit(pred, tgt)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
            self.optimizer.step()
            tgt_lens = batch_dict['tgt_lengths']
            tok_count = self._num_toks(tgt_lens)
            reporting_loss = loss.item() * tok_count
            epoch_loss += reporting_loss
            epoch_toks += tok_count
            self.nstep_agg += reporting_loss
            self.nstep_div += tok_count

            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                metrics['lr'] = self.optimizer.current_lr
                self.report(
                    self.optimizer.global_step + 1, metrics, self.nstep_start,
                    'Train', 'STEP', reporting_fns, self.nsteps
                )
                self.reset_nstep()

        metrics = self.calc_metrics(epoch_loss, epoch_toks)
        metrics['lr'] = self.optimizer.current_lr

        self.train_epochs += 1
        self.report(
            self.train_epochs, metrics, start,
            'Train', 'EPOCH', reporting_fns
        )
        return metrics
コード例 #5
0
ファイル: train.py プロジェクト: wenshuoliu/baseline
class DependencyParserTrainerPyTorch(EpochReportingTrainer):

    def __init__(self, model, **kwargs):

        if type(model) is dict:
            model = create_model_for('deps', **model)
        super().__init__()
        if type(model) is dict:
            model = create_model_for('deps', **model)
        self.punct_eval = kwargs.get('punct_eval', False)
        self.clip = float(kwargs.get('clip', 5))
        self.labels = model.labels
        self.gpus = int(kwargs.get('gpus', 1))
        if self.gpus == -1:
            self.gpus = len(os.getenv('CUDA_VISIBLE_DEVICES', os.getenv('NV_GPU', '0')).split(','))

        self.optimizer = OptimizerManager(model, **kwargs)
        self.model = model
        if self.gpus > 0 and self.model.gpu:
            self.crit = model.create_loss().cuda()
            if self.gpus > 1:
                self.model = torch.nn.DataParallel(model).cuda()
            else:
                self.model.cuda()
        else:
            logger.warning("Requested training on CPU.  This will be slow.")
            self.crit = model.create_loss()
            self.model = model
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)

    def _get_pytorch_model(self):
        return self.model.module if self.gpus > 1 else self.model

    def save(self, model_file):
        self._get_pytorch_model().save(model_file)

    def _make_input(self, batch_dict, **kwargs):
        return self._get_pytorch_model().make_input(batch_dict, **kwargs)

    @staticmethod
    def _get_batchsz(batch_dict):
        return len(batch_dict['labels'])

    def _test(self, loader, **kwargs):
        self.model.eval()
        steps = len(loader)
        pg = create_progress_bar(steps)
        metrics = [LAS(), UAS(), LCM(), UCM()]

        with torch.no_grad():
            for batch_dict in pg(loader):
                example = self._make_input(batch_dict)
                labels_gold = example.pop('labels')
                heads_gold = example.pop('heads')
                batchsz = self._get_batchsz(batch_dict)
                greedy_heads_pred, greedy_labels_pred = self.model.decode(example)
                T = greedy_labels_pred.shape[1]
                labels_gold_trimmed = labels_gold[:, :T]
                heads_gold_trimmed = heads_gold[:, :T]

                for i in range(batchsz):
                    for m in metrics:
                        if self.punct_eval is False:
                            labels_gold_trimmed[i].masked_fill_(labels_gold_trimmed[i] == self.model.punct, Offsets.PAD)
                        m.add(greedy_heads_pred[i], heads_gold_trimmed[i], greedy_labels_pred[i], labels_gold_trimmed[i])

        metrics = {m.name: m.score for m in metrics} 
        return metrics

    def _train(self, loader, **kwargs):
        self.model.train()
        reporting_fns = kwargs.get('reporting_fns', [])
        steps = len(loader)
        pg = create_progress_bar(steps)
        epoch_loss = 0
        epoch_div = 0
        for batch_dict in pg(loader):
            self.optimizer.zero_grad()
            example = self._make_input(batch_dict)
            heads_gold = example.pop('heads')
            labels_gold = example.pop('labels')
            heads_pred, labels_pred = self.model(example)
            loss = self.crit(heads_pred, heads_gold, labels_pred, labels_gold)
            batchsz = self._get_batchsz(batch_dict)
            report_loss = loss.item() * batchsz
            epoch_loss += report_loss
            epoch_div += batchsz
            self.nstep_agg += report_loss
            self.nstep_div += batchsz
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
            self.optimizer.step()

            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                metrics['lr'] = self.optimizer.current_lr
                self.report(
                    self.optimizer.global_step + 1, metrics, self.nstep_start,
                    'Train', 'STEP', reporting_fns, self.nsteps
                )
                self.reset_nstep()

        metrics = {}
        metrics['lr'] = self.optimizer.current_lr
        metrics['avg_loss'] = epoch_loss / float(epoch_div)
        return metrics