Ejemplo n.º 1
0
 def __init__(self, model, **kwargs):
     super(ClassifyTrainerPyTorch, self).__init__()
     self.clip = float(kwargs.get('clip', 5))
     self.labels = model.labels
     self.optimizer = OptimizerManager(model, **kwargs)
     self.crit = model.create_loss().cuda()
     self.model = torch.nn.DataParallel(model).cuda()
     self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
Ejemplo n.º 2
0
    def __init__(self, model, **kwargs):
        super(LanguageModelTrainerPyTorch, self).__init__()
        self.model = model
        self.clip = float(kwargs.get('clip', 5))
        self.gpu = not bool(kwargs.get('nogpu', False))
        self.crit = model.create_loss()

        if self.gpu:
            self.model = self.model.cuda()
            self.crit.cuda()
        self.nsteps = kwargs.get('nsteps', 500)

        self.optimizer = OptimizerManager(self.model, **kwargs)
Ejemplo n.º 3
0
 def __init__(self, model, **kwargs):
     super(Seq2SeqTrainerPyTorch, self).__init__()
     self.gpu = bool(kwargs.get('gpu', True))
     self.clip = float(kwargs.get('clip', 5))
     self.model = model
     self.optimizer = OptimizerManager(self.model, **kwargs)
     self._input = model.make_input
     self._predict = model.predict
     self.crit = model.create_loss()
     self.tgt_rlut = kwargs['tgt_rlut']
     if self.gpu:
         self.model = torch.nn.DataParallel(model).cuda()
         self.crit.cuda()
     self.nsteps = kwargs.get('nsteps', 500)
Ejemplo n.º 4
0
    def __init__(self, model, **kwargs):
        super(TaggerTrainerPyTorch, self).__init__()
        self.gpu = not bool(kwargs.get('nogpu', False))
        # By default support IOB1/IOB2
        self.span_type = kwargs.get('span_type', 'iob')
        self.verbose = kwargs.get('verbose', False)

        logger.info('Setting span type %s', self.span_type)
        self.model = model
        self.idx2label = revlut(self.model.labels)
        self.clip = float(kwargs.get('clip', 5))
        self.optimizer = OptimizerManager(self.model, **kwargs)
        if self.gpu:
            self.model = model.to_gpu()
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
Ejemplo n.º 5
0
 def __init__(self, model, **kwargs):
     super(ClassifyTrainerPyTorch, self).__init__()
     self.clip = float(kwargs.get('clip', 5))
     self.labels = model.labels
     self.optimizer = OptimizerManager(model, **kwargs)
     self.crit = model.create_loss().cuda()
     self.model = torch.nn.DataParallel(model).cuda()
     self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
Ejemplo n.º 6
0
    def __init__(self, model, **kwargs):
        super(LanguageModelTrainerPyTorch, self).__init__()
        self.model = model
        self.clip = float(kwargs.get('clip', 5))
        self.gpu = not bool(kwargs.get('nogpu', False))
        self.crit = model.create_loss()

        if self.gpu:
            self.model = self.model.cuda()
            self.crit.cuda()
        self.nsteps = kwargs.get('nsteps', 500)

        self.optimizer = OptimizerManager(self.model, **kwargs)
Ejemplo n.º 7
0
    def __init__(self, model, **kwargs):
        super(TaggerTrainerPyTorch, self).__init__()
        self.gpus = int(kwargs.get('gpus', 1))
        # By default support IOB1/IOB2
        self.span_type = kwargs.get('span_type', 'iob')
        self.verbose = kwargs.get('verbose', False)

        logger.info('Setting span type %s', self.span_type)
        self.model = model
        self.idx2label = revlut(self.model.labels)
        self.clip = float(kwargs.get('clip', 5))
        self.optimizer = OptimizerManager(self.model, **kwargs)
        if self.gpus > 1:
            logger.info(
                "Trainer for PyTorch tagger currently doesnt support multiple GPUs.  Setting to 1"
            )
            self.gpus = 1
        if self.gpus > 0:
            self.model = model.to_gpu()
        else:
            logger.warning("Requested training on CPU.  This will be slow.")

        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
Ejemplo n.º 8
0
 def __init__(self, model, **kwargs):
     super(Seq2SeqTrainerPyTorch, self).__init__()
     self.gpu = bool(kwargs.get('gpu', True))
     self.clip = float(kwargs.get('clip', 5))
     self.model = model
     self.optimizer = OptimizerManager(self.model, **kwargs)
     self._input = model.make_input
     self._predict = model.predict
     self.crit = model.create_loss()
     self.tgt_rlut = kwargs['tgt_rlut']
     if self.gpu:
         self.model = torch.nn.DataParallel(model).cuda()
         self.crit.cuda()
     self.nsteps = kwargs.get('nsteps', 500)
Ejemplo n.º 9
0
    def __init__(self, model, **kwargs):
        super(ClassifyTrainerPyTorch, self).__init__()
        self.clip = float(kwargs.get('clip', 5))
        self.labels = model.labels
        self.gpus = int(kwargs.get('gpus', 1))
        if self.gpus == -1:
            self.gpus = len(
                os.getenv('CUDA_VISIBLE_DEVICES', os.getenv('NV_GPU',
                                                            '0')).split(','))

        self.optimizer = OptimizerManager(model, **kwargs)
        self.model = model
        if self.gpus > 0:
            self.crit = model.create_loss().cuda()
            if self.gpus > 1:
                self.model = torch.nn.DataParallel(model).cuda()
            else:
                self.model.cuda()
        else:
            logger.warning("Requested training on CPU.  This will be slow.")
            self.crit = model.create_loss()
            self.model = model
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
Ejemplo n.º 10
0
    def __init__(self, model, **kwargs):
        super(TaggerTrainerPyTorch, self).__init__()
        self.gpu = not bool(kwargs.get('nogpu', False))
        # By default support IOB1/IOB2
        self.span_type = kwargs.get('span_type', 'iob')
        self.verbose = kwargs.get('verbose', False)

        if self.verbose:
            logger.info('Setting span type %s', self.span_type)
        self.model = model
        self.idx2label = revlut(self.model.labels)
        self.clip = float(kwargs.get('clip', 5))
        self.optimizer = OptimizerManager(self.model, **kwargs)
        if self.gpu:
            self.model = model.to_gpu()
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
Ejemplo n.º 11
0
class TaggerTrainerPyTorch(EpochReportingTrainer):
    def __init__(self, model, **kwargs):
        super(TaggerTrainerPyTorch, self).__init__()
        self.gpu = not bool(kwargs.get('nogpu', False))
        # By default support IOB1/IOB2
        self.span_type = kwargs.get('span_type', 'iob')
        self.verbose = kwargs.get('verbose', False)

        if self.verbose:
            logger.info('Setting span type %s', self.span_type)
        self.model = model
        self.idx2label = revlut(self.model.labels)
        self.clip = float(kwargs.get('clip', 5))
        self.optimizer = OptimizerManager(self.model, **kwargs)
        if self.gpu:
            self.model = model.to_gpu()
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)

    @staticmethod
    def _get_batchsz(batch_dict):
        return batch_dict['y'].shape[0]

    def process_output(self,
                       guess,
                       truth,
                       sentence_lengths,
                       ids,
                       handle=None,
                       txts=None):

        correct_labels = 0
        total_labels = 0
        truth_n = truth.cpu().numpy()
        # For fscore
        gold_count = 0
        guess_count = 0
        overlap_count = 0

        # For each sentence
        for b in range(len(guess)):

            sentence = guess[b].cpu().numpy()

            sentence_length = sentence_lengths[b]
            gold = truth_n[b, :sentence_length]
            correct_labels += np.sum(np.equal(sentence, gold))
            total_labels += sentence_length
            gold_chunks = to_spans(gold, self.idx2label, self.span_type,
                                   self.verbose)
            gold_count += len(gold_chunks)
            guess_chunks = to_spans(sentence, self.idx2label, self.span_type,
                                    self.verbose)
            guess_count += len(guess_chunks)

            overlap_chunks = gold_chunks & guess_chunks
            overlap_count += len(overlap_chunks)

            # Should we write a file out?  If so, we have to have txts
            if handle is not None:
                id = ids[b]
                txt = txts[id]
                write_sentence_conll(handle, sentence, gold, txt,
                                     self.idx2label)

        return correct_labels, total_labels, overlap_count, gold_count, guess_count

    def _test(self, ts, **kwargs):

        self.model.eval()
        total_correct = 0
        total_sum = 0
        total_gold_count = 0
        total_guess_count = 0
        total_overlap_count = 0
        metrics = {}
        steps = len(ts)
        conll_output = kwargs.get('conll_output', None)
        txts = kwargs.get('txts', None)
        handle = None
        if conll_output is not None and txts is not None:
            handle = open(conll_output, "w")
        pg = create_progress_bar(steps)
        for batch_dict in pg(ts):

            inputs = self.model.make_input(batch_dict)
            y = inputs.pop('y')
            lengths = inputs['lengths']
            ids = inputs['ids']
            pred = self.model(inputs)
            correct, count, overlaps, golds, guesses = self.process_output(
                pred, y.data, lengths, ids, handle, txts)
            total_correct += correct
            total_sum += count
            total_gold_count += golds
            total_guess_count += guesses
            total_overlap_count += overlaps

        total_acc = total_correct / float(total_sum)
        # Only show the fscore if requested
        metrics['f1'] = f_score(total_overlap_count, total_gold_count,
                                total_guess_count)
        metrics['acc'] = total_acc
        return metrics

    def _train(self, ts, **kwargs):
        self.model.train()
        reporting_fns = kwargs.get('reporting_fns', [])
        epoch_loss = 0
        epoch_norm = 0
        steps = len(ts)
        pg = create_progress_bar(steps)
        for batch_dict in pg(ts):
            inputs = self.model.make_input(batch_dict)
            self.optimizer.zero_grad()
            loss = self.model.compute_loss(inputs)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
            self.optimizer.step()
            bsz = self._get_batchsz(batch_dict)
            report_loss = loss.item() * bsz
            epoch_loss += report_loss
            epoch_norm += bsz
            self.nstep_agg += report_loss
            self.nstep_div += bsz
            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                self.report(self.optimizer.global_step + 1, metrics,
                            self.nstep_start, 'Train', 'STEP', reporting_fns,
                            self.nsteps)
                self.reset_nstep()

        metrics = self.calc_metrics(epoch_loss, epoch_norm)
        return metrics
Ejemplo n.º 12
0
class TaggerTrainerPyTorch(EpochReportingTrainer):
    def __init__(self, model, **kwargs):
        super(TaggerTrainerPyTorch, self).__init__()
        self.gpus = int(kwargs.get('gpus', 1))
        # By default support IOB1/IOB2
        self.span_type = kwargs.get('span_type', 'iob')
        self.verbose = kwargs.get('verbose', False)

        logger.info('Setting span type %s', self.span_type)
        self.model = model
        self.idx2label = revlut(self.model.labels)
        self.clip = float(kwargs.get('clip', 5))
        self.optimizer = OptimizerManager(self.model, **kwargs)
        if self.gpus > 1:
            logger.info(
                "Trainer for PyTorch tagger currently doesnt support multiple GPUs.  Setting to 1"
            )
            self.gpus = 1
        if self.gpus > 0:
            self.model = model.to_gpu()
        else:
            logger.warning("Requested training on CPU.  This will be slow.")

        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)

    @staticmethod
    def _get_batchsz(batch_dict):
        return batch_dict['y'].shape[0]

    def process_output(self,
                       guess,
                       truth,
                       sentence_lengths,
                       ids,
                       handle=None,
                       txts=None):

        # For acc
        correct_labels = 0
        total_labels = 0
        truth_n = truth.cpu().numpy()
        # For f1
        gold_chunks = []
        pred_chunks = []

        # For each sentence
        for b in range(len(guess)):
            sentence = guess[b].cpu().numpy()
            sentence_length = sentence_lengths[b]

            gold = truth_n[b, :sentence_length]
            correct_labels += np.sum(np.equal(sentence, gold))
            total_labels += sentence_length
            gold_chunks.append(
                set(
                    to_spans(gold, self.idx2label, self.span_type,
                             self.verbose)))
            pred_chunks.append(
                set(
                    to_spans(sentence, self.idx2label, self.span_type,
                             self.verbose)))

            # Should we write a file out?  If so, we have to have txts
            if handle is not None and txts is not None:
                txt_id = ids[b]
                txt = txts[txt_id]
                write_sentence_conll(handle, sentence, gold, txt,
                                     self.idx2label)

        return correct_labels, total_labels, gold_chunks, pred_chunks

    def _test(self, ts, **kwargs):

        self.model.eval()
        total_sum = 0
        total_correct = 0

        gold_spans = []
        pred_spans = []

        metrics = {}
        steps = len(ts)
        conll_output = kwargs.get('conll_output', None)
        txts = kwargs.get('txts', None)
        handle = None
        if conll_output is not None and txts is not None:
            handle = open(conll_output, "w")
        pg = create_progress_bar(steps)
        for batch_dict in pg(ts):

            inputs = self.model.make_input(batch_dict)
            y = inputs.pop('y')
            lengths = inputs['lengths']
            ids = inputs['ids']
            pred = self.model(inputs)
            correct, count, golds, guesses = self.process_output(
                pred, y.data, lengths, ids, handle, txts)
            total_correct += correct
            total_sum += count
            gold_spans.extend(golds)
            pred_spans.extend(guesses)

        total_acc = total_correct / float(total_sum)
        metrics['acc'] = total_acc
        metrics['f1'] = span_f1(gold_spans, pred_spans)
        if self.verbose:
            # TODO: Add programmatic access to these metrics?
            conll_metrics = per_entity_f1(gold_spans, pred_spans)
            conll_metrics['acc'] = total_acc * 100
            conll_metrics['tokens'] = total_sum.item()
            logger.info(conlleval_output(conll_metrics))
        return metrics

    def _train(self, ts, **kwargs):
        self.model.train()
        reporting_fns = kwargs.get('reporting_fns', [])
        epoch_loss = 0
        epoch_norm = 0
        steps = len(ts)
        pg = create_progress_bar(steps)
        for batch_dict in pg(ts):
            inputs = self.model.make_input(batch_dict)
            self.optimizer.zero_grad()
            loss = self.model.compute_loss(inputs)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
            self.optimizer.step()
            bsz = self._get_batchsz(batch_dict)
            report_loss = loss.item() * bsz
            epoch_loss += report_loss
            epoch_norm += bsz
            self.nstep_agg += report_loss
            self.nstep_div += bsz
            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                self.report(self.optimizer.global_step + 1, metrics,
                            self.nstep_start, 'Train', 'STEP', reporting_fns,
                            self.nsteps)
                self.reset_nstep()

        metrics = self.calc_metrics(epoch_loss, epoch_norm)
        return metrics
Ejemplo n.º 13
0
class Seq2SeqTrainerPyTorch(Trainer):
    def __init__(self, model, **kwargs):
        super(Seq2SeqTrainerPyTorch, self).__init__()
        self.gpu = bool(kwargs.get('gpu', True))
        self.clip = float(kwargs.get('clip', 5))
        self.model = model
        self.optimizer = OptimizerManager(self.model, **kwargs)
        self._input = model.make_input
        self._predict = model.predict
        self.crit = model.create_loss()
        self.tgt_rlut = kwargs['tgt_rlut']
        if self.gpu:
            self.model = torch.nn.DataParallel(model).cuda()
            self.crit.cuda()
        self.nsteps = kwargs.get('nsteps', 500)

    @staticmethod
    def _num_toks(tgt_lens):
        return np.sum(tgt_lens)

    def calc_metrics(self, agg, norm):
        metrics = super(Seq2SeqTrainerPyTorch, self).calc_metrics(agg, norm)
        metrics['perplexity'] = np.exp(metrics['avg_loss'])
        return metrics

    def test(self, vs, reporting_fns, phase, **kwargs):
        if phase == 'Test':
            return self._evaluate(vs, reporting_fns, **kwargs)

        self.model.eval()
        total_loss = total_toks = 0
        steps = len(vs)
        self.valid_epochs += 1
        preds = []
        golds = []

        start = time.time()
        pg = create_progress_bar(steps)
        for batch_dict in pg(vs):
            input_ = self._input(batch_dict)
            tgt = input_['tgt']
            tgt_lens = batch_dict['tgt_lengths']
            pred = self.model(input_)
            loss = self.crit(pred, tgt)
            toks = self._num_toks(tgt_lens)
            total_loss += loss.item() * toks
            total_toks += toks
            greedy_preds = [
                p[0] for p in self._predict(input_, beam=1, make_input=False)
            ]
            preds.extend(convert_seq2seq_preds(greedy_preds, self.tgt_rlut))
            golds.extend(
                convert_seq2seq_golds(tgt.cpu().numpy(), tgt_lens,
                                      self.tgt_rlut))

        metrics = self.calc_metrics(total_loss, total_toks)
        metrics['bleu'] = bleu(preds, golds)[0]
        self.report(self.valid_epochs, metrics, start, phase, 'EPOCH',
                    reporting_fns)
        return metrics

    def _evaluate(self, es, reporting_fns, **kwargs):
        self.model.eval()
        pg = create_progress_bar(len(es))
        preds = []
        golds = []
        start = time.time()
        for batch_dict in pg(es):
            tgt = batch_dict['tgt']
            tgt_lens = batch_dict['tgt_lengths']
            pred = [p[0] for p in self._predict(batch_dict, **kwargs)]
            preds.extend(convert_seq2seq_preds(pred, self.tgt_rlut))
            golds.extend(convert_seq2seq_golds(tgt, tgt_lens, self.tgt_rlut))
        metrics = {'bleu': bleu(preds, golds)[0]}
        self.report(0, metrics, start, 'Test', 'EPOCH', reporting_fns)
        return metrics

    def train(self, ts, reporting_fns):
        self.model.train()

        epoch_loss = 0
        epoch_toks = 0

        start = time.time()
        self.nstep_start = start
        for batch_dict in ts:

            start_time = time.time()
            self.optimizer.zero_grad()
            input_ = self._input(batch_dict)
            tgt = input_['tgt']
            pred = self.model(input_)
            loss = self.crit(pred, tgt)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
            self.optimizer.step()
            tgt_lens = batch_dict['tgt_lengths']
            tok_count = self._num_toks(tgt_lens)
            reporting_loss = loss.item() * tok_count
            epoch_loss += reporting_loss
            epoch_toks += tok_count
            self.nstep_agg += reporting_loss
            self.nstep_div += tok_count

            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                self.report(self.optimizer.global_step + 1, metrics,
                            self.nstep_start, 'Train', 'STEP', reporting_fns,
                            self.nsteps)
                self.reset_nstep()

        metrics = self.calc_metrics(epoch_loss, epoch_toks)
        self.train_epochs += 1
        self.report(self.train_epochs, metrics, start, 'Train', 'EPOCH',
                    reporting_fns)
        return metrics
Ejemplo n.º 14
0
class TaggerTrainerPyTorch(EpochReportingTrainer):

    def __init__(self, model, **kwargs):
        super(TaggerTrainerPyTorch, self).__init__()
        self.gpu = not bool(kwargs.get('nogpu', False))
        # By default support IOB1/IOB2
        self.span_type = kwargs.get('span_type', 'iob')
        self.verbose = kwargs.get('verbose', False)

        if self.verbose:
            logger.info('Setting span type %s', self.span_type)
        self.model = model
        self.idx2label = revlut(self.model.labels)
        self.clip = float(kwargs.get('clip', 5))
        self.optimizer = OptimizerManager(self.model, **kwargs)
        if self.gpu:
            self.model = model.to_gpu()
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)

    @staticmethod
    def _get_batchsz(batch_dict):
        return batch_dict['y'].shape[0]

    def process_output(self, guess, truth, sentence_lengths, ids, handle=None, txts=None):

        correct_labels = 0
        total_labels = 0
        truth_n = truth.cpu().numpy()
        # For fscore
        gold_count = 0
        guess_count = 0
        overlap_count = 0

        # For each sentence
        for b in range(len(guess)):

            sentence = guess[b].cpu().numpy()

            sentence_length = sentence_lengths[b]
            gold = truth_n[b, :sentence_length]
            correct_labels += np.sum(np.equal(sentence, gold))
            total_labels += sentence_length
            gold_chunks = to_spans(gold, self.idx2label, self.span_type, self.verbose)
            gold_count += len(gold_chunks)
            guess_chunks = to_spans(sentence, self.idx2label, self.span_type, self.verbose)
            guess_count += len(guess_chunks)

            overlap_chunks = gold_chunks & guess_chunks
            overlap_count += len(overlap_chunks)

            # Should we write a file out?  If so, we have to have txts
            if handle is not None:
                id = ids[b]
                txt = txts[id]
                write_sentence_conll(handle, sentence, gold, txt, self.idx2label)

        return correct_labels, total_labels, overlap_count, gold_count, guess_count

    def _test(self, ts, **kwargs):

        self.model.eval()
        total_correct = 0
        total_sum = 0
        total_gold_count = 0
        total_guess_count = 0
        total_overlap_count = 0
        metrics = {}
        steps = len(ts)
        conll_output = kwargs.get('conll_output', None)
        txts = kwargs.get('txts', None)
        handle = None
        if conll_output is not None and txts is not None:
            handle = open(conll_output, "w")
        pg = create_progress_bar(steps)
        for batch_dict in pg(ts):

            inputs = self.model.make_input(batch_dict)
            y = inputs.pop('y')
            lengths = inputs['lengths']
            ids = inputs['ids']
            pred = self.model(inputs)
            correct, count, overlaps, golds, guesses = self.process_output(pred, y.data, lengths, ids, handle, txts)
            total_correct += correct
            total_sum += count
            total_gold_count += golds
            total_guess_count += guesses
            total_overlap_count += overlaps

        total_acc = total_correct / float(total_sum)
        # Only show the fscore if requested
        metrics['f1'] = f_score(total_overlap_count, total_gold_count, total_guess_count)
        metrics['acc'] = total_acc
        return metrics

    def _train(self, ts, **kwargs):
        self.model.train()
        reporting_fns = kwargs.get('reporting_fns', [])
        epoch_loss = 0
        epoch_norm = 0
        steps = len(ts)
        pg = create_progress_bar(steps)
        for batch_dict in pg(ts):
            inputs = self.model.make_input(batch_dict)
            self.optimizer.zero_grad()
            loss = self.model.compute_loss(inputs)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
            self.optimizer.step()
            bsz = self._get_batchsz(batch_dict)
            report_loss = loss.item() * bsz
            epoch_loss += report_loss
            epoch_norm += bsz
            self.nstep_agg += report_loss
            self.nstep_div += bsz
            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                self.report(
                    self.optimizer.global_step + 1, metrics, self.nstep_start,
                    'Train', 'STEP', reporting_fns, self.nsteps
                )
                self.reset_nstep()

        metrics = self.calc_metrics(epoch_loss, epoch_norm)
        return metrics
Ejemplo n.º 15
0
class LanguageModelTrainerPyTorch(Trainer):
    def __init__(self, model, **kwargs):
        super(LanguageModelTrainerPyTorch, self).__init__()
        self.model = model
        self.clip = float(kwargs.get('clip', 5))
        self.gpu = not bool(kwargs.get('nogpu', False))
        self.crit = model.create_loss()

        if self.gpu:
            self.model = self.model.cuda()
            self.crit.cuda()
        self.nsteps = kwargs.get('nsteps', 500)

        self.optimizer = OptimizerManager(self.model, **kwargs)

    def repackage_hidden(self, h):
        """Wraps hidden states in new Variables, to detach them from their history."""
        if isinstance(h, torch.Tensor):
            return h.detach()
        else:
            return tuple(self.repackage_hidden(v) for v in h)

    @staticmethod
    def _get_dims(batch_dict):
        return batch_dict['y'].shape

    @staticmethod
    def _num_toks(batch_dict):
        return np.prod(LanguageModelTrainerPyTorch._get_dims(batch_dict))

    def calc_metrics(self, agg, norm):
        metrics = super(LanguageModelTrainerPyTorch,
                        self).calc_metrics(agg, norm)
        metrics['perplexity'] = np.exp(metrics['avg_loss'])
        return metrics

    def test(self, vs, reporting_fns, phase='Valid'):
        epoch = 0
        if phase == 'Valid':
            self.valid_epochs += 1
            epoch = self.valid_epochs
        start = time.time()
        self.model.eval()
        total_loss = 0
        total_toks = 0
        metrics = {}
        batchsz, nctx = self._get_dims(vs[0])

        hidden = self.model.init_hidden(batchsz)

        for batch_dict in vs:
            inputs = self.model.make_input(batch_dict)
            y = inputs.pop('y')
            output, hidden = self.model(inputs, hidden)
            toks = self._num_toks(batch_dict)
            total_loss += self.crit(output, y).item() * toks
            total_toks += toks
            if hidden is not None:
                hidden = self.repackage_hidden(hidden)
        metrics = self.calc_metrics(total_loss, total_toks)
        self.report(epoch, metrics, start, phase, 'EPOCH', reporting_fns)
        return metrics

    def train(self, ts, reporting_fns):
        start = time.time()
        self.nstep_start = start
        self.model.train()
        epoch_loss = 0
        epoch_toks = 0
        batchsz, nctx = self._get_dims(ts[0])
        hidden = self.model.init_hidden(batchsz)

        for batch_dict in ts:
            if hidden is not None:
                hidden = self.repackage_hidden(hidden)
            inputs = self.model.make_input(batch_dict)
            y = inputs.pop('y')
            self.optimizer.zero_grad()
            output, hidden = self.model(inputs, hidden)
            loss = self.crit(output, y)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
            self.optimizer.step()
            toks = self._num_toks(batch_dict)
            report_loss = loss.item() * toks
            epoch_loss += report_loss
            epoch_toks += toks
            self.nstep_agg += report_loss
            self.nstep_div += toks
            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                self.report(self.optimizer.global_step + 1, metrics,
                            self.nstep_start, 'Train', 'STEP', reporting_fns,
                            self.nsteps)
                self.reset_nstep()

        metrics = self.calc_metrics(epoch_loss, epoch_toks)
        self.train_epochs += 1
        self.report(self.train_epochs, metrics, start, 'Train', 'EPOCH',
                    reporting_fns)
        return metrics
Ejemplo n.º 16
0
class ClassifyTrainerPyTorch(EpochReportingTrainer):

    def __init__(self, model, **kwargs):
        super(ClassifyTrainerPyTorch, self).__init__()
        self.clip = float(kwargs.get('clip', 5))
        self.labels = model.labels
        self.optimizer = OptimizerManager(model, **kwargs)
        self.crit = model.create_loss().cuda()
        self.model = torch.nn.DataParallel(model).cuda()
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)

    def _make_input(self, batch_dict):
        return self.model.module.make_input(batch_dict)

    @staticmethod
    def _get_batchsz(batch_dict):
        return len(batch_dict['y'])

    def _test(self, loader, **kwargs):
        self.model.eval()
        total_loss = 0
        total_norm = 0
        steps = len(loader)
        pg = create_progress_bar(steps)
        cm = ConfusionMatrix(self.labels)
        verbose = kwargs.get("verbose", None)

        for batch_dict in pg(loader):
            example = self._make_input(batch_dict)
            y = example.pop('y')
            pred = self.model(example)
            loss = self.crit(pred, y)
            batchsz = self._get_batchsz(batch_dict)
            total_loss += loss.item() * batchsz
            total_norm += batchsz
            _add_to_cm(cm, y, pred)

        metrics = cm.get_all_metrics()
        metrics['avg_loss'] = total_loss / float(total_norm)
        verbose_output(verbose, cm)

        return metrics

    def _train(self, loader, **kwargs):
        self.model.train()
        reporting_fns = kwargs.get('reporting_fns', [])
        steps = len(loader)
        pg = create_progress_bar(steps)
        cm = ConfusionMatrix(self.labels)
        epoch_loss = 0
        epoch_div = 0
        for batch_dict in pg(loader):
            self.optimizer.zero_grad()
            example = self._make_input(batch_dict)
            y = example.pop('y')
            pred = self.model(example)
            loss = self.crit(pred, y)
            batchsz = self._get_batchsz(batch_dict)
            report_loss = loss.item() * batchsz
            epoch_loss += report_loss
            epoch_div += batchsz
            self.nstep_agg += report_loss
            self.nstep_div += batchsz
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
            _add_to_cm(cm, y, pred)
            self.optimizer.step()

            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                self.report(
                    self.optimizer.global_step + 1, metrics, self.nstep_start,
                    'Train', 'STEP', reporting_fns, self.nsteps
                )
                self.reset_nstep()

        metrics = cm.get_all_metrics()
        metrics['avg_loss'] = epoch_loss / float(epoch_div)
        return metrics
Ejemplo n.º 17
0
class ClassifyTrainerPyTorch(EpochReportingTrainer):
    def __init__(self, model, **kwargs):
        super(ClassifyTrainerPyTorch, self).__init__()
        self.clip = float(kwargs.get('clip', 5))
        self.labels = model.labels
        self.gpus = int(kwargs.get('gpus', 1))
        if self.gpus == -1:
            self.gpus = len(
                os.getenv('CUDA_VISIBLE_DEVICES', os.getenv('NV_GPU',
                                                            '0')).split(','))

        self.optimizer = OptimizerManager(model, **kwargs)
        self.model = model
        if self.gpus > 0:
            self.crit = model.create_loss().cuda()
            if self.gpus > 1:
                self.model = torch.nn.DataParallel(model).cuda()
            else:
                self.model.cuda()
        else:
            logger.warning("Requested training on CPU.  This will be slow.")
            self.crit = model.create_loss()
            self.model = model
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)

    def _make_input(self, batch_dict):
        if self.gpus > 1:
            return self.model.module.make_input(batch_dict)
        return self.model.make_input(batch_dict)

    @staticmethod
    def _get_batchsz(batch_dict):
        return len(batch_dict['y'])

    def _test(self, loader, **kwargs):
        self.model.eval()
        total_loss = 0
        total_norm = 0
        steps = len(loader)
        pg = create_progress_bar(steps)
        cm = ConfusionMatrix(self.labels)
        verbose = kwargs.get("verbose", None)
        output = kwargs.get('output')
        txts = kwargs.get('txts')
        handle = None
        line_number = 0
        if output is not None and txts is not None:
            handle = open(output, "w")

        for batch_dict in pg(loader):
            example = self._make_input(batch_dict)
            ys = example.pop('y')
            pred = self.model(example)
            loss = self.crit(pred, ys)
            if handle is not None:
                for p, y in zip(pred, ys):
                    handle.write('{}\t{}\t{}\n'.format(
                        " ".join(txts[line_number]), self.model.labels[p],
                        self.model.labels[y]))
                    line_number += 1
            batchsz = self._get_batchsz(batch_dict)
            total_loss += loss.item() * batchsz
            total_norm += batchsz
            _add_to_cm(cm, ys, pred)

        metrics = cm.get_all_metrics()
        metrics['avg_loss'] = total_loss / float(total_norm)
        verbose_output(verbose, cm)
        if handle is not None:
            handle.close()

        return metrics

    def _train(self, loader, **kwargs):
        self.model.train()
        reporting_fns = kwargs.get('reporting_fns', [])
        steps = len(loader)
        pg = create_progress_bar(steps)
        cm = ConfusionMatrix(self.labels)
        epoch_loss = 0
        epoch_div = 0
        for batch_dict in pg(loader):
            self.optimizer.zero_grad()
            example = self._make_input(batch_dict)
            y = example.pop('y')
            pred = self.model(example)
            loss = self.crit(pred, y)
            batchsz = self._get_batchsz(batch_dict)
            report_loss = loss.item() * batchsz
            epoch_loss += report_loss
            epoch_div += batchsz
            self.nstep_agg += report_loss
            self.nstep_div += batchsz
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
            _add_to_cm(cm, y, pred)
            self.optimizer.step()

            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                self.report(self.optimizer.global_step + 1, metrics,
                            self.nstep_start, 'Train', 'STEP', reporting_fns,
                            self.nsteps)
                self.reset_nstep()

        metrics = cm.get_all_metrics()
        metrics['avg_loss'] = epoch_loss / float(epoch_div)
        return metrics
Ejemplo n.º 18
0
class LanguageModelTrainerPyTorch(Trainer):

    def __init__(self, model, **kwargs):
        super(LanguageModelTrainerPyTorch, self).__init__()
        self.model = model
        self.clip = float(kwargs.get('clip', 5))
        self.gpu = not bool(kwargs.get('nogpu', False))
        self.crit = model.create_loss()

        if self.gpu:
            self.model = self.model.cuda()
            self.crit.cuda()
        self.nsteps = kwargs.get('nsteps', 500)

        self.optimizer = OptimizerManager(self.model, **kwargs)

    def repackage_hidden(self, h):
        """Wraps hidden states in new Variables, to detach them from their history."""
        if isinstance(h, torch.Tensor):
            return h.detach()
        else:
            return tuple(self.repackage_hidden(v) for v in h)

    @staticmethod
    def _get_dims(batch_dict):
        return batch_dict['y'].shape

    @staticmethod
    def _num_toks(batch_dict):
        return np.prod(LanguageModelTrainerPyTorch._get_dims(batch_dict))

    def calc_metrics(self, agg, norm):
        metrics = super(LanguageModelTrainerPyTorch, self).calc_metrics(agg, norm)
        metrics['perplexity'] = np.exp(metrics['avg_loss'])
        return metrics

    def test(self, vs, reporting_fns, phase='Valid'):
        epoch = 0
        if phase == 'Valid':
            self.valid_epochs += 1
            epoch = self.valid_epochs
        start = time.time()
        self.model.eval()
        total_loss = 0
        total_toks = 0
        metrics = {}
        batchsz, nctx = self._get_dims(vs[0])

        hidden = self.model.init_hidden(batchsz)

        for batch_dict in vs:
            inputs = self.model.make_input(batch_dict)
            y = inputs.pop('y')
            output, hidden = self.model(inputs, hidden)
            toks = self._num_toks(batch_dict)
            total_loss += self.crit(output, y).item() * toks
            total_toks += toks
            if hidden is not None:
                hidden = self.repackage_hidden(hidden)
        metrics = self.calc_metrics(total_loss, total_toks)
        self.report(
            epoch, metrics, start,
            phase, 'EPOCH', reporting_fns
        )
        return metrics

    def train(self, ts, reporting_fns):
        start = time.time()
        self.nstep_start = start
        self.model.train()
        epoch_loss = 0
        epoch_toks = 0
        batchsz, nctx = self._get_dims(ts[0])
        hidden = self.model.init_hidden(batchsz)

        for batch_dict in ts:
            if hidden is not None:
                hidden = self.repackage_hidden(hidden)
            inputs = self.model.make_input(batch_dict)
            y = inputs.pop('y')
            self.optimizer.zero_grad()
            output, hidden = self.model(inputs, hidden)
            loss = self.crit(output, y)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
            self.optimizer.step()
            toks = self._num_toks(batch_dict)
            report_loss = loss.item() * toks
            epoch_loss += report_loss
            epoch_toks += toks
            self.nstep_agg += report_loss
            self.nstep_div += toks
            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                self.report(
                    self.optimizer.global_step + 1, metrics, self.nstep_start,
                    'Train', 'STEP', reporting_fns, self.nsteps
                )
                self.reset_nstep()

        metrics = self.calc_metrics(epoch_loss, epoch_toks)
        self.train_epochs += 1
        self.report(
            self.train_epochs, metrics, start,
            'Train', 'EPOCH', reporting_fns
        )
        return metrics
Ejemplo n.º 19
0
class Seq2SeqTrainerPyTorch(Trainer):

    def __init__(self, model, **kwargs):
        super(Seq2SeqTrainerPyTorch, self).__init__()
        self.gpu = bool(kwargs.get('gpu', True))
        self.clip = float(kwargs.get('clip', 5))
        self.model = model
        self.optimizer = OptimizerManager(self.model, **kwargs)
        self._input = model.make_input
        self._predict = model.predict
        self.crit = model.create_loss()
        self.tgt_rlut = kwargs['tgt_rlut']
        if self.gpu:
            self.model = torch.nn.DataParallel(model).cuda()
            self.crit.cuda()
        self.nsteps = kwargs.get('nsteps', 500)

    @staticmethod
    def _num_toks(tgt_lens):
        return np.sum(tgt_lens)

    def calc_metrics(self, agg, norm):
        metrics = super(Seq2SeqTrainerPyTorch, self).calc_metrics(agg, norm)
        metrics['perplexity'] = np.exp(metrics['avg_loss'])
        return metrics

    def test(self, vs, reporting_fns, phase):
        if phase == 'Test':
            return self._evaluate(vs, reporting_fns)

        self.model.eval()
        total_loss = total_toks = 0
        steps = len(vs)
        self.valid_epochs += 1
        preds = []
        golds = []

        start = time.time()
        pg = create_progress_bar(steps)
        for batch_dict in pg(vs):
            input_ = self._input(batch_dict)
            tgt = input_['tgt']
            tgt_lens = batch_dict['tgt_lengths']
            pred = self.model(input_)
            loss = self.crit(pred, tgt)
            toks = self._num_toks(tgt_lens)
            total_loss += loss.item() * toks
            total_toks += toks
            greedy_preds = [p[0] for p in self._predict(input_, beam=1, make_input=False)]
            preds.extend(convert_seq2seq_preds(greedy_preds, self.tgt_rlut))
            golds.extend(convert_seq2seq_golds(tgt.cpu().numpy(), tgt_lens, self.tgt_rlut))

        metrics = self.calc_metrics(total_loss, total_toks)
        metrics['bleu'] = bleu(preds, golds)[0]
        self.report(
            self.valid_epochs, metrics, start,
            phase, 'EPOCH', reporting_fns
        )
        return metrics


    def _evaluate(self, es, reporting_fns):
        self.model.eval()
        pg = create_progress_bar(len(es))
        preds = []
        golds = []
        start = time.time()
        for batch_dict in pg(es):
            tgt = batch_dict['tgt']
            tgt_lens = batch_dict['tgt_lengths']
            pred = [p[0] for p in self._predict(batch_dict)]
            preds.extend(convert_seq2seq_preds(pred, self.tgt_rlut))
            golds.extend(convert_seq2seq_golds(tgt, tgt_lens, self.tgt_rlut))
        metrics = {'bleu': bleu(preds, golds)[0]}
        self.report(
            0, metrics, start, 'Test', 'EPOCH', reporting_fns
        )
        return metrics


    def train(self, ts, reporting_fns):
        self.model.train()

        epoch_loss = 0
        epoch_toks = 0

        start = time.time()
        self.nstep_start = start
        for batch_dict in ts:

            start_time = time.time()
            self.optimizer.zero_grad()
            input_ = self._input(batch_dict)
            tgt = input_['tgt']
            pred = self.model(input_)
            loss = self.crit(pred, tgt)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
            self.optimizer.step()
            tgt_lens = batch_dict['tgt_lengths']
            tok_count = self._num_toks(tgt_lens)
            reporting_loss = loss.item() * tok_count
            epoch_loss += reporting_loss
            epoch_toks += tok_count
            self.nstep_agg += reporting_loss
            self.nstep_div += tok_count

            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                self.report(
                    self.optimizer.global_step + 1, metrics, self.nstep_start,
                    'Train', 'STEP', reporting_fns, self.nsteps
                )
                self.reset_nstep()

        metrics = self.calc_metrics(epoch_loss, epoch_toks)
        self.train_epochs += 1
        self.report(
            self.train_epochs, metrics, start,
            'Train', 'EPOCH', reporting_fns
        )
        return metrics
Ejemplo n.º 20
0
class ClassifyTrainerPyTorch(EpochReportingTrainer):

    def __init__(self, model, **kwargs):
        super(ClassifyTrainerPyTorch, self).__init__()
        self.clip = float(kwargs.get('clip', 5))
        self.labels = model.labels
        self.optimizer = OptimizerManager(model, **kwargs)
        self.crit = model.create_loss().cuda()
        self.model = torch.nn.DataParallel(model).cuda()
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)

    def _make_input(self, batch_dict):
        return self.model.module.make_input(batch_dict)

    @staticmethod
    def _get_batchsz(batch_dict):
        return len(batch_dict['y'])

    def _test(self, loader, **kwargs):
        self.model.eval()
        total_loss = 0
        total_norm = 0
        steps = len(loader)
        pg = create_progress_bar(steps)
        cm = ConfusionMatrix(self.labels)
        verbose = kwargs.get("verbose", None)

        for batch_dict in pg(loader):
            example = self._make_input(batch_dict)
            y = example.pop('y')
            pred = self.model(example)
            loss = self.crit(pred, y)
            batchsz = self._get_batchsz(batch_dict)
            total_loss += loss.item() * batchsz
            total_norm += batchsz
            _add_to_cm(cm, y, pred)

        metrics = cm.get_all_metrics()
        metrics['avg_loss'] = total_loss / float(total_norm)
        verbose_output(verbose, cm)

        return metrics

    def _train(self, loader, **kwargs):
        self.model.train()
        reporting_fns = kwargs.get('reporting_fns', [])
        steps = len(loader)
        pg = create_progress_bar(steps)
        cm = ConfusionMatrix(self.labels)
        epoch_loss = 0
        epoch_div = 0
        for batch_dict in pg(loader):
            self.optimizer.zero_grad()
            example = self._make_input(batch_dict)
            y = example.pop('y')
            pred = self.model(example)
            loss = self.crit(pred, y)
            batchsz = self._get_batchsz(batch_dict)
            report_loss = loss.item() * batchsz
            epoch_loss += report_loss
            epoch_div += batchsz
            self.nstep_agg += report_loss
            self.nstep_div += batchsz
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
            _add_to_cm(cm, y, pred)
            self.optimizer.step()

            if (self.optimizer.global_step + 1) % self.nsteps == 0:
                metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                self.report(
                    self.optimizer.global_step + 1, metrics, self.nstep_start,
                    'Train', 'STEP', reporting_fns, self.nsteps
                )
                self.reset_nstep()

        metrics = cm.get_all_metrics()
        metrics['avg_loss'] = epoch_loss / float(epoch_div)
        return metrics