Example #1
0
    def train(self):
        """Train a model.

        NOTE: modifies TrainState in place.
        - parameters of the Editor and Optimizer are updated
        - train_steps is updated
        - random number generator states are updated at every checkpoint
        """
        # TODO(kelvin): do something to preserve random state upon reload?
        train_state = self.train_state
        examples = self._examples
        config = self.config
        workspace = self.workspace
        with random_state(self.train_state.random_state):
            editor = train_state.model
            train_batches = similar_size_batches(examples.train,
                                                 config.optim.batch_size)
            editor.test_batch(train_batches[0])
            best_exact_match_score = 0.0
            while True:
                random.shuffle(train_batches)
                loss = 0
                for batch in verboserate(train_batches,
                                         desc='Streaming training examples'):
                    loss, _, _ = editor.loss(batch)

                    finite_grads, grad_norm = self._take_grad_step(
                        train_state, loss)
                    if not finite_grads:
                        train_state.save(workspace.nan_checkpoints)

                        examples_path = join(
                            workspace.nan_checkpoints,
                            '{}.examples'.format(train_state.train_steps))
                        with open(examples_path, 'w') as f:
                            pickle.dump(batch, f)

                        print 'Gradient was NaN/inf on step {}.'.format(
                            train_state.train_steps)

                    step = train_state.train_steps

                    # run periodic evaluation and saving
                    if step != 0:
                        if step % 10 == 0:
                            self._update_metadata(train_state)
                        if step % config.timing.eval_small == 0:
                            self.evaluate(step, big_eval=False)
                            self.tb_logger.log_value('grad_norm', grad_norm,
                                                     step)
                        if step % config.timing.eval_big == 0:
                            train_stats, valid_stats = self.evaluate(
                                step, big_eval=True)
                            # train_stats, valid_stats = self.evaluate(step, big_eval=False)
                            exact_match_score = valid_stats[('big',
                                                             'exact_match',
                                                             'valid')]
                            self.checkpoints.save(train_state)
                        if step >= config.optim.max_iters:
                            return
Example #2
0
    def train(self):
        """Train a model.

        NOTE: modifies TrainState in place.
        - parameters of the Editor and Optimizer are updated
        - train_steps is updated
        - random number generator states are updated at every checkpoint
        """
        with random_state(self.train_state.random_state):
            self.train_vae()
            lsh = self.setup_ret()
            self.lsh = lsh
    def train(self):
        config = self.config
        train_state = self.train_state
        model, optimizer = train_state.model, train_state.optimizer

        # group into training batches
        train_batches = similar_size_batches(self.examples.train, batch_size=config.optim.batch_size,
                                             size=lambda x: len(x.output_words))

        def batch_generator():
            while True:
                # WARNING: random state of train state does not exactly restore state anymore, due to this shuffle
                random.shuffle(train_batches)
                for batch in verboserate(train_batches, desc='Streaming example batches'):
                    yield batch

        with random_state(train_state.random_state):
            for batch in batch_generator():
                # take gradient step
                loss = model.loss(batch, config.optim.num_negatives)
                finite_grads = self._take_grad_step(train_state, loss)  # TODO: clip gradient?
                train_steps = train_state.train_steps

                if not finite_grads:
                    print 'WARNING: grads not finite at step {}'.format(train_steps)

                self._update_metadata(train_state)

                # run periodic evaluation and saving
                if train_steps % config.eval.eval_steps == 0:
                    self._evaluate(self.examples, big_eval=False)

                if train_steps % config.eval.big_eval_steps == 0:
                    self._evaluate(self.examples, big_eval=True)

                if train_steps % config.eval.save_steps == 0:
                    self.checkpoints.save(train_state)

                if train_steps >= config.optim.max_iters:
                    return