Exemple #1
0
    def run_epoch(self, train, dev, train_eval, epoch):
        """Performs one complete pass over the train set and evaluate on dev

        Args:
            train: dataset that yields tuple of sentences, tags
            dev: dataset
            epoch: (int) index of the current epoch

        Returns:
            f1: (python float), score to select model on, higher is better

        """
        # progbar stuff for logging
        batch_size = self.config.batch_size
        nbatches = (len(train) + batch_size - 1) // batch_size
        prog = Progbar(target=nbatches)

        # iterate over dataset
        for i, (words, labels) in enumerate(minibatches(train, batch_size)):
            fd, _ = self.get_feed_dict(True, words, labels, lr=self.config.lr)
            _, train_loss = self.sess.run([self.train_op, self.loss],
                                          feed_dict=fd)
            prog.update(i + 1, values=[("train loss", train_loss)])

        acc_train = self.evaluate(train_eval)
        acc_test = self.evaluate(dev)

        prog.update(i + 1,
                    epoch, [("train loss", train_loss)],
                    exact=[("dev acc", acc_test), ("train acc", acc_train),
                           ("lr", self.config.lr)])

        return acc_train, acc_test, train_loss
Exemple #2
0
    def evaluate(self, test):
        """Evaluates performance on test set

        Args:
            test: dataset that yields tuple of (sentences, tags)

        Returns:
            metrics: (dict) metrics["acc"] = 98.4, ...

        """
        accs = []
        for words, labels in minibatches(test, self.config.batch_size):
            labels_pred, sequence_lengths = self.predict_batch(words)
            for lbls_true, lbls_pred, length in zip(labels, labels_pred,
                                                    sequence_lengths):
                # compare sentence labels
                lbls_true = lbls_true[:length]
                lbls_pred = lbls_pred[:length]
                accs += [a == b for (a, b) in zip(lbls_true, lbls_pred)]
        acc = np.mean(accs)
        return acc
Exemple #3
0
 def prebuild_feed_dict_batches(self, queue, dataset, batch_size):
     for words, labels in minibatches(dataset, batch_size):
         feed_dict = self.prebuild_feed_dict_batch(words, labels)
         queue.put(feed_dict)
     queue.put('END')
Exemple #4
0
    def _train_epochwise(self, train, dev, train_eval):
        """Performs training with early stopping and lr decay"""
        updates, epoch, best_score, nepoch_no_imprv = 0, 0, 0, 0
        batch_size = self.config.batch_size
        max_epochs = self.config.max_epochs
        nbatches = (len(train) + batch_size - 1) // batch_size

        while epoch < max_epochs:
            # Run one epoch
            epoch_time = time()
            train_time = time()

            epoch_train_loss = 0
            iter = 0
            prog = Progbar(target=nbatches)
            for words, labels in minibatches(train, batch_size):
                fd, _ = self.get_feed_dict(True,
                                           words,
                                           labels,
                                           lr=self.config.lr)
                _, train_loss = self.sess.run([self.train_op, self.loss],
                                              feed_dict=fd)
                epoch_train_loss += train_loss
                updates += 1

                if updates % self.config.lr_decay_step == 0:
                    # apply decay
                    if self.config.lr_decay_strategy == "on-no-improvement":
                        if acc_test < best_score:
                            self.config.lr *= self.config.lr_decay
                    elif self.config.lr_decay_strategy == "exponential":
                        self.config.lr *= self.config.lr_decay
                    elif self.config.lr_decay_strategy == "step":
                        self.config.lr = self.config.step_decay_init_lr * \
                                         math.pow(self.config.step_decay_drop, math.floor(
                                             (epoch) / self.config.step_decay_epochs_drop))
                    elif self.config.lr_decay_strategy is None:
                        pass
                    else:
                        raise ValueError("Invalid 'decay_strategy' setting: " +
                                         self.config.lr_decay_strategy)

                prog.update(iter + 1, values=[("train loss", train_loss)])
                iter += 1

            train_time = time() - train_time

            # evaluate epoch
            acc_train = self.evaluate(train_eval)

            eval_time = time()
            acc_test = self.evaluate(dev)
            eval_time = time() - eval_time

            epoch_time = time() - epoch_time

            # log epoch
            prog.update(iter + 1,
                        epoch, [("train loss", train_loss)],
                        exact=[("dev acc", acc_test), ("train acc", acc_train),
                               ("lr", self.config.lr)])
            self.write_epoch_results(epoch,
                                     acc_train,
                                     acc_test,
                                     epoch_train_loss / iter,
                                     nbatches,
                                     epoch_time=epoch_time,
                                     train_time=train_time,
                                     eval_time=eval_time)

            # early stopping and saving checkpoint
            if acc_test >= best_score:
                nepoch_no_imprv = 0
                self.save_session()
                best_score = acc_test
            else:
                nepoch_no_imprv += 1
                if nepoch_no_imprv >= self.config.nepoch_no_imprv:
                    self.logger.info(
                        "- early stopping {} epochs without improvement".
                        format(nepoch_no_imprv))
                    break
            epoch += 1
        return best_score
Exemple #5
0
    def train_stepwise(self, train, dev, train_eval):
        """Performs training with early stopping and lr exponential decay

        Args:
            train: dataset that yields tuple of (sentences, tags)
            dev: dataset

        """
        best_score = 0
        nepoch_no_imprv = 0
        updates = 0
        epoch_train_loss = 0

        prog = Progbar(target=self.config.updates_per_epoch)

        while 1:
            for words, labels in minibatches(train, self.config.batch_size):
                fd, _ = self.get_feed_dict(True,
                                           words,
                                           labels,
                                           lr=self.config.lr)
                _, train_loss = self.sess.run([self.train_op, self.loss],
                                              feed_dict=fd)
                prog.update((updates % self.config.updates_per_epoch) + 1,
                            values=[("train loss", train_loss)])
                epoch_train_loss += train_loss
                updates += 1

                if updates % self.config.updates_per_epoch == 0:
                    acc_train = self.evaluate(train_eval)
                    acc_test = self.evaluate(dev)

                    epoch = updates / self.config.updates_per_epoch
                    prog.update(self.config.updates_per_epoch,
                                epoch, [("train loss", train_loss)],
                                exact=[("dev acc", acc_test),
                                       ("train acc", acc_train),
                                       ("lr", self.config.lr)])
                    self.write_epoch_results(
                        epoch, acc_train, acc_test,
                        epoch_train_loss / self.config.updates_per_epoch)

                    epoch_train_loss = 0

                    # early stopping and saving best parameters
                    if acc_test >= best_score:
                        nepoch_no_imprv = 0
                        self.save_session()
                        best_score = acc_test
                    else:
                        nepoch_no_imprv += 1
                        if nepoch_no_imprv >= self.config.nepoch_no_imprv:
                            return best_score

                    # apply decay
                    if self.config.lr_decay_strategy == "on-no-improvement":
                        if acc_test < best_score:
                            self.config.lr *= self.config.lr_decay
                    elif self.config.lr_decay_strategy == "exponential":
                        self.config.lr *= self.config.lr_decay
                    elif self.config.lr_decay_strategy == "step":
                        self.config.lr = self.config.step_decay_init_lr * math.pow(
                            self.config.step_decay_drop,
                            math.floor(
                                epoch / self.config.step_decay_epochs_drop))
                    elif self.config.lr_decay_strategy is None:
                        pass
                    else:
                        raise ValueError("Invalid 'decay_strategy' setting: " +
                                         self.config.lr_decay_strategy)

                    if updates < self.config.max_updates:
                        prog = Progbar(target=self.config.updates_per_epoch)

                if updates >= self.config.max_updates:
                    return best_score