Exemple #1
0
    def train(self, epoch, training_classes, indices=None):
        """Train one epoch of this model by iterating through mini batches.

        An epoch ends after one pass through the training set, or if the
        number of mini batches exceeds the parameter "batches_in_epoch".
        """
        self.logger.info("epoch: %s", epoch)

        t0 = time.time()

        self.logger.info(
            "Learning rate: %s",
            self.learning_rate
            if self.lr_scheduler is None
            else self.lr_scheduler.get_lr(),
        )
        f = self.combine_classes(training_classes)
        self.pre_epoch()

        fparams = []
        if self.freeze_params == "output":
            fparams.append([self.model.linear2.module.weight, indices])

        train_model(self.model, self.train_loader, self.optimizer, self.device,
                    freeze_params=fparams,
                    batches_in_epoch=self.batches_in_epoch)

        self.post_epoch()
        self.logger.info("training duration: %s", time.time() - t0)
        f.close()
    def train(self, epoch):
        """Train one epoch of this model by iterating through mini batches.

        An epoch ends after one pass through the training set, or if the
        number of mini batches exceeds the parameter "batches_in_epoch".
        """
        if epoch == 0:
            loader = self.first_loader
            batches_in_epoch = self.batches_in_first_epoch
        else:
            loader = self.train_loader
            batches_in_epoch = self.batches_in_epoch

        self.logger.info("epoch: %s", epoch)
        t0 = time.time()
        self.pre_epoch()
        self.logger.info("learning rate: %s", self.lr_scheduler.get_lr())
        train_model(
            model=self.model,
            loader=loader,
            optimizer=self.optimizer,
            device=self.device,
            batches_in_epoch=batches_in_epoch,
        )
        self.post_epoch()
        self.logger.info("training duration: %s", time.time() - t0)
    def train(self, epoch):
        """Train one epoch of this model by iterating through mini batches.

        An epoch ends after one pass through the training set, or if the
        number of mini batches exceeds the parameter "batches_in_epoch".
        """
        self.logger.info("epoch: %s", epoch)

        t0 = time.time()

        self.logger.info(
            "Learning rate: %s",
            self.learning_rate
            if self.lr_scheduler is None else self.lr_scheduler.get_lr(),
        )

        self.pre_epoch()
        train_model(self.model,
                    self.train_loader,
                    self.optimizer,
                    self.device,
                    batches_in_epoch=self.batches_in_epoch,
                    post_batch_callback=self.post_batch)
        self.post_epoch()

        self.logger.info("training duration: %s", time.time() - t0)
Exemple #4
0
    def find_best_lr(self, num_classes_learned):
        """
        This is a simple hyper-parameter search for a good lr:
            1) Sample num_classes_learned classes
            2) Train over the sampled classes; once for each lr
            3) Evaluate the model on a held-out set
            4) Repeat as many times as desired and pick the lr that performs the best
               the most number times
        """

        lr_all = []

        # Grid search over lr
        for _ in range(0, self.num_lr_search_runs):

            # Choose num_classes_learned random classes to train and then test on.
            new_tasks = np.random.choice(self.num_classes_eval,
                                         num_classes_learned,
                                         replace=False)

            max_acc = -1000
            for lr in self.lr_sweep_range:

                # Reset output layer weights.
                if self.reset_output_params:
                    output_params = self.get_named_output_params()
                    self.reset_params(output_params.values())

                # Meta-test training.
                test_train_param = self.get_named_test_train_params()
                optim = Adam(test_train_param.values(), lr=lr)
                for task in new_tasks:
                    self.test_train_loader.sampler.set_active_tasks(task)
                    train_model(
                        model=self.get_model(),
                        loader=self.test_train_loader,
                        optimizer=optim,
                        device=self.device,
                        criterion=self._loss_function,
                    )

                # Meta-test testing.
                self.test_test_loader.sampler.set_active_tasks(new_tasks)
                results = evaluate_model(
                    model=self.get_model(),
                    loader=self.test_test_loader,
                    device=self.device,
                    criterion=self._loss_function,
                )
                correct = results["total_correct"]

                acc = correct / len(self.test_test_loader.sampler.indices)
                if (acc > max_acc):
                    max_acc = acc
                    max_lr = lr

            lr_all.append(max_lr)

        best_lr = float(stats.mode(lr_all)[0][0])
        return best_lr
Exemple #5
0
 def train_epoch(self, epoch):
     train_model(
         model=self.model,
         loader=self.train_loader,
         optimizer=self.optimizer,
         device=self.device,
         criterion=self.loss_function,
         batches_in_epoch=self.batches_in_epoch,
         pre_batch_callback=functools.partial(self.pre_batch, epoch=epoch),
         post_batch_callback=functools.partial(self.post_batch,
                                               epoch=epoch),
     )
 def train(self, epoch):
     self.logger.info("epoch: %s", epoch)
     t0 = time.time()
     self.pre_epoch()
     train_model(
         model=self.model,
         loader=self.train_loader,
         optimizer=self.optimizer,
         device=self.device,
         batches_in_epoch=self.batches_in_epoch,
         criterion=self.loss_function,
     )
     self.post_epoch()
     self.logger.info("training duration: %s", time.time() - t0)
Exemple #7
0
    def train_epoch(self, epoch):
        """This should be called to do one epoch of training and testing.

        Returns:
            A dict that describes progress of this epoch.
            The dict includes the key 'stop'. If set to one, this network
            should be stopped early. Training is not progressing well enough.
        """
        t1 = time.time()
        if epoch == 0:
            train_loader = self.first_loader
            batches_in_epoch = self.batches_in_first_epoch
        else:
            train_loader = self.train_loader
            batches_in_epoch = self.batches_in_epoch

        train_model(
            model=self.model,
            loader=train_loader,
            optimizer=self.optimizer,
            device=self.device,
            batches_in_epoch=batches_in_epoch,
            criterion=self.loss_function,
            post_batch_callback=self._post_batch,
        )
        self._post_epoch(epoch)
        train_time = time.time() - t1

        ret = self.run_noise_tests(self.noise_values, self.test_loaders, epoch)

        # Hard coded early stopping criteria for quicker experimentation
        if ((epoch > 3 and abs(ret["mean_accuracy"] - 0.1) < 0.01)
                # or (ret['noise_accuracy'] > 0.66 and ret['test_accuracy'] > 0.91)
                or
            (ret["noise_accuracy"] > 0.69 and ret["test_accuracy"] > 0.91) or
            (ret["noise_accuracy"] > 0.62 and ret["test_accuracy"] > 0.92)
                # or (epoch > 10 and ret["noise_accuracy"] < 0.40)
                # or (epoch > 30 and ret["noise_accuracy"] < 0.44)
                # or (epoch > 40 and ret["noise_accuracy"] < 0.50)
            ):
            ret["stop"] = 1
        else:
            ret["stop"] = 0

        ret["epoch_time_train"] = train_time
        ret["epoch_time"] = time.time() - t1
        ret["learning_rate"] = self.learning_rate
        # print(epoch, ret)
        return ret
    def _train(self):
        if self._iteration == 0:
            train_loader = self.first_loader
        else:
            train_loader = self.train_loader

        train_model(
            model=self.model,
            loader=train_loader,
            optimizer=self.optimizer,
            device=self.device,
        )
        self.model.apply(rezero_weights)
        self.model.apply(update_boost_strength)

        return evaluate_model(model=self.model,
                              loader=self.test_loader,
                              device=self.device)
Exemple #9
0
    def _train(self):
        if self._iteration == 0:
            train_loader = self.first_loader
        else:
            train_loader = self.train_loader

        train_model(
            model=self.model,
            loader=train_loader,
            optimizer=self.optimizer,
            device=self.device,
            post_batch_callback=self._post_batch,
        )
        self.model.apply(update_boost_strength)

        return evaluate_model(
            model=self.model, loader=self.test_loader, device=self.device
        )
 def train_epoch(self, epoch):
     with torch.autograd.profiler.profile(
             use_cuda=torch.cuda.is_available(),
             enabled=self.profile) as prof:
         train_model(
             model=self.model,
             loader=self.train_loader,
             optimizer=self.optimizer,
             device=self.device,
             criterion=self.loss_function,
             batches_in_epoch=self.batches_in_epoch,
             pre_batch_callback=functools.partial(self.pre_batch,
                                                  epoch=epoch),
             post_batch_callback=functools.partial(self.post_batch,
                                                   epoch=epoch),
         )
     if self.profile and prof is not None:
         self.logger.info(
             prof.key_averages().table(sort_by="self_cpu_time_total"))
Exemple #11
0
    def train_epoch(self, epoch):
        """
        This should be called to do one epoch of training and testing.

        Returns:
        A dict that describes progress of this epoch.
        The dict includes the key 'stop'. If set to one, this network
        should be stopped early. Training is not progressing well enough.
        """
        t1 = time.time()
        if epoch == 0:
            train_loader = self.first_loader
            batches_in_epoch = self.batches_in_first_epoch
        else:
            train_loader = self.train_loader
            batches_in_epoch = self.batches_in_epoch

        train_model(
            model=self.model,
            loader=train_loader,
            optimizer=self.optimizer,
            device=self.device,
            batches_in_epoch=batches_in_epoch,
            criterion=self.loss_function,
            post_batch_callback=self._post_batch,
        )

        train_time = time.time() - t1

        ret = self.run_noise_tests(self.noise_values, self.test_loaders, epoch)
        self._post_epoch(epoch, ret["mean_loss"])

        if self.early_stopping:
            ret["stop"] = self._early_stopping(epoch, ret["mean_loss"])
        else:
            ret["stop"] = 0
        ret["epoch_time_train"] = train_time
        ret["epoch_time"] = time.time() - t1
        ret["learning_rate"] = self.learning_rate
        # print(epoch, ret)
        return ret
Exemple #12
0
    def run_meta_testing_phase(self, num_classes_learned):
        """
        Run the meta-testing phase: train over num_classes_learned and then test over a
        held-out set comprised of those same classes (aka the meta-test test set). This
        shows the model's ability to conduct continual learning in a way that allows
        generalization. As well, at the end of this phase, this function also evaluates
        the models performance on the meta-test training set to evaluate it's ability to
        memorize without forgetting.
        """

        # Decide on the lr to use.
        if self.run_lr_sweep:
            lr = self.find_best_lr(num_classes_learned)
        else:
            lr = self.lr_sweep_range[-1]

        meta_test_test_accuracies = []
        meta_test_train_accuracies = []
        for _ in range(0, self.num_meta_testing_runs):

            # Choose num_classes_learned random classes to train and then test on.
            new_tasks = np.random.choice(self.num_classes_eval,
                                         num_classes_learned,
                                         replace=False)

            # Reset output layer weights.
            if self.reset_output_params:
                output_params = self.get_named_output_params()
                self.reset_params(output_params.values())

            # Meta-testing training.
            test_train_param = self.get_named_test_train_params()
            optim = Adam(test_train_param.values(), lr=lr)
            for task in new_tasks:
                self.test_train_loader.sampler.set_active_tasks(task)
                train_model(
                    model=self.get_model(),
                    loader=self.test_train_loader,
                    optimizer=optim,
                    device=self.device,
                    criterion=self._loss_function,
                )

            # Meta-testing testing (using the test-test set).
            self.test_test_loader.sampler.set_active_tasks(new_tasks)
            results = evaluate_model(
                model=self.model,
                loader=self.test_test_loader,
                device=self.device,
                criterion=self._loss_function,
            )
            correct = results["total_correct"]

            acc = correct / len(self.test_test_loader.sampler.indices)
            meta_test_test_accuracies.append(acc)

            # Meta-testing testing (using the test-train set).
            self.test_train_eval_loader.sampler.set_active_tasks(new_tasks)
            results = evaluate_model(
                model=self.get_model(),
                loader=self.test_train_eval_loader,
                device=self.device,
                criterion=self._loss_function,
            )
            correct = results["total_correct"]

            acc = correct / len(self.test_train_eval_loader.sampler.indices)
            meta_test_train_accuracies.append(acc)

        return meta_test_train_accuracies, meta_test_test_accuracies, lr