コード例 #1
0
def test_model(trainer: ModelTrainer, x: torch.Tensor, y: torch.Tensor,
               opts: ExperimentOptions) -> float:
    """
    Evaluate the test solver accuracy with a test solver and a test step size
    :param trainer: model trainer
    :param x: batch data
    :param y: batch labels
    :param opts: experiment options used for training the Neural ODE
    :return: test solver accuracy
    """
    train_step_size = trainer.model.feature_ex_block.options['step_size']
    train_solver = trainer.model.feature_ex_block.solver
    test_solver, test_step_size = find_test_model(solver=train_solver,
                                                  step_size=train_step_size)

    trainer.model.feature_ex_block.options['step_size'] = test_step_size
    trainer.model.feature_ex_block.solver = test_solver
    with torch.no_grad():
        logits = trainer.forward_one_step(x)
    acc = calculate_accuracy(y=y,
                             logits=logits,
                             batch_size=opts.batch_size,
                             num_classes=opts.num_classes)
    trainer.model.feature_ex_block.options['step_size'] = train_step_size
    trainer.model.feature_ex_block.solver = train_solver
    return acc
def test_model(trainer: ModelTrainer, x: torch.Tensor, y: torch.Tensor,
               opts: ExperimentOptions, train_solver_nfe: int) -> float:
    """
    Evaluate the test solver accuracy with a test solver and a test tolerance
    :param trainer: model trainer
    :param x: batch data
    :param y: batch labels
    :param opts: experiment options used for training the Neural ODE
    :return: test solver accuracy
    """
    train_tol = trainer.model.feature_ex_block.tol
    train_solver = trainer.model.feature_ex_block.solver
    test_solver, test_tol = find_test_model(solver=train_solver, tol=train_tol)
    while True:
        trainer.model.feature_ex_block.tol = test_tol
        trainer.model.feature_ex_block.solver = test_solver
        trainer.model.feature_ex_block.nfe = 0
        with torch.no_grad():
            logits = trainer.forward_one_step(x)
        acc = calculate_accuracy(y=y,
                                 logits=logits,
                                 batch_size=opts.batch_size,
                                 num_classes=opts.num_classes)
        test_solver_nfe = trainer.model.feature_ex_block.nfe
        trainer.model.feature_ex_block.nfe = 0
        if test_solver_nfe > train_solver_nfe:
            break
        """If test solver takes same number of steps as train solver decrease tolerance further if same solver for 
        training and testing is used."""
        test_tol = test_tol / 10

    trainer.model.feature_ex_block.tol = train_tol
    trainer.model.feature_ex_block.solver = train_solver
    return acc
def load_model(path: str,
               model_iter: int,
               use_gpu=True) -> (ModelTrainer, DataLoader, ExperimentOptions):
    opts_folder = 'options'
    file = 'opts.pkl'
    file_path = os.path.join(path, opts_folder, file)

    with open(file_path, "rb") as input_file:
        opts = pickle.load(input_file)
    opts.fixed_step_solver = True
    if not use_gpu:
        opts.use_gpu = False
    check_folder = 'checkpoints'

    test_opts = copy.deepcopy(opts)
    test_opts.split = 'test'
    test_dataloader = data.create_dataloader.create_dataloader(test_opts)

    file = f'model_iter_{model_iter}.pth'
    file_path = os.path.join(path, check_folder, file)

    trainer = ModelTrainer(opts)
    if use_gpu:
        state_dict = torch.load(file_path)
    else:
        state_dict = torch.load(file_path, map_location=torch.device('cpu'))
    trainer.model.load_state_dict(state_dict['model_state'])
    return trainer, test_dataloader, opts
コード例 #4
0
 def auto_train(self):
     self.get_pretrain_info()
     self.load_model()
     os.makedirs(self.model_save_folder, exist_ok=True)
     self.data_loader = dataset.DataLoader_Auto(self.data_src, label_dict,
                                                self.batch_size, self.size)
     # MT = trainer.ModelTrainer(train_type, self.silent_model, self.size)
     # MT.train_with_test(self.data_loader.dataloaders_dict, self.criterion, self.optimizer_ft, self.epoch, self.is_inception, self.model_save_path, self.log_save_path)
     ModelTrainer.train_sport_model(self.sport_model,
                                    self.data_loader.dataloaders_dict,
                                    self.criterion, self.optimizer_ft,
                                    self.epoch, self.is_inception,
                                    self.model_save_path,
                                    self.log_save_path)
     print("train model done, save model to %s" %
           os.path.join(self.model_save_path, self.model_str))
     self.record()
コード例 #5
0
    def __init__(self):
        parser = argparse.ArgumentParser(
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
        parser = initialize(parser)
        opts, unknown = parser.parse_known_args()
        self.opts = ExperimentOptions(opts)
        self.data_generator = self._get_data_generator()
        self.test_dataloader = self._get_test_dataloader()

        self.train_acc = None
        self.test_acc = None
        self.nfe_f = None
        self.nfe_b = None
        self.loss = None

        self.acc_log = {
            "train": torch.empty(self.opts.niter),
            "test": torch.empty(self.opts.niter),
        }
        self.loss_log = torch.empty(self.opts.niter)
        self.nfe_log = {
            "nfe_f": torch.empty(self.opts.niter),
            "nfe_b": torch.empty(self.opts.niter),
        }

        self.trainer = ModelTrainer(self.opts)
        if self.opts.use_gpu:
            self.trainer.model.cuda()

        # By default the device is cpu
        self.device = torch.device("cpu")
        if self.opts.use_gpu:
            self.device = torch.device("cuda:" + str(self.opts.gpu_ids[0]))

        # Initialize the summary writer

        if self.opts.use_tensorboard:
            self.writer = SummaryWriter(log_dir=self.opts.tensorboard_dir)
def adapt_tol(trainer: ModelTrainer, train_solver_acc: float,
              train_solver_nfe_dict: Dict[str,
                                          torch.Tensor], current_iter: int,
              x: torch.Tensor, y: torch.Tensor, opts: ExperimentOptions):
    """
    Adapt the tolerance used for training as described in Algorithm 3 in the paper.
    If tolerance is too large to guarantee continuous dynamics, the tolerance used for training is decreased.
    Else the tolerance is increased, to achieve minimal training time.
    :param trainer: model trainer
    :param train_solver_acc: accuracy reached by the train solver
    :param x: batch data
    :param y: batch labels
    :param opts: experiment options used for training the Neural ODE
    """
    train_solver_nfe = int(train_solver_nfe_dict["nfe_f"][-1].detach())
    threshold = opts.threshold
    test_solver_acc = test_model(trainer=trainer,
                                 x=x,
                                 y=y,
                                 opts=opts,
                                 train_solver_nfe=train_solver_nfe)
    dif = np.abs(test_solver_acc - train_solver_acc)
    if dif > threshold:
        tol = trainer.model.feature_ex_block.tol
        new_tol = 0.5 * tol
        trainer.model.feature_ex_block.tol = new_tol
    else:
        tol = trainer.model.feature_ex_block.tol
        new_tol = 1.1 * tol
        trainer.model.feature_ex_block.tol = new_tol
        logits = trainer.forward_one_step(x)
        acc = calculate_accuracy(y=y,
                                 logits=logits,
                                 batch_size=opts.batch_size,
                                 num_classes=opts.num_classes)

        dif = np.abs(test_solver_acc - acc)

        if dif > threshold:
            new_tol = tol
        elif (trainer.model.feature_ex_block.nfe == train_solver_nfe_dict['nfe_f'][current_iter-5:current_iter]).all()\
                and current_iter > 4:
            """ Do not change tolerance if the number of steps stays constant"""
            new_tol = tol
        trainer.model.feature_ex_block.tol = new_tol
コード例 #7
0
def adapt_step_size(trainer: ModelTrainer, train_solver_acc: float,
                    x: torch.Tensor, y: torch.Tensor, opts: ExperimentOptions):
    """
    Adapt the step size used for training as described in Algorithm 2 in the paper.
    If step size is too large to guarantee continuous dynamics, the step size used for training is decreased.
    Else the step size is increased, to achieve minimal training time.
    :param trainer: model trainer
    :param train_solver_acc: accuracy reached by the train solver
    :param x: batch data
    :param y: batch labels
    :param opts: experiment options used for training the Neural ODE
    """
    threshold = opts.threshold
    max_steps = opts.max_steps
    test_solver_acc = test_model(trainer=trainer, x=x, y=y, opts=opts)
    dif = np.abs(test_solver_acc - train_solver_acc)
    if dif > threshold:
        step_size = trainer.model.feature_ex_block.options['step_size']
        if 1 / step_size * 2 > max_steps:
            if int(1 / step_size) == max_steps:
                print("WARNING: Cannot increase step size further!")
                new_step_size = step_size
            else:
                new_step_size = 2 * step_size / (max_steps * step_size + 1)
        else:
            new_step_size = 0.5 * step_size
        trainer.model.feature_ex_block.options['step_size'] = new_step_size
    else:
        step_size = trainer.model.feature_ex_block.options['step_size']
        new_step_size = 1.1 * step_size
        if new_step_size > 1.0:
            new_step_size = 1.0
        trainer.model.feature_ex_block.options['step_size'] = new_step_size
        logits = trainer.forward_one_step(x)
        acc = calculate_accuracy(y=y,
                                 logits=logits,
                                 batch_size=opts.batch_size,
                                 num_classes=opts.num_classes)
        dif = np.abs(test_solver_acc - acc)
        if dif > threshold:
            new_step_size = step_size
        trainer.model.feature_ex_block.options['step_size'] = new_step_size
コード例 #8
0
ファイル: main.py プロジェクト: juvekaradheesh/AML-Project
def main():

    f = open(RESULTS_FILE, "a")
    f.write("Results from " + str(datetime.datetime.now()) + "\n")

    # Load config
    config = Config(config_default)

    # Load data
    load_data = CifarDataLoader(config)
    train_data = load_data.get_train_data()
    validation_data = load_data.get_test_data()

    optimizers = ['adam', 'adagrad', 'sgd']
    # optimizers = ['adam']

    # Loop over multiple optimizers
    # Without dropout
    for optimizer in optimizers:

        # Set optimizer
        config_default['optimizer'] = optimizer

        # Load config
        config = Config(config_default)

        # Create model
        temp = ConvNet(config)
        model = temp.get_model()  # without dropout

        # Train model
        trainer = ModelTrainer(model, train_data, validation_data,
                               config)  # without dropout
        trainer.train()

        # Save trained model
        model_name = 'cnn_' + optimizer + '.h5'  # without dropout
        save_model = os.path.join(SAVE_DIR, model_name)
        trainer.save(save_model)

        # Print the results
        print("optimizer: ", optimizer)
        print("Without dropout")
        print("loss: ", trainer.loss)
        print("validation loss: ", trainer.val_loss)

        f.write("optimizer: " + optimizer + "\n\n")
        f.write("Without dropout \n")
        f.write("loss: " + str(trainer.loss) + "\n")
        f.write("validation loss: " + str(trainer.val_loss) + "\n")
        f.write("\n")

    # Loop over multiple optimizers
    # With dropout
    for optimizer in optimizers:

        # Set optimizer
        config_default['optimizer'] = optimizer

        # Load config
        config = Config(config_default)

        # Create model
        temp = ConvNetDropout(config)
        model_do = temp.get_model()  # with dropout

        # Train model
        trainer_do = ModelTrainer(model_do, train_data, validation_data,
                                  config)  # with dropout
        trainer_do.train()

        # Save trained model
        model_do_name = 'cnn_dropout' + optimizer + '.h5'  # with dropout
        save_model_do = os.path.join(SAVE_DIR, model_do_name)
        trainer_do.save(save_model_do)

        # Print the results
        print("optimizer: ", optimizer)
        print("With dropout")
        print("loss: ", trainer_do.loss)
        print("validation loss: ", trainer_do.val_loss)

        f.write("optimizer: " + optimizer + "\n\n")
        f.write("With dropout \n")
        f.write("loss: " + str(trainer_do.loss) + "\n")
        f.write("validation loss: " + str(trainer_do.val_loss) + "\n")
        f.write("\n")

    f.write("\n\n\n")
    f.close()
コード例 #9
0
class TrainModel:
    def __init__(self):
        parser = argparse.ArgumentParser(
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
        parser = initialize(parser)
        opts, unknown = parser.parse_known_args()
        self.opts = ExperimentOptions(opts)
        self.data_generator = self._get_data_generator()
        self.test_dataloader = self._get_test_dataloader()

        self.train_acc = None
        self.test_acc = None
        self.nfe_f = None
        self.nfe_b = None
        self.loss = None

        self.acc_log = {
            "train": torch.empty(self.opts.niter),
            "test": torch.empty(self.opts.niter),
        }
        self.loss_log = torch.empty(self.opts.niter)
        self.nfe_log = {
            "nfe_f": torch.empty(self.opts.niter),
            "nfe_b": torch.empty(self.opts.niter),
        }

        self.trainer = ModelTrainer(self.opts)
        if self.opts.use_gpu:
            self.trainer.model.cuda()

        # By default the device is cpu
        self.device = torch.device("cpu")
        if self.opts.use_gpu:
            self.device = torch.device("cuda:" + str(self.opts.gpu_ids[0]))

        # Initialize the summary writer

        if self.opts.use_tensorboard:
            self.writer = SummaryWriter(log_dir=self.opts.tensorboard_dir)

    def run(self):
        torch.cuda.empty_cache()
        # Set random seed
        torch.manual_seed(self.opts.random_seed)

        loss_function = torch.nn.CrossEntropyLoss().to(self.device)
        # Time input for the ODE
        t = torch.as_tensor([0.0, 1.0]).to(self.device)

        print("Starting training....")
        if self.opts.use_adaption_algo:
            self._initialize_adaption_algo()
        for current_iter in range(self.opts.niter):
            self._iterate_one_training_step(current_iter, loss_function, t)
        if self.opts.evaluate_with_dif_solver:
            results = evaluate_with_dif_solver(
                trainer=self.trainer,
                test_dataloader=self.test_dataloader,
                opts=self.opts,
                device=self.device,
            )

            torch.save(
                results,
                os.path.join(
                    self.opts.experiment_dir,
                    f"eval_with_dif_solver_iter_{self.opts.niter - 1}.pt",
                ),
            )
        plot_results(self.opts)

    def _get_data_generator(self) -> Generator:
        # load the dataset
        dataloader = data.create_dataloader.create_dataloader(self.opts)
        print("\n{} dataloader of size {} was created\n".format(
            self.opts.dataset.upper(), len(dataloader)))
        # Wrap pytorch's dataloader in a generator function
        return inf_generator(dataloader)

    def _get_test_dataloader(self) -> DataLoader:
        test_opts = copy.deepcopy(self.opts)
        test_opts.split = "test"
        return data.create_dataloader.create_dataloader(test_opts)

    def _initialize_adaption_algo(self):
        x, _ = self.data_generator.__next__()
        x = x.to(self.device)
        if self.opts.fixed_step_solver:
            step_size = find_initial_step_size(
                mymodel=self.trainer.model,
                batch_data=x,
                order=return_order(self.opts.solver),
            )
            self.trainer.model.feature_ex_block.options[
                "step_size"] = step_size
        else:
            tol = self.opts.initial_tol
            self.trainer.model.feature_ex_block.tol = tol

    def _iterate_one_training_step(self, current_iter: int,
                                   loss_function: _Loss, t: torch.Tensor):
        self.trainer.model.train()
        self.trainer.optimizer.zero_grad()

        self.trainer.model.feature_ex_block.nfe = 0
        x, y = self.data_generator.__next__()
        x = x.to(self.device)
        y = y.to(self.device)

        logits = self.trainer.forward_one_step(x, t)
        self.loss = loss_function(logits, y)
        self.nfe_f = self.trainer.model.feature_ex_block.nfe
        self.trainer.model.feature_ex_block.nfe = 0
        self.loss.backward()
        self.nfe_b = self.trainer.model.feature_ex_block.nfe
        self.trainer.model.feature_ex_block.nfe = 0

        self.train_acc = calculate_accuracy(logits, y, self.opts.num_classes,
                                            self.opts.batch_size)

        if self.opts.evaluate_test_acc:
            with torch.no_grad():
                self.trainer.model.eval()
                self.test_acc = evaluate_model(self.trainer.model,
                                               self.test_dataloader, self.opts,
                                               self.device)

        self._save_current_state(current_iter)

        if self.opts.use_adaption_algo:
            self._apply_step_adaption_algo(current_iter, self.train_acc, x, y)

        if self.opts.use_tensorboard:
            self._create_tensorboard_logs(current_iter)
        self._print_training_info(current_iter)
        self.trainer.optimizer.step()
        torch.cuda.empty_cache()

    def _save_current_state(self, current_iter: int):
        self.nfe_log["nfe_f"][current_iter] = self.nfe_f
        self.nfe_log["nfe_b"][current_iter] = self.nfe_b
        self.loss_log[current_iter] = self.loss.cpu().detach()
        self.acc_log["train"][current_iter] = self.train_acc
        if self.opts.evaluate_test_acc:
            self.acc_log["test"][current_iter] = self.test_acc

        torch.save(self.loss_log,
                   os.path.join(self.opts.experiment_dir, "loss_log.pt"))
        torch.save(self.acc_log,
                   os.path.join(self.opts.experiment_dir, "acc_log.pt"))
        torch.save(self.nfe_log,
                   os.path.join(self.opts.experiment_dir, "nfe_log.pt"))

        # Save the current model
        if (current_iter + 1) % self.opts.model_checkpoint_freq == 0 or (
                current_iter + 1) == self.opts.niter:
            self.trainer.checkpoint_model_state(current_iter,
                                                self.opts.checkpoints_dir)

    def _create_tensorboard_logs(self, current_iter: int):
        self.writer.add_scalar("ACC/train", self.train_acc, current_iter + 1)
        self.writer.add_scalar("NFE/forward", self.nfe_f, current_iter + 1)
        self.writer.add_scalar("NFE/backward", self.nfe_b, current_iter + 1)

    def _print_training_info(self, current_iter: int):
        print_str = "Iter {} \b\b\t NFE-F {:.2f} \t NFE-B {:.2f}" "\t Train Acc {:.3f}%"
        print_vars = (current_iter + 1, self.nfe_f, self.nfe_b, self.train_acc)
        if self.test_acc is not None:
            print_str = print_str + "\t Test Acc {:.3f}%"
            print_vars = print_vars + (self.test_acc, )

        print(
            print_str.format(*print_vars),
            file=open(os.path.join(self.opts.experiment_dir, "output.txt"),
                      "a"),
        )

    def _apply_step_adaption_algo(
        self,
        current_iter: int,
        train_acc: float,
        x: torch.Tensor,
        y: torch.Tensor,
    ):

        if (current_iter + 1) % self.opts.adaption_interval == 0:
            if self.opts.fixed_step_solver:
                adapt_step_size(
                    trainer=self.trainer,
                    train_solver_acc=train_acc,
                    x=x,
                    y=y,
                    opts=self.opts,
                )
            else:
                adapt_tol(
                    trainer=self.trainer,
                    train_solver_acc=train_acc,
                    x=x,
                    y=y,
                    opts=self.opts,
                    train_solver_nfe_dict=self.nfe_log,
                    current_iter=current_iter,
                )