Esempio n. 1
0
def load_experiment(filename):
    with open(filename, 'r') as f:
        output = json.load(f)

    fit_res = FitResult(**output['results'])

    return fit_res
Esempio n. 2
0
def load_experiment(filename):
    with open(filename, "r") as f:
        output = json.load(f)

    config = output["config"]
    fit_res = FitResult(**output["results"])

    return config, fit_res
Esempio n. 3
0
    def fit(self,
            dl_train: DataLoader,
            dl_test: DataLoader,
            num_epochs,
            checkpoints: str = None,
            early_stopping: int = None,
            print_every=1,
            **kw) -> FitResult:
        """
        Trains the model for multiple epochs with a given training set,
        and calculates validation loss over a given validation set.
        :param dl_train: Dataloader for the training set.
        :param dl_test: Dataloader for the test set.
        :param num_epochs: Number of epochs to train for.
        :param checkpoints: Whether to save model to file every time the
            test set accuracy improves. Should be a string containing a
            filename without extension.
        :param early_stopping: Whether to stop training early if there is no
            test loss improvement for this number of epochs.
        :param print_every: Print progress every this number of epochs.
        :return: A FitResult object containing train and test losses per epoch.
        """
        actual_num_epochs = 0
        train_loss, train_acc, test_loss, test_acc = [], [], [], []

        best_acc = None
        epochs_without_improvement = 0

        for epoch in range(num_epochs):
            verbose = False  # pass this to train/test_epoch.
            if epoch % print_every == 0 or epoch == num_epochs - 1:
                verbose = True
            self._print(f'--- EPOCH {epoch+1}/{num_epochs} ---', verbose)

            # TODO: Train & evaluate for one epoch
            #  - Use the train/test_epoch methods.
            #  - Save losses and accuracies in the lists above.
            #  - Implement early stopping. This is a very useful and
            #    simple regularization technique that is highly recommended.
            #  - Optional: Implement checkpoints. You can use torch.save() to
            #    save the model to the file specified by the checkpoints
            #    argument.
            # ====== YOUR CODE: ======
            raise NotImplementedError()
            # ========================

        return FitResult(actual_num_epochs, train_loss, train_acc, test_loss,
                         test_acc)
Esempio n. 4
0
def run_experiment(run_name, out_dir='./results', seed=None, device=None,
                   # Training params
                   bs_train=128, bs_test=None, batches=100, epochs=100,
                   early_stopping=3, checkpoints=None, lr=1e-3, reg=1e-3,
                   # Model params
                   filters_per_layer=[64], layers_per_block=2, pool_every=2,
                   hidden_dims=[1024], model_type='cnn',
                   **kw):
    """
    Executes a single run of a Part3 experiment with a single configuration.

    These parameters are populated by the CLI parser below.
    See the help string of each parameter for it's meaning.
    """
    if not seed:
        seed = random.randint(0, 2 ** 31)
    torch.manual_seed(seed)
    if not bs_test:
        bs_test = max([bs_train // 4, 1])
    cfg = locals()

    tf = torchvision.transforms.ToTensor()
    ds_train = CIFAR10(root=DATA_DIR, download=True, train=True, transform=tf)
    ds_test = CIFAR10(root=DATA_DIR, download=True, train=False, transform=tf)

    if not device:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Select model class
    if model_type not in MODEL_TYPES:
        raise ValueError(f"Unknown model type: {model_type}")
    model_cls = MODEL_TYPES[model_type]

    # TODO: Train
    #  - Create model, loss, optimizer and trainer based on the parameters.
    #    Use the model you've implemented previously, cross entropy loss and
    #    any optimizer that you wish.
    #  - Run training and save the FitResults in the fit_res variable.
    #  - The fit results and all the experiment parameters will then be saved
    #   for you automatically.
    fit_res = None
    # ====== YOUR CODE: ======
    #raise NotImplementedError()
    x0, _ = ds_train[0]
    in_size = x0.shape
    num_classes = 10
    filters = [layer for layer in filters_per_layer for _ in range(layers_per_block)]
    model = model_cls(in_size=in_size, out_classes=num_classes, channels=filters,
                      pool_every=pool_every, hidden_dims=hidden_dims).to(device)

    loss_fn = torch.nn.CrossEntropyLoss().to(device)
#     optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=reg,)
#     optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, momentum=0.9, weight_decay=reg)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=reg)

    trainer = training.TorchTrainer(model, loss_fn, optimizer, device)

    dl_train = torch.utils.data.DataLoader(ds_train, bs_train, shuffle=False)
    dl_test = torch.utils.data.DataLoader(ds_test, bs_test, shuffle=False)
    fit_tmp = trainer.fit(dl_train, dl_test, num_epochs=epochs, checkpoints='./checkpoint',
                          early_stopping=early_stopping, max_batches=batches)
    train_loss_list = [tr_loss.item() for tr_loss in fit_tmp.train_loss]
    train_acc_list = [tr_acc.item() for tr_acc in fit_tmp.train_acc]
    test_loss_list = [tst_loss.item() for tst_loss in fit_tmp.test_loss]
    test_acc_list = [tst_acc.item() for tst_acc in fit_tmp.test_acc]
    fit_res = FitResult(num_epochs=fit_tmp.num_epochs, train_loss=train_loss_list,
             train_acc=train_acc_list, test_loss=test_loss_list,
             test_acc=test_acc_list)
    # ========================

    save_experiment(run_name, out_dir, cfg, fit_res)
    def fit(self,
            dl_train: DataLoader,
            dl_test: DataLoader,
            num_epochs,
            checkpoints: str = None,
            early_stopping: int = None,
            print_every=1,
            post_epoch_fn=None,
            **kw) -> FitResult:
        """
        Trains the model for multiple epochs with a given training set,
        and calculates validation loss over a given validation set.
        :param dl_train: Dataloader for the training set.
        :param dl_test: Dataloader for the test set.
        :param num_epochs: Number of epochs to train for.
        :param checkpoints: Whether to save model to file every time the
            test set accuracy improves. Should be a string containing a
            filename without extension.
        :param early_stopping: Whether to stop training early if there is no
            test loss improvement for this number of epochs.
        :param print_every: Print progress every this number of epochs.
        :param post_epoch_fn: A function to call after each epoch completes.
        :return: A FitResult object containing train and test losses per epoch.
        """
        actual_num_epochs = 0
        train_loss, train_acc, test_loss, test_acc = [], [], [], []

        best_acc = None
        epochs_without_improvement = 0

        checkpoint_filename = None
        if checkpoints is not None:
            checkpoint_filename = f'{checkpoints}.pt'
            Path(os.path.dirname(checkpoint_filename)).mkdir(exist_ok=True)
            if os.path.isfile(checkpoint_filename):
                print(f'*** Loading checkpoint file {checkpoint_filename}')
                saved_state = torch.load(checkpoint_filename,
                                         map_location=self.device)
                best_acc = saved_state.get('best_acc', best_acc)
                epochs_without_improvement =\
                    saved_state.get('ewi', epochs_without_improvement)
                self.model.load_state_dict(saved_state['model_state'])

        for epoch in range(num_epochs):
            save_checkpoint = False
            verbose = False  # pass this to train/test_epoch.
            if epoch % print_every == 0 or epoch == num_epochs - 1:
                verbose = True
            self._print(f'--- EPOCH {epoch+1}/{num_epochs} ---', verbose)

            # TODO:
            #  Train & evaluate for one epoch
            #  - Use the train/test_epoch methods.
            #  - Save losses and accuracies in the lists above.
            #  - Implement early stopping. This is a very useful and
            #    simple regularization technique that is highly recommended.
            # ====== YOUR CODE: ======
            # Train
            train_result = self.train_epoch(dl_train, **kw)

            curr_accuracy = train_result[1]
            train_acc.append(curr_accuracy)

            # epoch_res_0_tensor = torch.tensor(epoch_result[0])
            # print(epoch_res_0_tensor)

            curr_loss = torch.tensor(train_result[0]).sum().item() / len(
                train_result[0])
            # curr_loss = torch.stack(epoch_result[0]).sum().item() / len(epoch_result[0])
            train_loss.append(curr_loss)

            # Test
            test_result = self.test_epoch(dl_test, **kw)

            curr_test_accuracy = test_result[1]
            test_acc.append(curr_test_accuracy)

            curr_test_loss = torch.tensor(test_result[0]).sum().item() / len(
                test_result[0])
            # curr_test_loss = torch.stack(test_result[0]).sum().item() / len(test_result[0])
            test_loss.append(curr_test_loss)

            # Early stopping
            if epoch > 1 and test_loss[len(test_loss) - 1] < curr_test_loss:
                epochs_without_improvement += 1
            else:
                epochs_without_improvement = 0

            if best_acc == None or best_acc < curr_test_accuracy:
                best_acc = curr_test_accuracy

            if early_stopping != None and early_stopping <= epochs_without_improvement:
                break

            # Checkpoints
            save_checkpoint = True

            # ========================

            # Save model checkpoint if requested
            if save_checkpoint and checkpoint_filename is not None:
                saved_state = dict(best_acc=best_acc,
                                   ewi=epochs_without_improvement,
                                   model_state=self.model.state_dict())
                torch.save(saved_state, checkpoint_filename)
                print(f'*** Saved checkpoint {checkpoint_filename} '
                      f'at epoch {epoch+1}')

            if post_epoch_fn:
                post_epoch_fn(epoch, train_result, test_result, verbose)

        return FitResult(actual_num_epochs, train_loss, train_acc, test_loss,
                         test_acc)
Esempio n. 6
0
    def fit(self, dl_train: DataLoader, dl_test: DataLoader,
            num_epochs, checkpoints: str = None,
            early_stopping: int = None,
            print_every=1, **kw) -> FitResult:
        """
        Trains the model for multiple epochs with a given training set,
        and calculates validation loss over a given validation set.
        :param dl_train: Dataloader for the training set.
        :param dl_test: Dataloader for the test set.
        :param num_epochs: Number of epochs to train for.
        :param checkpoints: Whether to save model to file every time the
            test set accuracy improves. Should be a string containing a
            filename without extension.
        :param early_stopping: Whether to stop training early if there is no
            test loss improvement for this number of epochs.
        :param print_every: Print progress every this number of epochs.
        :return: A FitResult object containing train and test losses per epoch.
        """
        actual_num_epochs = 0
        train_loss, train_acc, test_loss, test_acc = [], [], [], []

        best_acc = None
        epochs_without_improvement = 0

        min_loss = None #TODO: CHECK THIS. SHOULD IT BE BEST_ACC?

        for epoch in range(num_epochs):
            verbose = False  # pass this to train/test_epoch.
            if epoch % print_every == 0 or epoch == num_epochs-1:
                verbose = True
            self._print(f'--- EPOCH {epoch+1}/{num_epochs} ---', verbose)

            # TODO: Train & evaluate for one epoch
            #  - Use the train/test_epoch methods.
            #  - Save losses and accuracies in the lists above.
            #  - Implement early stopping. This is a very useful and
            #    simple regularization technique that is highly recommended.
            #  - Optional: Implement checkpoints. You can use torch.save() to
            #    save the model to the file specified by the checkpoints
            #    argument.
            # ====== YOUR CODE: ======
            train_res = self.train_epoch(dl_train, verbose=verbose, **kw)
            train_loss.append(sum(train_res.losses)/len(train_res.losses))
            train_acc.append(train_res.accuracy)
            test_res = self.test_epoch(dl_test, verbose=verbose, **kw)
            test_loss.append(sum(test_res.losses)/len(test_res.losses))
            test_acc.append(test_res.accuracy)
            if best_acc is None or test_res.accuracy > best_acc:
                best_acc = test_res.accuracy
                if checkpoints is not None:
                    torch.save(self.model, checkpoints)
            if early_stopping is not None:
                new_loss = sum(test_res.losses)/len(test_res.losses)
                if min_loss is None or new_loss < min_loss:
                    min_loss = new_loss
                    epochs_without_improvement = 0
                else:
                    epochs_without_improvement += 1
                if epochs_without_improvement >= early_stopping:
                    break
            # ========================

        return FitResult(actual_num_epochs,
                         train_loss, train_acc, test_loss, test_acc)
Esempio n. 7
0
    def fit(self,
            dl_train: DataLoader,
            dl_test: DataLoader,
            num_epochs,
            checkpoints: str = None,
            early_stopping: int = None,
            print_every=1,
            **kw) -> FitResult:
        """
        Trains the model for multiple epochs with a given training set,
        and calculates validation loss over a given validation set.
        :param dl_train: Dataloader for the training set.
        :param dl_test: Dataloader for the test set.
        :param num_epochs: Number of epochs to train for.
        :param checkpoints: Whether to save model to file every time the
            test set accuracy improves. Should be a string containing a
            filename without extension.
        :param early_stopping: Whether to stop training early if there is no
            test loss improvement for this number of epochs.
        :param print_every: Print progress every this number of epochs.
        :return: A FitResult object containing train and test losses per epoch.
        """
        dl = dl_train

        actual_num_epochs = 0
        train_loss, train_acc, test_loss, test_acc = [], [], [], []

        best_acc = None
        epochs_without_improvement = 0
        current_learning_rate = self.scheduler.get_lr()

        for epoch in range(num_epochs):
            verbose = False  # pass this to train/test_epoch.
            if epoch % print_every == 0 or epoch == num_epochs - 1:
                verbose = True
            self._print(f'--- EPOCH {epoch + 1}/{num_epochs} ---', verbose)
            if current_learning_rate != self.scheduler.get_lr():
                current_learning_rate = self.scheduler.get_lr()
                self._print(
                    f' learning rate has been changed to {current_learning_rate}',
                    verbose)

            actual_num_epochs += 1
            train = self.train_epoch(dl_train, epoch, verbose=verbose, **kw)
            train_loss += [torch.mean(torch.stack(train.losses)).item()]

            if self.autoencoder_training:
                train_acc += [0]
            else:
                train_acc += [train.accuracy.item()]

            if self.scheduler:
                self.scheduler.step()

            if self.autoencoder_training is None:
                test = self.test_epoch(dl_test, epoch, verbose=verbose, **kw)
                test_loss += [torch.mean(torch.stack(test.losses)).item()]
                if self.autoencoder_training:
                    test_acc += [0]
                else:
                    test_acc += [test.accuracy.item()]

                if epoch >= 1 and test_loss[-1] < test_loss[-2]:
                    epochs_without_improvement = 0
                else:
                    epochs_without_improvement += 1
                if early_stopping is not None and early_stopping == epochs_without_improvement:
                    break

        return FitResult(actual_num_epochs, train_loss, train_acc, test_loss,
                         test_acc)
Esempio n. 8
0
    def fit(
        self,
        dl_train: DataLoader,
        dl_test: DataLoader,
        num_epochs,
        checkpoints: str = None,
        early_stopping: int = None,
        print_every=1,
        post_epoch_fn=None,
        **kw,
    ) -> FitResult:
        """
        Trains the model for multiple epochs with a given training set,
        and calculates validation loss over a given validation set.
        :param dl_train: Dataloader for the training set.
        :param dl_test: Dataloader for the test set.
        :param num_epochs: Number of epochs to train for.
        :param checkpoints: Whether to save model to file every time the
            test set accuracy improves. Should be a string containing a
            filename without extension.
        :param early_stopping: Whether to stop training early if there is no
            test loss improvement for this number of epochs.
        :param print_every: Print progress every this number of epochs.
        :param post_epoch_fn: A function to call after each epoch completes.
        :return: A FitResult object containing train and test losses per epoch.
        """
        actual_num_epochs = 0
        train_loss, train_acc, test_loss, test_acc = [], [], [], []

        best_acc = None
        epochs_without_improvement = 0

        checkpoint_filename = None
        if checkpoints is not None:
            checkpoint_filename = f"{checkpoints}.pt"
            Path(os.path.dirname(checkpoint_filename)).mkdir(exist_ok=True)
            if os.path.isfile(checkpoint_filename):
                print(f"*** Loading checkpoint file {checkpoint_filename}")
                saved_state = torch.load(checkpoint_filename,
                                         map_location=self.device)
                best_acc = saved_state.get("best_acc", best_acc)
                epochs_without_improvement = saved_state.get(
                    "ewi", epochs_without_improvement)
                self.model.load_state_dict(saved_state["model_state"])

        for epoch in range(num_epochs):
            save_checkpoint = False
            verbose = False  # pass this to train/test_epoch.
            if epoch % print_every == 0 or epoch == num_epochs - 1:
                verbose = True
            self._print(f"--- EPOCH {epoch+1}/{num_epochs} ---", verbose)

            # TODO:
            #  Train & evaluate for one epoch
            #  - Use the train/test_epoch methods.
            #  - Save losses and accuracies in the lists above.
            #  - Implement early stopping. This is a very useful and
            #    simple regularization technique that is highly recommended.
            # ====== YOUR CODE: ======
            self.optimizer.zero_grad()
            train_result = self.train_epoch(dl_train, verbose=verbose, **kw)
            test_result = self.test_epoch(dl_test, verbose=verbose, **kw)
            train_loss.extend(train_result.losses)
            train_acc.append(train_result.accuracy)
            test_loss.extend(test_result.losses)
            test_acc.append(test_result.accuracy)
            acc_len = len(test_acc)
            if not (checkpoints is
                    None) and acc_len > 1 and test_acc[-1] > test_acc[-2]:
                save_checkpoint = True
            no_improve = True
            if early_stopping and acc_len > early_stopping:
                curr_acc = test_acc[-1]
                for index in range(1, early_stopping + 1):
                    if curr_acc > test_acc[acc_len - index]:
                        no_improve = False
            else:
                no_improve = False
            if early_stopping and no_improve:
                return FitResult(actual_num_epochs, train_loss, train_acc,
                                 test_loss, test_acc)
            # ========================

            # Save model checkpoint if requested
            if save_checkpoint and checkpoint_filename is not None:
                saved_state = dict(
                    best_acc=best_acc,
                    ewi=epochs_without_improvement,
                    model_state=self.model.state_dict(),
                )
                torch.save(saved_state, checkpoint_filename)
                print(f"*** Saved checkpoint {checkpoint_filename} "
                      f"at epoch {epoch+1}")

            if post_epoch_fn:
                post_epoch_fn(epoch, train_result, test_result, verbose)

        return FitResult(actual_num_epochs, train_loss, train_acc, test_loss,
                         test_acc)
Esempio n. 9
0
    def fit(self,
            dl_train: DataLoader,
            dl_test: DataLoader,
            num_epochs,
            checkpoints: str = None,
            early_stopping: int = None,
            print_every=1,
            **kw) -> FitResult:
        """
        Trains the model for multiple epochs with a given training set,
        and calculates validation loss over a given validation set.
        :param dl_train: Dataloader for the training set.
        :param dl_test: Dataloader for the test set.
        :param num_epochs: Number of epochs to train for.
        :param checkpoints: Whether to save model to file every time the
            test set accuracy improves. Should be a string containing a
            filename without extension.
        :param early_stopping: Whether to stop training early if there is no
            test loss improvement for this number of epochs.
        :param print_every: Print progress every this number of epochs.
        :return: A FitResult object containing train and test losses per epoch.
        """
        actual_num_epochs = 0
        train_loss, train_acc, test_loss, test_acc = [], [], [], []

        best_acc = None
        epochs_without_improvement = 0
        for epoch in range(num_epochs):
            verbose = False  # pass this to train/test_epoch.
            if epoch % print_every == 0 or epoch == num_epochs - 1:
                verbose = True
            self._print(f'--- EPOCH {epoch+1}/{num_epochs} ---', verbose)

            # TODO: Train & evaluate for one epoch
            #  - Use the train/test_epoch methods.
            #  - Save losses and accuracies in the lists above.
            #  - Implement early stopping. This is a very useful and
            #    simple regularization technique that is highly recommended.
            #  - Optional: Implement checkpoints. You can use torch.save() to
            #    save the model to the file specified by the checkpoints
            #    argument.
            # ====== YOUR CODE: ======
            # raise NotImplementedError()
            # ========================
            actual_num_epochs += 1
            if not best_acc:
                best_acc = 0.0
            """""
            train_result = self.train_epoch(dl_train, verbose=verbose)
            train_loss.extend(train_result.losses)
            train_acc.append(train_result.accuracy)

            test_result = self.test_epoch(dl_test, verbose=verbose)
            test_loss.extend(test_result.losses)
            test_acc.append(test_result.accuracy)
            """ ""
            curr_epoch_res_test = self.test_epoch(dl_test,
                                                  verbose=verbose,
                                                  **kw)
            curr_test_accuracy = curr_epoch_res_test.accuracy
            test_loss.extend(curr_epoch_res_test.losses)
            #test_loss.append(sum(curr_epoch_res_test.losses)/len(curr_epoch_res_test.losses))
            test_acc.append(curr_test_accuracy)

            curr_epoch_res_train = self.train_epoch(dl_train,
                                                    verbose=verbose,
                                                    **kw)
            curr_train_accuracy = curr_epoch_res_train.accuracy
            train_loss.extend(curr_epoch_res_train.losses)
            #train_loss.append(sum(curr_epoch_res_train.losses)/len(curr_epoch_res_train.losses))
            train_acc.append(curr_train_accuracy)

            #             curr_loss = test_loss[-1]
            #             best_loss = min(test_loss[:-1]) if len(test_loss) >= 2 else 1e3
            #             if early_stopping and (curr_loss > best_loss - 1e-4):
            #                 epochs_without_improvement += 1
            #                 if epochs_without_improvement >= early_stopping:
            #                     break
            #             else:
            #                 epochs_without_improvement = 0
            if early_stopping and (best_acc > curr_test_accuracy):
                epochs_without_improvement += 1
                if epochs_without_improvement >= early_stopping:
                    break
            else:
                if best_acc < curr_test_accuracy:
                    best_acc = curr_test_accuracy
                epochs_without_improvement = 0
            """
                        if checkpoints is not None and test_acc[-1] > best_acc:
                torch.save(self.model, checkpoints)
                best_acc = test_acc[-1]
            
            """

        return FitResult(actual_num_epochs, train_loss, train_acc, test_loss,
                         test_acc)
Esempio n. 10
0
    def fit(self,
            dl_train: DataLoader,
            dl_test: DataLoader = None,
            num_epochs=100,
            checkpoints: str = None,
            early_stopping: int = 25,
            start_epoch: int = 0,
            print_every=1,
            post_epoch_fn=None,
            never_print=False,
            **kw) -> FitResult:
        """
        Trains the model for multiple epochs with a given training set,
        and calculates validation loss over a given validation set.
        :param dl_train: Dataloader for the training set.
        :param dl_test: Dataloader for the test set.
        :param num_epochs: Number of epochs to train for.
        :param checkpoints: Whether to save model to file every time the
            test set accuracy improves. Should be a string containing a
            filename without extension.
        :param early_stopping: Whether to stop training early if there is no
            test loss improvement for this number of epochs.
        :param print_every: Print progress every this number of epochs.
        :param post_epoch_fn: A function to call after each epoch completes.
        :return: A FitResult object containing train and test losses per epoch.
        """
        actual_num_epochs = 0
        train_loss, train_acc, test_loss, test_acc = [], [], [], []

        best_acc = None
        epochs_without_improvement = 0

        checkpoint_filename = None
        if checkpoints is not None:
            checkpoint_filename = f'{checkpoints}.pt'
            Path(os.path.dirname(checkpoint_filename)).mkdir(exist_ok=True)
            if os.path.isfile(checkpoint_filename):
                print(f'*** Loading checkpoint file {checkpoint_filename}')
                saved_state = torch.load(checkpoint_filename,
                                         map_location=self.device)
                best_acc = saved_state.get('best_acc', best_acc)
                epochs_without_improvement =\
                    saved_state.get('ewi', epochs_without_improvement)
                self.model.load_state_dict(saved_state['model_state'])

        for epoch in range(start_epoch, num_epochs + start_epoch):
            save_checkpoint = False
            verbose = False  # pass this to train/test_epoch.
            if epoch % print_every == 0 or epoch == num_epochs - 1:
                if not never_print:
                    verbose = True
            self._print(f'--- EPOCH {epoch+1}/{num_epochs} ---', verbose)

            # TODO:
            #  Train & evaluate for one epoch
            #  - Use the train/test_epoch methods.
            #  - Save losses and accuracies in the lists above.
            #  - Implement early stopping. This is a very useful and
            #    simple regularization technique that is highly recommended.
            # ====== YOUR CODE: ======

            kw['verbose'] = verbose
            res = self.train_epoch(dl_train, **kw)
            train_loss.append(torch.mean((torch.tensor(res.losses))).item())
            train_acc.append(res.accuracy)
            train_result = EpochResult(train_loss[-1], train_acc[-1])

            if dl_test is not None:
                res = self.test_epoch(dl_test, **kw)
                test_loss.append(torch.mean((torch.tensor(res.losses))).item())
                test_acc.append(res.accuracy)
                test_result = EpochResult(test_loss[-1], test_acc[-1])

            if epoch > 0:
                if dl_test is not None:
                    if res.accuracy >= torch.max(torch.tensor(test_acc)):
                        if checkpoints:
                            torch.save(self.model, checkpoint_filename)
                            print('\nsaved\n')
                        epochs_without_improvement = 0
                    else:
                        epochs_without_improvement += 1

            if self.scheduler is not None:
                self.scheduler.step()
            if early_stopping:
                if epochs_without_improvement >= early_stopping:
                    break
            # ========================

            # Save model checkpoint if requested
            if save_checkpoint and checkpoint_filename is not None:
                saved_state = dict(best_acc=best_acc,
                                   ewi=epochs_without_improvement,
                                   model_state=self.model.state_dict())
                torch.save(saved_state, checkpoint_filename)
                print(f'*** Saved checkpoint {checkpoint_filename} '
                      f'at epoch {epoch+1}')

            if post_epoch_fn:
                post_epoch_fn(epoch, train_result, test_result, verbose)

        return FitResult(actual_num_epochs, train_loss, train_acc, test_loss,
                         test_acc)
Esempio n. 11
0
    def fit(
        self,
        dl_train: DataLoader,
        dl_test: DataLoader,
        num_epochs,
        checkpoints: str = None,
        early_stopping: int = None,
        print_every=1,
        **kw,
    ) -> FitResult:
        """
        Trains the model for multiple epochs with a given training set,
        and calculates validation loss over a given validation set.
        :param dl_train: Dataloader for the training set.
        :param dl_test: Dataloader for the test set.
        :param num_epochs: Number of epochs to train for.
        :param checkpoints: Whether to save model to file every time the
            test set accuracy improves. Should be a string containing a
            filename without extension.
        :param early_stopping: Whether to stop training early if there is no
            test loss improvement for this number of epochs.
        :param print_every: Print progress every this number of epochs.
        :return: A FitResult object containing train and test losses per epoch.
        """
        actual_num_epochs = 0
        train_loss, train_acc, test_loss, test_acc = [], [], [], []

        best_acc = None
        epochs_without_improvement = 0
        best_test_loss = None

        for epoch in range(num_epochs):
            verbose = False  # pass this to train/test_epoch.
            if epoch % print_every == 0 or epoch == num_epochs - 1:
                verbose = True
            self._print(f"--- EPOCH {epoch+1}/{num_epochs} ---", verbose)

            # TODO: Train & evaluate for one epoch
            #  - Use the train/test_epoch methods.
            #  - Save losses and accuracies in the lists above.
            #  - Implement early stopping. This is a very useful and
            #    simple regularization technique that is highly recommended.
            #  - Optional: Implement checkpoints. You can use torch.save() to
            #    save the model to the file specified by the checkpoints
            #    argument.
            # ====== YOUR CODE: ======
            # print(dl_test)
            actual_num_epochs += 1
            train_losses, train_accuracy = self.train_epoch(dl_train, **kw)
            test_losses, test_accuracy = self.test_epoch(dl_test, **kw)
            train_loss.extend(train_losses)
            train_acc.append(train_accuracy)
            test_loss.extend(test_losses)
            test_acc.append(test_accuracy)

            # current_test_loss = torch.mean(torch.stack(test_losses))
            current_test_loss = torch.mean(torch.stack(test_losses)) if isinstance(test_losses[0], torch.Tensor) \
                else sum(test_losses)/len(test_losses)
            if (best_test_loss is None) or (current_test_loss <
                                            best_test_loss):
                best_test_loss = current_test_loss
                epochs_without_improvement = 0
            else:
                epochs_without_improvement += 1
            if (early_stopping is not None) and (epochs_without_improvement
                                                 == early_stopping):
                break
            # ========================

        return FitResult(actual_num_epochs, train_loss, train_acc, test_loss,
                         test_acc)
Esempio n. 12
0
    def fit(self,
            dl_train: DataLoader,
            dl_test: DataLoader,
            num_epochs,
            checkpoints: str = None,
            early_stopping: int = None,
            print_every=1,
            **kw) -> FitResult:
        """
        Trains the model for multiple epochs with a given training set,
        and calculates validation loss over a given validation set.
        :param dl_train: Dataloader for the training set.
        :param dl_test: Dataloader for the test set.
        :param num_epochs: Number of epochs to train for.
        :param checkpoints: Whether to save model to file every time the
            test set accuracy improves. Should be a string containing a
            filename without extension.
        :param early_stopping: Whether to stop training early if there is no
            test loss improvement for this number of epochs.
        :param print_every: Print progress every this number of epochs.
        :return: A FitResult object containing train and test losses per epoch.
        """
        actual_num_epochs = 0
        train_loss, train_acc, test_loss, test_acc = [], [], [], []

        best_acc = None
        epochs_without_improvement = 0

        for epoch in range(num_epochs):
            verbose = False  # pass this to train/test_epoch.
            if epoch % print_every == 0 or epoch == num_epochs - 1:
                verbose = True
            self._print(f'--- EPOCH {epoch+1}/{num_epochs} ---', verbose)

            # TODO: Train & evaluate for one epoch
            #  - Use the train/test_epoch methods.
            #  - Save losses and accuracies in the lists above.
            #  - Implement early stopping. This is a very useful and
            #    simple regularization technique that is highly recommended.
            #  - Optional: Implement checkpoints. You can use torch.save() to
            #    save the model to the file specified by the checkpoints
            #    argument.
            # ====== YOUR CODE: ======
            train_res = self.train_epoch(dl_train, **kw)
            test_res = self.train_epoch(dl_test, **kw)
            train_acc.append(train_res.accuracy)
            test_acc.append(test_res.accuracy)

            test_curr_epc_avg_loss = torch.sum(torch.tensor(
                test_res.losses)).item() / len(test_res.losses)
            # this is for evaluate improvment later on
            minimal_batches_count = min(len(test_loss), len(test_res.losses))
            test_last_epc_avg_loss = 1
            if minimal_batches_count == 0:
                test_last_epc_avg_loss += test_curr_epc_avg_loss
            else:
                test_last_epc_avg_loss = torch.sum(
                    torch.tensor(test_loss[:-minimal_batches_count])).item(
                    ) / minimal_batches_count
            # updating the epochs_without_improvement for early stopping
            if test_last_epc_avg_loss <= test_curr_epc_avg_loss and len(
                    test_loss) > 0:
                epochs_without_improvement += 1
            else:
                epochs_without_improvement = 0

            # update the arrays:
            train_loss.extend(
                [train_loss_t.item() for train_loss_t in train_res.losses])
            test_loss.extend(
                [test_loss_t.item() for test_loss_t in test_res.losses])

            # Implement checkpoints
            if (not best_acc) or best_acc < test_res.accuracy:
                best_acc = test_res.accuracy
                if checkpoints:
                    torch.save(self.model, checkpoints)

            # Implement early stopping
            if early_stopping and epochs_without_improvement >= early_stopping:
                break
            # ========================

        return FitResult(actual_num_epochs, train_loss, train_acc, test_loss,
                         test_acc)