def run_experiment(self):
        """
        Runs experiment train and evaluation iterations, saving the model and best val model and val model accuracy after each epoch
        :return: The summary current_epoch_losses from starting epoch to total_epochs.
        """
        total_losses = {
            "train_acc": [],
            "train_loss": [],
            "val_acc": [],
            "val_loss": []
        }  # initialize a dict to keep the per-epoch metrics
        for i, epoch_idx in enumerate(
                range(self.starting_epoch, self.num_epochs)):
            epoch_start_time = time.time()
            current_epoch_losses = {
                "train_acc": [],
                "train_loss": [],
                "val_acc": [],
                "val_loss": []
            }

            with tqdm.tqdm(
                    total=self.train_data.num_batches
            ) as pbar_train:  # create a progress bar for training
                for idx, (x,
                          y) in enumerate(self.train_data):  # get data batches
                    loss, accuracy = self.run_train_iter(
                        x=x, y=y)  # take a training iter step
                    current_epoch_losses["train_loss"].append(loss.cpu(
                    ))  # add current iter loss to the train loss list
                    current_epoch_losses["train_acc"].append(
                        accuracy)  # add current iter acc to the train acc list
                    pbar_train.update(1)
                    pbar_train.set_description(
                        "loss: {:.4f}, accuracy: {:.4f}".format(
                            loss, accuracy))

            with tqdm.tqdm(
                    total=self.val_data.num_batches
            ) as pbar_val:  # create a progress bar for validation
                for x, y in self.val_data:  # get data batches
                    loss, accuracy = self.run_evaluation_iter(
                        x=x, y=y)  # run a validation iter
                    current_epoch_losses["val_loss"].append(
                        loss.cpu())  # add current iter loss to val loss list.
                    current_epoch_losses["val_acc"].append(
                        accuracy)  # add current iter acc to val acc lst.
                    pbar_val.update(1)  # add 1 step to the progress bar
                    pbar_val.set_description(
                        "loss: {:.4f}, accuracy: {:.4f}".format(
                            loss, accuracy))
            val_mean_accuracy = np.mean(current_epoch_losses['val_acc'])
            if val_mean_accuracy > self.best_val_model_acc:  # if current epoch's mean val acc is greater than the saved best val acc then
                self.best_val_model_acc = val_mean_accuracy  # set the best val model acc to be current epoch's val accuracy
                self.best_val_model_idx = epoch_idx  # set the experiment-wise best val idx to be the current epoch's idx

            for key, value in current_epoch_losses.items():
                total_losses[key].append(
                    np.mean(value)
                )  # get mean of all metrics of current epoch metrics dict, to get them ready for storage and output on the terminal.

            save_statistics(experiment_log_dir=self.experiment_logs,
                            filename='summary.csv',
                            stats_dict=total_losses,
                            current_epoch=i,
                            continue_from_mode=True if
                            (self.starting_epoch != 0 or i > 0) else
                            False)  # save statistics to stats file.

            # load_statistics(experiment_log_dir=self.experiment_logs, filename='summary.csv') # How to load a csv file if you need to

            out_string = "_".join([
                "{}_{:.4f}".format(key, np.mean(value))
                for key, value in current_epoch_losses.items()
            ])
            # create a string to use to report our epoch metrics
            epoch_elapsed_time = time.time(
            ) - epoch_start_time  # calculate time taken for epoch
            epoch_elapsed_time = "{:.4f}".format(epoch_elapsed_time)
            print("Epoch {}:".format(epoch_idx), out_string, "epoch time",
                  epoch_elapsed_time, "seconds")
            self.save_model(
                model_save_dir=self.experiment_saved_models,
                # save model and best val idx and best val acc, using the model dir, model name and model idx
                model_save_name="train_model",
                model_idx=epoch_idx,
                best_validation_model_idx=self.best_val_model_idx,
                best_validation_model_acc=self.best_val_model_acc)

        print("Generating test set evaluation metrics")
        self.load_model(
            model_save_dir=self.experiment_saved_models,
            model_idx=self.best_val_model_idx,
            # load best validation model
            model_save_name="train_model")
        print(self.best_val_model_idx)
        current_epoch_losses = {
            "test_acc": [],
            "test_f1_score": []
        }  # initialize a statistics dict
        with tqdm.tqdm(total=self.test_data.num_batches
                       ) as pbar_test:  # ini a progress bar
            for x, y in self.test_data:  # sample batch
                loss, accuracy = self.compute_Fscore(
                    x=x, y=y
                )  # compute loss and accuracy by running an evaluation step
                current_epoch_losses["test_f1_score"].append(
                    loss)  # save test loss
                current_epoch_losses["test_acc"].append(
                    accuracy)  # save test accuracy
                pbar_test.update(1)  # update progress bar status
                pbar_test.set_description(
                    "f1_score: {:.4f}, accuracy: {:.4f}".format(
                        loss, accuracy))  # update progress bar string output

        test_losses = {
            key: [np.mean(value)]
            for key, value in current_epoch_losses.items()
        }  # save test set metrics in dict format
        save_statistics(
            experiment_log_dir=self.experiment_logs,
            filename='test_summary.csv',
            # save test set metrics on disk in .csv format
            stats_dict=test_losses,
            current_epoch=0,
            continue_from_mode=False)

        return total_losses, test_losses
Beispiel #2
0
    def run_experiment(self):
        """
        Runs experiment train and evaluation iterations, saving the model and best val model and val model accuracy after each epoch
        :return: The summary current_epoch_losses from starting epoch to total_epochs.
        """
        total_losses = {
            "train_acc": [],
            "train_loss": [],
            "train_files": [],
            "train_labels": [],
            "val_acc": [],
            "val_loss": [],
            "val_files": [],
            "val_labels": [],
            "val_pred_labels": []
        }  # initialize a dict to keep the per-epoch metrics
        for i, epoch_idx in enumerate(
                range(self.starting_epoch, self.num_epochs)):
            epoch_start_time = time.time()
            current_epoch_losses = {
                "train_acc": [],
                "train_loss": [],
                "train_files": [],
                "train_labels": [],
                "val_acc": [],
                "val_loss": [],
                "val_files": [],
                "val_labels": [],
                "val_pred_labels": []
            }

            with tqdm.tqdm(
                    total=self.train_data.num_batches
            ) as pbar_train:  # create a progress bar for training
                for idx, (x, y, f_n_train) in enumerate(
                        self.train_data):  # get data batches
                    loss, accuracy = self.run_train_iter(
                        x=x, y=y)  # take a training iter step
                    current_epoch_losses["train_loss"].append(
                        np.array(loss.cpu())
                    )  # add current iter loss to the train loss list
                    current_epoch_losses["train_acc"].append(
                        accuracy)  # add current iter acc to the train acc list
                    current_epoch_losses["train_files"].append(f_n_train)
                    current_epoch_losses["train_labels"].append(y)

                    pbar_train.update(1)
                    pbar_train.set_description(
                        "loss: {:.4f}, accuracy: {:.4f}".format(
                            loss, accuracy))

            with tqdm.tqdm(
                    total=self.val_data.num_batches
            ) as pbar_val:  # create a progress bar for validation
                for x, y_val, f_n_val in self.val_data:  # get data batches
                    loss, accuracy, y_targets, predicted_labels = self.run_evaluation_iter(
                        x=x, y=y_val)  # run a validation iter
                    current_epoch_losses["val_loss"].append(
                        np.array(loss.cpu())
                    )  # add current iter loss to val loss list.
                    current_epoch_losses["val_acc"].append(
                        accuracy)  # add current iter acc to val acc lst.
                    current_epoch_losses["val_files"].append(f_n_val)
                    current_epoch_losses["val_labels"].append(y_targets)
                    current_epoch_losses["val_pred_labels"].append(
                        predicted_labels)

                    pbar_val.update(1)  # add 1 step to the progress bar
                    pbar_val.set_description(
                        "loss: {:.4f}, accuracy: {:.4f}".format(
                            loss, accuracy))
            val_mean_accuracy = np.mean(current_epoch_losses['val_acc'])
            if val_mean_accuracy > self.best_val_model_acc:  # if current epoch's mean val acc is greater than the saved best val acc then
                self.best_val_model_acc = val_mean_accuracy  # set the best val model acc to be current epoch's val accuracy
                self.best_val_model_idx = epoch_idx  # set the experiment-wise best val idx to be the current epoch's idx
                total_losses["val_files"] = current_epoch_losses["val_files"]
                total_losses["val_labels"] = current_epoch_losses["val_labels"]
                total_losses["val_pred_labels"] = current_epoch_losses[
                    "val_pred_labels"]
                total_losses["train_files"] = current_epoch_losses[
                    "train_files"]
                total_losses["train_labels"] = current_epoch_losses[
                    "train_labels"]

            for key, value in current_epoch_losses.items():
                if key not in [
                        "val_files", "val_labels", "val_preds",
                        "val_pred_labels", "train_files", "train_labels",
                        "test_files", "test_labels", "test_pred_labels"
                ]:
                    total_losses[key].append(
                        np.mean(value)
                    )  # get mean of all metrics of current epoch metrics dict, to get them ready for storage and output on the terminal.

            save_statistics(experiment_log_dir=self.experiment_logs,
                            filename='summary.csv',
                            stats_dict=total_losses,
                            current_epoch=i)  # save statistics to stats file.

            # load_statistics(experiment_log_dir=self.experiment_logs, filename='summary.csv') # How to load a csv file if you need to

            out_string = "_".join([
                "{}_{:.4f}".format(key, np.mean(value))
                for key, value in current_epoch_losses.items() if key not in [
                    "val_files", "val_labels", "val_preds", "val_pred_labels",
                    "train_files", "train_labels", "test_files", "test_labels",
                    "test_pred_labels"
                ]
            ])
            # create a string to use to report our epoch metrics
            epoch_elapsed_time = time.time(
            ) - epoch_start_time  # calculate time taken for epoch
            epoch_elapsed_time = "{:.4f}".format(epoch_elapsed_time)
            print("Epoch {}:".format(epoch_idx), out_string, "epoch time",
                  epoch_elapsed_time, "seconds")
            self.save_model(
                model_save_dir=self.experiment_saved_models,
                # save model and best val idx and best val acc, using the model dir, model name and model idx
                model_save_name="train_model",
                model_idx=epoch_idx,
                best_validation_model_idx=self.best_val_model_idx,
                best_validation_model_acc=self.best_val_model_acc)

        total_losses["val_files"] = [
            item for sublist in total_losses["val_files"] for item in sublist
        ]
        total_losses["val_labels"] = [
            item for sublist in total_losses["val_labels"] for item in sublist
        ]
        #total_losses["val_preds"] = [item for sublist in total_losses["val_files"] for item in sublist]
        total_losses["val_pred_labels"] = [
            item for sublist in total_losses["val_pred_labels"]
            for item in sublist
        ]
        total_losses["train_files"] = [
            item for sublist in total_losses["train_files"] for item in sublist
        ]
        total_losses["train_labels"] = [
            item for sublist in total_losses["train_labels"]
            for item in sublist
        ]

        # for i in total_losses["val_pred_labels"]:
        #     i = i.numpy()[0]
        filename = self.experiment_folder + '/total_losses_pickle.pkl'
        print("Generated file " + filename)
        outfile = open(filename, 'wb')
        pickle.dump(total_losses, outfile)
        outfile.close()

        print("Generating test set evaluation metrics")
        self.load_model(
            model_save_dir=self.experiment_saved_models,
            model_idx=self.best_val_model_idx,
            # load best validation model
            model_save_name="train_model")
        current_epoch_losses = {
            "test_acc": [],
            "test_loss": [],
            "test_files": [],
            "test_labels": [],
            "test_pred_labels": []
        }  # initialize a statistics dict
        with tqdm.tqdm(total=self.test_data.num_batches
                       ) as pbar_test:  # ini a progress bar
            for x, y, f_name_test in self.test_data:  # sample batch
                loss, accuracy, y_targets, predicted_labels = self.run_evaluation_iter(
                    x=x, y=y
                )  # compute loss and accuracy by running an evaluation step
                current_epoch_losses["test_loss"].append(np.array(
                    loss.cpu()))  # save test loss
                current_epoch_losses["test_acc"].append(
                    accuracy)  # save test accuracy
                current_epoch_losses["test_files"].append(
                    f_name_test)  # save test accuracy
                current_epoch_losses["test_labels"].append(y_targets)
                current_epoch_losses["test_pred_labels"].append(
                    predicted_labels)

                pbar_test.update(1)  # update progress bar status
                pbar_test.set_description(
                    "loss: {:.4f}, accuracy: {:.4f}".format(
                        loss, accuracy))  # update progress bar string output

            filename = self.experiment_folder + '/total_losses_pickle_test.pkl'
            print("Generated file " + filename)
            outfile = open(filename, 'wb')
            pickle.dump(current_epoch_losses, outfile)
            outfile.close()

        test_losses = {
            key: [np.mean(value)]
            for key, value in current_epoch_losses.items()
            if key not in ["test_files", "test_labels", "test_pred_labels"]
        }  # save test set metrics in dict format
        save_statistics(
            experiment_log_dir=self.experiment_logs,
            filename='test_summary.csv',
            # save test set metrics on disk in .csv format
            stats_dict=test_losses,
            current_epoch=0)

        return total_losses, test_losses