예제 #1
0
    def run(self,
            adding_noise=False,
            lr_decay=False,
            augmentation=False,
            test_augmentation=False):
        """
        run method is written for running model
        """
        data_set = self.load_data_set()
        model, criterion, optimizer = self.init_model(data_set)

        best_validation_loss = float("inf")
        best_test_f_score = 0.0

        best_val_loss_model = ""
        best_test_f_score_model = ""

        losses_dict = dict()
        acc_dict = dict()
        losses_dict["train_loss"] = []
        losses_dict["validation_loss"] = []
        losses_dict["test_loss"] = []
        acc_dict["train_acc"] = []
        acc_dict["validation_acc"] = []
        acc_dict["test_acc"] = []

        augmentation_class = None
        augmentation_methods = None
        # call augmentation class
        if augmentation:
            augmentation_class, augmentation_methods = self.create_augmentation(
                data_set)

        # start training model
        for epoch in range(N_EPOCHS):
            start_time = time.time()

            # adding noise to fully connected layers
            if adding_noise:
                with torch.no_grad():
                    for name, param in model.named_parameters():
                        if name.startswith("w_s1") or name.startswith("w_s2"):
                            param.add_(torch.randn(param.size()).to(DEVICE))

            # train model on train data
            if augmentation:
                train(model=model,
                      iterator=data_set.iterator_dict["train_iterator"],
                      optimizer=optimizer,
                      criterion=criterion,
                      epoch=epoch,
                      augmentation_class=augmentation_class,
                      augmentation_methods=augmentation_methods,
                      lr_decay=lr_decay)
            else:
                train(model=model,
                      iterator=data_set.iterator_dict["train_iterator"],
                      optimizer=optimizer,
                      criterion=criterion,
                      epoch=epoch,
                      lr_decay=lr_decay)

            # compute model result on train data
            train_log_dict = evaluate(
                model=model,
                iterator=data_set.iterator_dict["train_iterator"],
                criterion=criterion)

            losses_dict["train_loss"].append(train_log_dict["loss"])
            acc_dict["train_acc"].append(train_log_dict["acc"])

            # compute model result on validation data
            valid_log_dict = evaluate(
                model=model,
                iterator=data_set.iterator_dict["valid_iterator"],
                criterion=criterion)

            losses_dict["validation_loss"].append(valid_log_dict["loss"])
            acc_dict["validation_acc"].append(valid_log_dict["acc"])

            # compute model result on test data
            test_log_dict = evaluate(
                model=model,
                iterator=data_set.iterator_dict["test_iterator"],
                criterion=criterion)

            losses_dict["test_loss"].append(test_log_dict["loss"])
            acc_dict["test_acc"].append(test_log_dict["acc"])

            end_time = time.time()

            # calculate epoch time
            epoch_mins, epoch_secs = process_time(start_time, end_time)

            # save model when loss in validation data is decrease
            if valid_log_dict["loss"] < best_validation_loss:
                best_validation_loss = valid_log_dict["loss"]
                torch.save(
                    model.state_dict(),
                    MODEL_PATH + f"model_epoch{epoch + 1}_loss_"
                    f"{valid_log_dict['loss']}.pt")
                best_val_loss_model = f"model_epoch{epoch + 1}_loss_" \
                    f"{valid_log_dict['loss']}.pt"

            # save model when fscore in test data is increase
            if test_log_dict["total_fscore"] > best_test_f_score:
                best_test_f_score = test_log_dict["total_fscore"]
                torch.save(
                    model.state_dict(), MODEL_PATH + f"model_epoch{epoch + 1}"
                    f"_fscore_{test_log_dict['total_fscore']}.pt")
                best_test_f_score_model = f"model_epoch{epoch + 1}" \
                    f"_fscore_{test_log_dict['total_fscore']}.pt"

            # show model result
            logging.info(
                f"Epoch: {epoch + 1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s"
            )
            model_result_log(train_log_dict, valid_log_dict, test_log_dict)

            # save model result in log file
            self.log_file.write(f"Epoch: {epoch + 1:02} | Epoch Time: "
                                f"{epoch_mins}m {epoch_secs}s\n")
            model_result_save(self.log_file, train_log_dict, valid_log_dict,
                              test_log_dict)

        # save final model
        torch.save(model.state_dict(), MODEL_PATH + "final_model.pt")

        # test augmentation
        if test_augmentation:
            if not augmentation:
                augmentation_class, _ = self.create_augmentation(data_set)
            self.eval_test_augmentation(
                best_val_loss_model=best_val_loss_model,
                best_test_f_score_model=best_test_f_score_model,
                data_set=data_set,
                aug_class=augmentation_class)

        # plot curve
        self.draw_curves(train_acc=acc_dict["train_acc"],
                         validation_acc=acc_dict["validation_acc"],
                         test_acc=acc_dict["test_acc"],
                         train_loss=losses_dict["train_loss"],
                         validation_loss=losses_dict["validation_loss"],
                         test_loss=losses_dict["test_loss"])
예제 #2
0
    def run(self,
            model_name,
            lr_decay=False,
            augmentation=False,
            test_augmentation=False):
        """
        run method is written for running model
        """
        # select model
        model_config = dict()
        if model_name == "bert":
            model_config = BERT_CONFIG
        elif model_name == "parsbert":
            model_config = PARSBERT_CONFIG
        elif model_name == "albert":
            model_config = ALBERT_CONFIG

        # open log file
        self.log_file = open(model_config["log_path"], "w")

        # load data_set iterators
        data_set = self.load_data_set(model_config)
        # create model
        model, criterion, optimizer = self.init_model(data_set, model_config)

        best_validation_loss = float("inf")
        best_test_f_score = 0.0

        best_val_loss_model = ""
        best_test_f_score_model = ""

        losses_dict = dict()
        acc_dict = dict()
        losses_dict["train_loss"] = []
        losses_dict["dev_loss"] = []
        losses_dict["test_loss"] = []
        acc_dict["train_acc"] = []
        acc_dict["dev_acc"] = []
        acc_dict["test_acc"] = []

        augmentation_class = None
        augmentation_methods = None

        # call augmentation class
        if augmentation:
            augmentation_class, augmentation_methods = self.create_augmentation(
                data_set)

        # start training model
        for epoch in range(N_EPOCHS):
            start_time = time.time()

            # train model on train data
            if augmentation:
                train(model=model,
                      iterator=data_set.iterator_dict["train_iterator"],
                      optimizer=optimizer,
                      criterion=criterion,
                      epoch=epoch,
                      augmentation_class=augmentation_class,
                      augmentation_methods=augmentation_methods,
                      lr_decay=lr_decay)
            else:
                train(model=model,
                      iterator=data_set.iterator_dict["train_iterator"],
                      optimizer=optimizer,
                      criterion=criterion,
                      epoch=epoch,
                      lr_decay=lr_decay)

            # compute model result on train data
            train_log_dict = evaluate(
                model=model,
                iterator=data_set.iterator_dict["train_iterator"],
                criterion=criterion)

            losses_dict["train_loss"].append(train_log_dict["loss"])
            acc_dict["train_acc"].append(train_log_dict["acc"])

            # compute model result on dev data
            valid_log_dict = evaluate(
                model=model,
                iterator=data_set.iterator_dict["valid_iterator"],
                criterion=criterion)

            losses_dict["dev_loss"].append(valid_log_dict["loss"])
            acc_dict["dev_acc"].append(valid_log_dict["acc"])

            # compute model result on test data
            test_log_dict = evaluate(
                model=model,
                iterator=data_set.iterator_dict["test_iterator"],
                criterion=criterion)

            losses_dict["test_loss"].append(test_log_dict["loss"])
            acc_dict["test_acc"].append(test_log_dict["acc"])

            end_time = time.time()

            # calculate epoch time
            epoch_mins, epoch_secs = process_time(start_time, end_time)

            # save model when loss in validation data is decrease
            if valid_log_dict["loss"] < best_validation_loss:
                best_validation_loss = valid_log_dict["loss"]
                torch.save(
                    model.state_dict(), model_config["save_model_path"] +
                    f"model_epoch{epoch + 1}_loss_"
                    f"{valid_log_dict['loss']}.pt")
                best_val_loss_model = f"model_epoch{epoch + 1}_loss_" \
                    f"{valid_log_dict['loss']}.pt"

            # save model when fscore of test data is increase
            if test_log_dict["total_fscore"] > best_test_f_score:
                best_test_f_score = test_log_dict["total_fscore"]
                torch.save(
                    model.state_dict(),
                    model_config["save_model_path"] + f"model_epoch{epoch + 1}"
                    f"_fscore_{test_log_dict['total_fscore']}.pt")
                best_test_f_score_model = f"model_epoch{epoch + 1}" \
                    f"_fscore_{test_log_dict['total_fscore']}.pt"

            # show model result
            logging.info(
                f"Epoch: {epoch + 1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s"
            )
            model_result_log(train_log_dict, test_log_dict, test_log_dict)

            # save model result in log file
            self.log_file.write(f"Epoch: {epoch + 1:02} | Epoch Time: "
                                f"{epoch_mins}m {epoch_secs}s\n")
            model_result_save(self.log_file, train_log_dict, test_log_dict,
                              test_log_dict)

        # save final model
        torch.save(model.state_dict(),
                   model_config["save_model_path"] + "final_model.pt")

        # test augmentation
        if test_augmentation:
            self.eval_test_augmentation(
                best_val_loss_model=best_val_loss_model,
                best_test_f_score_model=best_test_f_score_model,
                data_set=data_set,
                aug_class=augmentation_class,
                model_config=model_config)

        # plot curve
        self.draw_curves(train_acc=acc_dict["train_acc"],
                         validation_acc=acc_dict["dev_acc"],
                         test_acc=acc_dict["test_acc"],
                         train_loss=losses_dict["train_loss"],
                         validation_loss=losses_dict["dev_loss"],
                         test_loss=losses_dict["test_loss"],
                         model_config=model_config)