예제 #1
0
def start_testing(trained_model_file):
    parser = CoreNLPParser(url='http://localhost:9080')

    emotions = ['happiness', 'sadness', 'anger', 'disgust', 'surprise', 'fear']

    dirname = os.path.dirname(os.path.realpath(__file__)) + "/"

    glove_model = read_glove_vectors(dirname + "Pickle/gloveModel")

    hidden_size = 256
    num_layers = 2
    bidirectional = False
    batchnorm = False
    dropout_hidden = 0.3
    dropout_output = 0.9
    model = LSTM(300, hidden_size, num_layers, bidirectional, batchnorm,
                 dropout_hidden, dropout_output).to(device)

    with torch.no_grad():
        model.load_state_dict(torch.load(trained_model_file))
        print(model)
        model.eval()
        while True:
            test_sentence = input("Give a test sentence: ")
            sentence = list(parser.tokenize(test_sentence))
            input1, sent_length = get_input_vector(glove_model, sentence)
            class_pred = model(input1, sent_length)
            print("Sentence: " + test_sentence)
            _, pred = class_pred.max(dim=1)
            print("Prediction:\t" + emotions[pred[0]])
            print("Output Values:")
            percentages = torch.nn.functional.softmax(class_pred, dim=1) * 100
            for i in range(len(emotions)):
                print(emotions[i] + " %" +
                      str(percentages.data.tolist()[0][i]))
예제 #2
0
#         loss = loss_function(output, target.to(device))
#         loss.backward()
#         optimizer.step()
#         #scheduler.step()
#     losses.append(loss.item())
#     print(f'Epoch : {epoch}, loss : {loss.item()}')
#     if(loss < minLoss):
#         minLoss = loss
#         torch.save({'epoch': epoch,
#                     'model_state_dict': model.state_dict(),
#                     'optimizer_state_dict': optimizer.state_dict()},
#                     './best_saved_model.pt')

checkpoint = torch.load('./best_saved_model.pt')
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()
testDf = pd.read_csv('./chart/기아.csv')
mm = MinMaxScaler()
test_x = testDf.iloc[-200:, 1:6]
mm.fit(testDf.iloc[-200:, 4:5])
test_x = MinMaxScaler().fit_transform(test_x)
test_x = Variable(torch.tensor(test_x).to(device))
test_x = test_x.reshape(1, test_x.shape[0], test_x.shape[1]).float()
predict_y = [testDf.iloc[-100:, 4].tolist()[0] for i in range(window_size)]
predict = [
    model(test_x[:, idx:idx + window_size, :])
    for idx in range(test_x.shape[1] - window_size - 1)
]
predict = mm.inverse_transform(np.array(predict).reshape(-1, 1))
predict_y.extend(predict[:, 0])
predict_y = predict_y[-100:]
예제 #3
0
def train():
    number_of_epochs = 1000
    n_splits = 5
    early_stopping_patience = 15

    outputs, filelines = zip(*filtereddata)
    outputs = torch.LongTensor(outputs).to(device)

    padded_vectors, targets, sent_lengths = ready_data(outputs, filelines)
    targets_on_cpu = targets.cpu()

    skf = StratifiedKFold(n_splits=n_splits, shuffle=False)
    i = 0
    train_classification_reports, test_classification_reports = [], []
    all_train_conf_matrices, all_test_conf_matrices = [], []
    for train_index, test_index in skf.split(np.zeros(len(targets)), targets_on_cpu):
        i += 1
        x_train, x_test = padded_vectors[:, train_index,
                                         :], padded_vectors[:, test_index, :]
        y_train, y_test = targets[train_index], targets[test_index]
        sent_len_train, sent_len_test = sent_lengths[train_index], sent_lengths[test_index]
        x_test, y_test, sent_len_test = get_batch(
            0, len(y_test), x_test, y_test, sent_len_test)

        model = LSTM(300, hidden_size, num_layers, bidirectional,
                     batchnorm, dropout_hidden, dropout_output).to(device)
        # https://discuss.pytorch.org/t/vgg-output-layer-no-softmax/9273/5
        loss_function = nn.CrossEntropyLoss().to(device)
        #optimizer = optim.Adam(model.parameters(), lr=0.01)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(
            0.9, 0.999), eps=1e-08, weight_decay=1e-5, amsgrad=False)

        early_stopping = EarlyStopping(
            patience=early_stopping_patience, verbose=True)

        N = len(y_train)

        trainlosses, testlosses = [None], [None]
        fold_train_classification_reports, fold_test_classification_reports = [], []
        fold_train_conf_matrices, fold_test_conf_matrices = [], []
        for epoch in range(1, number_of_epochs + 1):
            ###################
            # train the model #
            ###################
            model.train()  # prep model for training

            shuffleindices = torch.randperm(len(y_train))
            x_train.copy_(x_train[:, shuffleindices, :])
            y_train.copy_(y_train[shuffleindices])
            sent_len_train.copy_(sent_len_train[shuffleindices])
            for batch in range(math.ceil(N / batch_size)):
                # clear the gradients of all optimized variables
                optimizer.zero_grad()

                # get_data gets the data from the dataset (sequence batch, size batch_size)
                x_batch, targets_batch, sent_lengths_batch = get_batch(
                    batch, batch_size, x_train, y_train, sent_len_train)

                # forward pass: compute predicted outputs by passing inputs to the model
                class_pred = model(x_batch, sent_lengths_batch)
                # calculate the loss
                loss = loss_function(class_pred, targets_batch)
                # backward pass: compute gradient of the loss with respect to model parameters
                loss.backward()
                # perform a single optimization step (parameter update)
                optimizer.step()

            """===================================================================================================================="""
            directory = dirname + "results/es_n" + str(n_splits) + "+b" + str(batch_size) + "+e" + str(number_of_epochs) + "+lr" + str(learning_rate) + "+hidden" + str(hidden_size) + "+ly" + str(num_layers) \
                + ("+bd" if bidirectional else "") + ("+bn" if batchnorm else "") + \
                "+dp_h" + str(dropout_hidden) + "+dp_o" + \
                str(dropout_output) + "/"
            fold = i
            saveFile = epoch == number_of_epochs
            """===================================================================================================================="""

            ######################
            # validate the model #
            ######################
            model.eval()  # prep model for evaluation
            """===================================================================================================================="""
            ######################
            #   FIND TEST LOSS   #
            ######################
            # forward pass: compute predicted outputs by passing inputs to the model
            class_pred = model(x_test, sent_len_test)
            # calculate the loss
            loss = loss_function(class_pred, y_test)

            _, pred = class_pred.cpu().detach().max(dim=1)

            testlosses.append(loss.item())  # record validation loss
            minposs = testlosses.index(min(testlosses[1:])) - 1
            y_test2 = y_test.cpu()
            classification_report = metrics.classification_report(
                y_true=y_test2, y_pred=pred, target_names=emotions, output_dict=True, zero_division=0)
            test_conf_matrix = metrics.confusion_matrix(
                y_true=y_test2, y_pred=pred)
            fold_test_classification_reports.append(
                fix_report(classification_report))
            fold_test_conf_matrices.append(test_conf_matrix)

            del classification_report
            del test_conf_matrix
            del loss
            del y_test2

            tobeprintedtest = ["", "", "", "", "", "", "", "", "", "", "", "                         TEST DATA",
                               report_to_string(fold_test_classification_reports[minposs])]
            tobeprintedtest = '\n'.join(tobeprintedtest)
            """===================================================================================================================="""
            """===================================================================================================================="""
            ######################
            # FIND TRAINING LOSS #
            ######################
            x_train2, y_train2, sent_len_train2 = get_batch(
                0, len(y_train), x_train, y_train, sent_len_train)
            # forward pass: compute predicted outputs by passing inputs to the model
            class_pred = model(x_train2, sent_len_train2)
            # calculate the loss
            loss = loss_function(class_pred, y_train2)

            _, pred = class_pred.cpu().detach().max(dim=1)

            trainlosses.append(loss.item())  # record training loss
            y_train2 = y_train2.cpu()
            classification_report = metrics.classification_report(
                y_true=y_train2, y_pred=pred, target_names=emotions, output_dict=True, zero_division=0)
            train_conf_matrix = metrics.confusion_matrix(
                y_true=y_train2, y_pred=pred)
            fold_train_classification_reports.append(
                fix_report(classification_report))
            fold_train_conf_matrices.append(train_conf_matrix)

            del classification_report
            del train_conf_matrix
            del loss
            del x_train2
            del y_train2
            del sent_len_train2

            tobeprintedtrain = ["LEARNING RATE = " + str(learning_rate), "BATCH SIZE = " + str(batch_size), "HIDDEN_SIZE = " + str(hidden_size),
                                str(num_layers) + " LAYERS", "BIDIRECTIONAL " + ("YES" if bidirectional else "NO"), "BATCHNORM " + (
                                    "YES" if batchnorm else "NO"), "DROPOUT HIDDEN = " + str(dropout_hidden),
                                "DROPOUT OUTPUT = " + str(dropout_output),
                                "", "FOLD " + str(fold) + "/" + str(n_splits), "EPOCH " + str(
                                    epoch) + "/" + str(number_of_epochs), "                        TRAIN DATA",
                                report_to_string(fold_train_classification_reports[minposs])]
            tobeprintedtrain = '\n'.join(tobeprintedtrain)
            """===================================================================================================================="""
            """===================================================================================================================="""
            """===================================================================================================================="""
            # early_stopping needs the validation loss to check if it has decresed,
            # and if it has, it will make a checkpoint of the current model
            early_stopping(testlosses[-1], model)
            """===================================================================================================================="""
            """===================================================================================================================="""
            if saveFile or early_stopping.early_stop:
                image1 = plottraintest(trainlosses, testlosses)
                image2 = showResults(tobeprintedtrain, tobeprintedtest,
                                     fold_train_conf_matrices[minposs], fold_test_conf_matrices[minposs], emotions)
                showSaveImage(image1, image2, directory, fold,
                              saveFile or early_stopping.early_stop, False)

                train_classification_reports.append(
                    fold_train_classification_reports[minposs])
                all_train_conf_matrices.append(
                    fold_train_conf_matrices[minposs])
                fold_train_conf_matrices = []
                test_classification_reports.append(
                    fold_test_classification_reports[minposs])
                fold_test_classification_reports = []
                all_test_conf_matrices.append(fold_test_conf_matrices[minposs])
            """===================================================================================================================="""
            if n_splits == i and (epoch == number_of_epochs or early_stopping.early_stop):
                train_conf_matrices_average = np.round(
                    np.mean(all_train_conf_matrices, axis=0), 1)
                test_conf_matrices_average = np.round(
                    np.mean(all_test_conf_matrices, axis=0), 1)

                train_classification_average = report_average(
                    train_classification_reports)
                test_classification_average = report_average(
                    test_classification_reports)

                tobeprintedtrain = ["LEARNING RATE = " + str(learning_rate), "BATCH SIZE = " + str(batch_size), "HIDDEN_SIZE = " + str(hidden_size),
                                    str(num_layers) + " LAYERS", "BIDIRECTIONAL " + ("YES" if bidirectional else "NO"), "BATCHNORM " + (
                                        "YES" if batchnorm else "NO"), "DROPOUT HIDDEN = " + str(dropout_hidden),
                                    "DROPOUT OUTPUT = " + str(dropout_output),
                                    "", str(fold) + " FOLD AVERAGE", "EPOCH " +
                                    str(epoch), "                        TRAIN DATA",
                                    train_classification_average]
                tobeprintedtrain = '\n'.join(tobeprintedtrain)

                tobeprintedtest = ["", "", "", "", "", "", "", "", "", "", "", "                         TEST DATA",
                                   test_classification_average]
                tobeprintedtest = '\n'.join(tobeprintedtest)

                averageImage2 = showResults(
                    tobeprintedtrain, tobeprintedtest, train_conf_matrices_average, test_conf_matrices_average, emotions)
                showSaveImage(None, averageImage2, directory, str(fold) + " fold average",
                              n_splits == i and (epoch == number_of_epochs or early_stopping.early_stop), True)

            if early_stopping.early_stop:
                print("Early stopping")
                break
            """===================================================================================================================="""
            print('epoch: {} memory use: {}MB'.format(
                epoch, torch.cuda.memory_allocated()/2.**20))
        torch.cuda.empty_cache()
        # load the last checkpoint with the best model
        model.load_state_dict(torch.load('checkpoint.pt'))
        torch.save(model.state_dict(), dirname + "models/es_n" + str(i) + "+b" + str(batch_size) + "+e" + str(number_of_epochs) + "+lr" + str(learning_rate) + "+hidden" + str(hidden_size) + "+ly" + str(num_layers)
                   + ("+bd" if bidirectional else "") + ("+bn" if batchnorm else "") + "+dp_h" + str(dropout_hidden) + "+dp_o" + str(dropout_output) + ".pth")

        del x_train
        del x_test
        del y_train
        del y_test
        del pred
        del sent_len_train
        del sent_len_test
        del trainlosses
        del testlosses
        del loss_function
        del optimizer
        del early_stopping
        del model
        gc.collect()

        """