Esempio n. 1
0
def train(opt):
    if torch.cuda.is_available():
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)
    #os.chdir("/data2/xuhuizh/graphM_project/HAMN")
    output_file = open(opt.saved_path + os.sep + "logs.txt", "w")
    output_file.write("Model's parameters: {}".format(vars(opt)))
    training_params = {
        "batch_size": opt.batch_size,
        "shuffle": True,
        "drop_last": True
    }
    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False
    }
    if opt.tune == 1:
        print('Fine tune the word embedding')
        freeze = False
    else:
        freeze = True
    if opt.max_sent_words == "5,5":
        max_word_length, max_sent_length = get_max_lengths(opt.train_set)
    else:
        max_word_length, max_sent_length = [
            int(x) for x in opt.max_sent_words.split(',')
        ]
    print(max_word_length, max_sent_length, flush=True)
    training_set = MyDataset(opt.train_set, opt.word2vec_path, max_sent_length,
                             max_word_length)
    training_generator = DataLoader(training_set, **training_params)
    test_set = MyDataset(opt.test_set, opt.word2vec_path, max_sent_length,
                         max_word_length)
    test_generator = DataLoader(test_set, **test_params)
    if opt.graph == 1:
        print('use graph model')
        model = HierGraphNet(opt.word_hidden_size, opt.sent_hidden_size,
                             opt.batch_size, freeze, opt.word2vec_path,
                             max_sent_length, max_word_length)
    elif opt.graph == 2:
        print('use deep graph model')
        model = DHierGraphNet(opt.word_hidden_size, opt.sent_hidden_size,
                              opt.batch_size, freeze, opt.word2vec_path,
                              max_sent_length, max_word_length)
    else:
        model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size,
                           opt.batch_size, freeze, opt.word2vec_path,
                           max_sent_length, max_word_length)

    # writer.add_graph(model, torch.zeros(opt.batch_size, max_sent_length, max_word_length))

    if torch.cuda.is_available():
        model.cuda()
    #m = nn.Sigmoid()
    #criterion = nn.CosineEmbeddingLoss()
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=opt.lr)
    best_loss = 1e5
    best_epoch = 0
    model.train()
    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epoches):
        start_time = time.time()
        loss_ls = []
        te_label_ls = []
        te_pred_ls = []
        for iter, (feature1, feature2, label,
                   pos) in enumerate(training_generator):
            num_sample = len(label)
            if torch.cuda.is_available():
                feature1 = feature1.cuda()
                feature2 = feature2.cuda()
                label = label.float().cuda()
            #print(label.shape)
            #print(feature1)
            #print(feature2)
            optimizer.zero_grad()
            model._init_hidden_state()
            predictions = model(feature1, feature2)
            #print(label)
            #print(predictions)
            #cosine:
            #loss = criterion(output_1, output_2, label)
            #BCE:
            #print(predictions)
            #print(label)
            loss = criterion(predictions, label)
            loss.backward()
            optimizer.step()
            training_metrics = get_evaluation(
                label.cpu().numpy(),
                predictions.cpu().detach().numpy(),
                list_metrics=["accuracy"])
            print(
                "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}"
                .format(epoch + 1, opt.num_epoches, iter + 1,
                        num_iter_per_epoch, optimizer.param_groups[0]['lr'],
                        loss, training_metrics["accuracy"]),
                flush=True)
            print("--- %s seconds ---" % (time.time() - start_time))
            start_time = time.time()
            # writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter)
            # writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter)
            loss_ls.append(loss * num_sample)
            te_label_ls.extend(label.clone().cpu())
            te_pred_ls.append(predictions.clone().cpu())
            sum_all = 0
            sum_updated = 0
            '''
            for name, param in model.named_parameters():
                print('All parameters')
                print(name,torch.numel(param.data))
                sum_all += torch.numel(param.data)
                if param.requires_grad:
                    print('Updated parameters:')
                    print(name,torch.numel(param.data))
                    sum_updated+= torch.numel(param.data)
            print('all', sum_all)
            print('update', sum_updated)
            '''
        #print total train loss
        te_loss = sum(loss_ls) / test_set.__len__()
        te_pred = torch.cat(te_pred_ls, 0)
        te_label = np.array(te_label_ls)
        test_metrics = get_evaluation(
            te_label,
            te_pred.detach().numpy(),
            list_metrics=["accuracy", "confusion_matrix"])
        output_file.write(
            "Epoch: {}/{} \nTrain loss: {} Train accuracy: {} \nTrain confusion matrix: \n{}\n\n"
            .format(epoch + 1, opt.num_epoches, te_loss,
                    test_metrics["accuracy"],
                    test_metrics["confusion_matrix"]))
        print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
            epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'],
            te_loss, test_metrics["accuracy"]))
        if epoch % opt.test_interval == 0:
            model.eval()
            loss_ls = []
            te_label_ls = []
            te_pred_ls = []
            for te_feature1, te_feature2, te_label, cite_pos in test_generator:
                num_sample = len(te_label)
                #print(num_sample)
                if torch.cuda.is_available():
                    te_feature1 = te_feature1.cuda()
                    te_feature2 = te_feature2.cuda()
                    te_label = te_label.float().cuda()
                with torch.no_grad():
                    model._init_hidden_state(num_sample)
                    te_predictions = model(te_feature1, te_feature2)
                    te_predictions = te_predictions[-1]
                te_loss = criterion(te_predictions, te_label)
                loss_ls.append(te_loss * num_sample)
                te_label_ls.extend(te_label.clone().cpu())
                te_pred_ls.append(te_predictions.clone().cpu())
            te_loss = sum(loss_ls) / test_set.__len__()
            te_pred = torch.cat(te_pred_ls, 0)
            te_label = np.array(te_label_ls)
            test_metrics = get_evaluation(
                te_label,
                te_pred.numpy(),
                list_metrics=["accuracy", "confusion_matrix"])
            output_file.write(
                "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n"
                .format(epoch + 1, opt.num_epoches, te_loss,
                        test_metrics["accuracy"],
                        test_metrics["confusion_matrix"]))
            print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
                epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'],
                te_loss, test_metrics["accuracy"]))
            for name, param in model.named_parameters():
                if param.requires_grad:
                    if name == 'fd.weight':
                        print(name, param.data)
            #writer.add_scalar('Test/Loss', te_loss, epoch)
            #writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch)
            model.train()
            if te_loss + opt.es_min_delta < best_loss:
                best_loss = te_loss
                best_epoch = epoch
                torch.save(model, opt.saved_path + os.sep + opt.model_name)
                torch.save(model.state_dict(),
                           opt.saved_path + os.sep + opt.model_name + '.pth')

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, te_loss))
                break
Esempio n. 2
0
def test(opt):
    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False
    }
    if os.path.isdir(opt.output):
        shutil.rmtree(opt.output)
    os.makedirs(opt.output)
    if torch.cuda.is_available():
        model = torch.load(opt.model_path + os.sep + opt.model_type + "_model")
    else:
        model = torch.load(opt.model_path + os.sep + opt.model_type + "_model",
                           map_location=lambda storage, loc: storage)
    max_news_length, max_sent_length, max_word_length = get_max_lengths(
        opt.train_set)
    stock_length = 9
    test_set = MyDataset(data_path=opt.test_set,
                         dict_path=opt.word2vec_path,
                         max_news_length=max_news_length,
                         max_sent_length=max_sent_length,
                         max_word_length=max_word_length,
                         days_num=opt.days_num,
                         stock_length=stock_length)
    test_generator = DataLoader(test_set, **test_params)

    if torch.cuda.is_available():
        model.cuda()
    model.eval()
    te_label_ls = []
    te_pred_ls = []
    for te_days_news, te_days_stock, te_label in test_generator:
        num_sample = len(te_label)
        if torch.cuda.is_available():
            te_feature = te_days_news.cuda()
            te_days_stock = te_days_stock.cuda()
            te_label = te_label.cuda()
        with torch.no_grad():
            if opt.model_type in [
                    "ori_han", "sent_ori_han", "muil_han", "sent_muil_han"
            ]:
                te_predictions = model(te_days_news)
            elif opt.model_type in ["muil_stock_han", "sent_muil_stock_han"]:
                te_predictions = model(te_days_news, te_days_stock)
            te_predictions = F.softmax(te_predictions)
        te_label_ls.extend(te_label.clone().cpu())
        te_pred_ls.append(te_predictions.clone().cpu())
    te_pred = torch.cat(te_pred_ls, 0).numpy()
    te_label = np.array(te_label_ls)

    fieldnames = ['True label', 'Predicted label', 'Content']
    with open(opt.output + os.sep + "predictions.csv", 'w') as csv_file:
        writer = csv.DictWriter(csv_file,
                                fieldnames=fieldnames,
                                quoting=csv.QUOTE_NONNUMERIC)
        writer.writeheader()
        for i, j, k in zip(te_label, te_pred, test_set.newses,
                           test_set.stocks):
            writer.writerow({
                'True label': i + 1,
                'Predicted label': np.argmax(j) + 1,
                'Content': k
            })

    test_metrics = get_evaluation(
        te_label,
        te_pred,
        list_metrics=["accuracy", "loss", "confusion_matrix"])
    print("Prediction:\nLoss: {} Accuracy: {} \nConfusion matrix: \n{}".format(
        test_metrics["loss"], test_metrics["accuracy"],
        test_metrics["confusion_matrix"]))
Esempio n. 3
0
def train(opt, train_data_path, test_data_path, valid_data_path):
    task = opt.task_name
    if torch.cuda.is_available():
        torch.cuda.manual_seed(2019)
    else:
        torch.manual_seed(2019)
    output_file = open(opt.saved_path + os.sep + "logs.txt", "w")
    output_file.write("Model's parameters: {}".format(vars(opt)))
    training_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": True
    }
    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False
    }

    max_word_length, max_sent_length = get_max_lengths(opt.input_path)
    print("Max words: ", max_word_length, "Max sents: ", max_sent_length)

    #df_data = pd.read_csv(data_path, encoding='utf8', sep='\t')
    #df_data = df_data.sample(frac=1, random_state=2019)
    #print(df_data.shape)
    #y = df_data.readability.values

    #kf = model_selection.StratifiedKFold(n_splits=5)

    predicted_all_folds = []
    true_all_folds = []
    counter = 0
    accuracies_all_folds = []
    precision_all_folds = []
    recall_all_folds = []
    f1_all_folds = []
    qwk_all_folds = []

    #if os.path.exists(opt.vocab_path):
    #   os.remove(opt.vocab_path)

    df_train = pd.read_csv(train_data_path, encoding='utf8', sep='\t')
    df_test = pd.read_csv(test_data_path, encoding='utf8', sep='\t')
    df_valid = pd.read_csv(valid_data_path, encoding='utf8', sep='\t')

    training_set = MyDataset(df_train, opt.vocab_path, task, max_sent_length,
                             max_word_length)
    training_generator = DataLoader(training_set, **training_params)

    test_set = MyDataset(df_test, opt.vocab_path, task, max_sent_length,
                         max_word_length)
    test_generator = DataLoader(test_set, **test_params)

    valid_set = MyDataset(df_valid, opt.vocab_path, task, max_sent_length,
                          max_word_length)
    valid_generator = DataLoader(valid_set, **test_params)

    model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size,
                       opt.batch_size, training_set.num_classes,
                       opt.vocab_path, max_sent_length, max_word_length)

    if torch.cuda.is_available():
        model.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
    best_loss = 1e5
    best_epoch = 0
    num_iter_per_epoch = len(training_generator)

    for epoch in range(opt.num_epoches):
        model.train()
        for iter, (feature, label) in enumerate(training_generator):
            if torch.cuda.is_available():
                feature = feature.cuda()
                label = label.cuda()
            optimizer.zero_grad()
            model._init_hidden_state()
            predictions = model(feature)
            loss = criterion(predictions, label)
            loss.backward()
            optimizer.step()

            #`clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25)
            training_metrics = get_evaluation(
                label.cpu().numpy(),
                predictions.cpu().detach().numpy(),
                list_metrics=["accuracy"])
            print(
                "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}"
                .format(epoch + 1, opt.num_epoches, iter + 1,
                        num_iter_per_epoch, optimizer.param_groups[0]['lr'],
                        loss, training_metrics["accuracy"]))

        if epoch % opt.test_interval == 0:
            model.eval()
            loss_ls = []
            te_label_ls = []
            te_pred_ls = []
            for te_feature, te_label in valid_generator:
                num_sample = len(te_label)
                if torch.cuda.is_available():
                    te_feature = te_feature.cuda()
                    te_label = te_label.cuda()
                with torch.no_grad():
                    model._init_hidden_state(num_sample)
                    te_predictions = model(te_feature)
                te_loss = criterion(te_predictions, te_label)
                loss_ls.append(te_loss * num_sample)
                te_label_ls.extend(te_label.clone().cpu())
                te_pred_ls.append(te_predictions.clone().cpu())
            te_loss = sum(loss_ls) / test_set.__len__()
            te_pred = torch.cat(te_pred_ls, 0)
            te_label = np.array(te_label_ls)
            test_metrics = get_evaluation(
                te_label,
                te_pred.numpy(),
                list_metrics=["accuracy", "confusion_matrix", "qwk"])

            print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
                epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'],
                te_loss, test_metrics["accuracy"]))
            if te_loss + opt.es_min_delta < best_loss:
                best_loss = te_loss
                best_epoch = epoch
                print('Saving model')
                torch.save(model, opt.saved_path + os.sep + "whole_model_han")

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, best_loss))
                break

    print()
    print('Evaluation: ')
    print()

    model.eval()
    model = torch.load(opt.saved_path + os.sep + "whole_model_han")
    loss_ls = []
    te_label_ls = []
    te_pred_ls = []
    for te_feature, te_label in test_generator:
        num_sample = len(te_label)
        if torch.cuda.is_available():
            te_feature = te_feature.cuda()
            te_label = te_label.cuda()
        with torch.no_grad():
            model._init_hidden_state(num_sample)
            te_predictions = model(te_feature)
        te_loss = criterion(te_predictions, te_label)
        loss_ls.append(te_loss * num_sample)
        te_label_ls.extend(te_label.clone().cpu())
        te_pred_ls.append(te_predictions.clone().cpu())
    te_pred = torch.cat(te_pred_ls, 0)
    te_label = np.array(te_label_ls)
    test_metrics = get_evaluation(te_label,
                                  te_pred.numpy(),
                                  list_metrics=[
                                      "accuracy", "precision", "recall", "f1",
                                      "confusion_matrix", 'qwk'
                                  ])

    true = te_label
    preds = np.argmax(te_pred.numpy(), -1)
    predicted_all_folds.extend(preds)
    true_all_folds.extend(true)

    print("Test set accuracy: {}".format(test_metrics["accuracy"]))
    print("Test set precision: {}".format(test_metrics["precision"]))
    print("Test set recall: {}".format(test_metrics["recall"]))
    print("Test set f1: {}".format(test_metrics["f1"]))
    print("Test set cm: {}".format(test_metrics["confusion_matrix"]))
    print("Test set qwk: {}".format(test_metrics["qwk"]))

    accuracies_all_folds.append(test_metrics["accuracy"])
    precision_all_folds.append(test_metrics["precision"])
    recall_all_folds.append(test_metrics["recall"])
    f1_all_folds.append(test_metrics["f1"])
    qwk_all_folds.append(test_metrics["qwk"])
    print()

    #if task in ['newsela', 'merlin', 'capito', 'apa']:
    #   break

    print()
    print("Task: ", task)
    print("Accuracy: ", accuracy_score(true_all_folds, predicted_all_folds))
    print(
        "Precison: ",
        precision_score(true_all_folds,
                        predicted_all_folds,
                        average="weighted"))
    print(
        "Recall: ",
        recall_score(true_all_folds, predicted_all_folds, average="weighted"))
    print("F1: ",
          f1_score(true_all_folds, predicted_all_folds, average="weighted"))
    print('Confusion matrix: ',
          confusion_matrix(true_all_folds, predicted_all_folds))
    print(
        'QWK: ',
        cohen_kappa_score(true_all_folds,
                          predicted_all_folds,
                          weights="quadratic"))

    print('All folds accuracy: ', accuracies_all_folds)
    print('All folds precision: ', precision_all_folds)
    print('All folds recall: ', recall_all_folds)
    print('All folds f1: ', f1_all_folds)
    print('All folds QWK: ', qwk_all_folds)
Esempio n. 4
0
def train(opt):
    if torch.cuda.is_available():
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)
    output_file = open(opt.saved_path + os.sep + "logs.txt", "w")
    output_file.write("Model's parameters: {}".format(vars(opt)))
    training_params = {
        "batch_size": opt.batch_size,
        "shuffle": True,
        "drop_last": True
    }
    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False
    }

    max_word_length, max_sent_length = get_max_lengths(opt.train_set)
    training_set = MyDataset(opt.train_set, opt.word2vec_path, max_sent_length,
                             max_word_length)
    training_generator = DataLoader(training_set, **training_params)
    test_set = MyDataset(opt.test_set, opt.word2vec_path, max_sent_length,
                         max_word_length)
    test_generator = DataLoader(test_set, **test_params)

    model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size,
                       opt.batch_size, training_set.num_classes,
                       opt.word2vec_path, max_sent_length, max_word_length)

    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)
    writer = SummaryWriter(opt.log_path)
    # writer.add_graph(model, torch.zeros(opt.batch_size, max_sent_length, max_word_length))

    if torch.cuda.is_available():
        model.cuda()

    criterion = nn.MSELoss()
    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                       model.parameters()),
                                lr=opt.lr,
                                momentum=opt.momentum)
    # optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr)
    best_loss = 1e5
    best_epoch = 0
    model.train()
    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epoches):
        for iter, (feature1, feature2, label) in enumerate(training_generator):
            if torch.cuda.is_available():
                feature1 = feature1.cuda()
                feature2 = feature2.cuda()
                label = label.cuda()
            optimizer.zero_grad()
            model._init_hidden_state()
            feature1 = model(feature1)
            model._init_hidden_state()
            feature2 = model(feature2)

            diff = exponent_neg_manhattan_distance(feature1, feature2)
            loss = criterion(diff, label.float())
            loss.backward()
            optimizer.step()
            training_metrics = get_evaluation(label.cpu().numpy(),
                                              diff.cpu().detach().numpy(),
                                              list_metrics=["accuracy"])
            print(
                "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}"
                .format(epoch + 1, opt.num_epoches, iter + 1,
                        num_iter_per_epoch, optimizer.param_groups[0]['lr'],
                        loss, training_metrics["accuracy"]))
            writer.add_scalar('Train/Loss', loss,
                              epoch * num_iter_per_epoch + iter)
            writer.add_scalar('Train/Accuracy', training_metrics["accuracy"],
                              epoch * num_iter_per_epoch + iter)
        # torch.save(model, opt.saved_path + os.sep + "whole_model_han_{}".format(epoch))
        if epoch % opt.test_interval == 0:
            model.eval()
            loss_ls = []
            te_label_ls = []
            te_pred_ls = []
            for te_feature1, te_feature2, te_label in test_generator:
                num_sample = len(te_label)
                if torch.cuda.is_available():
                    te_feature1 = te_feature1.cuda()
                    te_feature2 = te_feature2.cuda()
                    te_label = te_label.cuda()
                with torch.no_grad():
                    model._init_hidden_state(num_sample)
                    te_feature1 = model(te_feature1)
                    model._init_hidden_state(num_sample)
                    te_feature2 = model(te_feature2)
                te_diff = exponent_neg_manhattan_distance(
                    te_feature1, te_feature2)
                te_loss = criterion(te_diff, te_label.float())
                loss_ls.append(te_loss * num_sample)
                te_label_ls.extend(te_label.clone().cpu())
                te_pred_ls.append(te_diff.clone().cpu())
            te_loss = sum(loss_ls) / test_set.__len__()
            te_pred = torch.cat(te_pred_ls, 0)
            te_label = np.array(te_label_ls)
            test_metrics = get_evaluation(
                te_label,
                te_pred.numpy(),
                list_metrics=["accuracy", "confusion_matrix"])
            output_file.write(
                "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n"
                .format(epoch + 1, opt.num_epoches, te_loss,
                        test_metrics["accuracy"],
                        test_metrics["confusion_matrix"]))
            print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
                epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'],
                te_loss, test_metrics["accuracy"]))
            writer.add_scalar('Test/Loss', te_loss, epoch)
            writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch)
            model.train()
            if te_loss + opt.es_min_delta < best_loss:
                best_loss = te_loss
                best_epoch = epoch
                torch.save(model, opt.saved_path + os.sep + "whole_model_han")

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, te_loss))
                break