Example #1
0
def train(opt):
    if torch.cuda.is_available():
        torch.cuda.manual_seed(123)
        print("cuda...")
    else:
        torch.manual_seed(123)
    # training setting
    output_file = open(opt.saved_path + "logs.txt", "w")
    output_file.write("Model's parameters: {}".format(vars(opt)))
    training_params = {
        "batch_size": opt.batch_size,
        "num_workers": opt.num_workers,
        "shuffle": True,
        "pin_memory": True,
        "drop_last": True
    }
    test_params = {
        "batch_size": opt.batch_size,
        "num_workers": opt.num_workers,
        "shuffle": False,
        "pin_memory": True,
        "drop_last": False
    }
    # training dataset info
    # max_news_length, max_sent_length, max_word_length = get_max_lengths(opt.train_set)
    # stock_length = 9

    data_init_time = datetime.datetime.now()
    training_set = MyDataset(data_path=opt.train_set)
    training_generator = DataLoader(training_set, **training_params)
    test_set = MyDataset(data_path=opt.test_set)
    test_generator = DataLoader(test_set, **test_params)
    data_end_time = datetime.datetime.now()
    print("the data loading time is: {}s...".format(
        (data_end_time - data_init_time).seconds))

    # model init
    model_init_time = datetime.datetime.now()
    if opt.model_type == "ori_han":
        model = Ori_HAN(days_num=opt.days_num,
                        days_hidden_size=opt.days_hidden_size,
                        news_hidden_size=opt.news_hidden_size,
                        num_classes=training_set.num_classes,
                        pretrained_word2vec_path=opt.word2vec_path,
                        dropout=opt.dropout)
    elif opt.model_type == "sent_ori_han":
        model = Sent_Ori_HAN(days_num=opt.days_num,
                             days_hidden_size=opt.days_hidden_size,
                             news_hidden_size=opt.news_hidden_size,
                             sent_hidden_size=opt.sent_hidden_size,
                             num_classes=training_set.num_classes,
                             pretrained_word2vec_path=opt.word2vec_path,
                             dropout=opt.dropout)
    elif opt.model_type == "muil_han":
        model = Muil_HAN(head_num=opt.head_num,
                         days_num=opt.days_num,
                         days_hidden_size=opt.days_hidden_size,
                         news_hidden_size=opt.news_hidden_size,
                         num_classes=training_set.num_classes,
                         pretrained_word2vec_path=opt.word2vec_path,
                         dropout=opt.dropout)
    elif opt.model_type == "sent_muil_han":
        model = Sent_Muil_HAN(head_num=opt.head_num,
                              days_num=opt.days_num,
                              days_hidden_size=opt.days_hidden_size,
                              news_hidden_size=opt.news_hidden_size,
                              sent_hidden_size=opt.sent_hidden_size,
                              num_classes=training_set.num_classes,
                              pretrained_word2vec_path=opt.word2vec_path,
                              dropout=opt.dropout)
    elif opt.model_type == "muil_stock_han":
        model = Muil_Stock_HAN(head_num=opt.head_num,
                               days_num=opt.days_num,
                               days_hidden_size=opt.days_hidden_size,
                               news_hidden_size=opt.news_hidden_size,
                               stock_hidden_size=opt.stock_hidden_size,
                               stock_length=stock_length,
                               num_classes=training_set.num_classes,
                               pretrained_word2vec_path=opt.word2vec_path,
                               dropout=opt.dropout)
    elif opt.model_type == "sent_muil_stock_han":
        model = Sent_Muil_Stock_HAN(head_num=opt.head_num,
                                    days_num=opt.days_num,
                                    days_hidden_size=opt.days_hidden_size,
                                    news_hidden_size=opt.news_hidden_size,
                                    sent_hidden_size=opt.sent_hidden_size,
                                    stock_hidden_size=opt.stock_hidden_size,
                                    stock_length=stock_length,
                                    num_classes=training_set.num_classes,
                                    pretrained_word2vec_path=opt.word2vec_path,
                                    dropout=opt.dropout)
    model_end_time = datetime.datetime.now()
    print("the model init time is: {}s...".format(
        (model_end_time - model_init_time).seconds))

    # other setting
    if os.path.isdir(opt.log_path + opt.model_type):
        shutil.rmtree(opt.log_path + opt.model_type)  # 递归删除文件夹下的所有子文件夹
    os.makedirs(opt.log_path + opt.model_type)
    writer = SummaryWriter(opt.log_path + opt.model_type)

    # 模型训练相关信息初始化
    if torch.cuda.is_available():
        model.cuda()
        print("model use cuda...")

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                       model.parameters()),
                                lr=opt.lr,
                                momentum=opt.momentum)
    best_loss = 1e5
    best_epoch = 0
    model.train()
    num_iter_per_epoch = len(training_generator)

    # 训练模型
    print("start to train model...")
    for epoch in range(opt.num_epoches):
        dataloader_init_time = datetime.datetime.now()
        for iter, (days_news, days_stock,
                   label) in enumerate(training_generator):
            dataloader_end_time = datetime.datetime.now()
            print("the dataloader loading time is: {}s...".format(
                (dataloader_end_time - dataloader_init_time).seconds))
            if torch.cuda.is_available():
                days_news = days_news.cuda()
                days_stock = days_stock.cuda()
                label = label.cuda()
                print("data use cuda...")
            training_init_time = datetime.datetime.now()
            optimizer.zero_grad()
            if opt.model_type in [
                    "ori_han", "sent_ori_han", "muil_han", "sent_muil_han"
            ]:
                predictions = model(days_news)
            elif opt.model_type in ["muil_stock_han", "sent_muil_stock_han"]:
                predictions = model(days_news, days_stock)
            loss = criterion(predictions, torch.tensor(label))
            loss.backward()
            optimizer.step()
            training_metrics = get_evaluation(
                label.cpu().numpy(),
                predictions.cpu().detach().numpy(),
                list_metrics=["accuracy"])
            print(
                "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}"
                .format(epoch + 1, opt.num_epoches, iter + 1,
                        num_iter_per_epoch, optimizer.param_groups[0]['lr'],
                        loss, training_metrics["accuracy"]))
            writer.add_scalar('Train/Loss', loss,
                              epoch * num_iter_per_epoch + iter)
            writer.add_scalar('Train/Accuracy', training_metrics["accuracy"],
                              epoch * num_iter_per_epoch + iter)
            training_end_time = datetime.datetime.now()
            print("the training time is: {}s...".format(
                (training_end_time - training_init_time).seconds))
        if epoch % opt.test_interval == 0:
            model.eval()
            loss_ls = []
            te_label_ls = []
            te_pred_ls = []
            for te_days_news, te_days_stock, te_label in test_generator:
                num_sample = len(te_label)
                if torch.cuda.is_available():
                    te_days_news = te_days_news.cuda()
                    te_days_stock = te_days_stock.cuda()
                    te_label = te_label.cuda()
                with torch.no_grad():
                    if opt.model_type in [
                            "ori_han", "sent_ori_han", "muil_han",
                            "sent_muil_han"
                    ]:
                        te_predictions = model(te_days_news)
                    elif opt.model_type in [
                            "muil_stock_han", "sent_muil_stock_han"
                    ]:
                        te_predictions = model(te_days_news, te_days_stock)
                te_loss = criterion(te_predictions, torch.tensor(te_label))
                loss_ls.append(te_loss * num_sample)
                te_label_ls.extend(te_label.clone().cpu())
                te_pred_ls.append(te_predictions.clone().cpu())
            te_loss = sum(loss_ls) / test_set.__len__()
            te_pred = torch.cat(te_pred_ls, 0)
            te_label = np.array(te_label_ls)
            test_metrics = get_evaluation(
                te_label,
                te_pred.numpy(),
                list_metrics=["accuracy", "confusion_matrix"])
            output_file.write(
                "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n"
                .format(epoch + 1, opt.num_epoches, te_loss,
                        test_metrics["accuracy"],
                        test_metrics["confusion_matrix"]))
            print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
                epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'],
                te_loss, test_metrics["accuracy"]))
            writer.add_scalar('Test/Loss', te_loss, epoch)
            writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch)
            model.train()
            if te_loss + opt.es_min_delta < best_loss:
                best_loss = te_loss
                best_epoch = epoch
                torch.save(model, opt.saved_path + opt.model_type + "_model")

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, te_loss))
                break
def train(opt):
    if torch.cuda.is_available():
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)
    if opt.dataset in ["agnews", "dbpedia", "yelp_review", "yelp_review_polarity", "amazon_review",
                       "amazon_polarity", "sogou_news", "yahoo_answers"]:
        opt.input, opt.output = get_default_folder(opt.dataset, opt.feature)

    if not os.path.exists(opt.output):
        os.makedirs(opt.output)
    output_file = open(opt.output + os.sep + "logs.txt", "w")
    output_file.write("Model's parameters: {}".format(vars(opt)))

    training_params = {"batch_size": opt.batch_size,
                       "shuffle": True,
                       "num_workers": 0}
    test_params = {"batch_size": opt.batch_size,
                   "shuffle": False,
                   "num_workers": 0}
    training_set = MyDataset(opt.input + os.sep + "train.csv", opt.max_length)
    test_set = MyDataset(opt.input + os.sep + "test.csv", opt.max_length)
    training_generator = DataLoader(training_set, **training_params)
    test_generator = DataLoader(test_set, **test_params)

    if opt.feature == "small":
        model = CharacterLevelCNN(input_length=opt.max_length, n_classes=training_set.num_classes,
                                  input_dim=len(opt.alphabet),
                                  n_conv_filters=256, n_fc_neurons=1024)

    elif opt.feature == "large":
        model = CharacterLevelCNN(input_length=opt.max_length, n_classes=training_set.num_classes,
                                  input_dim=len(opt.alphabet),
                                  n_conv_filters=1024, n_fc_neurons=2048)
    else:
        sys.exit("Invalid feature mode!")

    log_path = "{}_{}_{}".format(opt.log_path, opt.feature, opt.dataset)
    if os.path.isdir(log_path):
        shutil.rmtree(log_path)
    os.makedirs(log_path)
    writer = SummaryWriter(log_path)

    if torch.cuda.is_available():
        model.cuda()

    criterion = nn.CrossEntropyLoss()
    if opt.optimizer == "adam":
        optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
    elif opt.optimizer == "sgd":
        optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9)
    best_loss = 1e5
    best_epoch = 0
    model.train()
    num_iter_per_epoch = len(training_generator)

    for epoch in range(opt.num_epochs):
        for iter, batch in enumerate(training_generator):
            feature, label = batch
            if torch.cuda.is_available():
                feature = feature.cuda()
                label = label.cuda()
            optimizer.zero_grad()
            predictions = model(feature)
            loss = criterion(predictions, label)
            loss.backward()
            optimizer.step()

            training_metrics = get_evaluation(label.cpu().numpy(), predictions.cpu().detach().numpy(),
                                              list_metrics=["accuracy"])
            print("Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
                epoch + 1,
                opt.num_epochs,
                iter + 1,
                num_iter_per_epoch,
                optimizer.param_groups[0]['lr'],
                loss, training_metrics["accuracy"]))
            writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter)
            writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter)
        model.eval()
        loss_ls = []
        te_label_ls = []
        te_pred_ls = []
        for batch in test_generator:
            te_feature, te_label = batch
            num_sample = len(te_label)
            if torch.cuda.is_available():
                te_feature = te_feature.cuda()
                te_label = te_label.cuda()
            with torch.no_grad():
                te_predictions = model(te_feature)
            te_loss = criterion(te_predictions, te_label)
            loss_ls.append(te_loss * num_sample)
            te_label_ls.extend(te_label.clone().cpu())
            te_pred_ls.append(te_predictions.clone().cpu())

        te_loss = sum(loss_ls) / test_set.__len__()
        te_pred = torch.cat(te_pred_ls, 0)
        te_label = np.array(te_label_ls)
        test_metrics = get_evaluation(te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"])
        output_file.write(
            "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n".format(
                epoch + 1, opt.num_epochs,
                te_loss,
                test_metrics["accuracy"],
                test_metrics["confusion_matrix"]))
        print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
            epoch + 1,
            opt.num_epochs,
            optimizer.param_groups[0]['lr'],
            te_loss, test_metrics["accuracy"]))
        writer.add_scalar('Test/Loss', te_loss, epoch)
        writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch)
        model.train()
        if te_loss + opt.es_min_delta < best_loss:
            best_loss = te_loss
            best_epoch = epoch
            torch.save(model, "{}/char-cnn_{}_{}".format(opt.output, opt.dataset, opt.feature))
        # Early stopping
        if epoch - best_epoch > opt.es_patience > 0:
            print("Stop training at epoch {}. The lowest loss achieved is {} at epoch {}".format(epoch, te_loss, best_epoch))
            break
        if opt.optimizer == "sgd" and epoch % 3 == 0 and epoch > 0:
            current_lr = optimizer.state_dict()['param_groups'][0]['lr']
            current_lr /= 2
            for param_group in optimizer.param_groups:
                param_group['lr'] = current_lr
Example #3
0
def train(opt):
    if torch.cuda.is_available():
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)
    # os.chdir("/data2/xuhuizh/graphM_project/HAMN")
    output_file = open(opt.saved_path + os.sep + "logs.txt", "w")
    output_file.write("Model's parameters: {}".format(vars(opt)))
    training_params = {
        "batch_size": opt.batch_size,
        "shuffle": True,
        "drop_last": True
    }
    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False
    }
    if opt.tune == 1:
        print('Fine tune the word embedding')
        freeze = False
    else:
        freeze = True
    if opt.max_sent_words == "5,5":
        max_word_length, max_sent_length = get_max_lengths(opt.train_set)
    else:
        max_word_length, max_sent_length = [
            int(x) for x in opt.max_sent_words.split(',')
        ]
    print(max_word_length, max_sent_length)
    training_set = MyDataset(opt.train_set, opt.word2vec_path, max_sent_length,
                             max_word_length)
    training_generator = DataLoader(training_set, **training_params)
    test_set = MyDataset(opt.test_set, opt.word2vec_path, max_sent_length,
                         max_word_length)
    test_generator = DataLoader(test_set, **test_params)
    if opt.graph == 1:
        print('use graph model')
        model = HierGraphNet(opt.word_hidden_size, opt.sent_hidden_size,
                             opt.batch_size, freeze, opt.word2vec_path,
                             max_sent_length, max_word_length)
    elif opt.graph == 2:
        print('use deep graph model')
        model = DHierGraphNet(opt.word_hidden_size, opt.sent_hidden_size,
                              opt.batch_size, freeze, opt.word2vec_path,
                              max_sent_length, max_word_length)
    else:
        model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size,
                           opt.batch_size, freeze, opt.word2vec_path,
                           max_sent_length, max_word_length)
    # if os.path.isdir(opt.log_path):
    #    shutil.rmtree(opt.log_path)
    # os.makedirs(opt.log_path)
    #writer = SummaryWriter(opt.log_path)
    # writer.add_graph(model, torch.zeros(opt.batch_size, max_sent_length, max_word_length))

    if torch.cuda.is_available():
        model.cuda()
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=opt.lr)
    best_loss = 1e5
    best_epoch = 0
    model.train()
    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epoches):
        start_time = time.time()
        loss_ls = []
        te_label_ls = []
        te_pred_ls = []
        for iter, (feature1, feature2, label) in enumerate(training_generator):
            num_sample = len(label)
            if torch.cuda.is_available():
                feature1 = feature1.cuda()
                feature2 = feature2.cuda()
                label = label.float().cuda()
            optimizer.zero_grad()
            model._init_hidden_state()
            predictions = model(feature1, feature2)
            loss = criterion(predictions, label)
            loss.backward()
            optimizer.step()
            training_metrics = get_evaluation(
                label.cpu().numpy(),
                predictions.cpu().detach().numpy(),
                list_metrics=["accuracy"])
            print(
                "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}"
                .format(epoch + 1, opt.num_epoches, iter + 1,
                        num_iter_per_epoch, optimizer.param_groups[0]['lr'],
                        loss, training_metrics["accuracy"]),
                flush=True)
            print("--- %s seconds ---" % (time.time() - start_time))
            start_time = time.time()
            # writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter)
            # writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter)
            loss_ls.append(loss * num_sample)
            te_label_ls.extend(label.clone().cpu())
            te_pred_ls.append(predictions.clone().cpu())
            sum_all = 0
            sum_updated = 0
            ''' 
            for name, param in model.named_parameters():
                print('All parameters')
                print(name,torch.numel(param.data))
                sum_all += torch.numel(param.data)
                if param.requires_grad:
                    print('Updated parameters:')
                    print(name,torch.numel(param.data))
                    sum_updated+= torch.numel(param.data)
            print('all', sum_all)
            print('update', sum_updated)
            '''
        #print total train loss
        te_loss = sum(loss_ls) / test_set.__len__()
        te_pred = torch.cat(te_pred_ls, 0)
        te_label = np.array(te_label_ls)
        test_metrics = get_evaluation(
            te_label,
            te_pred.detach().numpy(),
            list_metrics=["accuracy", "confusion_matrix"])
        output_file.write(
            "Epoch: {}/{} \nTrain loss: {} Train accuracy: {} \nTrain confusion matrix: \n{}\n\n"
            .format(epoch + 1, opt.num_epoches, te_loss,
                    test_metrics["accuracy"],
                    test_metrics["confusion_matrix"]))
        print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
            epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'],
            te_loss, test_metrics["accuracy"]))
        if epoch % opt.test_interval == 0:
            model.eval()
            loss_ls = []
            te_label_ls = []
            te_pred_ls = []
            for te_feature1, te_feature2, te_label in test_generator:
                num_sample = len(te_label)
                #print(num_sample)
                if torch.cuda.is_available():
                    te_feature1 = te_feature1.cuda()
                    te_feature2 = te_feature2.cuda()
                    te_label = te_label.float().cuda()
                with torch.no_grad():
                    model._init_hidden_state(num_sample)
                    te_predictions = model(te_feature1, te_feature2)
                te_loss = criterion(te_predictions, te_label)
                loss_ls.append(te_loss * num_sample)
                te_label_ls.extend(te_label.clone().cpu())
                te_pred_ls.append(te_predictions.clone().cpu())
            te_loss = sum(loss_ls) / test_set.__len__()
            te_pred = torch.cat(te_pred_ls, 0)
            te_label = np.array(te_label_ls)
            test_metrics = get_evaluation(
                te_label,
                te_pred.numpy(),
                list_metrics=["accuracy", "confusion_matrix"])
            output_file.write(
                "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n"
                .format(epoch + 1, opt.num_epoches, te_loss,
                        test_metrics["accuracy"],
                        test_metrics["confusion_matrix"]))
            print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
                epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'],
                te_loss, test_metrics["accuracy"]))
            for name, param in model.named_parameters():
                if param.requires_grad:
                    if name == 'fd.weight':
                        print(name, param.data)
        # writer.add_scalar('Test/Loss', te_loss, epoch)
        # writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch)
            model.train()
            if te_loss + opt.es_min_delta < best_loss:
                best_loss = te_loss
                best_epoch = epoch
                torch.save(model, opt.saved_path + os.sep + opt.model_name)
                logger.info('saved model')

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, te_loss))
                break
Example #4
0
def train(opt, train_data_path, test_data_path, valid_data_path):
    task = opt.task_name
    if torch.cuda.is_available():
        torch.cuda.manual_seed(2019)
    else:
        torch.manual_seed(2019)
    output_file = open(opt.saved_path + os.sep + "logs.txt", "w")
    output_file.write("Model's parameters: {}".format(vars(opt)))
    training_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": True
    }
    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False
    }

    max_word_length, max_sent_length = get_max_lengths(opt.input_path)
    print("Max words: ", max_word_length, "Max sents: ", max_sent_length)

    #df_data = pd.read_csv(data_path, encoding='utf8', sep='\t')
    #df_data = df_data.sample(frac=1, random_state=2019)
    #print(df_data.shape)
    #y = df_data.readability.values

    #kf = model_selection.StratifiedKFold(n_splits=5)

    predicted_all_folds = []
    true_all_folds = []
    counter = 0
    accuracies_all_folds = []
    precision_all_folds = []
    recall_all_folds = []
    f1_all_folds = []
    qwk_all_folds = []

    #if os.path.exists(opt.vocab_path):
    #   os.remove(opt.vocab_path)

    df_train = pd.read_csv(train_data_path, encoding='utf8', sep='\t')
    df_test = pd.read_csv(test_data_path, encoding='utf8', sep='\t')
    df_valid = pd.read_csv(valid_data_path, encoding='utf8', sep='\t')

    training_set = MyDataset(df_train, opt.vocab_path, task, max_sent_length,
                             max_word_length)
    training_generator = DataLoader(training_set, **training_params)

    test_set = MyDataset(df_test, opt.vocab_path, task, max_sent_length,
                         max_word_length)
    test_generator = DataLoader(test_set, **test_params)

    valid_set = MyDataset(df_valid, opt.vocab_path, task, max_sent_length,
                          max_word_length)
    valid_generator = DataLoader(valid_set, **test_params)

    model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size,
                       opt.batch_size, training_set.num_classes,
                       opt.vocab_path, max_sent_length, max_word_length)

    if torch.cuda.is_available():
        model.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
    best_loss = 1e5
    best_epoch = 0
    num_iter_per_epoch = len(training_generator)

    for epoch in range(opt.num_epoches):
        model.train()
        for iter, (feature, label) in enumerate(training_generator):
            if torch.cuda.is_available():
                feature = feature.cuda()
                label = label.cuda()
            optimizer.zero_grad()
            model._init_hidden_state()
            predictions = model(feature)
            loss = criterion(predictions, label)
            loss.backward()
            optimizer.step()

            #`clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25)
            training_metrics = get_evaluation(
                label.cpu().numpy(),
                predictions.cpu().detach().numpy(),
                list_metrics=["accuracy"])
            print(
                "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}"
                .format(epoch + 1, opt.num_epoches, iter + 1,
                        num_iter_per_epoch, optimizer.param_groups[0]['lr'],
                        loss, training_metrics["accuracy"]))

        if epoch % opt.test_interval == 0:
            model.eval()
            loss_ls = []
            te_label_ls = []
            te_pred_ls = []
            for te_feature, te_label in valid_generator:
                num_sample = len(te_label)
                if torch.cuda.is_available():
                    te_feature = te_feature.cuda()
                    te_label = te_label.cuda()
                with torch.no_grad():
                    model._init_hidden_state(num_sample)
                    te_predictions = model(te_feature)
                te_loss = criterion(te_predictions, te_label)
                loss_ls.append(te_loss * num_sample)
                te_label_ls.extend(te_label.clone().cpu())
                te_pred_ls.append(te_predictions.clone().cpu())
            te_loss = sum(loss_ls) / test_set.__len__()
            te_pred = torch.cat(te_pred_ls, 0)
            te_label = np.array(te_label_ls)
            test_metrics = get_evaluation(
                te_label,
                te_pred.numpy(),
                list_metrics=["accuracy", "confusion_matrix", "qwk"])

            print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
                epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'],
                te_loss, test_metrics["accuracy"]))
            if te_loss + opt.es_min_delta < best_loss:
                best_loss = te_loss
                best_epoch = epoch
                print('Saving model')
                torch.save(model, opt.saved_path + os.sep + "whole_model_han")

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, best_loss))
                break

    print()
    print('Evaluation: ')
    print()

    model.eval()
    model = torch.load(opt.saved_path + os.sep + "whole_model_han")
    loss_ls = []
    te_label_ls = []
    te_pred_ls = []
    for te_feature, te_label in test_generator:
        num_sample = len(te_label)
        if torch.cuda.is_available():
            te_feature = te_feature.cuda()
            te_label = te_label.cuda()
        with torch.no_grad():
            model._init_hidden_state(num_sample)
            te_predictions = model(te_feature)
        te_loss = criterion(te_predictions, te_label)
        loss_ls.append(te_loss * num_sample)
        te_label_ls.extend(te_label.clone().cpu())
        te_pred_ls.append(te_predictions.clone().cpu())
    te_pred = torch.cat(te_pred_ls, 0)
    te_label = np.array(te_label_ls)
    test_metrics = get_evaluation(te_label,
                                  te_pred.numpy(),
                                  list_metrics=[
                                      "accuracy", "precision", "recall", "f1",
                                      "confusion_matrix", 'qwk'
                                  ])

    true = te_label
    preds = np.argmax(te_pred.numpy(), -1)
    predicted_all_folds.extend(preds)
    true_all_folds.extend(true)

    print("Test set accuracy: {}".format(test_metrics["accuracy"]))
    print("Test set precision: {}".format(test_metrics["precision"]))
    print("Test set recall: {}".format(test_metrics["recall"]))
    print("Test set f1: {}".format(test_metrics["f1"]))
    print("Test set cm: {}".format(test_metrics["confusion_matrix"]))
    print("Test set qwk: {}".format(test_metrics["qwk"]))

    accuracies_all_folds.append(test_metrics["accuracy"])
    precision_all_folds.append(test_metrics["precision"])
    recall_all_folds.append(test_metrics["recall"])
    f1_all_folds.append(test_metrics["f1"])
    qwk_all_folds.append(test_metrics["qwk"])
    print()

    #if task in ['newsela', 'merlin', 'capito', 'apa']:
    #   break

    print()
    print("Task: ", task)
    print("Accuracy: ", accuracy_score(true_all_folds, predicted_all_folds))
    print(
        "Precison: ",
        precision_score(true_all_folds,
                        predicted_all_folds,
                        average="weighted"))
    print(
        "Recall: ",
        recall_score(true_all_folds, predicted_all_folds, average="weighted"))
    print("F1: ",
          f1_score(true_all_folds, predicted_all_folds, average="weighted"))
    print('Confusion matrix: ',
          confusion_matrix(true_all_folds, predicted_all_folds))
    print(
        'QWK: ',
        cohen_kappa_score(true_all_folds,
                          predicted_all_folds,
                          weights="quadratic"))

    print('All folds accuracy: ', accuracies_all_folds)
    print('All folds precision: ', precision_all_folds)
    print('All folds recall: ', recall_all_folds)
    print('All folds f1: ', f1_all_folds)
    print('All folds QWK: ', qwk_all_folds)
Example #5
0
def train(opt):
    if torch.cuda.is_available():
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)
    output_file = open(opt.saved_path + os.sep + "logs.txt", "w")
    output_file.write("Model's parameters: {}".format(vars(opt)))
    training_params = {
        "batch_size": opt.batch_size,
        "shuffle": True,
        "drop_last": True
    }
    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False
    }

    max_word_length, max_sent_length = get_max_lengths(opt.train_set)
    training_set = MyDataset(opt.train_set, opt.word2vec_path, max_sent_length,
                             max_word_length)
    training_generator = DataLoader(training_set, **training_params)
    test_set = MyDataset(opt.test_set, opt.word2vec_path, max_sent_length,
                         max_word_length)
    test_generator = DataLoader(test_set, **test_params)

    model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size,
                       opt.batch_size, training_set.num_classes,
                       opt.word2vec_path, max_sent_length, max_word_length)

    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)
    writer = SummaryWriter(opt.log_path)
    # writer.add_graph(model, torch.zeros(opt.batch_size, max_sent_length, max_word_length))

    if torch.cuda.is_available():
        model.cuda()

    criterion = nn.MSELoss()
    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                       model.parameters()),
                                lr=opt.lr,
                                momentum=opt.momentum)
    # optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr)
    best_loss = 1e5
    best_epoch = 0
    model.train()
    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epoches):
        for iter, (feature1, feature2, label) in enumerate(training_generator):
            if torch.cuda.is_available():
                feature1 = feature1.cuda()
                feature2 = feature2.cuda()
                label = label.cuda()
            optimizer.zero_grad()
            model._init_hidden_state()
            feature1 = model(feature1)
            model._init_hidden_state()
            feature2 = model(feature2)

            diff = exponent_neg_manhattan_distance(feature1, feature2)
            loss = criterion(diff, label.float())
            loss.backward()
            optimizer.step()
            training_metrics = get_evaluation(label.cpu().numpy(),
                                              diff.cpu().detach().numpy(),
                                              list_metrics=["accuracy"])
            print(
                "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}"
                .format(epoch + 1, opt.num_epoches, iter + 1,
                        num_iter_per_epoch, optimizer.param_groups[0]['lr'],
                        loss, training_metrics["accuracy"]))
            writer.add_scalar('Train/Loss', loss,
                              epoch * num_iter_per_epoch + iter)
            writer.add_scalar('Train/Accuracy', training_metrics["accuracy"],
                              epoch * num_iter_per_epoch + iter)
        # torch.save(model, opt.saved_path + os.sep + "whole_model_han_{}".format(epoch))
        if epoch % opt.test_interval == 0:
            model.eval()
            loss_ls = []
            te_label_ls = []
            te_pred_ls = []
            for te_feature1, te_feature2, te_label in test_generator:
                num_sample = len(te_label)
                if torch.cuda.is_available():
                    te_feature1 = te_feature1.cuda()
                    te_feature2 = te_feature2.cuda()
                    te_label = te_label.cuda()
                with torch.no_grad():
                    model._init_hidden_state(num_sample)
                    te_feature1 = model(te_feature1)
                    model._init_hidden_state(num_sample)
                    te_feature2 = model(te_feature2)
                te_diff = exponent_neg_manhattan_distance(
                    te_feature1, te_feature2)
                te_loss = criterion(te_diff, te_label.float())
                loss_ls.append(te_loss * num_sample)
                te_label_ls.extend(te_label.clone().cpu())
                te_pred_ls.append(te_diff.clone().cpu())
            te_loss = sum(loss_ls) / test_set.__len__()
            te_pred = torch.cat(te_pred_ls, 0)
            te_label = np.array(te_label_ls)
            test_metrics = get_evaluation(
                te_label,
                te_pred.numpy(),
                list_metrics=["accuracy", "confusion_matrix"])
            output_file.write(
                "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n"
                .format(epoch + 1, opt.num_epoches, te_loss,
                        test_metrics["accuracy"],
                        test_metrics["confusion_matrix"]))
            print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
                epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'],
                te_loss, test_metrics["accuracy"]))
            writer.add_scalar('Test/Loss', te_loss, epoch)
            writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch)
            model.train()
            if te_loss + opt.es_min_delta < best_loss:
                best_loss = te_loss
                best_epoch = epoch
                torch.save(model, opt.saved_path + os.sep + "whole_model_han")

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, te_loss))
                break
Example #6
0
def train(opt):
    if torch.cuda.is_available():
        torch.cuda.manual_seed(0)
    else:
        torch.manual_seed(0)
    output_file = open(opt.saved_path + os.sep + "logs.txt", "w")
    output_file.write("Model's parameters: {}".format(vars(opt)))
    training_params = {
        "batch_size": opt.batch_size,
        "shuffle": True,
        "drop_last": True
    }
    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False
    }

    train_idx, test_idx, label2idx = k_fold_split(opt.data_set)
    training_set = MyDataset(opt.data_set, opt.bert_path, train_idx[0],
                             label2idx)
    training_generator = DataLoader(training_set, **training_params)
    test_set = MyDataset(opt.data_set, opt.bert_path, test_idx[0], label2idx)
    test_generator = DataLoader(test_set, **test_params)

    model = BertHierAttNet(opt.bert_size, opt.word_hidden_size,
                           opt.sent_hidden_size, len(label2idx), opt.bert_path)

    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)
    writer = SummaryWriter(opt.log_path)
    # writer.add_graph(model, torch.zeros(opt.batch_size, max_sent_length, max_word_length))

    if torch.cuda.is_available():
        model.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                       model.parameters()),
                                lr=opt.lr,
                                momentum=opt.momentum)
    best_loss = 1e5
    best_epoch = 0
    model.train()
    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epoches):
        for i, (x, mask, label, id) in enumerate(training_generator):
            if torch.cuda.is_available():
                x = x.cuda()  # [batch, seq_num, seq_len]
                mask = mask.cuda()  # [batch, seq_num, seq_len]
                label = label.cuda()  # [batch]
            optimizer.zero_grad()
            predictions = model(x, mask)
            loss = criterion(predictions, label)
            loss.backward()
            optimizer.step()
            training_metrics = get_evaluation(
                label.cpu().numpy(),
                predictions.cpu().detach().numpy(),
                list_metrics=["accuracy"])
            print(
                "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}"
                .format(epoch + 1, opt.num_epoches, i + 1, num_iter_per_epoch,
                        optimizer.param_groups[0]['lr'], loss,
                        training_metrics["accuracy"]))
            writer.add_scalar('Train/Loss', loss,
                              epoch * num_iter_per_epoch + i)
            writer.add_scalar('Train/Accuracy', training_metrics["accuracy"],
                              epoch * num_iter_per_epoch + i)
        if epoch % opt.test_interval == 0:
            model.eval()
            loss_ls = []
            te_label_ls = []
            te_pred_ls = []
            for te_feature, te_label in test_generator:
                num_sample = len(te_label)
                if torch.cuda.is_available():
                    te_feature = te_feature.cuda()
                    te_label = te_label.cuda()
                with torch.no_grad():
                    model._init_hidden_state(num_sample)
                    te_predictions = model(te_feature)
                te_loss = criterion(te_predictions, te_label)
                loss_ls.append(te_loss * num_sample)
                te_label_ls.extend(te_label.clone().cpu())
                te_pred_ls.append(te_predictions.clone().cpu())
            te_loss = sum(loss_ls) / test_set.__len__()
            te_pred = torch.cat(te_pred_ls, 0)
            te_label = np.array(te_label_ls)
            test_metrics = get_evaluation(
                te_label,
                te_pred.numpy(),
                list_metrics=["accuracy", "confusion_matrix"])
            output_file.write(
                "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n"
                .format(epoch + 1, opt.num_epoches, te_loss,
                        test_metrics["accuracy"],
                        test_metrics["confusion_matrix"]))
            print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
                epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'],
                te_loss, test_metrics["accuracy"]))
            writer.add_scalar('Test/Loss', te_loss, epoch)
            writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch)
            model.train()
            if te_loss + opt.es_min_delta < best_loss:
                best_loss = te_loss
                best_epoch = epoch
                torch.save(model, opt.saved_path + os.sep + "whole_model_han")

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, te_loss))
                break
Example #7
0
def train(opt):
    if use_cuda:
        torch.cuda.manual_seed(RANDOM_SEED)
    else:
        torch.manual_seed(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)
    random.seed(RANDOM_SEED)

    output_file = open(opt.saved_path + os.sep + "logs.txt", "w")
    output_file.write("Model's parameters: {}".format(vars(opt)))
    training_params = {"batch_size": opt.batch_size,
                       "shuffle": True,
                       "drop_last": False}
    test_params = {"batch_size": opt.batch_size,
                   "shuffle": True,
                   "drop_last": False}

    mydataset = MyDataset(superspan_HANsFile, superspan_HANs_labelsFile, label_namesFile, ImportanceFeatureMatsFile,
                          model_gensim, max_vocab, training_inds, childLabel2ParentLabelFile, None, dataset, descriptor_HANsFile, VvFile)

    # training_generator = DataLoader(mydataset, **training_params)
    testing_inds = [i for i in range(len(mydataset.text_lines)) if i not in training_inds]
    if test_these_inds:
        testing_inds = test_these_inds
    test_set = MyDataset(superspan_HANsFile, superspan_HANs_labelsFile, label_namesFile, ImportanceFeatureMatsFile, model_gensim,
                         max_vocab, testing_inds, None, test_this_label, None, None, None, mydataset.max_length_sentences, mydataset.max_length_word)
    test_generator = DataLoader(test_set, **test_params)

    model = HierAttNet(opt.sent_feature_size, phrases2feature_vector_path, dictionary_path,
                       mydataset.max_length_sentences, mydataset.max_length_word,
                       model_save_path, Vv_embedding_path, path_semanticsFile, max_vocab, use_cuda, mydataset, opt.num_bins)
    if os.path.isdir(log_path):
        shutil.rmtree(log_path)
    os.makedirs(log_path)
    writer = SummaryWriter(log_path)

    if use_cuda:
        model.cuda()
    criterion = nn.CrossEntropyLoss()
    att_parameters = set(model.sent_att_net.parameters()) | set(model.word_att_net.parameters())
    optimizer = torch.optim.SGD([
        {'params': filter(lambda p: p.requires_grad, set(model.parameters()) - att_parameters)},
        {'params': filter(lambda p: p.requires_grad, att_parameters), 'lr': opt.lr * 1000},
    ], lr=opt.lr, momentum=opt.momentum)

    best_loss = 1e5
    best_epoch = 0
    model.train()
    num_iter_per_epoch = len(training_generator)

    classind2size = Counter()
    topk2classind2errorSize = {
        'top 1': Counter(),
        'top 3': Counter(),
        'top 5': Counter(),
    }
    stop_training = False
    all_labels_set = set([l for l in mydataset.labels_list if '.' in l])
    sampled_labels_set = set()

    for epoch in range(opt.num_epoches):
        for iter, (features, ImportanceFeatureMat, labels, indexes, addtional_info) in tqdm(enumerate(training_generator)):
            if use_cuda:
                features = features.cuda()
                ImportanceFeatureMat = ImportanceFeatureMat.cuda()
                labels = labels.cuda()
            optimizer.zero_grad()
            predictions, attn_score, similarity_w_attentions = model(
                features, ImportanceFeatureMat, get_concept_similarity)
            loss = criterion(predictions, labels)
            if not stop_training:
                loss.backward()
                optimizer.step()
            sampled_labels_set |= set(labels.cpu().numpy())

            if USE_TRAINING_METRICS:
                training_metrics = get_evaluation(labels.cpu().numpy(), predictions.data.cpu().numpy(), list_metrics=[
                                                  "accuracy", "top K accuracy", "top K classind2wrong_doc_ind", "top K tree score"], childLabel2ParentLabel=mydataset.childLabel2ParentLabel, labels_list=mydataset.labels_list)

                print("Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, top K accuracy: {}, top K tree score: {}".format(
                    epoch + 1,
                    opt.num_epoches,
                    iter + 1,
                    num_iter_per_epoch,
                    optimizer.param_groups[0]['lr'],
                    loss, training_metrics["top K accuracy"], training_metrics["top K tree score"]))

                writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter)
                writer.add_scalar('Train/Accuracy',
                                  training_metrics["accuracy"], epoch * num_iter_per_epoch + iter)

                column_names = [model.dataset.labels_list.copy()
                                for _ in range(labels.shape[0])]

                if iter % opt.log_interval == 1:
                    if get_concept_similarity:
                        pickle.dump([pd.DataFrame(s, index=model.dataset.doc_tensor2doc(features[i]), columns=column_names[i]) for i, s in enumerate(
                            model.similarity_w_attentions)], open('log/model.similarity_w_attentions{}.bin'.format(iter), 'wb'))

                    pickle.dump(pd.DataFrame(model.bin_weight_history), open(
                        'log/model.bin_weight_history_{}.bin'.format(iter), 'wb'))
                    pickle.dump(pd.DataFrame(model.sent_att_net.context_weight_history, columns=['position', 'length', 'inTitle']),
                                open('log/model.sent_att_net.context_weight_history_{}.bin'.format(iter), 'wb'))
                    pickle.dump(pd.DataFrame(model.word_att_net.context_weight_history,
                                             columns=['meaningfulness', 'purity', 'targetness', 'completeness', 'nltk', 'spacy_np', 'spacy_entity', 'autophrase']),
                                open('log/model.word_att_net.context_weight_history_{}.bin'.format(iter), 'wb')
                                )

    model.eval()
    loss_ls = []
    te_label_ls = []
    te_pred_ls = []
    for iter, (te_feature, ImportanceFeatureMat, te_label, indexes, addtional_info) in tqdm(enumerate(test_generator), total=len(test_generator)):
        num_sample = len(te_label)
        if use_cuda:
            te_feature = te_feature.cuda()
            ImportanceFeatureMat = ImportanceFeatureMat.cuda()
            te_label = te_label.cuda()
        with torch.no_grad():
            if test_these_inds or test_this_label:
                te_predictions, te_attn_score, similarity_w_attentions = model(
                    te_feature, ImportanceFeatureMat, get_concept_similarity=get_concept_similarity)
            else:
                te_predictions, te_attn_score, _ = model(te_feature, ImportanceFeatureMat)
        te_loss = criterion(te_predictions, te_label)
        loss_ls.append(te_loss * num_sample)
        te_label_ls.extend(te_label.clone().cpu())
        te_pred_ls.append(te_predictions.clone().cpu())

        if test_these_inds or test_this_label:
            column_names = [model.dataset.labels_list.copy()
                            for _ in range(te_label.shape[0])]

            training_metrics = get_evaluation(te_label.cpu().numpy(), te_predictions.data.cpu().numpy(), list_metrics=[
                                              "accuracy", "top K accuracy", "top K classind2wrong_doc_ind", "top K tree score"], childLabel2ParentLabel=mydataset.childLabel2ParentLabel, labels_list=mydataset.labels_list)

            for classind, doc_ind_in_batchs in training_metrics["top K classind2wrong_doc_ind"]['top 1'].items():
                print('error for class: ', classind, mydataset.labels_list[classind])
                for (doc_ind, preds) in doc_ind_in_batchs:
                    try:
                        print('doc_ind', doc_ind, 'predicted: ', [
                              mydataset.labels_list[pred_classind] for pred_classind in preds], addtional_info[doc_ind])
                        for i, pred_classind in enumerate(preds):
                            column_names[doc_ind][pred_classind] += "@{}".format(i)
                        print(mydataset.doc_tensor2doc(
                            te_feature[doc_ind]), 'tensor index:', indexes[doc_ind])
                        sim_save_path = 'log/model.similarity_w_attentions_docindex_{}.bin'.format(indexes.numpy()[
                                                                                                   doc_ind])
                        pickle.dump(pd.DataFrame(similarity_w_attentions[doc_ind], index=model.dataset.doc_tensor2doc(
                            te_feature[doc_ind]), columns=column_names[doc_ind]), open(sim_save_path, 'wb'))
                    except Exception as e:
                        import ipdb
                        ipdb.set_trace()
                        raise e
            continue

        if iter % 10 == 1:
            print('test iter {}/{}'.format(iter, len(test_generator)))

            te_loss = sum(loss_ls) / test_set.__len__()
            te_pred = torch.cat(te_pred_ls, 0)
            te_label = np.array(te_label_ls)
            test_metrics = get_evaluation(te_label, te_pred.numpy(), list_metrics=[
                                          "accuracy", "top K accuracy", "top K tree score", "top K accuracy by class", "confusion_matrix"], childLabel2ParentLabel=mydataset.childLabel2ParentLabel, labels_list=mydataset.labels_list)

            with open(evaluationResultFile, 'w') as my_file:
                # print(str(test_metrics), file=my_file)

            pickle.dump(test_metrics, open(evaluationResultFile_bin, 'wb'))
            output_file.write(
                "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n".format(
                    epoch + 1, opt.num_epoches,
                    te_loss,
                    test_metrics["accuracy"],
                    test_metrics["confusion_matrix"]))
            print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
                epoch + 1,
                opt.num_epoches,
                optimizer.param_groups[0]['lr'],
                te_loss, test_metrics["accuracy"]))
            writer.add_scalar('Test/Loss', te_loss, epoch)
            writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch)
            model.train()
            if te_loss + opt.es_min_delta < best_loss:
                best_loss = te_loss
                best_epoch = epoch

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print("Stop training at epoch {}. The lowest loss achieved is {}".format(
                    epoch, te_loss))
                break

    pickle.dump(test_metrics, open(evaluationResultFileFinal_bin, 'wb'))


if __name__ == '__main__':
    if use_cuda:
        torch.cuda.set_device(dataset2device[dataset])

    train(args)
Example #8
0
def train(opt):
    if torch.cuda.is_available():
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)
    training_params = {"batch_size": opt.batch_size,
                       "shuffle": True}

    test_params = {"batch_size": opt.batch_size,
                   "shuffle": False}

    output_file = open(opt.saved_path + os.sep + "logs.txt", "w")
    output_file.write("Model's parameters: {}".format(vars(opt)))

    training_set = MyDataset(opt.data_path, opt.total_images_per_class, opt.ratio, "train")
    training_generator = DataLoader(training_set, **training_params)
    print ("there are {} images for training phase".format(training_set.__len__()))
    test_set = MyDataset(opt.data_path, opt.total_images_per_class, opt.ratio, "test")
    test_generator = DataLoader(test_set, **test_params)
    print("there are {} images for test phase".format(test_set.__len__()))


    model = QuickDraw(num_classes=training_set.num_classes)

    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)
    writer = SummaryWriter(opt.log_path)
    # writer.add_graph(model, torch.rand(opt.batch_size, 1, 28, 28))

    if torch.cuda.is_available():
        model.cuda()

    criterion = nn.CrossEntropyLoss()
    if opt.optimizer == "adam":
        optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
    elif opt.optimizer == "sgd":
        optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9)
    else:
        print("invalid optimizer")
        exit(0)

    best_loss = 1e5
    best_epoch = 0
    model.train()
    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epochs):
        for iter, batch in enumerate(training_generator):
            images, labels = batch
            if torch.cuda.is_available():
                images = images.cuda()
                labels = labels.cuda()
            optimizer.zero_grad()
            predictions = model(images)
            loss = criterion(predictions, labels)
            loss.backward()
            optimizer.step()
            training_metrics = get_evaluation(labels.cpu().numpy(), predictions.cpu().detach().numpy(),
                                              list_metrics=["accuracy"])
            print("Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
                epoch + 1,
                opt.num_epochs,
                iter + 1,
                num_iter_per_epoch,
                optimizer.param_groups[0]['lr'],
                loss, training_metrics["accuracy"]))
            writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter)
            writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter)

        model.eval()
        loss_ls = []
        te_label_ls = []
        te_pred_ls = []
        for idx, te_batch in enumerate(test_generator):
            te_images, te_labels = te_batch
            num_samples = te_labels.size()[0]
            if torch.cuda.is_available():
                te_images = te_images.cuda()
                te_labels = te_labels.cuda()
            with torch.no_grad():
                te_predictions = model(te_images)
            te_loss = criterion(te_predictions, te_labels)
            loss_ls.append(te_loss * num_samples)
            te_label_ls.extend(te_labels.clone().cpu())
            te_pred_ls.append(te_predictions.clone().cpu())
        te_loss = sum(loss_ls) / test_set.__len__()
        te_pred = torch.cat(te_pred_ls, 0)
        te_label = np.array(te_label_ls)
        test_metrics = get_evaluation(te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"])
        output_file.write(
            "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n".format(
                epoch + 1, opt.num_epochs,
                te_loss,
                test_metrics["accuracy"],
                test_metrics["confusion_matrix"]))
        print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
            epoch + 1,
            opt.num_epochs,
            optimizer.param_groups[0]['lr'],
            te_loss, test_metrics["accuracy"]))
        writer.add_scalar('Test/Loss', te_loss, epoch)
        writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch)
        model.train()
        if te_loss + opt.es_min_delta < best_loss:
            best_loss = te_loss
            best_epoch = epoch
            torch.save(model, opt.saved_path + os.sep + "whole_model_quickdraw")
        if epoch - best_epoch > opt.es_patience > 0:
            print("Stop training at epoch {}. The lowest loss achieved is {}".format(epoch, te_loss))
            break
    writer.close()
    output_file.close()
Example #9
0
def train(opt):
    # GPUが使えるか確認
    if torch.cuda.is_available():
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)
    # データセットのパスを指定
    if opt.dataset in ["csic2010", "agnews", "dbpedia", "yelp_review", "yelp_review_polarity", "amazon_review",
                       "amazon_polarity", "sogou_news", "yahoo_answers"]:
        opt.input, opt.output = get_default_folder(opt.dataset, opt.feature)
    # outputのディレクトリが存在していなかったらディレクトリを作成
    if not os.path.exists(opt.output):
        os.makedirs(opt.output)
    # outputファイルを作成
    output_file = open(opt.output + os.sep + "logs.txt", "w")
    output_file.write("Model's parameters: {}".format(vars(opt)))
    # パラメータの設定
    training_params = {"batch_size": opt.batch_size,
                       "shuffle": True,
                       "num_workers": 0}
    test_params = {"batch_size": opt.batch_size,
                   "shuffle": False,
                   "num_workers": 0}
    # データセットの読み込み
    training_set = MyDataset(opt.input + os.sep + "train.csv", opt.max_length)
    test_set = MyDataset(opt.input + os.sep + "test.csv", opt.max_length)
    training_generator = DataLoader(training_set, **training_params)
    test_generator = DataLoader(test_set, **test_params)
    # データセットの大小指定
    if opt.feature == "small":
        model = CharacterLevelCNN(input_length=opt.max_length, n_classes=training_set.num_classes,
                                  input_dim=len(opt.alphabet),
                                  n_conv_filters=256, n_fc_neurons=1024)

    elif opt.feature == "large":
        model = CharacterLevelCNN(input_length=opt.max_length, n_classes=training_set.num_classes,
                                  input_dim=len(opt.alphabet),
                                  n_conv_filters=1024, n_fc_neurons=2048)
    else:
        sys.exit("Invalid feature mode!")
    # ログのパス指定とログの書き込み
    log_path = "{}_{}_{}".format(opt.log_path, opt.feature, opt.dataset)
    if os.path.isdir(log_path):
        shutil.rmtree(log_path)
    os.makedirs(log_path)
    writer = SummaryWriter(log_path)
    # GPUが使えるか確認
    if torch.cuda.is_available():
        model.cuda()
    # 損失関数にはクロスエントロピーを使用
    criterion = nn.CrossEntropyLoss()
    # 最適化方法の指定
    if opt.optimizer == "adam":
        optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
    elif opt.optimizer == "sgd":
        optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9)
    # パラメータの指定
    best_loss = 1e5
    best_epoch = 0
    # 学習
    model.train()
    # エポックごとのイテレーション回数
    num_iter_per_epoch = len(training_generator)
    # 学習開始
    for epoch in range(opt.num_epochs):
        for iter, batch in enumerate(training_generator):
            # featureとlabelに分ける(train)
            feature, label = batch
            # GPU使用可能か確認
            if torch.cuda.is_available():
                feature = feature.cuda()
                label = label.cuda()
            # 勾配の初期化
            optimizer.zero_grad()
            # 予測
            predictions = model(feature)
            # ロスの計算
            loss = criterion(predictions, label)
            # 勾配の計算
            loss.backward()
            # パラメータの更新
            optimizer.step()
            # モデルの評価
            training_metrics = get_evaluation(label.cpu().numpy(), predictions.cpu().detach().numpy(),
                                              list_metrics=["accuracy"])
            # 現在の学習状況の表示
            print("Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
                epoch + 1,
                opt.num_epochs,
                iter + 1,
                num_iter_per_epoch,
                optimizer.param_groups[0]['lr'],
                loss, training_metrics["accuracy"]))
            writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter)
            writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter)
        # 推論モードへの切り替え
        model.eval()
        loss_ls = []
        te_label_ls = []
        te_pred_ls = []
        for batch in test_generator:
            # featureとlabelに分ける(test)
            te_feature, te_label = batch
            # サンプル数の取得
            num_sample = len(te_label)
            # GPUが使用可能か確認
            if torch.cuda.is_available():
                te_feature = te_feature.cuda()
                te_label = te_label.cuda()
            # パラメータの保存を止める
            with torch.no_grad():
                te_predictions = model(te_feature)
            # ロスの計算
            te_loss = criterion(te_predictions, te_label)
            # 現在のロスをリストに追加
            loss_ls.append(te_loss * num_sample)
            # テストデータのlabelをリストに追加
            te_label_ls.extend(te_label.clone().cpu())
            # テストデータの予測値をリストに追加
            te_pred_ls.append(te_predictions.clone().cpu())
        # バッチ全体でのロスを計算
        te_loss = sum(loss_ls) / test_set.__len__()
        # testの予測値のtensorを縦方向に連結
        te_pred = torch.cat(te_pred_ls, 0)
        # testのlabelをnumpy.array化
        te_label = np.array(te_label_ls)
        # モデルの評価
        test_metrics = get_evaluation(te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"])
        # 出力結果をファイルに書き込み
        output_file.write(
            "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n".format(
                epoch + 1, opt.num_epochs,
                te_loss,
                test_metrics["accuracy"],
                test_metrics["confusion_matrix"]))
        # 現在のテストの評価状況を表示
        print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format(
            epoch + 1,
            opt.num_epochs,
            optimizer.param_groups[0]['lr'],
            te_loss, test_metrics["accuracy"]))
        writer.add_scalar('Test/Loss', te_loss, epoch)
        writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch)
        # 学習モードに変更
        model.train()
        # 現在のロスがあらかじめ設定したロスの閾値を下回ったら,現在のモデルを保存
        if te_loss + opt.es_min_delta < best_loss:
            best_loss = te_loss
            best_epoch = epoch
            torch.save(model, "{}/char-cnn_{}_{}".format(opt.output, opt.dataset, opt.feature))
        # Early stopping
        if epoch - best_epoch > opt.es_patience > 0:
            print("Stop training at epoch {}. The lowest loss achieved is {} at epoch {}".format(epoch, te_loss, best_epoch))
            break
        # 勾配グリッピング
        if opt.optimizer == "sgd" and epoch % 3 == 0 and epoch > 0:
            current_lr = optimizer.state_dict()['param_groups'][0]['lr']
            current_lr /= 2
            for param_group in optimizer.param_groups:
                param_group['lr'] = current_lr