def validate(val_set, model, args):
    model.eval()
    print_count = 0

    data_size = len(val_set)
    start_tmp = 0
    args.batch_size = 5
    batch_num = math.ceil(data_size / args.batch_size)
    acc_all = 0.0

    with torch.no_grad():
        while start_tmp < data_size:

            print_count += 1
            if start_tmp + args.batch_size < data_size:
                cur_batch = val_set[start_tmp:start_tmp + args.batch_size]
                start_tmp = start_tmp + args.batch_size
            else:
                cur_batch = val_set[start_tmp:]
                start_tmp = start_tmp + args.batch_size

            padded_articles = cur_batch[:, 0]
            padded_summaries = cur_batch[:, 1]

            padded_articles = np.array(
                [np.array(tmp) for tmp in padded_articles])
            padded_summaries = np.array(
                [np.array(tmp) for tmp in padded_summaries])

            tensor_art = torch.LongTensor(padded_articles.astype(np.float32))
            tensor_sum = torch.LongTensor(padded_summaries.astype(np.float32))

            tensor_art = to_cuda(tensor_art)
            tensor_sum = to_cuda(tensor_sum)

            out_list, _ = model(tensor_art, None, False)

            output_list = []
            target_list = []

            for j in range(out_list.shape[0]):
                k = remove_pad(tensor_sum[j, :])
                # print(out_list[j, :k].shape)
                out_tmp = ' '.join(
                    map(str,
                        torch.argmax(out_list[j, :k], 1).cpu().numpy()))
                tar_tmp = ' '.join(map(str, (tensor_sum[j, :k]).cpu().numpy()))

                output_list.append(out_tmp)
                target_list.append(tar_tmp)

            acc = accuracy(output_list, target_list)
            print('Test [' + str(print_count) + '/' + str(batch_num) + ']',
                  'Acc ', acc)

            acc_all = acc_all + acc * len(cur_batch)
        acc_all = acc_all / data_size
        print(' *Accuracy all:', acc_all)
    args.batch_size = 1
    return acc_all
def test(val_set, model):
    model.eval()
    batch_size = 1
    data_size = len(val_set)
    # batch_num = math.ceil(data_size / batch_size)
    start_tmp = 0
    with torch.no_grad():
        while start_tmp < data_size:

            if start_tmp + batch_size < data_size:
                cur_batch = val_set[start_tmp:start_tmp + batch_size]
                start_tmp = start_tmp + batch_size
            else:
                cur_batch = val_set[start_tmp:]
                start_tmp = start_tmp + batch_size

            padded_articles = cur_batch[:, 0]
            padded_summaries = cur_batch[:, 1]

            padded_articles = np.array(
                [np.array(tmp) for tmp in padded_articles])
            padded_summaries = np.array(
                [np.array(tmp) for tmp in padded_summaries])

            tensor_art = torch.LongTensor(padded_articles.astype(np.float32))
            tensor_sum = torch.LongTensor(padded_summaries.astype(np.float32))

            tensor_art = to_cuda(tensor_art)
            tensor_sum = to_cuda(tensor_sum)

            out_list, _ = model(tensor_art, None, False)

            output_list = []
            target_list = []

            for j in range(out_list.shape[0]):
                k = remove_pad(tensor_sum[j, :])
                # print(out_list[j, :k].shape)
                out_tmp = torch.argmax(out_list[j, :k], 1).cpu().numpy()
                tar_tmp = (tensor_sum[j, :k]).cpu().numpy()

                output_list.append(out_tmp)
                target_list.append(tar_tmp)
                print(out_tmp)
                print(tar_tmp)
                input()
def train(train_set, model, criterion, optimizer, epoch, args):
    model.train()
    print('Start of Epoch: ', epoch)
    print_count = 0
    end = time.time()

    np.random.shuffle(train_set)

    data_size = len(train_set)
    start_tmp = 0
    batch_num = math.ceil(data_size / args.batch_size)

    while start_tmp < data_size:

        print_count += 1
        if start_tmp + args.batch_size < data_size:
            cur_batch = train_set[start_tmp:start_tmp + args.batch_size]
            start_tmp = start_tmp + args.batch_size
        else:
            cur_batch = train_set[start_tmp:]
            start_tmp = start_tmp + args.batch_size

        padded_articles = cur_batch[:, 0]
        padded_summaries = cur_batch[:, 1]

        padded_articles = np.array([np.array(tmp) for tmp in padded_articles])
        padded_summaries = np.array(
            [np.array(tmp) for tmp in padded_summaries])

        # print(padded_articles)
        # for i in padded_articles:
        #     print(len(i))

        tensor_art = torch.LongTensor(padded_articles.astype(np.float32))
        tensor_sum = torch.LongTensor(padded_summaries.astype(np.float32))

        tensor_art = to_cuda(tensor_art)
        tensor_sum = to_cuda(tensor_sum)

        optimizer.zero_grad()

        out_list, cov_loss = model(tensor_art, tensor_sum)

        loss = torch.tensor(0.)
        loss = to_cuda(loss)

        for j in range(out_list.shape[0]):
            # k = remove_pad(tensor_sum[j, :])
            # k = min(k, len(out_list[j]), len(tensor_sum[j]))
            #
            # loss += criterion(torch.log(out_list[j, :k]), tensor_sum[j, :k])
            k = remove_pad(tensor_sum[j])
            k = min(k, len(out_list[j]), len(tensor_sum[j]))
            # k = min(k, len(out_list[j]), len(tensor_sum[j, 1:]))

            loss += criterion(torch.log(out_list[j, :k - 1]), tensor_sum[j,
                                                                         1:k])

        loss += cov_loss

        # if print_count % 100 == 0:
        print(
            'Epoch: [' + str(epoch) + '] [' + str(print_count) + '/' +
            str(batch_num) + ']', 'Loss ',
            loss.cpu().detach().numpy(), 'cov Loss',
            cov_loss.cpu().detach().numpy())

        loss.backward()
        optimizer.step()
    print('End of Epoch', epoch, 'time cost', time.time() - end)
Exemple #4
0
criterion = nn.NLLLoss()

torch.autograd.set_detect_anomaly(True)

for i in range(100):
    print('Epoch:', i + 1)

    opt.zero_grad()
    out_list, cov_loss = model(tensor_art[0:2], tensor_sum[0:2])
    # print(len(out_list[0][0]))
    loss = torch.tensor(0.)
    loss = to_cuda(loss)
    for j in range(out_list.shape[0]):
        # loss += criterion(out_list[j],tensor_sum[j,1:]) # '1:' Remove <SOS>

        k = remove_pad(tensor_sum[j, :])

        loss += criterion(torch.log(out_list[j, :k]), tensor_sum[j, :k])

        # loss += cov_loss

    # PRINT
    k = remove_pad(tensor_sum[0, :])
    #print(tensor_sum[0,:k])

    out_string = []
    for word in tensor_sum[0, :k]:
        out_string.append(dic.idx2word[word])

    #print(len(out_string))