def validate(val_set, model, args): model.eval() print_count = 0 data_size = len(val_set) start_tmp = 0 args.batch_size = 5 batch_num = math.ceil(data_size / args.batch_size) acc_all = 0.0 with torch.no_grad(): while start_tmp < data_size: print_count += 1 if start_tmp + args.batch_size < data_size: cur_batch = val_set[start_tmp:start_tmp + args.batch_size] start_tmp = start_tmp + args.batch_size else: cur_batch = val_set[start_tmp:] start_tmp = start_tmp + args.batch_size padded_articles = cur_batch[:, 0] padded_summaries = cur_batch[:, 1] padded_articles = np.array( [np.array(tmp) for tmp in padded_articles]) padded_summaries = np.array( [np.array(tmp) for tmp in padded_summaries]) tensor_art = torch.LongTensor(padded_articles.astype(np.float32)) tensor_sum = torch.LongTensor(padded_summaries.astype(np.float32)) tensor_art = to_cuda(tensor_art) tensor_sum = to_cuda(tensor_sum) out_list, _ = model(tensor_art, None, False) output_list = [] target_list = [] for j in range(out_list.shape[0]): k = remove_pad(tensor_sum[j, :]) # print(out_list[j, :k].shape) out_tmp = ' '.join( map(str, torch.argmax(out_list[j, :k], 1).cpu().numpy())) tar_tmp = ' '.join(map(str, (tensor_sum[j, :k]).cpu().numpy())) output_list.append(out_tmp) target_list.append(tar_tmp) acc = accuracy(output_list, target_list) print('Test [' + str(print_count) + '/' + str(batch_num) + ']', 'Acc ', acc) acc_all = acc_all + acc * len(cur_batch) acc_all = acc_all / data_size print(' *Accuracy all:', acc_all) args.batch_size = 1 return acc_all
def test(val_set, model): model.eval() batch_size = 1 data_size = len(val_set) # batch_num = math.ceil(data_size / batch_size) start_tmp = 0 with torch.no_grad(): while start_tmp < data_size: if start_tmp + batch_size < data_size: cur_batch = val_set[start_tmp:start_tmp + batch_size] start_tmp = start_tmp + batch_size else: cur_batch = val_set[start_tmp:] start_tmp = start_tmp + batch_size padded_articles = cur_batch[:, 0] padded_summaries = cur_batch[:, 1] padded_articles = np.array( [np.array(tmp) for tmp in padded_articles]) padded_summaries = np.array( [np.array(tmp) for tmp in padded_summaries]) tensor_art = torch.LongTensor(padded_articles.astype(np.float32)) tensor_sum = torch.LongTensor(padded_summaries.astype(np.float32)) tensor_art = to_cuda(tensor_art) tensor_sum = to_cuda(tensor_sum) out_list, _ = model(tensor_art, None, False) output_list = [] target_list = [] for j in range(out_list.shape[0]): k = remove_pad(tensor_sum[j, :]) # print(out_list[j, :k].shape) out_tmp = torch.argmax(out_list[j, :k], 1).cpu().numpy() tar_tmp = (tensor_sum[j, :k]).cpu().numpy() output_list.append(out_tmp) target_list.append(tar_tmp) print(out_tmp) print(tar_tmp) input()
def train(train_set, model, criterion, optimizer, epoch, args): model.train() print('Start of Epoch: ', epoch) print_count = 0 end = time.time() np.random.shuffle(train_set) data_size = len(train_set) start_tmp = 0 batch_num = math.ceil(data_size / args.batch_size) while start_tmp < data_size: print_count += 1 if start_tmp + args.batch_size < data_size: cur_batch = train_set[start_tmp:start_tmp + args.batch_size] start_tmp = start_tmp + args.batch_size else: cur_batch = train_set[start_tmp:] start_tmp = start_tmp + args.batch_size padded_articles = cur_batch[:, 0] padded_summaries = cur_batch[:, 1] padded_articles = np.array([np.array(tmp) for tmp in padded_articles]) padded_summaries = np.array( [np.array(tmp) for tmp in padded_summaries]) # print(padded_articles) # for i in padded_articles: # print(len(i)) tensor_art = torch.LongTensor(padded_articles.astype(np.float32)) tensor_sum = torch.LongTensor(padded_summaries.astype(np.float32)) tensor_art = to_cuda(tensor_art) tensor_sum = to_cuda(tensor_sum) optimizer.zero_grad() out_list, cov_loss = model(tensor_art, tensor_sum) loss = torch.tensor(0.) loss = to_cuda(loss) for j in range(out_list.shape[0]): # k = remove_pad(tensor_sum[j, :]) # k = min(k, len(out_list[j]), len(tensor_sum[j])) # # loss += criterion(torch.log(out_list[j, :k]), tensor_sum[j, :k]) k = remove_pad(tensor_sum[j]) k = min(k, len(out_list[j]), len(tensor_sum[j])) # k = min(k, len(out_list[j]), len(tensor_sum[j, 1:])) loss += criterion(torch.log(out_list[j, :k - 1]), tensor_sum[j, 1:k]) loss += cov_loss # if print_count % 100 == 0: print( 'Epoch: [' + str(epoch) + '] [' + str(print_count) + '/' + str(batch_num) + ']', 'Loss ', loss.cpu().detach().numpy(), 'cov Loss', cov_loss.cpu().detach().numpy()) loss.backward() optimizer.step() print('End of Epoch', epoch, 'time cost', time.time() - end)
criterion = nn.NLLLoss() torch.autograd.set_detect_anomaly(True) for i in range(100): print('Epoch:', i + 1) opt.zero_grad() out_list, cov_loss = model(tensor_art[0:2], tensor_sum[0:2]) # print(len(out_list[0][0])) loss = torch.tensor(0.) loss = to_cuda(loss) for j in range(out_list.shape[0]): # loss += criterion(out_list[j],tensor_sum[j,1:]) # '1:' Remove <SOS> k = remove_pad(tensor_sum[j, :]) loss += criterion(torch.log(out_list[j, :k]), tensor_sum[j, :k]) # loss += cov_loss # PRINT k = remove_pad(tensor_sum[0, :]) #print(tensor_sum[0,:k]) out_string = [] for word in tensor_sum[0, :k]: out_string.append(dic.idx2word[word]) #print(len(out_string))