def trainBatch(net, criterion, optimizer): data = train_iter.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() return cost
def val(net, dataset, criterion, max_iter=100): print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() data_loader = torch.utils.data.DataLoader( dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers)) val_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = utils.averager() for i in range(min(max_iter, len(data_loader))): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, cpu_texts): if pred == target.lower(): n_correct += 1 raw_preds = converter.decode(preds.data, preds_size.data, raw=True) for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) accuracy = n_correct / float(max_iter * opt.batchSize) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
def val(net, dataset, max_iter=100): print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() data_loader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers)) val_iter = iter(data_loader) i = 0 n_correct = 0 # loss averager avg_h_val = utils.averager() avg_cost_val = utils.averager() avg_h_cost_val = utils.averager() if opt.eval_all: max_iter = len(data_loader) else: max_iter = min(max_iter, len(data_loader)) for i in range(max_iter): data = val_iter.next() # print('data: ', data) # print(data) i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) # print('len(cpu_images): ', len(cpu_images)) # print('cpu_texts ', cpu_texts) # print('len(cpu_texts): ', len(cpu_texts)) # print('l ', l) # print(len(l)) # print('length ', length) preds = crnn(image) # size = 26, 64, 96 # print(preds.size()) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) H, cost = seg_ctc_ent_cost(preds, text, preds_size, length, uni_rate=opt.uni_rate) h_cost = (1 - opt.h_rate) * cost - opt.h_rate * H avg_h_val.add(H / batch_size) avg_cost_val.add(cost / batch_size) avg_h_cost_val.add(h_cost / batch_size) _, preds = preds.max(2) # size = 26, 64 # print(preds.size()) # preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for idx, (pred, target) in enumerate(zip(sim_preds, cpu_texts)): if pred == target.lower(): n_correct += 1 raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): print('%-30s => %-30s, gt: %-30s' % (raw_pred, pred, gt)) accuracy = n_correct / float(max_iter * opt.batchSize) print( 'Test H: %f, Cost: %f, H Cost: %f, accuray: %f' % (avg_h_val.val(), avg_cost_val.val(), avg_h_cost_val.val(), accuracy))
def val(net, dataset, criterion, max_iter=100): print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() data_loader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers)) val_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(data_loader)) print("max_iter", max_iter, "len(data_loader)", len(data_loader)) for i in range(max_iter): data = val_iter.next() # print(data) i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) # preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) list_cpu_texts = [] for i in cpu_texts: list_cpu_texts.append(i.decode('utf-8', 'strict')) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) if (i == 1): print(sim_preds) print(cpu_texts) # cpu_texts = byte_to_zh(cpu_texts) # print("sim_preds",sim_preds) for pred, target in zip(sim_preds, list_cpu_texts): if (pred == target.lower()) | (pred == target): n_correct += 1 raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, list_cpu_texts): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) accuracy = n_correct / float(max_iter * opt.batchSize) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
def val(net, test_dataset, criterion, max_iter=100): print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() data_loader = torch.utils.data.DataLoader(test_dataset, shuffle=False, batch_size=batchSize, num_workers=int(workers), collate_fn=dataset.alignCollate( imgH=32, imgW=100, keep_ratio=True)) val_iter = iter(data_loader) i = 0 n = 0 n_correct = 0 n_text = 0 loss_avg = util.averager() max_iter = len(data_loader) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) util.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) util.loadData(text, t) util.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, cpu_texts): if isinstance(target, unicode) is False: target = target.decode('utf-8') pred_encode, _ = converter.encode(pred) target_encode, _ = converter.encode(target) t = editdistance.eval(pred_encode, target_encode) l = len(target_encode) # chardit1 = chardet.detect(target) # print (chardit1) print(pred + '>>>>' + target) n_correct += t n_text += l n += 1 raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:n_test_disp] for raw_pred, sim_pred, gt in zip(raw_preds, sim_preds, cpu_texts): print('%-20s => %-20s, gt: %-20s' % (raw_pred, sim_pred, gt)) len_edit = n_correct / float(n) len_text = n_text / float(n) norm = 1 - len_edit / len_text print('average editdistance: %f, normalized accuracy: %f' % (len_edit, norm))
def val(net, valdataset, criterionAttention,criterionCTC, max_iter=100): print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() val_sampler = dataset.randomSequentialSampler(valdataset, opt.batchSize) data_loader = torch.utils.data.DataLoader( valdataset, batch_size=opt.batchSize, shuffle=False, sampler=val_sampler, num_workers=int(opt.workers), collate_fn=dataset.alignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio)) # data_loader = torch.utils.data.DataLoader( # dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers)) val_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(data_loader)) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) tAttention, lAttention = converterAttention.encode(cpu_texts) utils.loadData(textAttention, tAttention) utils.loadData(lengthAttention, lAttention) tCTC, lCTC = converterCTC.encode(cpu_texts) utils.loadData(textCTC, tCTC) utils.loadData(lengthCTC, lCTC) # print (image) if opt.lang: predsCTC, predsAttention = crnn(image, lengthAttention, textAttention) else: predsCTC, predsAttention = crnn(imageAttention, lengthAttention) costAttention = criterionAttention(predsAttention, textAttention) preds_size = Variable(torch.IntTensor([predsCTC.size(0)] * batch_size)) costCTC = criterionCTC(predsCTC, textCTC, preds_size, lengthCTC) / batch_size loss_avg.add(costAttention) loss_avg.add(costCTC.cuda()) _, predsAttention = predsAttention.max(1) predsAttention = predsAttention.view(-1) sim_predsAttention = converterAttention.decode(predsAttention.data, lengthAttention.data) for pred, target in zip(sim_predsAttention, cpu_texts): # target = ''.join(target.split(opt.sep)) print (pred,target) if pred == target: n_correct += 1 # for pred, gt in zip(sim_preds, cpu_texts): # gt = ''.join(gt.split(opt.sep)) # print('%-20s, gt: %-20s' % (pred, gt)) accuracy = n_correct / float(max_iter * opt.batchSize) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
crnn = torch.nn.DataParallel(crnn, device_ids=range(1)) image = image.cuda() text = text.cuda() length = length.cuda() counter = 0 n_correct = 0 for images, labels in test_loader: counter = counter + 1 batch_size = images.size(0) loadData(image, images) integer_labels, label_lengths = string_converter.convert_string_to_integer(labels, []) loadData(text, integer_labels) loadData(length, label_lengths) output = crnn(image) output = F.log_softmax(output, 2) output_size = Variable(torch.IntTensor([output.size(0)] * batch_size)) cost = loss_function(output, text, output_size, length) / batch_size total_cost = total_cost + cost.item() _, output = output.max(2) output = output.transpose(1, 0).contiguous().view(-1) predicted_texts = string_converter.convert_integer_to_string(output.data, output_size.data) ground_truth_texts = string_converter.convert_integer_to_string(text.data, length.data) for pred, target in zip(predicted_texts, ground_truth_texts): if pred == target: n_correct += 1 print('Prediction : {} GT : {}'.format(pred, target)) average_cost = total_cost / float(counter) accuracy = n_correct / (batch * counter) * 100
def val(net, dataset, criterion, max_iter=1000, test_aug=False, n_aug=1): print('Start validation set') for p in crnn.parameters(): p.requires_grad = False net.eval() i = 0 n_correct = 0 loss_avg = utils.averager() image_count = 0 # Character and word error rate lists char_error = [] w_error = [] pred_dict = {} gt_dict = {} for epoch in range(n_aug): max_iter = len(dataset) if test_aug else min(max_iter, len(dataset)) val_iter = iter(dataset) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts, cpu_files = data batch_size = cpu_images.size(0) image_count = image_count + batch_size utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) #print(preds.size()) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) # RA: While I am not sure yet, it looks like a greedy decoder and not beam search is being used here # Case is ignored in the accuracy, which is not ideal for an actual working system _, preds = preds.max(2) if torch.__version__ < '0.2': preds = preds.squeeze(2) # https://github.com/meijieru/crnn.pytorch/issues/31 preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, target, f in zip(sim_preds, cpu_texts, cpu_files): if f not in gt_dict: gt_dict[f] = target pred_dict[f] = [] pred_dict[f].append(pred) if pred == target: n_correct += 1 # Case-sensitive character and word error rates for f, target in gt_dict.items(): # Finds the most commonly predicted string for all the augmented images best_pred = Counter(pred_dict[f]).most_common(1)[0][0] char_error.append(cer(best_pred, target)) w_error.append(wer(best_pred, target)) raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) print("Total number of images in validation set: %8d" % image_count) accuracy = n_correct / float(max_iter * opt.batchSize) print('Test loss: %f, accuracy: %f' % (loss_avg.val(), accuracy)) char_arr = np.array(char_error) w_arr = np.array(w_error) char_mean_error = np.mean(char_arr) word_mean_error = np.mean(w_arr) print("Character error rate mean: %4.4f; Character error rate sd: %4.4f" % ( char_mean_error, np.std(char_arr, ddof=1))) print("Word error rate mean: %4.4f; Word error rate sd: %4.4f" % (word_mean_error, np.std(w_arr, ddof=1))) return char_mean_error, word_mean_error, accuracy
def val(net, valdataset, criterionAttention, criterionCTC, max_iter=100): print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() val_sampler = dataset.randomSequentialSampler(valdataset, opt.batchSize) data_loader = torch.utils.data.DataLoader(valdataset, batch_size=opt.batchSize, shuffle=False, sampler=val_sampler, num_workers=int(opt.workers), collate_fn=dataset.alignCollate( imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio)) # data_loader = torch.utils.data.DataLoader( # dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers)) val_iter = iter(data_loader) i = 0 n_correct = 0 n_correctCTC = 0 n_correctAttention = 0 distanceCTC = 0 distanceAttention = 0 sum_charNum = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(data_loader)) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) tAttention, lAttention = converterAttention.encode(cpu_texts) utils.loadData(textAttention, tAttention) utils.loadData(lengthAttention, lAttention) tCTC, lCTC = converterCTC.encode(cpu_texts) utils.loadData(textCTC, tCTC) utils.loadData(lengthCTC, lCTC) # print (image) if opt.lang: predsCTC, predsAttention = crnn(image, lengthAttention, textAttention) else: predsCTC, predsAttention = crnn(imageAttention, lengthAttention) costAttention = criterionAttention(predsAttention, textAttention) preds_size = Variable(torch.IntTensor([predsCTC.size(0)] * batch_size)) costCTC = criterionCTC(predsCTC, textCTC, preds_size, lengthCTC) / batch_size loss_avg.add(costAttention) loss_avg.add(costCTC.cuda()) _, predsAttention = predsAttention.max(1) predsAttention = predsAttention.view(-1) sim_predsAttention = converterAttention.decode(predsAttention.data, lengthAttention.data) _, predsCTC = predsCTC.max(2) predsCTC = predsCTC.transpose(1, 0).contiguous().view(-1) sim_predsCTC = converterCTC.decode(predsCTC.data, preds_size.data, raw=False) for i, cpu_text in enumerate(cpu_texts): gtText = cpu_text.decode('utf-8') CTCText = sim_predsCTC[i] if isinstance(CTCText, str): CTCText = CTCText.decode('utf-8') AttentionText = sim_predsAttention[i] print('gtText: %s' % gtText) print('CTCText: %s' % CTCText) print('AttentionText: %s' % AttentionText) if gtText == CTCText: n_correctCTC += 1 if gtText == AttentionText: n_correctAttention += 1 distanceCTC += Levenshtein.distance(CTCText, gtText) distanceAttention += Levenshtein.distance(AttentionText, gtText) sum_charNum = sum_charNum + len(gtText) correctCTC_accuracy = n_correctCTC / float(max_iter * batch_size) cerCTC = distanceCTC / float(sum_charNum) print('Test CERCTC: %f, accuracyCTC: %f' % (cerCTC, correctCTC_accuracy)) correctAttention_accuracy = n_correctAttention / float( max_iter * batch_size) cerAttention = distanceAttention / float(sum_charNum) print('Test CERAttention: %f, accuricyAttention: %f' % (cerAttention, correctAttention_accuracy))
def val(net, _dataset1, _dataset2, _dataset3, epoch, step, criterion, max_iter=100): logger.info('Start val') # for p in crnn.parameters(): # p.requires_grad = False net.eval() data_loader1 = torch.utils.data.DataLoader( _dataset1, shuffle=False, batch_size=params.batchSize, num_workers=int(params.workers), collate_fn=dataset.alignCollate(imgH=params.imgH, imgW=params.imgW, keep_ratio=params.keep_ratio)) data_loader2 = torch.utils.data.DataLoader( _dataset2, shuffle=False, batch_size=params.batchSize, num_workers=int(params.workers), collate_fn=dataset.alignCollate(imgH=params.imgH, imgW=params.imgW, keep_ratio=params.keep_ratio)) data_loader3 = torch.utils.data.DataLoader( _dataset3, shuffle=False, batch_size=params.batchSize, num_workers=int(params.workers), collate_fn=dataset.alignCollate(imgH=params.imgH, imgW=params.imgW, keep_ratio=params.keep_ratio)) val_iter = iter(data_loader1) val_iter2 = iter(data_loader2) val_iter3 = iter(data_loader3) i = 0 n_correct = 0 loss_avg = utils.averager() max_iter = len(data_loader1) record_dir = log_dir + 'epoch_%d_step_%d_data.txt' % (epoch, step) r = 1 f = open(record_dir, "a") num_label, num_pred = params.total_num, 0 start = time.time() for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) data2 = val_iter2.next() cpu_images2, _ = data2 utils.loadData(image2, cpu_images2) data3 = val_iter3.next() cpu_images3, _ = data3 utils.loadData(image3, cpu_images3) with torch.no_grad(): preds = torch.mean( torch.cat([ torch.unsqueeze(crnn(image), 0), torch.unsqueeze(crnn(image2), 0), torch.unsqueeze(crnn(image3), 0) ], 0), 0) print('preds: ', preds.shape) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) if not isinstance(sim_preds, list): sim_preds = [sim_preds] for pred in sim_preds: f.write(str(r).zfill(6) + ".jpg " + pred + "\n") r += 1 list_1 = [] for i in cpu_texts: string = i.decode('utf-8', 'strict') list_1.append(string) for pred, target in zip(sim_preds, list_1): if pred == target: n_correct += 1 num_pred += len(sim_preds) print("") f.close() raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:params.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, list_1): logger.info('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) logger.info('correct_num: %d' % (n_correct)) logger.info('Total_num: %d' % (max_iter * params.batchSize)) accuracy = float(n_correct) / num_pred recall = float(n_correct) / num_label logger.info( 'Test loss: %f, accuray: %f, recall: %f, F1 score: %f, Cost : %.4fs per img' % (loss_avg.val(), accuracy, recall, 2 * accuracy * recall / (accuracy + recall + 1e-2), (time.time() - start) / max_iter))
def val(net, dataset, criterion, max_iter=100): print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() data_loader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=params.batchSize, num_workers=int(params.workers)) val_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(data_loader)) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) # print('-----preds-----') # print(preds) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) # print('-----preds_size-----') # print(preds_size) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) # print('-----preds.max(2)-----') # print(preds) preds = preds.transpose(1, 0).contiguous().view(-1) # print('-----preds.transpose(1, 0)-----') # print(preds) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) list_1 = [] for m in cpu_texts: list_1.append(m.decode('utf-8', 'strict')) # if (i - 1) % 10 == 0: # print('-----sim_preds-----list_1-----') # print(sim_preds, list_1) for pred, target in zip(sim_preds, list_1): if pred == target: n_correct += 1 # else: # print('%-20s, gt: %-20s' % (pred, target)) raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:params.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, list_1): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) print(n_correct) print(max_iter * params.batchSize) accuracy = n_correct / float(max_iter * params.batchSize) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
def val(net, dataset, criterion, max_iter=2): print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() data_loader = torch.utils.data.DataLoader(dataset, shuffle=False, batch_size=opt.batchSize, num_workers=int(opt.workers)) val_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(data_loader)) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) if ifUnicode: cpu_texts = [clean_txt(tx.decode('utf-8')) for tx in cpu_texts] # print(cpu_texts) t, l = converter.encode(cpu_texts) # print(t) # print(l) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) # print(preds) # print(preds.shape) _, preds = preds.max(2) # print(preds) # print(preds.shape) # preds = preds.squeeze(2) # print(preds) # print(preds.shape) preds = preds.transpose(1, 0).contiguous().view(-1) # print(preds) # print(preds.shape) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) print(sim_preds) print(cpu_texts) for pred, target in zip(sim_preds, cpu_texts): if pred.strip() == target.strip(): n_correct += 1 # raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp] # for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): # print((pred, gt)) # print accuracy = n_correct / float(max_iter * opt.batchSize) testLoss = loss_avg.val() print('Test loss: %f, accuray: %f' % (testLoss, accuracy)) return testLoss, accuracy
def validation(net, dataset, criterion, max_iter=100): """ validation on val dataset """ print('Start val') for p in crnn.parameters(): p.requires_grad = False # change model to eval mode net.eval() data_loader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers)) # returns an iterator for the data_loader val_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(data_loader)) for i in range(max_iter): data = val_iter.next() i += 1 # get image and label for the validaton cpu_images, cpu_texts = data batch_size = cpu_images.size(0) # copy cpu_images to image utils.loadData(image, cpu_images) # encode label to number t, l = converter.encode(cpu_texts) # copy label and label`s to t and l utils.loadData(text, t) utils.loadData(length, l) # image (1x1x32x100) preds = crnn(image) # preds (26x1x37) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) # compute cost cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) # preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, cpu_texts): # if pred == target.lower(): # for case insensitive if pred == target: # for case sensitive n_correct += 1 raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) # compute accuracy accuracy = n_correct / float(max_iter * opt.batchSize) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
def val(net, dataset, criterion, idx, max_iter=20): print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() data_loader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers)) val_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(data_loader)) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds.cpu(), text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) # preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, cpu_texts): if pred == target.lower(): n_correct += 1 raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) # writer.add_text('Text', '%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt), idx) accuracy = n_correct / float(max_iter * opt.batchSize) writer.add_scalars('data/loss', {'val': loss_avg.val()}, idx) writer.add_scalars('data/accuracy', {'val': accuracy}, idx) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy)) global best_accuracy # save model if best_accuracy < accuracy: best_accuracy = accuracy if best_accuracy > 0.35: model_path = os.path.join(model_dir, '{:.5f}.pth'.format(best_accuracy)) print('At epoch {}, iter {}, writing model file to {}'.format( epoch, i, model_path)) torch.save(crnn.state_dict(), model_path)