labels_pred2 = net.forward_ocr(features2) ctc_f = labels_pred.data.cpu().numpy() ctc_f = ctc_f.swapaxes(1, 2) labels = ctc_f.argmax(2) ind = np.unravel_index(labels, ctc_f.shape) conf = np.mean(np.exp(ctc_f.max(2)[labels > 3])) #if conf < 0.4: # print('Too low conf!') # continue conf_raw = np.exp(ctc_f[ind]) det_text, conf2, dec_s, word_splits = print_seq_ext( labels[0, :], codec) det_text = det_text.strip() if args.debug: im += 1 im *= 128 cv2.imshow('im', im.astype(np.uint8)) cv2.waitKey(0) if args.debug: print(det_text) if conf < 0.01 and len(det_text) == 3: print('Too low conf short: {0} {1}'.format(det_text, conf)) continue
def test(net, codec, args, list_file='/home/busta/data/icdar_ch8_validation/ocr_valid.txt', norm_height=32, max_samples=1000000): codec_rev = {} index = 4 for i in range(0, len(codec)): codec_rev[codec[i]] = index index += 1 net = net.eval() #list_file = '/mnt/textspotter/tmp/90kDICT32px/train_list.txt' #list_file = '/home/busta/data/Challenge2_Test_Task3_Images/gt.txt' #list_file = '/home/busta/data/90kDICT32px/train_icdar_ch8.txt' fout = open('/tmp/ch8_valid.txt', 'w') fout_ocr = open('/tmp/ocr_valid.txt', 'w') dir_name = os.path.dirname(list_file) images = [] with open(list_file, "r") as ins: for line in ins: images.append(line.strip()) #if len(images) > 1000: # break scripts = [ '', 'DIGIT', 'LATIN', 'ARABIC', 'BENGALI', 'HANGUL', 'CJK', 'HIRAGANA', 'KATAKANA' ] conf_matrix = np.zeros((len(scripts), len(scripts)), dtype=np.int) gt_script = {} ed_script = {} correct_ed1_script = {} correct_script = {} count_script = {} for scr in scripts: gt_script[scr] = 0 ed_script[scr] = 0 correct_script[scr] = 0 correct_ed1_script[scr] = 0 count_script[scr] = 0 it = 0 it2 = 0 correct = 0 correct_ed1 = 0 ted = 0 gt_all = 0 images_count = 0 bad_words = [] for img in images: imageNo = it2 #imageNo = random.randint(0, len(images) - 1) if imageNo >= len(images) or imageNo > max_samples: break image_name = img spl = image_name.split(",") delim = "," if len(spl) == 1: spl = image_name.split(" ") delim = " " image_name = spl[0].strip() gt_txt = '' if len(spl) > 1: gt_txt = spl[1].strip() if len(spl) > 2: gt_txt += delim + spl[2] if len(gt_txt) > 1 and gt_txt[0] == '"' and gt_txt[-1] == '"': gt_txt = gt_txt[1:len(gt_txt) - 1] it2 += 1 if len(gt_txt) == 0: print(images[imageNo]) continue if image_name[-1] == ',': image_name = image_name[0:-1] img_nameo = image_name image_name = '{0}/{1}'.format(dir_name, image_name) img = cv2.imread(image_name) if img is None: print(image_name) continue scale = norm_height / float(img.shape[0]) width = int(img.shape[1] * scale) width = max(8, int(round(width / 4)) * 4) scaled = cv2.resize(img, (int(width), norm_height)) #scaled = scaled[:, :, ::-1] scaled = np.expand_dims(scaled, axis=0) scaled = np.asarray(scaled, dtype=np.float) scaled /= 128 scaled -= 1 try: scaled_var = net_utils.np_to_variable(scaled, is_cuda=args.cuda).permute( 0, 3, 1, 2) x = net.forward_features(scaled_var) ctc_f = net.forward_ocr(x) ctc_f = ctc_f.data.cpu().numpy() ctc_f = ctc_f.swapaxes(1, 2) labels = ctc_f.argmax(2) det_text, conf, dec_s, _ = print_seq_ext(labels[0, :], codec) except: print('bad image') det_text = '' det_text = det_text.strip() gt_txt = gt_txt.strip() try: if 'ARABIC' in ud.name(gt_txt[0]): #gt_txt = gt_txt[::-1] det_text = det_text[::-1] except: continue it += 1 scr_count = [0, 0, 0, 0, 0, 0, 0, 0, 0] scr_count = np.array(scr_count) for c_char in gt_txt: assigned = False for idx, scr in enumerate(scripts): if idx == 0: continue symbol_name = ud.name(c_char) if scr in symbol_name: scr_count[idx] += 1 assigned = True break if not assigned: scr_count[0] += 1 maximum_indices = np.where(scr_count == np.max(scr_count)) script = scripts[maximum_indices[0][0]] det_count = [0, 0, 0, 0, 0, 0, 0, 0, 0] det_count = np.array(det_count) for c_char in det_text: assigned = False for idx, scr in enumerate(scripts): if idx == 0: continue try: symbol_name = ud.name(c_char) if scr in symbol_name: det_count[idx] += 1 assigned = True break except: pass if not assigned: det_count[0] += 1 maximum_indices_det = np.where(det_count == np.max(det_count)) script_det = scripts[maximum_indices_det[0][0]] conf_matrix[maximum_indices[0][0], maximum_indices_det[0][0]] += 1 edit_dist = distance(det_text.lower(), gt_txt.lower()) ted += edit_dist gt_all += len(gt_txt) gt_script[script] += len(gt_txt) ed_script[script] += edit_dist images_count += 1 fout_ocr.write('{0}, "{1}"\n'.format(os.path.basename(image_name), det_text.strip())) if det_text.lower() == gt_txt.lower(): correct += 1 correct_ed1 += 1 correct_script[script] += 1 correct_ed1_script[script] += 1 else: if edit_dist == 1: correct_ed1 += 1 correct_ed1_script[script] += 1 image_prev = "<img src=\"{0}\" height=\"32\" />".format(img_nameo) bad_words.append( (gt_txt, det_text, edit_dist, image_prev, img_nameo)) print('{0} - {1} / {2:.2f} - {3:.2f}'.format( det_text, gt_txt, correct / float(it), ted / 3.0)) count_script[script] += 1 fout.write('{0}|{1}|{2}|{3}\n'.format(os.path.basename(image_name), gt_txt, det_text, edit_dist)) print('Test accuracy: {0:.3f}, {1:.2f}, {2:.3f}'.format( correct / float(images_count), ted / 3.0, ted / float(gt_all))) itf = open("per_script_accuracy.csv", "w") itf.write( 'Script & Accuracy & Edit Distance & ed1 & Ch instances & Im Instances \\\\\n' ) for scr in scripts: correct_scr = correct_script[scr] correct_scr_ed1 = correct_ed1_script[scr] all = count_script[scr] ted_scr = ed_script[scr] gt_all_scr = gt_script[scr] print(' Script:{3} Acc : {0:.3f}, {1:.2f}, {2:.3f}, {4}'.format( correct_scr / float(max(all, 1)), ted_scr / 3.0, ted_scr / float(max(gt_all_scr, 1)), scr, gt_all_scr)) itf.write( '{0} & {1:.3f} & {5:.3f} & {2:.3f} & {3} & {4} \\\\\n'.format( scr.title(), correct_scr / float(max(all, 1)), ted_scr / float(max(gt_all_scr, 1)), gt_all_scr, all, correct_scr_ed1 / float(max(all, 1)))) itf.write('{0} & {1:.3f} & {5:.3f} & {2:.3f} & {3} & {4} \\\\\n'.format( 'Total', correct / float(max(images_count, 1)), ted / float(max(gt_all, 1)), gt_all, images_count, correct_ed1 / float(max(images_count, 1)))) itf.close() print(conf_matrix) np.savetxt("conf_matrix.csv", conf_matrix, delimiter=' & ', fmt='%d', newline=' \\\\\n') itf = open("conf_matrix_out.csv", "w") itf.write(' & ') delim = "" for scr in scripts: itf.write(delim) itf.write(scr.title()) delim = " & " itf.write('\\\\\n') script_no = 0 with open("conf_matrix.csv", "r") as ins: for line in ins: line = scripts[script_no].title() + " & " + line itf.write(line) script_no += 1 if script_no >= len(scripts): break fout.close() fout_ocr.close() net.train() pd.options.display.max_rows = 9999 #pd.options.display.max_cols = 9999 if len(bad_words) > 0: wworst = sorted(bad_words, key=lambda x: x[2]) ww = np.asarray(wworst, np.object) ww = ww[0:1500, :] df2 = pd.DataFrame({ 'gt': ww[:, 0], 'pred': ww[:, 1], 'ed': ww[:, 2], 'image': ww[:, 3] }) html = df2.to_html(escape=False) report = open('{0}/ocr_bad.html'.format(dir_name), 'w') report.write(html) report.close() wworst = sorted(bad_words, key=lambda x: x[2], reverse=True) ww = np.asarray(wworst, np.object) ww = ww[0:1500, :] df2 = pd.DataFrame({ 'gt': ww[:, 0], 'pred': ww[:, 1], 'ed': ww[:, 2], 'image': ww[:, 3] }) html = df2.to_html(escape=False) report = open('{0}/ocr_not_sobad.html'.format(dir_name), 'w') report.write(html) report.close() return correct / float(images_count), ted
def evaluate_e2e_crnn(root, net, norm_height=48, name_model='E2E', normalize=False, save=False, cuda=True, save_dir='eval'): #Decription : evaluate model E2E net = net.eval() # if cuda: # print('Using cuda ...') # net = net.to(device) images = glob.glob(os.path.join(root, '*.jpg')) png = glob.glob(os.path.join(root, '*.png')) images.extend(png) png = glob.glob(os.path.join(root, '*.JPG')) images.extend(png) imagess = np.asarray(images) tp_all = 0 gt_all = 0 tp_e2e_all = 0 gt_e2e_all = 0 tp_e2e_ed1_all = 0 detecitons_all = 0 eval_text_length = 2 segm_thresh = 0.5 min_height = 8 idx = 0 if not os.path.exists(save_dir): os.mkdir(save_dir) note_path = os.path.join(save_dir, 'note_eval.txt') note_file = open(note_path, 'a') with torch.no_grad(): index = np.arange(0, imagess.shape[0]) # np.random.shuffle(index) for i in index: img_name = imagess[i] base_nam = os.path.basename(img_name) # # if args.evaluate == 1: res_gt = base_nam.replace(".jpg", '.txt').replace(".png", '.txt') res_gt = '{0}/gt_{1}'.format(root, res_gt) if not os.path.exists(res_gt): res_gt = base_nam.replace(".jpg", '.txt').replace("_", "") res_gt = '{0}/gt_{1}'.format(root, res_gt) if not os.path.exists(res_gt): print('missing! {0}'.format(res_gt)) gt_rect, gt_txts = [], [] # continue gt_rect, gt_txts = load_gt(res_gt) # print(img_name) img = cv2.imread(img_name) im_resized, _ = resize_image( img, max_size=1848 * 1024, scale_up=False) # 1348*1024 #1848*1024 images = np.asarray([im_resized], dtype=np.float) if normalize: images /= 128 images -= 1 im_data = net_utils.np_to_variable(images, is_cuda=cuda).permute( 0, 3, 1, 2) [iou_pred, iou_pred1], rboxs, angle_pred, features = net(im_data) iou = iou_pred.data.cpu()[0].numpy() iou = iou.squeeze(0) rbox = rboxs[0].data.cpu()[0].numpy() rbox = rbox.swapaxes(0, 1) rbox = rbox.swapaxes(1, 2) detections = get_boxes(iou, rbox, angle_pred[0].data.cpu()[0].numpy(), segm_thresh) im_scalex = im_resized.shape[1] / img.shape[1] im_scaley = im_resized.shape[0] / img.shape[0] detetcions_out = [] detectionso = np.copy(detections) if len(detections) > 0: detections[:, 0] /= im_scalex detections[:, 2] /= im_scalex detections[:, 4] /= im_scalex detections[:, 6] /= im_scalex detections[:, 1] /= im_scaley detections[:, 3] /= im_scaley detections[:, 5] /= im_scaley detections[:, 7] /= im_scaley for bid, box in enumerate(detections): boxo = detectionso[bid] # score = boxo[8] boxr = boxo[0:8].reshape(-1, 2) # box_area = area(boxr.reshape(8)) # conf_factor = score / box_area center = (boxr[0, :] + boxr[1, :] + boxr[2, :] + boxr[3, :]) / 4 dw = boxr[2, :] - boxr[1, :] dw2 = boxr[0, :] - boxr[3, :] dh = boxr[1, :] - boxr[0, :] dh2 = boxr[3, :] - boxr[2, :] h = math.sqrt(dh[0] * dh[0] + dh[1] * dh[1]) + 1 h2 = math.sqrt(dh2[0] * dh2[0] + dh2[1] * dh2[1]) + 1 h = (h + h2) / 2 w = math.sqrt(dw[0] * dw[0] + dw[1] * dw[1]) w2 = math.sqrt(dw2[0] * dw2[0] + dw2[1] * dw2[1]) w = (w + w2) / 2 if ((h - 1) / im_scaley) < min_height: continue input_W = im_data.size(3) input_H = im_data.size(2) target_h = norm_height scale = target_h / h target_gw = int(w * scale + target_h / 4) target_gw = max(8, int(round(target_gw / 8)) * 8) xc = center[0] yc = center[1] w2 = w h2 = h angle = math.atan2((boxr[2][1] - boxr[1][1]), boxr[2][0] - boxr[1][0]) angle2 = math.atan2((boxr[3][1] - boxr[0][1]), boxr[3][0] - boxr[0][0]) angle = (angle + angle2) / 2 # show pooled image in image layer scalex = (w2 + h2 / 4) / input_W scaley = h2 / input_H th11 = scalex * math.cos(angle) th12 = -math.sin(angle) * scaley * input_H / input_W th13 = (2 * xc - input_W - 1) / (input_W - 1) th21 = math.sin(angle) * scalex * input_W / input_H th22 = scaley * math.cos(angle) th23 = (2 * yc - input_H - 1) / (input_H - 1) t = np.asarray([th11, th12, th13, th21, th22, th23], dtype=np.float) t = torch.from_numpy(t).type(torch.FloatTensor) t = t.to(device) theta = t.view(-1, 2, 3) grid = F.affine_grid( theta, torch.Size((1, 3, int(target_h), int(target_gw)))) x = F.grid_sample(im_data, grid) # features = net.forward_features(x) # labels_pred = net.forward_ocr(features) labels_pred = net.forward_ocr(x) labels_pred = labels_pred.permute(1, 2, 0) ctc_f = labels_pred.data.cpu().numpy() ctc_f = ctc_f.swapaxes(1, 2) labels = ctc_f.argmax(2) conf = np.mean(np.exp(ctc_f.max(2)[labels > 3])) if conf < 0.02: continue det_text, conf2, dec_s, word_splits = print_seq_ext( labels[0, :], codec) det_text = det_text.strip() if conf < 0.01 and len(det_text) == 3: continue if len(det_text) > 0: dtxt = det_text.strip() if len(dtxt) >= eval_text_length: # print('{0} - {1}'.format(dtxt, conf_factor)) boxw = np.copy(boxr) boxw[:, 1] /= im_scaley boxw[:, 0] /= im_scalex boxw = boxw.reshape(8) detetcions_out.append([boxw, dtxt]) pix = img # if args.evaluate == 1: tp, tp_e2e, gt_e2e, tp_e2e_ed1, detection_to_gt, pixx = evaluate_image( pix, detetcions_out, gt_rect, gt_txts, eval_text_length=eval_text_length) tp_all += tp gt_all += len(gt_txts) tp_e2e_all += tp_e2e gt_e2e_all += gt_e2e tp_e2e_ed1_all += tp_e2e_ed1 detecitons_all += len(detetcions_out) # print(gt_all) if save: cv2.imwrite('{0}/{1}'.format(save_dir, base_nam), pixx) # print(" E2E recall tp_e2e:{0:.3f} / tp:{1:.3f} / e1:{2:.3f}, precision: {3:.3f}".format( # tp_e2e_all / float(max(1, gt_e2e_all)), # tp_all / float(max(1, gt_e2e_all)), # tp_e2e_ed1_all / float(max(1, gt_e2e_all)), # tp_all / float(max(1, detecitons_all)))) note_file.write( 'Model{4}---E2E recall tp_e2e:{0:.3f} / tp:{1:.3f} / e1:{2:.3f}, precision: {3:.3f} \n' .format(tp_e2e_all / float(max(1, gt_e2e_all)), tp_all / float(max(1, gt_e2e_all)), tp_e2e_ed1_all / float(max(1, gt_e2e_all)), tp_all / float(max(1, detecitons_all)), name_model)) note_file.close() return (tp_e2e_all / float(max(1, gt_e2e_all)), tp_all / float(max(1, gt_e2e_all)), tp_e2e_ed1_all / float(max(1, gt_e2e_all)), tp_all / float(max(1, detecitons_all)))