def eval_detection(opts, net=None): if net == None: net = OctShuffleMLT(attention=True) net_utils.load_net(opts.model, net) if opts.cuda: net.cuda() images, gt_boxes = load_annotation(opts.eval_list) true_positives = 0 false_positives = 0 false_negatives = 0 for i in range(images.shape[0]): image = np.expand_dims(images[i], axis=0) image_boxes_gt = np.array(gt_boxes[i]) im_data = net_utils.np_to_variable(image, is_cuda=opts.cuda).permute(0, 3, 1, 2) seg_pred, rboxs, angle_pred, features = net(im_data) rbox = rboxs[0].data.cpu()[0].numpy() rbox = rbox.swapaxes(0, 1) rbox = rbox.swapaxes(1, 2) angle_pred = angle_pred[0].data.cpu()[0].numpy() segm = seg_pred[0].data.cpu()[0].numpy() segm = segm.squeeze(0) boxes = get_boxes(segm, rbox, angle_pred, opts.segm_thresh) if (opts.debug): print(boxes.shape) print(image_boxes_gt.shape) print("============") false_positives += boxes.shape[0] false_negatives += image_boxes_gt.shape[0] for box in boxes: b = box[0:8].reshape(4,-1) poly = Polygon.Polygon(b) for box_gt in image_boxes_gt: b_gt = box_gt[0:8].reshape(4,-1) poly_gt = Polygon.Polygon(b_gt) intersection = poly_gt | poly union = poly_gt & poly iou = (intersection.area()+1.0) / (union.area()+1.0)-1.0 if iou > 0.5: true_positives+=1 false_negatives-=1 false_positives-=1 image_boxes_gt = np.array([bgt for bgt in image_boxes_gt if not np.array_equal(bgt, box_gt)]) break print("tp: {} fp: {} fn: {}".format(true_positives, false_positives, false_negatives)) precision = true_positives / (true_positives+false_positives) recall = true_positives / (true_positives+false_negatives) f_score = 2*precision*recall/(precision+recall) print("PRECISION: {} \t RECALL: {} \t F SCORE: {}".format(precision, recall, f_score))
def draw_detection_results(detection_output_filename, target_dir): boxes = get_boxes(detection_output_filename) system('rm -rf ' + target_dir) system('mkdir -p ' + target_dir) for image_filename, bs in boxes.iteritems(): cmd = 'convert ' + convert_bgr_to_rgb(image_filename) cmd += ' -fill none -stroke chartreuse -strokewidth 2' for xmin, ymin, xmax, ymax, score in bs: cmd += (' -draw "rectangle %s,%s,%s,%s" ' % (int(xmin), int(ymin), int(xmax), int(ymax))) # Text drawing code strangely doesn't work on my machine. # See the question I posted on SO: # http://stackoverflow.com/questions/27324930/convert-non-conforming-drawing-primitive-definition-text/27332225#27332225 #cmd += ' -pointsize 17 -fill chartreuse' #text = 'Score:' + "{:.2f}".format(score) #cmd += ' -draw "text 20%%,20%% \'%s\'"' % text target = join(target_dir, splitext(basename(image_filename))[0] + '.jpg') cmd += ' ' + target print cmd system(cmd)
images /= 128 images -= 1 im_data = net_utils.np_to_variable(images.transpose(0, 3, 1, 2), is_cuda=args.cuda) seg_pred, rboxs, angle_pred, features = net(im_data) rbox = rboxs[0].data.cpu()[0].numpy() # 转变成h,w,c rbox = rbox.swapaxes(0, 1) rbox = rbox.swapaxes(1, 2) angle_pred = angle_pred[0].data.cpu()[0].numpy() segm = seg_pred[0].data.cpu()[0].numpy() segm = segm.squeeze(0) draw2 = np.copy(im_resized) boxes = get_boxes(segm, rbox, angle_pred, args.segm_thresh) img = Image.fromarray(draw2) draw = ImageDraw.Draw(img) out_boxes = [] for box in boxes: pts = box[0:8] pts = pts.reshape(4, -1) # det_text, conf, dec_s = ocr_image(net, codec, im_data, box) det_text, conf, dec_s = align_ocr(net, converter, im_data, box, features, debug=0) if len(det_text) == 0: continue
cv2.imshow('iou', iou) # cv2.imshow('ioud', ioud) cv2.imshow('iou_pred1', iou_pred1) size = 3 import scipy.ndimage as ndimage image_max = ndimage.maximum_filter(iou, size=size, mode='constant') mask = (iou == image_max) iou2 = iou * mask if args.debug == 1: cv2.imshow('iou2', iou2) detections = get_boxes(iou, rbox, angle_pred[0].data.cpu()[0].numpy(), args.segm_thresh) # detectionsd = get_boxes(iou_pred1, rboxd, angle_pred[1].data.cpu()[0].numpy(), args.segm_thresh, iou_thresh=0.2) im_scalex = im_resized.shape[1] / img.shape[1] im_scaley = im_resized.shape[0] / img.shape[0] detectionso = np.copy(detections) if len(detections) > 0: detections[:, 0] /= im_scalex detections[:, 2] /= im_scalex detections[:, 4] /= im_scalex detections[:, 6] /= im_scalex detections[:, 1] /= im_scaley detections[:, 3] /= im_scaley
def run_model_input_image(im, show_boxes=False): predictions = {} parser = argparse.ArgumentParser() parser.add_argument('-cuda', type=int, default=1) parser.add_argument('-model', default='e2e-mlt-rctw.h5') parser.add_argument('-segm_thresh', default=0.5) font2 = ImageFont.truetype("Arial-Unicode-Regular.ttf", 18) args = parser.parse_args() net = ModelResNetSep2(attention=True) net_utils.load_net(args.model, net) net = net.eval() if args.cuda: print('Using cuda ...') net = net.cuda() with torch.no_grad(): # im = Image.open(im) # im = im.convert('RGB') im = np.asarray(im) im = im[...,:3] im_resized, (ratio_h, ratio_w) = resize_image(im, scale_up=False) images = np.asarray([im_resized], dtype=np.float) images /= 128 images -= 1 im_data = net_utils.np_to_variable(images, is_cuda=args.cuda).permute(0, 3, 1, 2) seg_pred, rboxs, angle_pred, features = net(im_data) rbox = rboxs[0].data.cpu()[0].numpy() rbox = rbox.swapaxes(0, 1) rbox = rbox.swapaxes(1, 2) angle_pred = angle_pred[0].data.cpu()[0].numpy() segm = seg_pred[0].data.cpu()[0].numpy() segm = segm.squeeze(0) draw2 = np.copy(im_resized) boxes = get_boxes(segm, rbox, angle_pred, args.segm_thresh) img = Image.fromarray(draw2) draw = ImageDraw.Draw(img) #if len(boxes) > 10: # boxes = boxes[0:10] out_boxes = [] prediction_i = [] for box in boxes: pts = box[0:8] pts = pts.reshape(4, -1) det_text, conf, dec_s = ocr_image(net, codec, im_data, box) if len(det_text) == 0: continue width, height = draw.textsize(det_text, font=font2) center = [box[0], box[1]] draw.text((center[0], center[1]), det_text, fill = (0,255,0),font=font2) out_boxes.append(box) # det_text is one prediction prediction_i.append(det_text.lower()) predictions["frame"] = prediction_i # show each image boxes and output in pop up window. show_image_with_boxes(img, out_boxes, show=show_boxes) print(predictions) return predictions
def evaluate_e2e_crnn(root, net, norm_height=48, name_model='E2E', normalize=False, save=False, cuda=True, save_dir='eval'): #Decription : evaluate model E2E net = net.eval() # if cuda: # print('Using cuda ...') # net = net.to(device) images = glob.glob(os.path.join(root, '*.jpg')) png = glob.glob(os.path.join(root, '*.png')) images.extend(png) png = glob.glob(os.path.join(root, '*.JPG')) images.extend(png) imagess = np.asarray(images) tp_all = 0 gt_all = 0 tp_e2e_all = 0 gt_e2e_all = 0 tp_e2e_ed1_all = 0 detecitons_all = 0 eval_text_length = 2 segm_thresh = 0.5 min_height = 8 idx = 0 if not os.path.exists(save_dir): os.mkdir(save_dir) note_path = os.path.join(save_dir, 'note_eval.txt') note_file = open(note_path, 'a') with torch.no_grad(): index = np.arange(0, imagess.shape[0]) # np.random.shuffle(index) for i in index: img_name = imagess[i] base_nam = os.path.basename(img_name) # # if args.evaluate == 1: res_gt = base_nam.replace(".jpg", '.txt').replace(".png", '.txt') res_gt = '{0}/gt_{1}'.format(root, res_gt) if not os.path.exists(res_gt): res_gt = base_nam.replace(".jpg", '.txt').replace("_", "") res_gt = '{0}/gt_{1}'.format(root, res_gt) if not os.path.exists(res_gt): print('missing! {0}'.format(res_gt)) gt_rect, gt_txts = [], [] # continue gt_rect, gt_txts = load_gt(res_gt) # print(img_name) img = cv2.imread(img_name) im_resized, _ = resize_image( img, max_size=1848 * 1024, scale_up=False) # 1348*1024 #1848*1024 images = np.asarray([im_resized], dtype=np.float) if normalize: images /= 128 images -= 1 im_data = net_utils.np_to_variable(images, is_cuda=cuda).permute( 0, 3, 1, 2) [iou_pred, iou_pred1], rboxs, angle_pred, features = net(im_data) iou = iou_pred.data.cpu()[0].numpy() iou = iou.squeeze(0) rbox = rboxs[0].data.cpu()[0].numpy() rbox = rbox.swapaxes(0, 1) rbox = rbox.swapaxes(1, 2) detections = get_boxes(iou, rbox, angle_pred[0].data.cpu()[0].numpy(), segm_thresh) im_scalex = im_resized.shape[1] / img.shape[1] im_scaley = im_resized.shape[0] / img.shape[0] detetcions_out = [] detectionso = np.copy(detections) if len(detections) > 0: detections[:, 0] /= im_scalex detections[:, 2] /= im_scalex detections[:, 4] /= im_scalex detections[:, 6] /= im_scalex detections[:, 1] /= im_scaley detections[:, 3] /= im_scaley detections[:, 5] /= im_scaley detections[:, 7] /= im_scaley for bid, box in enumerate(detections): boxo = detectionso[bid] # score = boxo[8] boxr = boxo[0:8].reshape(-1, 2) # box_area = area(boxr.reshape(8)) # conf_factor = score / box_area center = (boxr[0, :] + boxr[1, :] + boxr[2, :] + boxr[3, :]) / 4 dw = boxr[2, :] - boxr[1, :] dw2 = boxr[0, :] - boxr[3, :] dh = boxr[1, :] - boxr[0, :] dh2 = boxr[3, :] - boxr[2, :] h = math.sqrt(dh[0] * dh[0] + dh[1] * dh[1]) + 1 h2 = math.sqrt(dh2[0] * dh2[0] + dh2[1] * dh2[1]) + 1 h = (h + h2) / 2 w = math.sqrt(dw[0] * dw[0] + dw[1] * dw[1]) w2 = math.sqrt(dw2[0] * dw2[0] + dw2[1] * dw2[1]) w = (w + w2) / 2 if ((h - 1) / im_scaley) < min_height: continue input_W = im_data.size(3) input_H = im_data.size(2) target_h = norm_height scale = target_h / h target_gw = int(w * scale + target_h / 4) target_gw = max(8, int(round(target_gw / 8)) * 8) xc = center[0] yc = center[1] w2 = w h2 = h angle = math.atan2((boxr[2][1] - boxr[1][1]), boxr[2][0] - boxr[1][0]) angle2 = math.atan2((boxr[3][1] - boxr[0][1]), boxr[3][0] - boxr[0][0]) angle = (angle + angle2) / 2 # show pooled image in image layer scalex = (w2 + h2 / 4) / input_W scaley = h2 / input_H th11 = scalex * math.cos(angle) th12 = -math.sin(angle) * scaley * input_H / input_W th13 = (2 * xc - input_W - 1) / (input_W - 1) th21 = math.sin(angle) * scalex * input_W / input_H th22 = scaley * math.cos(angle) th23 = (2 * yc - input_H - 1) / (input_H - 1) t = np.asarray([th11, th12, th13, th21, th22, th23], dtype=np.float) t = torch.from_numpy(t).type(torch.FloatTensor) t = t.to(device) theta = t.view(-1, 2, 3) grid = F.affine_grid( theta, torch.Size((1, 3, int(target_h), int(target_gw)))) x = F.grid_sample(im_data, grid) # features = net.forward_features(x) # labels_pred = net.forward_ocr(features) labels_pred = net.forward_ocr(x) labels_pred = labels_pred.permute(1, 2, 0) ctc_f = labels_pred.data.cpu().numpy() ctc_f = ctc_f.swapaxes(1, 2) labels = ctc_f.argmax(2) conf = np.mean(np.exp(ctc_f.max(2)[labels > 3])) if conf < 0.02: continue det_text, conf2, dec_s, word_splits = print_seq_ext( labels[0, :], codec) det_text = det_text.strip() if conf < 0.01 and len(det_text) == 3: continue if len(det_text) > 0: dtxt = det_text.strip() if len(dtxt) >= eval_text_length: # print('{0} - {1}'.format(dtxt, conf_factor)) boxw = np.copy(boxr) boxw[:, 1] /= im_scaley boxw[:, 0] /= im_scalex boxw = boxw.reshape(8) detetcions_out.append([boxw, dtxt]) pix = img # if args.evaluate == 1: tp, tp_e2e, gt_e2e, tp_e2e_ed1, detection_to_gt, pixx = evaluate_image( pix, detetcions_out, gt_rect, gt_txts, eval_text_length=eval_text_length) tp_all += tp gt_all += len(gt_txts) tp_e2e_all += tp_e2e gt_e2e_all += gt_e2e tp_e2e_ed1_all += tp_e2e_ed1 detecitons_all += len(detetcions_out) # print(gt_all) if save: cv2.imwrite('{0}/{1}'.format(save_dir, base_nam), pixx) # print(" E2E recall tp_e2e:{0:.3f} / tp:{1:.3f} / e1:{2:.3f}, precision: {3:.3f}".format( # tp_e2e_all / float(max(1, gt_e2e_all)), # tp_all / float(max(1, gt_e2e_all)), # tp_e2e_ed1_all / float(max(1, gt_e2e_all)), # tp_all / float(max(1, detecitons_all)))) note_file.write( 'Model{4}---E2E recall tp_e2e:{0:.3f} / tp:{1:.3f} / e1:{2:.3f}, precision: {3:.3f} \n' .format(tp_e2e_all / float(max(1, gt_e2e_all)), tp_all / float(max(1, gt_e2e_all)), tp_e2e_ed1_all / float(max(1, gt_e2e_all)), tp_all / float(max(1, detecitons_all)), name_model)) note_file.close() return (tp_e2e_all / float(max(1, gt_e2e_all)), tp_all / float(max(1, gt_e2e_all)), tp_e2e_ed1_all / float(max(1, gt_e2e_all)), tp_all / float(max(1, detecitons_all)))