コード例 #1
0
ファイル: eval.py プロジェクト: wisdal/NAVI-STR
                    res_gt = '{0}/gt_{1}'.format(args.images_dir, res_gt)
                    if not os.path.exists(res_gt):
                        print('missing! {0}'.format(res_gt))
                        gt_rect, gt_txts = [], []
                        #continue
                gt_rect, gt_txts = load_gt(res_gt)

            print(img_name)

            img = cv2.imread(img_name)

            #font = cv2.FONT_HERSHEY_SIMPLEX
            #cv2.putText(img,'cs',(10,img.shape[0] -40), font, 0.8,(255,255,255),2,cv2.LINE_AA)

            im_resized, (ratio_h, ratio_w) = resize_image(
                img, max_size=1848 * 1024,
                scale_up=True)  #1348*1024 #1848*1024
            #im_resized = im_resized[:, :, ::-1]
            images = np.asarray([im_resized], dtype=np.float)
            images /= 128
            images -= 1
            im_data = net_utils.np_to_variable(images,
                                               is_cuda=args.cuda).permute(
                                                   0, 3, 1, 2)

            [iou_pred, iou_pred1], rboxs, angle_pred, features = net(im_data)
            iou = iou_pred.data.cpu()[0].numpy()
            iou = iou.squeeze(0)

            iou_pred1 = iou_pred1.data.cpu()[0].numpy()
            iou_pred1 = iou_pred1.squeeze(0)
コード例 #2
0
def evaluate_e2e_crnn(root,
                      net,
                      norm_height=48,
                      name_model='E2E',
                      normalize=False,
                      save=False,
                      cuda=True,
                      save_dir='eval'):
    #Decription : evaluate model E2E
    net = net.eval()
    # if cuda:
    #   print('Using cuda ...')
    #   net = net.to(device)

    images = glob.glob(os.path.join(root, '*.jpg'))
    png = glob.glob(os.path.join(root, '*.png'))
    images.extend(png)
    png = glob.glob(os.path.join(root, '*.JPG'))
    images.extend(png)

    imagess = np.asarray(images)

    tp_all = 0
    gt_all = 0
    tp_e2e_all = 0
    gt_e2e_all = 0
    tp_e2e_ed1_all = 0
    detecitons_all = 0
    eval_text_length = 2
    segm_thresh = 0.5
    min_height = 8
    idx = 0

    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    note_path = os.path.join(save_dir, 'note_eval.txt')
    note_file = open(note_path, 'a')

    with torch.no_grad():

        index = np.arange(0, imagess.shape[0])
        # np.random.shuffle(index)
        for i in index:
            img_name = imagess[i]
            base_nam = os.path.basename(img_name)
            #
            # if args.evaluate == 1:
            res_gt = base_nam.replace(".jpg", '.txt').replace(".png", '.txt')
            res_gt = '{0}/gt_{1}'.format(root, res_gt)
            if not os.path.exists(res_gt):
                res_gt = base_nam.replace(".jpg", '.txt').replace("_", "")
                res_gt = '{0}/gt_{1}'.format(root, res_gt)
                if not os.path.exists(res_gt):
                    print('missing! {0}'.format(res_gt))
                    gt_rect, gt_txts = [], []
            # continue
            gt_rect, gt_txts = load_gt(res_gt)

            # print(img_name)
            img = cv2.imread(img_name)

            im_resized, _ = resize_image(
                img, max_size=1848 * 1024,
                scale_up=False)  # 1348*1024 #1848*1024
            images = np.asarray([im_resized], dtype=np.float)

            if normalize:
                images /= 128
                images -= 1
            im_data = net_utils.np_to_variable(images, is_cuda=cuda).permute(
                0, 3, 1, 2)

            [iou_pred, iou_pred1], rboxs, angle_pred, features = net(im_data)
            iou = iou_pred.data.cpu()[0].numpy()
            iou = iou.squeeze(0)

            rbox = rboxs[0].data.cpu()[0].numpy()
            rbox = rbox.swapaxes(0, 1)
            rbox = rbox.swapaxes(1, 2)

            detections = get_boxes(iou, rbox,
                                   angle_pred[0].data.cpu()[0].numpy(),
                                   segm_thresh)

            im_scalex = im_resized.shape[1] / img.shape[1]
            im_scaley = im_resized.shape[0] / img.shape[0]

            detetcions_out = []
            detectionso = np.copy(detections)
            if len(detections) > 0:
                detections[:, 0] /= im_scalex
                detections[:, 2] /= im_scalex
                detections[:, 4] /= im_scalex
                detections[:, 6] /= im_scalex

                detections[:, 1] /= im_scaley
                detections[:, 3] /= im_scaley
                detections[:, 5] /= im_scaley
                detections[:, 7] /= im_scaley

            for bid, box in enumerate(detections):

                boxo = detectionso[bid]
                # score = boxo[8]
                boxr = boxo[0:8].reshape(-1, 2)
                # box_area = area(boxr.reshape(8))

                # conf_factor = score / box_area

                center = (boxr[0, :] + boxr[1, :] + boxr[2, :] +
                          boxr[3, :]) / 4

                dw = boxr[2, :] - boxr[1, :]
                dw2 = boxr[0, :] - boxr[3, :]
                dh = boxr[1, :] - boxr[0, :]
                dh2 = boxr[3, :] - boxr[2, :]

                h = math.sqrt(dh[0] * dh[0] + dh[1] * dh[1]) + 1
                h2 = math.sqrt(dh2[0] * dh2[0] + dh2[1] * dh2[1]) + 1
                h = (h + h2) / 2
                w = math.sqrt(dw[0] * dw[0] + dw[1] * dw[1])
                w2 = math.sqrt(dw2[0] * dw2[0] + dw2[1] * dw2[1])
                w = (w + w2) / 2

                if ((h - 1) / im_scaley) < min_height:
                    continue

                input_W = im_data.size(3)
                input_H = im_data.size(2)
                target_h = norm_height

                scale = target_h / h
                target_gw = int(w * scale + target_h / 4)
                target_gw = max(8, int(round(target_gw / 8)) * 8)
                xc = center[0]
                yc = center[1]
                w2 = w
                h2 = h

                angle = math.atan2((boxr[2][1] - boxr[1][1]),
                                   boxr[2][0] - boxr[1][0])
                angle2 = math.atan2((boxr[3][1] - boxr[0][1]),
                                    boxr[3][0] - boxr[0][0])
                angle = (angle + angle2) / 2

                # show pooled image in image layer
                scalex = (w2 + h2 / 4) / input_W
                scaley = h2 / input_H

                th11 = scalex * math.cos(angle)
                th12 = -math.sin(angle) * scaley * input_H / input_W
                th13 = (2 * xc - input_W - 1) / (input_W - 1)

                th21 = math.sin(angle) * scalex * input_W / input_H
                th22 = scaley * math.cos(angle)
                th23 = (2 * yc - input_H - 1) / (input_H - 1)

                t = np.asarray([th11, th12, th13, th21, th22, th23],
                               dtype=np.float)
                t = torch.from_numpy(t).type(torch.FloatTensor)
                t = t.to(device)
                theta = t.view(-1, 2, 3)

                grid = F.affine_grid(
                    theta, torch.Size((1, 3, int(target_h), int(target_gw))))
                x = F.grid_sample(im_data, grid)

                # features = net.forward_features(x)
                # labels_pred = net.forward_ocr(features)
                labels_pred = net.forward_ocr(x)
                labels_pred = labels_pred.permute(1, 2, 0)

                ctc_f = labels_pred.data.cpu().numpy()
                ctc_f = ctc_f.swapaxes(1, 2)

                labels = ctc_f.argmax(2)

                conf = np.mean(np.exp(ctc_f.max(2)[labels > 3]))
                if conf < 0.02:
                    continue

                det_text, conf2, dec_s, word_splits = print_seq_ext(
                    labels[0, :], codec)
                det_text = det_text.strip()

                if conf < 0.01 and len(det_text) == 3:
                    continue

                if len(det_text) > 0:
                    dtxt = det_text.strip()
                    if len(dtxt) >= eval_text_length:
                        # print('{0} - {1}'.format(dtxt, conf_factor))
                        boxw = np.copy(boxr)
                        boxw[:, 1] /= im_scaley
                        boxw[:, 0] /= im_scalex
                        boxw = boxw.reshape(8)

                        detetcions_out.append([boxw, dtxt])

            pix = img

            # if args.evaluate == 1:
            tp, tp_e2e, gt_e2e, tp_e2e_ed1, detection_to_gt, pixx = evaluate_image(
                pix,
                detetcions_out,
                gt_rect,
                gt_txts,
                eval_text_length=eval_text_length)
            tp_all += tp
            gt_all += len(gt_txts)
            tp_e2e_all += tp_e2e
            gt_e2e_all += gt_e2e
            tp_e2e_ed1_all += tp_e2e_ed1
            detecitons_all += len(detetcions_out)
            # print(gt_all)
            if save:
                cv2.imwrite('{0}/{1}'.format(save_dir, base_nam), pixx)

            # print("	E2E recall tp_e2e:{0:.3f} / tp:{1:.3f} / e1:{2:.3f}, precision: {3:.3f}".format(
            #   tp_e2e_all / float(max(1, gt_e2e_all)),
            #   tp_all / float(max(1, gt_e2e_all)),
            #   tp_e2e_ed1_all / float(max(1, gt_e2e_all)),
            #   tp_all / float(max(1, detecitons_all))))

        note_file.write(
            'Model{4}---E2E recall tp_e2e:{0:.3f} / tp:{1:.3f} / e1:{2:.3f}, precision: {3:.3f} \n'
            .format(tp_e2e_all / float(max(1, gt_e2e_all)),
                    tp_all / float(max(1, gt_e2e_all)),
                    tp_e2e_ed1_all / float(max(1, gt_e2e_all)),
                    tp_all / float(max(1, detecitons_all)), name_model))

        note_file.close()
    return (tp_e2e_all / float(max(1, gt_e2e_all)),
            tp_all / float(max(1, gt_e2e_all)),
            tp_e2e_ed1_all / float(max(1, gt_e2e_all)),
            tp_all / float(max(1, detecitons_all)))