예제 #1
0
파일: demo.py 프로젝트: dipikakhullar/ocr
def show_image_with_boxes(img, out_boxes, show=False):
  if show:
    im = np.array(img)
    for box in out_boxes:
      pts  = box[0:8]
      pts = pts.reshape(4, -1)
      draw_box_points(im, pts, color=(0, 255, 0), thickness=1)

    cv2.imshow('img', im)
    cv2.waitKey(1)
예제 #2
0
파일: eval.py 프로젝트: cnzeki/E2E-MLT
def draw_detections(img, boxes, color=(255, 0, 0)):
    draw2 = np.copy(img)
    if len(boxes) == 0:
        return draw2
    for i in range(0, boxes.shape[0]):
        pts = boxes[i]
        pts = pts[0:8]
        pts = pts.reshape(4, -1)
        pts = np.asarray(pts, dtype=np.int)
        draw_box_points(draw2, pts, color=color, thickness=2)

    # cv2.imshow('nms', draw2)

    return draw2
예제 #3
0
def process_boxes(images,
                  im_data,
                  iou_pred,
                  roi_pred,
                  angle_pred,
                  score_maps,
                  gt_idxs,
                  gtso,
                  lbso,
                  features,
                  net,
                  ctc_loss,
                  opts,
                  debug=False):

    ctc_loss_count = 0
    loss = torch.from_numpy(np.asarray([0])).type(torch.FloatTensor).cuda()

    for bid in range(iou_pred.size(0)):

        gts = gtso[bid]
        lbs = lbso[bid]

        gt_proc = 0
        gt_good = 0

        gts_count = {}

        iou_pred_np = iou_pred[bid].data.cpu().numpy()
        iou_map = score_maps[bid]
        to_walk = iou_pred_np.squeeze(0) * iou_map * (iou_pred_np.squeeze(0) >
                                                      0.5)

        roi_p_bid = roi_pred[bid].data.cpu().numpy()
        gt_idx = gt_idxs[bid]

        if debug:
            img = images[bid]
            img += 1
            img *= 128
            img = np.asarray(img, dtype=np.uint8)

        xy_text = np.argwhere(to_walk > 0)
        random.shuffle(xy_text)
        xy_text = xy_text[0:min(xy_text.shape[0], 100)]

        for i in range(0, xy_text.shape[0]):
            if opts.geo_type == 1:
                break
            pos = xy_text[i, :]

            gt_id = gt_idx[pos[0], pos[1]]

            if not gt_id in gts_count:
                gts_count[gt_id] = 0

            if gts_count[gt_id] > 2:
                continue

            gt = gts[gt_id]
            gt_txt = lbs[gt_id]
            if gt_txt.startswith('##'):
                continue

            angle_sin = angle_pred[bid, 0, pos[0], pos[1]]
            angle_cos = angle_pred[bid, 1, pos[0], pos[1]]

            angle = math.atan2(angle_sin, angle_cos)

            angle_gt = (math.atan2(
                (gt[2][1] - gt[1][1]), gt[2][0] - gt[1][0]) + math.atan2(
                    (gt[3][1] - gt[0][1]), gt[3][0] - gt[0][0])) / 2

            if math.fabs(angle_gt - angle) > math.pi / 16:
                continue

            offset = roi_p_bid[:, pos[0], pos[1]]
            posp = pos + 0.25
            pos_g = np.array([(posp[1] - offset[0] * math.sin(angle)) * 4,
                              (posp[0] - offset[0] * math.cos(angle)) * 4])
            pos_g2 = np.array([(posp[1] + offset[1] * math.sin(angle)) * 4,
                               (posp[0] + offset[1] * math.cos(angle)) * 4])

            pos_r = np.array([(posp[1] - offset[2] * math.cos(angle)) * 4,
                              (posp[0] - offset[2] * math.sin(angle)) * 4])
            pos_r2 = np.array([(posp[1] + offset[3] * math.cos(angle)) * 4,
                               (posp[0] + offset[3] * math.sin(angle)) * 4])

            center = (pos_g + pos_g2 + pos_r + pos_r2) / 2 - [
                4 * pos[1], 4 * pos[0]
            ]
            #center = (pos_g + pos_g2 + pos_r + pos_r2) / 4
            dw = pos_r - pos_r2
            dh = pos_g - pos_g2

            w = math.sqrt(dw[0] * dw[0] + dw[1] * dw[1])
            h = math.sqrt(dh[0] * dh[0] + dh[1] * dh[1])

            dhgt = gt[1] - gt[0]

            h_gt = math.sqrt(dhgt[0] * dhgt[0] + dhgt[1] * dhgt[1])
            if h_gt < 10:
                continue

            rect = ((center[0], center[1]), (w, h), angle * 180 / math.pi)
            pts = cv2.boxPoints(rect)

            pred_bbox = cv2.boundingRect(pts)
            pred_bbox = [
                pred_bbox[0], pred_bbox[1], pred_bbox[2], pred_bbox[3]
            ]
            pred_bbox[2] += pred_bbox[0]
            pred_bbox[3] += pred_bbox[1]

            if gt[:,
                  0].max() > im_data.size(3) or gt[:,
                                                   1].max() > im_data.size(3):
                continue

            gt_bbox = [
                gt[:, 0].min(), gt[:, 1].min(), gt[:, 0].max(), gt[:, 1].max()
            ]
            inter = intersect(pred_bbox, gt_bbox)

            uni = union(pred_bbox, gt_bbox)
            ratio = area(inter) / float(area(uni))

            if ratio < 0.90:
                continue

            hratio = min(h, h_gt) / max(h, h_gt)
            if hratio < 0.5:
                continue

            input_W = im_data.size(3)
            input_H = im_data.size(2)
            target_h = norm_height

            scale = target_h / h
            target_gw = (int(w * scale) + target_h)
            target_gw = max(8, int(round(target_gw / 4)) * 4)

            #show pooled image in image layer

            scalex = (w + h) / input_W
            scaley = h / input_H

            th11 = scalex * math.cos(angle)
            th12 = -math.sin(angle) * scaley
            th13 = (2 * center[0] - input_W - 1) / (
                input_W - 1
            )  #* torch.cos(angle_var) - (2 * yc - input_H - 1) / (input_H - 1) * torch.sin(angle_var)

            th21 = math.sin(angle) * scalex
            th22 = scaley * math.cos(angle)
            th23 = (2 * center[1] - input_H - 1) / (
                input_H - 1
            )  #* torch.cos(angle_var) + (2 * xc - input_W - 1) / (input_W - 1) * torch.sin(angle_var)

            t = np.asarray([th11, th12, th13, th21, th22, th23],
                           dtype=np.float)
            t = torch.from_numpy(t).type(torch.FloatTensor).cuda()

            #t = torch.stack((th11, th12, th13, th21, th22, th23), dim=1)
            theta = t.view(-1, 2, 3)

            grid = F.affine_grid(
                theta, torch.Size((1, 3, int(target_h), int(target_gw))))

            x = F.grid_sample(im_data[bid].unsqueeze(0), grid)

            if debug:
                x_c = x.data.cpu().numpy()[0]
                x_data_draw = x_c.swapaxes(0, 2)
                x_data_draw = x_data_draw.swapaxes(0, 1)

                x_data_draw += 1
                x_data_draw *= 128
                x_data_draw = np.asarray(x_data_draw, dtype=np.uint8)
                x_data_draw = x_data_draw[:, :, ::-1]

                cv2.circle(img, (int(center[0]), int(center[1])), 5,
                           (0, 255, 0))
                cv2.imshow('im_data', x_data_draw)

                draw_box_points(img, pts)
                draw_box_points(img, gt, color=(0, 0, 255))

                cv2.imshow('img', img)
                cv2.waitKey(100)

            gt_labels = []
            gt_labels.append(codec_rev[' '])
            for k in range(len(gt_txt)):
                if gt_txt[k] in codec_rev:
                    gt_labels.append(codec_rev[gt_txt[k]])
                else:
                    print('Unknown char: {0}'.format(gt_txt[k]))
                    gt_labels.append(3)

            if 'ARABIC' in ud.name(gt_txt[0]):
                gt_labels = gt_labels[::-1]
            gt_labels.append(codec_rev[' '])

            features = net.forward_features(x)
            labels_pred = net.forward_ocr(features)

            label_length = []
            label_length.append(len(gt_labels))
            probs_sizes = autograd.Variable(
                torch.IntTensor([(labels_pred.permute(2, 0, 1).size()[0])] *
                                (labels_pred.permute(2, 0, 1).size()[1])))
            label_sizes = autograd.Variable(
                torch.IntTensor(
                    torch.from_numpy(np.array(label_length)).int()))
            labels = autograd.Variable(
                torch.IntTensor(torch.from_numpy(np.array(gt_labels)).int()))

            loss = loss + ctc_loss(labels_pred.permute(2, 0, 1), labels,
                                   probs_sizes, label_sizes).cuda()
            ctc_loss_count += 1

            if debug:
                ctc_f = labels_pred.data.cpu().numpy()
                ctc_f = ctc_f.swapaxes(1, 2)

                labels = ctc_f.argmax(2)
                det_text, conf, dec_s, splits = print_seq_ext(
                    labels[0, :], codec)

                print('{0} \t {1}'.format(det_text, gt_txt))

            gts_count[gt_id] += 1

            if ctc_loss_count > 64 or debug:
                break

        for gt_id in range(0, len(gts)):

            gt = gts[gt_id]
            gt_txt = lbs[gt_id]

            gt_txt_low = gt_txt.lower()
            if gt_txt.startswith('##'):
                continue

            if gt[:,
                  0].max() > im_data.size(3) or gt[:,
                                                   1].max() > im_data.size(3):
                continue

            if gt.min() < 0:
                continue

            center = (gt[0, :] + gt[1, :] + gt[2, :] + gt[3, :]) / 4
            dw = gt[2, :] - gt[1, :]
            dh = gt[1, :] - gt[0, :]

            w = math.sqrt(dw[0] * dw[0] + dw[1] * dw[1])
            h = math.sqrt(dh[0] * dh[0] + dh[1] * dh[1]) + random.randint(
                -2, 2)

            if h < 8:
                #print('too small h!')
                continue

            angle_gt = (math.atan2(
                (gt[2][1] - gt[1][1]), gt[2][0] - gt[1][0]) + math.atan2(
                    (gt[3][1] - gt[0][1]), gt[3][0] - gt[0][0])) / 2

            input_W = im_data.size(3)
            input_H = im_data.size(2)
            target_h = norm_height

            scale = target_h / h
            target_gw = int(w * scale) + random.randint(0, int(target_h))
            target_gw = max(8, int(round(target_gw / 4)) * 4)

            xc = center[0]
            yc = center[1]
            w2 = w
            h2 = h

            #show pooled image in image layer

            scalex = (w2 + random.randint(0, int(h2))) / input_W
            scaley = h2 / input_H

            th11 = scalex * math.cos(angle_gt)
            th12 = -math.sin(angle_gt) * scaley
            th13 = (2 * xc - input_W - 1) / (
                input_W - 1
            )  #* torch.cos(angle_var) - (2 * yc - input_H - 1) / (input_H - 1) * torch.sin(angle_var)

            th21 = math.sin(angle_gt) * scalex
            th22 = scaley * math.cos(angle_gt)
            th23 = (2 * yc - input_H - 1) / (
                input_H - 1
            )  #* torch.cos(angle_var) + (2 * xc - input_W - 1) / (input_W - 1) * torch.sin(angle_var)

            t = np.asarray([th11, th12, th13, th21, th22, th23],
                           dtype=np.float)
            t = torch.from_numpy(t).type(torch.FloatTensor)
            t = t.cuda()
            theta = t.view(-1, 2, 3)

            grid = F.affine_grid(
                theta, torch.Size((1, 3, int(target_h), int(target_gw))))
            x = F.grid_sample(im_data[bid].unsqueeze(0), grid)

            #score_sampled = F.grid_sample(iou_pred[bid].unsqueeze(0), grid)

            gt_labels = []
            gt_labels.append(codec_rev[' '])
            for k in range(len(gt_txt)):
                if gt_txt[k] in codec_rev:
                    gt_labels.append(codec_rev[gt_txt[k]])
                else:
                    print('Unknown char: {0}'.format(gt_txt[k]))
                    gt_labels.append(3)
            gt_labels.append(codec_rev[' '])

            if 'ARABIC' in ud.name(gt_txt[0]):
                gt_labels = gt_labels[::-1]

            features = net.forward_features(x)
            labels_pred = net.forward_ocr(features)

            label_length = []
            label_length.append(len(gt_labels))
            probs_sizes = torch.IntTensor(
                [(labels_pred.permute(2, 0, 1).size()[0])] *
                (labels_pred.permute(2, 0, 1).size()[1]))
            label_sizes = torch.IntTensor(
                torch.from_numpy(np.array(label_length)).int())
            labels = torch.IntTensor(
                torch.from_numpy(np.array(gt_labels)).int())

            loss = loss + ctc_loss(labels_pred.permute(2, 0, 1), labels,
                                   probs_sizes, label_sizes).cuda()
            ctc_loss_count += 1

            if debug:
                x_d = x.data.cpu().numpy()[0]
                x_data_draw = x_d.swapaxes(0, 2)
                x_data_draw = x_data_draw.swapaxes(0, 1)

                x_data_draw += 1
                x_data_draw *= 128
                x_data_draw = np.asarray(x_data_draw, dtype=np.uint8)
                x_data_draw = x_data_draw[:, :, ::-1]
                cv2.imshow('im_data_gt', x_data_draw)
                cv2.waitKey(100)

            gt_proc += 1
            if True:
                ctc_f = labels_pred.data.cpu().numpy()
                ctc_f = ctc_f.swapaxes(1, 2)

                labels = ctc_f.argmax(2)
                det_text, conf, dec_s, splits = print_seq_ext(
                    labels[0, :], codec)
                if debug:
                    print('{0} \t {1}'.format(det_text, gt_txt))
                if det_text.lower() == gt_txt.lower():
                    gt_good += 1

            if ctc_loss_count > 128 or debug:
                break

    if ctc_loss_count > 0:
        loss /= ctc_loss_count

    return loss, gt_good, gt_proc
예제 #4
0
      for box in boxes:

        pts  = box[0:8]
        pts = pts.reshape(4, -1)

        # det_text, conf, dec_s = ocr_image(net, codec, im_data, box)
        det_text, conf, dec_s = align_ocr(net, converter, im_data, box, features, debug=0)
        if len(det_text) == 0:
          continue

        width, height = draw.textsize(det_text, font=font2)
        center =  [box[0], box[1]]
        draw.text((center[0], center[1]), det_text, fill = (0,255,0),font=font2)
        out_boxes.append(box)
        print(det_text)

      im = np.array(img)
      for box in out_boxes:
        pts  = box[0:8]
        pts = pts.reshape(4, -1)
        draw_box_points(im, pts, color=(0, 255, 0), thickness=1)

      cv2.imshow('img', im)
      basename = os.path.basename(path)
      cv2.imwrite(os.path.join(args.output, basename), im)
      cv2.waitKey(1000)

        


예제 #5
0
파일: eval.py 프로젝트: wisdal/NAVI-STR
def process_splits(trans,
                   word_splits,
                   conf,
                   splits,
                   start,
                   ctc_f,
                   rot_mat,
                   angle,
                   box_points,
                   w,
                   h,
                   draw,
                   is_dict,
                   debug=False):
    '''
  Summary : Split the transciption and corresponding bounding-box based on spaces predicted by recognizer FCN.
  Description : 

  Parameters
  ----------
  trans : string
      String containing the predicted transcription for the corresponding predicted bounding-box.
  conf : list
      List containing sum of confidence for all the character by recognizer FCN, start and end position in bounding-box for generated transciption.
  splits :  list
      List containing index of position of predicted spaces by the recognizer FCN.
  norm2 : matrix
      Matrix containing the cropped bounding-box predicted by localization FCN in the originial image.
  ctc_f : matrix
      Matrix containing output of recognizer FCN for the given input bounding-box.
  rot_mat : matrix
      Rotation matrix returned by get_normalized_image function.
  boxt : tuple of tuples
      Tuple of tuples containing parametes of predicted bounding-box by localization FCN.
  draw : matrix
      Matrix containing input image.
  is_dict : 
  debug : boolean
      Boolean parameter representing debug mode, if it is True visualization boxes are generated.

  Returns
  -------
  boxes_out : list of tuples
      List of tuples containing predicted bounding-box parameters, predicted transcription and mean confidence score from the recognizer.
  '''
    spl = word_splits
    boxout = np.copy(box_points)
    #draw_box_points(draw, boxout, color = (0, 255, 0), thickness=2)
    start_f = start[0, 0]
    mean_conf = conf[0, 0] / max(
        1, len(trans))  # Overall confidence of recognizer FCN
    boxes_out = []
    y = 0
    for s in range(len(spl)):
        text = spl[s]
        end_f = splits[0, s]
        if s < len(spl) - 1:
            try:
                if splits[0, s] > start_f:
                    end_f = splits[
                        0, s]  # New ending point of bounding-box transcription
            except IndexError:
                pass
        scalex = w / float(ctc_f.shape[1])
        poss = start_f * scalex
        pose = (end_f + 2) * scalex
        rect = [[poss, h], [poss, y], [pose, y], [pose, h]]
        rect = np.array(rect)
        int_t = rot_mat
        dst_rect = np.copy(rect)
        dst_rect[:, 0] = int_t[0, 0] * rect[:, 0] + int_t[
            0, 1] * rect[:, 1] + int_t[0, 2]
        dst_rect[:, 1] = int_t[1, 0] * rect[:, 0] + int_t[
            1, 1] * rect[:, 1] + int_t[1, 2]
        dst_rect[:, 0] += boxout[1, 0]
        dst_rect[:, 1] += boxout[1, 1]

        if debug:
            draw_box_points(draw, dst_rect, color=(0, 255, 0))
            cv2.imshow('draw', draw)
            cv2.waitKey(0)

        boxes_out.append((dst_rect, [text, mean_conf, is_dict]))
        start_f = end_f + 1
    return boxes_out
예제 #6
0
파일: eval.py 프로젝트: wisdal/NAVI-STR
def evaluate_image(img,
                   detections,
                   gt_rect,
                   gt_txts,
                   iou_th=0.5,
                   iou_th_vis=0.5,
                   iou_th_eval=0.5,
                   eval_text_length=1):
    '''
  Summary : Returns end-to-end true-positives, detection true-positives, number of GT to be considered for eval (len > 2).
  Description : For each predicted bounding-box, comparision is made with each GT entry. Values of number of end-to-end true
                positives, number of detection true positives, number of GT entries to be considered for evaluation are computed.
  
  Parameters
  ----------
  iou_th_eval : float
      Threshold value of intersection-over-union used for evaluation of predicted bounding-boxes
  iou_th_vis : float
      Threshold value of intersection-over-union used for visualization when transciption is true but IoU is lesser.
  iou_th : float
      Threshold value of intersection-over-union between GT and prediction.
  word_gto : list of lists
      List of ground-truth bounding boxes along with transcription.
  batch : list of lists
      List containing data (input image, image file name, ground truth).
  detections : tuple of tuples
      Tuple of predicted bounding boxes along with transcriptions and text/no-text score.
  
  Returns
  -------
  tp : int
      Number of predicted bounding-boxes having IoU with GT greater than iou_th_eval.
  tp_e2e : int
      Number of predicted bounding-boxes having same transciption as GT and len > 2.
  gt_e2e : int
      Number of GT entries for which transcription len > 2.
  '''

    gt_to_detection = {}
    detection_to_gt = {}
    tp = 0
    tp_e2e = 0
    tp_e2e_ed1 = 0
    gt_e2e = 0

    gt_matches = np.zeros(gt_rect.shape[0])
    gt_matches_ed1 = np.zeros(gt_rect.shape[0])

    for i in range(0, len(detections)):

        det = detections[i]
        box = det[0]  # Predicted bounding-box parameters
        box = np.array(
            box, dtype="int")  # Convert predicted bounding-box to numpy array
        box = box[0:8].reshape(4, 2)
        bbox = cv2.boundingRect(box)

        bbox = [bbox[0], bbox[1], bbox[2], bbox[3]]
        bbox[2] += bbox[0]  # Convert width to right-coordinate
        bbox[3] += bbox[1]  # Convert height to bottom-coordinate

        det_text = det[1]  # Predicted transcription for bounding-box

        for gt_no in range(len(gt_rect)):

            gtbox = gt_rect[gt_no]
            txt = gt_txts[gt_no]  # GT transcription for given GT bounding-box
            gtbox = np.array(gtbox, dtype="int")
            gtbox = gtbox[0:8].reshape(4, 2)
            rect_gt = cv2.boundingRect(gtbox)

            rect_gt = [rect_gt[0], rect_gt[1], rect_gt[2], rect_gt[3]]
            rect_gt[2] += rect_gt[0]  # Convert GT width to right-coordinate
            rect_gt[3] += rect_gt[1]  # Convert GT height to bottom-coordinate

            inter = intersect(
                bbox,
                rect_gt)  # Intersection of predicted and GT bounding-boxes
            uni = union(bbox,
                        rect_gt)  # Union of predicted and GT bounding-boxes
            ratio = area(inter) / float(area(
                uni))  # IoU measure between predicted and GT bounding-boxes

            # 1). Visualize the predicted-bounding box if IoU with GT is higher than IoU threshold (iou_th) (Always required)
            # 2). Visualize the predicted-bounding box if transcription matches the GT and condition 1. holds
            # 3). Visualize the predicted-bounding box if transcription matches and IoU with GT is less than iou_th_vis and 1. and 2. hold
            if ratio > iou_th:
                if not gt_no in gt_to_detection:
                    gt_to_detection[gt_no] = [0, 0]

                edit_dist = editdistance.eval(det_text.lower(), txt.lower())
                if edit_dist <= 1:
                    gt_matches_ed1[gt_no] = 1
                    draw_box_points(img, box, color=(0, 128, 0), thickness=2)

                if edit_dist == 0:  #det_text.lower().find(txt.lower()) != -1:
                    draw_box_points(img, box, color=(0, 255, 0), thickness=2)
                    gt_matches[
                        gt_no] = 1  # Change this parameter to 1 when predicted transcription is correct.

                    if ratio < iou_th_vis:
                        #draw_box_points(draw, box, color = (255, 255, 255), thickness=2)
                        #cv2.imshow('draw', draw)
                        #cv2.waitKey(0)
                        pass

                tupl = gt_to_detection[gt_no]
                if tupl[0] < ratio:
                    tupl[0] = ratio
                    tupl[1] = i
                    detection_to_gt[i] = [gt_no, ratio, edit_dist]

    # Count the number of end-to-end and detection true-positives
    for gt_no in range(gt_matches.shape[0]):
        gt = gt_matches[gt_no]
        gt_ed1 = gt_matches_ed1[gt_no]
        txt = gt_txts[gt_no]

        gtbox = gt_rect[gt_no]
        gtbox = np.array(gtbox, dtype="int")
        gtbox = gtbox[0:8].reshape(4, 2)

        if len(txt) >= eval_text_length and not txt.startswith('##'):
            gt_e2e += 1
            if gt == 1:
                tp_e2e += 1
            if gt_ed1 == 1:
                tp_e2e_ed1 += 1

        if gt_no in gt_to_detection:
            tupl = gt_to_detection[gt_no]
            if tupl[0] > iou_th_eval:  # Increment detection true-positive, if IoU is greater than iou_th_eval
                if len(txt) >= eval_text_length and not txt.startswith('##'):
                    tp += 1
            #else:
            #  draw_box_points(img, gtbox, color = (255, 255, 255), thickness=2)

    for i in range(0, len(detections)):
        det = detections[i]
        box = det[0]  # Predicted bounding-box parameters
        box = np.array(
            box, dtype="int")  # Convert predicted bounding-box to numpy array
        box = box[0:8].reshape(4, 2)

        if not i in detection_to_gt:
            draw_box_points(img, box, color=(0, 0, 255), thickness=2)
        else:
            [gt_no, ratio, edit_dist] = detection_to_gt[i]
            if edit_dist > 0:
                draw_box_points(img, box, color=(255, 0, 0), thickness=2)

    #cv2.imshow('draw', draw)
    return tp, tp_e2e, gt_e2e, tp_e2e_ed1, detection_to_gt
예제 #7
0
파일: eval.py 프로젝트: wisdal/NAVI-STR
                    rot_mat = cv2.getRotationMatrix2D(
                        (0, 0), -angle * 180 / math.pi, 1)
                    splits_raw = process_splits(
                        det_text, word_splits, conf_raw, dec_s, conf2, ctc_f,
                        rot_mat, angle, boxr, w, h, im_resized,
                        0)  # Process the split and improve the localization
                    for spl in splits_raw:

                        spl[1][0] = spl[1][0].strip()

                        if len(spl[1][0]) >= eval_text_length:
                            has_long = True
                            boxw = spl[0]
                            boxw[:, 0] /= im_scalex
                            boxw[:, 1] /= im_scaley
                            draw_box_points(img, boxw, color=(0, 255, 0))
                            #cv2.imshow('img', img)
                            #cv2.waitKey()

                            #print('{0} - {1}'.format(spl[1][0], conf_factor))
                            #if conf_factor < 0.01:
                            #  print('Skipping {0} - {1}'.format(spl[1][0], conf_factor))
                            #  continue
                            print('{0} - {1}'.format(spl[1][0], conf_factor))
                            boxw = boxw.reshape(8)
                            detections_out.append([boxw, spl[1][0]])

            pix = img

            if args.evaluate == 1:
                # detections_out = np.expand_dims(gt_rect, axis=1) # this is only for spoofing the bbox w/ gt
예제 #8
0
    if gt_no in gt_to_detection:
      tupl = gt_to_detection[gt_no] 
      if tupl[0] > iou_th_eval: # Increment detection true-positive, if IoU is greater than iou_th_eval
    if len(txt) >= eval_text_length and not txt.startswith('##'):
      tp += 1   
      #else:
      #  draw_box_points(img, gtbox, color = (255, 255, 255), thickness=2)
    
  for i in range(0, len(detections)):  
    det = detections[i]
    box =  det[0] # Predicted bounding-box parameters
    box = np.array(box, dtype="int") # Convert predicted bounding-box to numpy array
    box = box[0:8].reshape(4, 2)
    
    if not i in detection_to_gt:
      draw_box_points(img, box, color = (0, 0, 255), thickness=2)
    else:
      [gt_no, ratio, edit_dist] = detection_to_gt[i]
      if edit_dist > 0:
    draw_box_points(img, box, color = (255, 0, 0), thickness=2)
        
  #cv2.imshow('draw', draw)         
  return tp, tp_e2e, gt_e2e, tp_e2e_ed1, detection_to_gt 
  
from PIL import Image
from PIL import ImageFont
from PIL import ImageDraw 

import glob

def process_splits(trans, word_splits, conf, splits, start, ctc_f, rot_mat, angle, box_points, w, h, draw, is_dict, debug = False):