Python get_boxes Examples, nms.get_boxes Python Examples

Example #1

0

Show file

def eval_detection(opts, net=None):
  if net == None:
    net = OctShuffleMLT(attention=True)
    net_utils.load_net(opts.model, net)
    if opts.cuda:
      net.cuda()

  images, gt_boxes = load_annotation(opts.eval_list)  
  true_positives = 0
  false_positives = 0
  false_negatives = 0
  
  for i in range(images.shape[0]):
    image = np.expand_dims(images[i], axis=0)
    image_boxes_gt = np.array(gt_boxes[i])

    im_data = net_utils.np_to_variable(image, is_cuda=opts.cuda).permute(0, 3, 1, 2)
    seg_pred, rboxs, angle_pred, features = net(im_data)
    
    rbox = rboxs[0].data.cpu()[0].numpy()
    rbox = rbox.swapaxes(0, 1)
    rbox = rbox.swapaxes(1, 2)
    angle_pred = angle_pred[0].data.cpu()[0].numpy()
    segm = seg_pred[0].data.cpu()[0].numpy()
    segm = segm.squeeze(0)

    boxes =  get_boxes(segm, rbox, angle_pred, opts.segm_thresh)

    if (opts.debug):
      print(boxes.shape)
      print(image_boxes_gt.shape)
      print("============")

    false_positives += boxes.shape[0]
    false_negatives += image_boxes_gt.shape[0]
    for box in boxes:
      b = box[0:8].reshape(4,-1)
      poly = Polygon.Polygon(b)
      for box_gt in image_boxes_gt:
        b_gt = box_gt[0:8].reshape(4,-1)
        poly_gt = Polygon.Polygon(b_gt)
        intersection = poly_gt | poly
        union = poly_gt & poly
        iou = (intersection.area()+1.0) / (union.area()+1.0)-1.0
        if iou > 0.5:
          true_positives+=1
          false_negatives-=1
          false_positives-=1
          image_boxes_gt = np.array([bgt for bgt in image_boxes_gt if not np.array_equal(bgt, box_gt)])
          break
  print("tp: {} fp: {} fn: {}".format(true_positives, false_positives, false_negatives))
  precision = true_positives / (true_positives+false_positives)
  recall = true_positives / (true_positives+false_negatives)
  f_score = 2*precision*recall/(precision+recall)
  print("PRECISION: {} \t RECALL: {} \t F SCORE: {}".format(precision, recall, f_score))

Example #2

0

Show file

File: image_annotator.py Project: EricDoug/video-object-detection

def draw_detection_results(detection_output_filename, target_dir):
  boxes = get_boxes(detection_output_filename)

  system('rm -rf ' + target_dir)
  system('mkdir -p ' + target_dir)

  for image_filename, bs in boxes.iteritems():
    cmd = 'convert ' + convert_bgr_to_rgb(image_filename)
    cmd += ' -fill none -stroke chartreuse -strokewidth 2'
    for xmin, ymin, xmax, ymax, score in bs:
      cmd += (' -draw "rectangle %s,%s,%s,%s" ' %
                (int(xmin), int(ymin), int(xmax), int(ymax)))
      # Text drawing code strangely doesn't work on my machine.
      # See the question I posted on SO:
      # http://stackoverflow.com/questions/27324930/convert-non-conforming-drawing-primitive-definition-text/27332225#27332225
      #cmd += ' -pointsize 17 -fill chartreuse'
      #text = 'Score:' + "{:.2f}".format(score)
      #cmd += ' -draw "text 20%%,20%% \'%s\'"' % text
    target = join(target_dir, splitext(basename(image_filename))[0] + '.jpg')
    cmd += ' ' + target
    print cmd
    system(cmd)

Example #3

0

Show file

File: align_demo.py Project: windzhougithub/FOTS.pytorch-1

      images /= 128
      images -= 1
      im_data = net_utils.np_to_variable(images.transpose(0, 3, 1, 2), is_cuda=args.cuda)
      seg_pred, rboxs, angle_pred, features = net(im_data)

      rbox = rboxs[0].data.cpu()[0].numpy()                   # 转变成h,w,c
      rbox = rbox.swapaxes(0, 1)
      rbox = rbox.swapaxes(1, 2)

      angle_pred = angle_pred[0].data.cpu()[0].numpy()

      segm = seg_pred[0].data.cpu()[0].numpy()
      segm = segm.squeeze(0)

      draw2 = np.copy(im_resized)
      boxes =  get_boxes(segm, rbox, angle_pred, args.segm_thresh)

      img = Image.fromarray(draw2)
      draw = ImageDraw.Draw(img)

      out_boxes = []
      for box in boxes:

        pts  = box[0:8]
        pts = pts.reshape(4, -1)

        # det_text, conf, dec_s = ocr_image(net, codec, im_data, box)
        det_text, conf, dec_s = align_ocr(net, converter, im_data, box, features, debug=0)
        if len(det_text) == 0:
          continue

Example #4

0

Show file

File: eval.py Project: cnzeki/E2E-MLT

                cv2.imshow('iou', iou)
                # cv2.imshow('ioud', ioud)
                cv2.imshow('iou_pred1', iou_pred1)

            size = 3
            import scipy.ndimage as ndimage

            image_max = ndimage.maximum_filter(iou, size=size, mode='constant')
            mask = (iou == image_max)
            iou2 = iou * mask

            if args.debug == 1:
                cv2.imshow('iou2', iou2)

            detections = get_boxes(iou, rbox,
                                   angle_pred[0].data.cpu()[0].numpy(),
                                   args.segm_thresh)
            # detectionsd = get_boxes(iou_pred1, rboxd, angle_pred[1].data.cpu()[0].numpy(), args.segm_thresh, iou_thresh=0.2)

            im_scalex = im_resized.shape[1] / img.shape[1]
            im_scaley = im_resized.shape[0] / img.shape[0]

            detectionso = np.copy(detections)
            if len(detections) > 0:
                detections[:, 0] /= im_scalex
                detections[:, 2] /= im_scalex
                detections[:, 4] /= im_scalex
                detections[:, 6] /= im_scalex

                detections[:, 1] /= im_scaley
                detections[:, 3] /= im_scaley

Example #5

0

Show file

File: demo.py Project: dipikakhullar/ocr

def run_model_input_image(im, show_boxes=False):
  predictions = {}
  parser = argparse.ArgumentParser()
  parser.add_argument('-cuda', type=int, default=1)
  parser.add_argument('-model', default='e2e-mlt-rctw.h5')
  parser.add_argument('-segm_thresh', default=0.5)

  font2 = ImageFont.truetype("Arial-Unicode-Regular.ttf", 18)

  args = parser.parse_args()

  net = ModelResNetSep2(attention=True)
  net_utils.load_net(args.model, net)
  net = net.eval()

  if args.cuda:
    print('Using cuda ...')
    net = net.cuda()

  with torch.no_grad():
    # im = Image.open(im)
    # im = im.convert('RGB')
    im = np.asarray(im)
    im = im[...,:3]
    im_resized, (ratio_h, ratio_w) = resize_image(im, scale_up=False)
    images = np.asarray([im_resized], dtype=np.float)
    images /= 128
    images -= 1
    im_data = net_utils.np_to_variable(images, is_cuda=args.cuda).permute(0, 3, 1, 2)
    seg_pred, rboxs, angle_pred, features = net(im_data)

    rbox = rboxs[0].data.cpu()[0].numpy()
    rbox = rbox.swapaxes(0, 1)
    rbox = rbox.swapaxes(1, 2)

    angle_pred = angle_pred[0].data.cpu()[0].numpy()


    segm = seg_pred[0].data.cpu()[0].numpy()
    segm = segm.squeeze(0)

    draw2 = np.copy(im_resized)
    boxes =  get_boxes(segm, rbox, angle_pred, args.segm_thresh)

    img = Image.fromarray(draw2)
    draw = ImageDraw.Draw(img)

    #if len(boxes) > 10:
    #  boxes = boxes[0:10]

    out_boxes = []
    prediction_i = []
    for box in boxes:

        pts  = box[0:8]
        pts = pts.reshape(4, -1)

        det_text, conf, dec_s = ocr_image(net, codec, im_data, box)
        if len(det_text) == 0:
            continue

        width, height = draw.textsize(det_text, font=font2)
        center =  [box[0], box[1]]
        draw.text((center[0], center[1]), det_text, fill = (0,255,0),font=font2)
        out_boxes.append(box)

        # det_text is one prediction
        prediction_i.append(det_text.lower())

    predictions["frame"] = prediction_i

    # show each image boxes and output in pop up window.
    show_image_with_boxes(img, out_boxes, show=show_boxes)

  print(predictions)
  return predictions

Example #6

0

Show file

def evaluate_e2e_crnn(root,
                      net,
                      norm_height=48,
                      name_model='E2E',
                      normalize=False,
                      save=False,
                      cuda=True,
                      save_dir='eval'):
    #Decription : evaluate model E2E
    net = net.eval()
    # if cuda:
    #   print('Using cuda ...')
    #   net = net.to(device)

    images = glob.glob(os.path.join(root, '*.jpg'))
    png = glob.glob(os.path.join(root, '*.png'))
    images.extend(png)
    png = glob.glob(os.path.join(root, '*.JPG'))
    images.extend(png)

    imagess = np.asarray(images)

    tp_all = 0
    gt_all = 0
    tp_e2e_all = 0
    gt_e2e_all = 0
    tp_e2e_ed1_all = 0
    detecitons_all = 0
    eval_text_length = 2
    segm_thresh = 0.5
    min_height = 8
    idx = 0

    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    note_path = os.path.join(save_dir, 'note_eval.txt')
    note_file = open(note_path, 'a')

    with torch.no_grad():

        index = np.arange(0, imagess.shape[0])
        # np.random.shuffle(index)
        for i in index:
            img_name = imagess[i]
            base_nam = os.path.basename(img_name)
            #
            # if args.evaluate == 1:
            res_gt = base_nam.replace(".jpg", '.txt').replace(".png", '.txt')
            res_gt = '{0}/gt_{1}'.format(root, res_gt)
            if not os.path.exists(res_gt):
                res_gt = base_nam.replace(".jpg", '.txt').replace("_", "")
                res_gt = '{0}/gt_{1}'.format(root, res_gt)
                if not os.path.exists(res_gt):
                    print('missing! {0}'.format(res_gt))
                    gt_rect, gt_txts = [], []
            # continue
            gt_rect, gt_txts = load_gt(res_gt)

            # print(img_name)
            img = cv2.imread(img_name)

            im_resized, _ = resize_image(
                img, max_size=1848 * 1024,
                scale_up=False)  # 1348*1024 #1848*1024
            images = np.asarray([im_resized], dtype=np.float)

            if normalize:
                images /= 128
                images -= 1
            im_data = net_utils.np_to_variable(images, is_cuda=cuda).permute(
                0, 3, 1, 2)

            [iou_pred, iou_pred1], rboxs, angle_pred, features = net(im_data)
            iou = iou_pred.data.cpu()[0].numpy()
            iou = iou.squeeze(0)

            rbox = rboxs[0].data.cpu()[0].numpy()
            rbox = rbox.swapaxes(0, 1)
            rbox = rbox.swapaxes(1, 2)

            detections = get_boxes(iou, rbox,
                                   angle_pred[0].data.cpu()[0].numpy(),
                                   segm_thresh)

            im_scalex = im_resized.shape[1] / img.shape[1]
            im_scaley = im_resized.shape[0] / img.shape[0]

            detetcions_out = []
            detectionso = np.copy(detections)
            if len(detections) > 0:
                detections[:, 0] /= im_scalex
                detections[:, 2] /= im_scalex
                detections[:, 4] /= im_scalex
                detections[:, 6] /= im_scalex

                detections[:, 1] /= im_scaley
                detections[:, 3] /= im_scaley
                detections[:, 5] /= im_scaley
                detections[:, 7] /= im_scaley

            for bid, box in enumerate(detections):

                boxo = detectionso[bid]
                # score = boxo[8]
                boxr = boxo[0:8].reshape(-1, 2)
                # box_area = area(boxr.reshape(8))

                # conf_factor = score / box_area

                center = (boxr[0, :] + boxr[1, :] + boxr[2, :] +
                          boxr[3, :]) / 4

                dw = boxr[2, :] - boxr[1, :]
                dw2 = boxr[0, :] - boxr[3, :]
                dh = boxr[1, :] - boxr[0, :]
                dh2 = boxr[3, :] - boxr[2, :]

                h = math.sqrt(dh[0] * dh[0] + dh[1] * dh[1]) + 1
                h2 = math.sqrt(dh2[0] * dh2[0] + dh2[1] * dh2[1]) + 1
                h = (h + h2) / 2
                w = math.sqrt(dw[0] * dw[0] + dw[1] * dw[1])
                w2 = math.sqrt(dw2[0] * dw2[0] + dw2[1] * dw2[1])
                w = (w + w2) / 2

                if ((h - 1) / im_scaley) < min_height:
                    continue

                input_W = im_data.size(3)
                input_H = im_data.size(2)
                target_h = norm_height

                scale = target_h / h
                target_gw = int(w * scale + target_h / 4)
                target_gw = max(8, int(round(target_gw / 8)) * 8)
                xc = center[0]
                yc = center[1]
                w2 = w
                h2 = h

                angle = math.atan2((boxr[2][1] - boxr[1][1]),
                                   boxr[2][0] - boxr[1][0])
                angle2 = math.atan2((boxr[3][1] - boxr[0][1]),
                                    boxr[3][0] - boxr[0][0])
                angle = (angle + angle2) / 2

                # show pooled image in image layer
                scalex = (w2 + h2 / 4) / input_W
                scaley = h2 / input_H

                th11 = scalex * math.cos(angle)
                th12 = -math.sin(angle) * scaley * input_H / input_W
                th13 = (2 * xc - input_W - 1) / (input_W - 1)

                th21 = math.sin(angle) * scalex * input_W / input_H
                th22 = scaley * math.cos(angle)
                th23 = (2 * yc - input_H - 1) / (input_H - 1)

                t = np.asarray([th11, th12, th13, th21, th22, th23],
                               dtype=np.float)
                t = torch.from_numpy(t).type(torch.FloatTensor)
                t = t.to(device)
                theta = t.view(-1, 2, 3)

                grid = F.affine_grid(
                    theta, torch.Size((1, 3, int(target_h), int(target_gw))))
                x = F.grid_sample(im_data, grid)

                # features = net.forward_features(x)
                # labels_pred = net.forward_ocr(features)
                labels_pred = net.forward_ocr(x)
                labels_pred = labels_pred.permute(1, 2, 0)

                ctc_f = labels_pred.data.cpu().numpy()
                ctc_f = ctc_f.swapaxes(1, 2)

                labels = ctc_f.argmax(2)

                conf = np.mean(np.exp(ctc_f.max(2)[labels > 3]))
                if conf < 0.02:
                    continue

                det_text, conf2, dec_s, word_splits = print_seq_ext(
                    labels[0, :], codec)
                det_text = det_text.strip()

                if conf < 0.01 and len(det_text) == 3:
                    continue

                if len(det_text) > 0:
                    dtxt = det_text.strip()
                    if len(dtxt) >= eval_text_length:
                        # print('{0} - {1}'.format(dtxt, conf_factor))
                        boxw = np.copy(boxr)
                        boxw[:, 1] /= im_scaley
                        boxw[:, 0] /= im_scalex
                        boxw = boxw.reshape(8)

                        detetcions_out.append([boxw, dtxt])

            pix = img

            # if args.evaluate == 1:
            tp, tp_e2e, gt_e2e, tp_e2e_ed1, detection_to_gt, pixx = evaluate_image(
                pix,
                detetcions_out,
                gt_rect,
                gt_txts,
                eval_text_length=eval_text_length)
            tp_all += tp
            gt_all += len(gt_txts)
            tp_e2e_all += tp_e2e
            gt_e2e_all += gt_e2e
            tp_e2e_ed1_all += tp_e2e_ed1
            detecitons_all += len(detetcions_out)
            # print(gt_all)
            if save:
                cv2.imwrite('{0}/{1}'.format(save_dir, base_nam), pixx)

            # print("	E2E recall tp_e2e:{0:.3f} / tp:{1:.3f} / e1:{2:.3f}, precision: {3:.3f}".format(
            #   tp_e2e_all / float(max(1, gt_e2e_all)),
            #   tp_all / float(max(1, gt_e2e_all)),
            #   tp_e2e_ed1_all / float(max(1, gt_e2e_all)),
            #   tp_all / float(max(1, detecitons_all))))

        note_file.write(
            'Model{4}---E2E recall tp_e2e:{0:.3f} / tp:{1:.3f} / e1:{2:.3f}, precision: {3:.3f} \n'
            .format(tp_e2e_all / float(max(1, gt_e2e_all)),
                    tp_all / float(max(1, gt_e2e_all)),
                    tp_e2e_ed1_all / float(max(1, gt_e2e_all)),
                    tp_all / float(max(1, detecitons_all)), name_model))

        note_file.close()
    return (tp_e2e_all / float(max(1, gt_e2e_all)),
            tp_all / float(max(1, gt_e2e_all)),
            tp_e2e_ed1_all / float(max(1, gt_e2e_all)),
            tp_all / float(max(1, detecitons_all)))