Esempio n. 1
def computeWordOvelap(imgc, word_gt, words, wordsOk, wordsFp):

    best_match = 0
    best_match2 = 0
    for det_word in words:
            cv2.rectangle(imgc, (det_word[0], det_word[1]),
                          (det_word[2], det_word[3]), (0, 0, 255))
            for gt_box in word_gt:
                rect_int = utils.intersect(det_word, gt_box)
                int_area = utils.area(rect_int)
                union_area = utils.area(utils.union(det_word, gt_box))

                ratio = int_area / float(union_area)
                ratio2 = int_area / utils.area(gt_box)
                if ratio > best_match:
                    best_match = ratio
                    w = det_word
                    best_match2 = ratio2

            if best_match2 > 0.3:
            elif best_match == 0:

    return (best_match, best_match2)
Esempio n. 2
def computeWordOvelap(imgc, word_gt, words, wordsOk, wordsFp):
    best_match = 0
    best_match2 = 0
    for det_word in words:
            cv2.rectangle(imgc, (det_word[0], det_word[1]), (det_word[2], det_word[3]), (0, 0, 255))
            for gt_box in word_gt:
                rect_int =  utils.intersect( det_word, gt_box )
                int_area = utils.area(rect_int)
                union_area = utils.area(utils.union(det_word, gt_box))
                ratio = int_area / float(union_area)
                ratio2 = int_area / utils.area(gt_box)
                if ratio > best_match:
                    best_match = ratio
                    w = det_word
                    best_match2 = ratio2
            if best_match2 > 0.3:
            elif best_match == 0:
    return (best_match, best_match2)
Esempio n. 3
    def define_location_quality(self):

            self : we use self.point and check it against the dictionary of shape_corners
            shapes = {'name1': APIshape, name2: Abstract/Section, name3: qq, ...}

        # if we were unable to calculate a point
        if not self.point:
            return 0

        score = self.location_quality
        for name, shape in self.reference_shapes.iteritems():
                shape = ensure_polygon(shape)

                if shape.intersection(transform(4269, 3857, self.point)): 
                    score += math.pow(area(shape, units='square miles'), -1)  # if within a square mile => score = 1
                    # print '\tpoint inside the shape: %s, area: %.3f square miles' % (name, area(shape, units='square miles'))
                    # print '\tpoint outside the shape: %s' % name
                print '\tinvalid geomtery for %s' % name

        # reset score if the point does not fall within any of the related shapes
        if score == self.location_quality:
            score = -1

        self.location_quality = score
def associate_plate_to_vehicle(cars, plates):
    Associate each plate to the closest available vehicle.

    Also check that the bounding box of the license plate is inside the
    bounding box of the car it is assigned to.


    cars : list
        The list of vehicles without any license plate annotation.

    plates : list
        The list of license plates detected in a frame.


        The list of vehicles updated with license plates associated to the
        correct vehicle.

    # Keep track of indexes of cars that have already been assigned
    unavailable_cars_indexes = list()

    for p in plates:
        min_dist = np.inf

        px, py = compute_center_coordinates(p['bounding_box'])

        for ic, car in enumerate(cars):

            # Skip vehicles that have already been assigned a plate
            if ic in unavailable_cars_indexes:

            # Compute coordinates of the center of the vehicle's bb
            cx, cy = compute_center_coordinates(car['bounding_box'])

            dist = (cx - px)**2 + (cy - py)**2
            if dist < min_dist:
                min_dist = dist
                min_dist_car_index = ic

        # If a suitable car has been found AND there is an overlap between the
        # bounding boux of the license plate and the one of the closest car, do
        # proceed to assign that license plate to that car.
        if (min_dist != np.inf and area(
                p['bounding_box'], cars[min_dist_car_index]['bounding_box'])
                is not None):

            # Assign plate to the closest available car
            cars[min_dist_car_index]['plates'] = [p]

            # Mark car as unavailable

    return cars
Esempio n. 5
    def define_location_quality(self):
            self : we use self.point and check it against the dictionary of shape_corners
            shapes = {'name1': APIshape, name2: Abstract/Section, name3: qq, ...}

        # if we were unable to calculate a point
        if not self.point:
            return 0

        score = self.location_quality
        for name, shape in self.reference_shapes.iteritems():
                shape = ensure_polygon(shape)

                if shape.intersection(transform(4269, 3857, self.point)):
                    score += math.pow(
                        area(shape, units='square miles'),
                        -1)  # if within a square mile => score = 1
                    # print '\tpoint inside the shape: %s, area: %.3f square miles' % (name, area(shape, units='square miles'))
                    # print '\tpoint outside the shape: %s' % name
                print '\tinvalid geomtery for %s' % name

        # reset score if the point does not fall within any of the related shapes
        if score == self.location_quality:
            score = -1

        self.location_quality = score
Esempio n. 6
def computeSegmOverlap(gt_rects, segmentations, MIN_SEGM_OVRLAP=0.6):

    segm2chars = 0

    for k in range(len(gt_rects)):
        gt_rect = gt_rects[k]
        best_match = 0
        best_match_line = 0
        if (gt_rect[4] == ',' or gt_rect[4] == '.' or gt_rect[4] == '\''
                or gt_rect[4] == ':'
                or gt_rect[4] == '-') and not evalPunctuation:

        best_match2 = 0
        for detId in range(segmentations.shape[0]):
            rectn = segmentations[detId, :]
            rect_int = utils.intersect(rectn, gt_rect)
            int_area = utils.area(rect_int)
            union_area = utils.area(utils.union(rectn, gt_rect))

            ratio = int_area / float(union_area)

            if ratio > best_match:
                best_match = ratio

            if ratio > best_match_line and rectn[7] == 1.0:
                best_match_line = ratio

            gt_rect[5] = best_match
            if best_match < MIN_SEGM_OVRLAP:
                if k < len(gt_rects) - 1:
                    gt_rect2 = gt_rects[k + 1]
                    chars2Rect = utils.union(gt_rect2, gt_rect)
                    rect_int = utils.intersect(rectn, chars2Rect)
                    int_area = utils.area(rect_int)
                    union_area = utils.area(utils.union(rectn, chars2Rect))
                    ratio = int_area / float(union_area)
                    if ratio > best_match2:
                        if ratio > MIN_SEGM_OVRLAP:
                            segm2chars += 1
                            best_match2 = ratio
                            gt_rect[5] = ratio
                            gt_rect2[5] = ratio
Esempio n. 7
def main():
    cam = cv2.VideoCapture(0)
    # load data
    embed_file_path = "data/embed.pkl"
    face_database = loadData(embed_file_path)
    # load models
    device = "MYRIAD"
    plugin = IEPlugin(device, plugin_dirs=None)

    face_embed = FaceEmbedding(plugin)
    face_detect = MobileFaceDetect(plugin)
    # params
    config = configparser.ConfigParser()"config.ini")
    fm_threshold = float(config["FACE_MATCH"]['Threshold'])
    label = "new"

    while (True):
        ret, frame =
        if not ret:
            print("dead cam")
            cam = cv2.VideoCapture(0)
            if cv2.waitKey(1) & 0xFF == ord('q'):
        face_bboxes = face_detect.inference(frame)
        if len(face_bboxes) > 0:
            areas = [area(box) for box in face_bboxes]
            max_id = np.argmax(np.asarray(areas))
            mfb = face_bboxes[max_id]
            main_face = frame[mfb[1]:mfb[3], mfb[0]:mfb[2], :]
            # TODO real face detection
            # TODO face alignment
            # face_feature = face_embed(main_face, fe_net, fe_input_blob)
            # s = time.time()
            face_feature = face_embed.inference(main_face)
            # print(time.time() - s)
            # TODO face record
            best_match = bruteforce(face_feature, face_database, fm_threshold)
            if best_match is None:
                label = "new"
                label = str(best_match['id'])
            # visualize for debug
            cv2.rectangle(frame, (mfb[0], mfb[1]), (mfb[2], mfb[3]),
                          (255, 0, 0), 2)
                        label, (mfb[0], mfb[1]),
                        0.6, (0, 0, 255),
        cv2.imshow("gandalf", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
Esempio n. 8
def computeSegmOverlap(gt_rects, segmentations, MIN_SEGM_OVRLAP = 0.6):
    segm2chars = 0
    for k in range(len(gt_rects)):
        gt_rect = gt_rects[k]
        best_match = 0
        best_match_line = 0
        if (gt_rect[4] == ',' or gt_rect[4] == '.' or gt_rect[4] == '\'' or gt_rect[4] == ':' or gt_rect[4] == '-') and not evalPunctuation:

        best_match2 = 0 
        for detId in range(segmentations.shape[0]):
            rectn = segmentations[detId, :]
            rect_int =  utils.intersect( rectn, gt_rect )
            int_area = utils.area(rect_int)
            union_area = utils.area(utils.union(rectn, gt_rect))
            ratio = int_area / float(union_area)
            if ratio > best_match:
                best_match = ratio
            if ratio > best_match_line and rectn[7] == 1.0 :
                best_match_line = ratio
            gt_rect[5] = best_match
            if best_match < MIN_SEGM_OVRLAP: 
                if k < len(gt_rects) - 1:
                    gt_rect2 = gt_rects[k + 1]
                    chars2Rect = utils.union(gt_rect2, gt_rect)
                    rect_int = utils.intersect( rectn, chars2Rect )
                    int_area = utils.area(rect_int)
                    union_area = utils.area(utils.union(rectn, chars2Rect))
                    ratio = int_area / float(union_area)
                    if ratio > best_match2:
                        if ratio > MIN_SEGM_OVRLAP:
                            segm2chars += 1
                            best_match2 = ratio
                            gt_rect[5] = ratio
                            gt_rect2[5] = ratio
Esempio n. 9
def fradius(theta, a, b, X, Y):
    ph = [_tr(theta, b / a, X[i], Y[i]) for i in range(3)]

    PX = [p[0] for p in ph]
    PY = [p[1] for p in ph]

    du = distsq([PX[0], PX[1]], [PY[0], PY[1]])
    dv = distsq([PX[1], PX[2]], [PY[1], PY[2]])
    dw = distsq([PX[0], PX[2]], [PY[0], PY[2]])

    A = area(PX, PY)

    return b**2 * 16 * A**2 - du * dv * dw
    def is_included_in_nutrition_table(self, word):
        """Compute intersection area between a word area and nutrition table area and return a boolean to check if
        percentage of the word's area included in nutritional table is greater than 50%
        Keyword arguments:
        word -- Word
        #Get word bounding_box vetices
        word_vertices = word.bounding_box

        #Find straight circumscribed rectangle
        bounding_box = circumscribed_rectangle(word_vertices)

        #Get inclusion score un nutrition table = area of intersection / area of word bounding box
        inclusion_score = intersection_area(
            bounding_box, self.table_bbx) / area(bounding_box)

        return inclusion_score > 0.5
def remove_duplicate_plates(plates, annotations_history):
    # TODO document

    excluded_plates_indexes = list()
    unique_plates_list = list()

    for i in range(len(plates)):

        # If it has been already excluded, skip
        if i in excluded_plates_indexes:

        p1 = plates[i]

        for j in range(i + 1, len(plates)):

            p2 = plates[j]

            # Handle collision
            if area(p1['bounding_box'], p2['bounding_box']) is not None:

                # If they're the same skip the other one
                if p1['plate_text'] == p2['plate_text']:

                if p1['valid_plate'] and not p2['valid_plate']:
                    # If the first one is valid and the second one isn't,
                    # remove the second one.
                elif not p1['valid_plate'] and p2['valid_plate']:
                    # If the first one is NOT valid and the second one is
                    # valid, break the inner loop, thus preventing the first
                    # one to be added (it's the else part of the loop).
                    # Choose the one that appeared more frequently in the past
                    # TODO


    return unique_plates_list
Esempio n. 12
def process_batch(nets, optim, optim2, image_size, args):
  global it, mean_loss, mean_rec
  net, net_ctc = nets
  net =
  net_ctc =
  it += 1 
  im = net.blobs['data'].data[...]
  draw = np.swapaxes(im,2,3)
  draw = np.swapaxes(draw,1,3)
  im_ctc = np.copy(draw)
  draw += 1
  draw *= 128
  draw = np.array(draw, dtype="uint8").copy() 
  if args.debug:
    grid_step = 16
    line = 0
    while line < image_size[0]:
      cv2.line(draw[0], (0, line), (image_size[1], line), (128, 128, 128))
      line += grid_step
  boxes  =  net.blobs['boxes'].data[...]
  word_gtob = net.blobs['gt_boxes'].data[...]
  word_txt = net.blobs['gt_labels'].data[...]
  lines_gtob = net.blobs['line_boxes'].data[...]
  lines_txt = net.blobs['line_labels'].data[...]
  #nms = boxeso[:, 0, 0, 8] == 0
  #boxes = boxes[:, :, nms, :]
  boxes[:, 0, :, 0] *= image_size[0]
  boxes[:, 0, :, 1] *= image_size[1]
  normFactor = math.sqrt(image_size[1] * image_size[1] + image_size[0] * image_size[0])
  boxes[:, 0, :, 2] *= normFactor
  boxes[:, 0, :, 3] *= normFactor
  sum_cost = 0
  count = 0
  labels_gt = []
  labels_det = []
  gt_to_detection = {}
  batch_buckets = []    
  dummy = {} 
  matched_detections = 0
  for bid in range(im.shape[0]):
    o_image = net.layers[0].get_image_file_name(bid)
    o_image = cv2.imread(o_image, cv2.IMREAD_GRAYSCALE)
    cx = net.layers[0].get_crop(bid, 0)
    cy = net.layers[0].get_crop(bid, 1)
    cmx = net.layers[0].get_crop(bid, 2)
    cmy = net.layers[0].get_crop(bid, 3)
    o_image = o_image[cy:cmy, cx:cmx]
    boxes_count = 0
    for i in range(0, boxes.shape[2]):
      det_word = boxes[bid, 0, i]
      if (det_word[0] == 0 and det_word[1] == 0) or det_word[5] < 0.01:
      boxes_count += 1
    x = [i for i in range(boxes_count)]
    bucket_images = {}
    word_gto = word_gtob[bid]
    word_gto_txt = word_txt[bid]
    gt_count = 0 
    for gt_no in range(word_gto.shape[0]):
      gt = word_gto[gt_no, :]
      gt = gt.reshape(6)
      gtnum = 1000 * bid +  gt_no
      if gt[5] == -1:
        #print("ignore gt!")
      gt_count += 1
      txt = word_gto_txt[gt_no, :]
      gtbox  = ((gt[0] * image_size[0], gt[1] * image_size[1]), (gt[2] * normFactor, gt[3] * normFactor), gt[4] * 180 / 3.14)
      gtbox = cv2.boxPoints(gtbox)
      gtbox = np.array(gtbox, dtype="int")
      rect_gt = cv2.boundingRect(gtbox)

      if rect_gt[0] == 0 or rect_gt[1] == 0 or  rect_gt[0] + rect_gt[2]  >= image_size[0] or rect_gt[1] + rect_gt[3]  >= image_size[1]:
      if gt[3] * normFactor <  3:
        if args.debug:
          #print('too small gt!')
          vis.draw_box_points(draw[bid], gtbox, color = (255, 255, 0))
          cv2.imshow('draw', draw[bid])
      if args.debug:
        vis.draw_box_points(draw[bid], gtbox, color = (0, 0, 0), thickness=2)
      #vis.draw_box_points(draw[bid], gtbox, color = (255, 255, 255))
      #cv2.imshow('draw', draw[bid])
      rect_gt = [rect_gt[0], rect_gt[1], rect_gt[2], rect_gt[3]]
      rect_gt[2] += rect_gt[0]
      rect_gt[3] += rect_gt[1]

      for i in range(0, min(100, boxes_count)):
        if math.fabs(gt[4] - det_word[4]) > math.pi / 16:
        det_word = boxes[bid, 0, x[i], :]
        if (det_word[0] == 0 and det_word[1] == 0) or det_word[5] < 0.01:
        box  = ((det_word[0], det_word[1]), (det_word[2], det_word[3]), det_word[4] * 180 / 3.14)
        box = cv2.boxPoints(box)
        if args.debug:
          boxp = np.array(box, dtype="int")
          vis.draw_box_points(draw[bid], boxp, color = (0, 255, 0))
        box = np.array(box, dtype="int")
        bbox = cv2.boundingRect(box)
        bbox = [bbox[0], bbox[1], bbox[2], bbox[3]]
        bbox[2] += bbox[0]
        bbox[3] += bbox[1]
        #rectangle intersection ... 
        inter = intersect(bbox, rect_gt)
        uni = union(bbox, rect_gt)
        ratio = area(inter) / float(area(uni))
        ratio_gt = area(inter) / float(area(rect_gt))
        if ratio_gt < 0.95:
        if ratio < 0.5:
        if not gt_to_detection.has_key(gtnum):
            gt_to_detection[gtnum] = [0, 0, 0]
        tupl = gt_to_detection[gtnum] 
        if tupl[0] < ratio:
          tupl[0] = ratio 
          tupl[1] = x[i]  
          tupl[2] = ratio_gt       
        det_word = boxes[bid, 0, x[i], :]
        box  = ([det_word[0], det_word[1]], [det_word[2], det_word[3]], det_word[4] * 180 / 3.14)
        boxO = get_obox(im_ctc[bid], o_image, box)
        boxO = ((boxO[0][0], boxO[0][1]), (boxO[1][0], boxO[1][1]), boxO[2])
        norm2, rot_mat = get_normalized_image(o_image, boxO)
        #norm3, rot_mat = get_normalized_image(im_ctc[bid], ([det_word[0], det_word[1]], [det_word[2] * 1.2, det_word[3] * 1.1], det_word[4] * 180 / 3.14))
        if norm2 is None:
        #if norm3 is None:
        #  continue
        #cv2.imshow('ts', norm2)
        #cv2.imshow('ts3', norm3)
        width_scale = 32.0 / norm2.shape[0]
        width = norm2.shape[1] * width_scale
        best_diff = width
        bestb = 0
        for b in range(0, len(buckets)):
          if best_diff > abs(width * 1.3 - buckets[b]):
            best_diff = abs(width * 1.3 - buckets[b])
            bestb = b
        scaled = cv2.resize(norm2, (buckets[bestb], 32))  
        scaled = np.asarray(scaled, dtype=np.float)
        delta = scaled.max() - scaled.min()
        scaled = (scaled) / (delta / 2)
        scaled -= scaled.mean()
        if not bucket_images.has_key(bestb):
          bucket_images[bestb] = {}
          bucket_images[bestb]['img'] = []  
          bucket_images[bestb]['sizes'] = []    
          bucket_images[bestb]['txt'] = []
          bucket_images[bestb]['gt_enc'] = []
          dummy[bestb] = 1
          if args.debug and len(bucket_images[bestb]) > 4:
          elif  len(bucket_images[bestb]) > 32:
        gt_labels = []
        txt_enc = ''
        for k in range(txt.shape[1]):
          if txt[0, k] > 0:
            if codec_rev.has_key(txt[0, k]):                
              gt_labels.append( codec_rev[txt[0, k]] )
              gt_labels.append( 3 )
            txt_enc += unichr(txt[0, k])
            gt_labels.append( 0 )
        if scaled.ndim == 3:
          scaled = cv2.cvtColor(scaled, cv2.COLOR_BGR2GRAY)
        if args.debug:
          cv2.imshow('scaled', scaled)
        matched_detections += 1   
  #and learn OCR
  for bucket in bucket_images.keys():
    imtf = np.asarray(bucket_images[bucket]['img'], dtype=np.float)
    imtf = np.reshape(imtf, (imtf.shape[0], -1, imtf.shape[1], imtf.shape[2]))    
    #imtf = imtf.reshape((imtf.shape[0], imtf.shape[1], imtf.shape[2], 1))
    #imtf = np.swapaxes(imtf,1,3)
    net_ctc.blobs['data'].reshape(imtf.shape[0],imtf.shape[1],imtf.shape[2], imtf.shape[3]) 
    net_ctc.blobs['data'].data[...] = imtf
    labels = bucket_images[bucket]['gt_enc']
    txt = bucket_images[bucket]['txt']
    max_len = 0
    for l in range(0, len(labels)):
      max_len = max(max_len, len(labels[l]))
    for l in range(0, len(labels)):
      while len(labels[l]) <  max_len:
    labels = np.asarray(labels, np.float)
    net_ctc.blobs['label'].reshape(labels.shape[0], labels.shape[1])
    net_ctc.blobs['label'].data[...] = labels
    if args.debug:
        cv2.imshow('draw', draw[0])
    sum_cost += net_ctc.blobs['loss'].data[...]
    if net_ctc.blobs['loss'].data[...] > 10:
      cv2.imshow('draw', draw[0])
      sf = net_ctc.blobs['transpose'].data[...]
      labels2 = sf.argmax(3)
      out = utils.print_seq(labels2[:, 0, :])
      print(u'{0} - {1}'.format(out, txt[0])  )
    count += imtf.shape[0]
  correct_cout = 0    
  for i in range(len(labels_gt)):
    det_text = labels_det[i]
    gt_text = labels_gt[i]
    if it % 100 == 0:
      print( u"{0} - {1}".format(det_text, gt_text).encode('utf8') )
    if det_text == gt_text:
      correct_cout += 1
  count = max(count, 1)    
  mean_loss = 0.99 * mean_loss + 0.01 * sum_cost / count
  mean_rec = mean_rec * 0.99 + 0.01 * correct_cout / float(max(1, len(labels_gt)))
  #count detection ratio

  tp = 0
  for bid in range(im.shape[0]):
    word_gto = word_gtob[bid]
    for gt_no in range(len(word_gto)):
      gt = word_gto[gt_no]
      gtnum = 1000 * bid +  gt_no
      if gt_to_detection.has_key(gtnum):
        tupl = gt_to_detection[gtnum] 
        if tupl[0] > 0.5:
          tp += 1
  loc_recall = tp / float(max(1, gt_count))             
  if args.debug:
    cv2.imshow('draw', draw[0])
    if im.shape[0] > 1:
        cv2.imshow('draw2', draw[1])
  if it % 10 == 0:
    print('{0} - lr:{1:.3e} ctc:{2:.4f}/{3:.4f} wr:{4:.2f}/{5:.2f}, loc:{6:.2f} {7}'.format(it, 0.0001, sum_cost / count, mean_loss, correct_cout / float(max(1, len(labels_gt))), mean_rec, loc_recall, matched_detections))
  if it % 1000 == 0:
Esempio n. 13
def evaluate_image(batch,
  Summary : Returns end-to-end true-positives, detection true-positives, number of GT to be considered for eval (len > 2).
  Description : For each predicted bounding-box, comparision is made with each GT entry. Values of number of end-to-end true
                positives, number of detection true positives, number of GT entries to be considered for evaluation are computed.
  iou_th_eval : float
      Threshold value of intersection-over-union used for evaluation of predicted bounding-boxes
  iou_th_vis : float
      Threshold value of intersection-over-union used for visualization when transciption is true but IoU is lesser.
  iou_th : float
      Threshold value of intersection-over-union between GT and prediction.
  word_gto : list of lists
      List of ground-truth bounding boxes along with transcription.
  batch : list of lists
      List containing data (input image, image file name, ground truth).
  detections : tuple of tuples
      Tuple of predicted bounding boxes along with transcriptions and text/no-text score.
  tp : int
      Number of predicted bounding-boxes having IoU with GT greater than iou_th_eval.
  tp_e2e : int
      Number of predicted bounding-boxes having same transciption as GT and len > 2.
  gt_e2e : int
      Number of GT entries for which transcription len > 2.

    gt_to_detection = {}
    tp = 0
    tp_e2e = 0
    gt_e2e = 0

    draw = batch[4][0]
    normFactor = math.sqrt(
        draw.shape[1] * draw.shape[1] +
        draw.shape[0] * draw.shape[0])  # Normalization factor
    for i in range(0, len(detections)):

        det = detections[i]
        boxr = det[0]
        box = cv2.boxPoints(boxr)  # Predicted bounding-box parameters
        box = np.array(
            box, dtype="int")  # Convert predicted bounding-box to numpy array
        bbox = cv2.boundingRect(box)

        bbox = [bbox[0], bbox[1], bbox[2], bbox[3]]
        bbox[2] += bbox[0]  # Convert width to right-coordinate
        bbox[3] += bbox[1]  # Convert height to bottom-coordinate

        vis.draw_box_points(draw, box, color=(255, 0, 0))

        det_text = det[1][0]  # Predicted transcription for bounding-box

        for gt_no in range(len(word_gto)):

            gt = word_gto[gt_no]
            txt = gt[5]  # GT transcription for given GT bounding-box
            gtbox = ((gt[0] * draw.shape[1], gt[1] * draw.shape[0]),
                     (gt[2] * normFactor, gt[3] * normFactor),
                     gt[4] * 180 / 3.14)  # Re-scaling GT values
            gtbox = cv2.boxPoints(gtbox)
            gtbox = np.array(gtbox, dtype="int")
            rect_gt = cv2.boundingRect(gtbox)

            rect_gt = [rect_gt[0], rect_gt[1], rect_gt[2], rect_gt[3]]
            rect_gt[2] += rect_gt[0]  # Convert GT width to right-coordinate
            rect_gt[3] += rect_gt[1]  # Convert GT height to bottom-coordinate

            inter = intersect(
                rect_gt)  # Intersection of predicted and GT bounding-boxes
            uni = union(bbox,
                        rect_gt)  # Union of predicted and GT bounding-boxes
            ratio = area(inter) / float(area(
                uni))  # IoU measure between predicted and GT bounding-boxes

            # 1). Visualize the predicted-bounding box if IoU with GT is higher than IoU threshold (iou_th) (Always required)
            # 2). Visualize the predicted-bounding box if transcription matches the GT and condition 1. holds
            # 3). Visualize the predicted-bounding box if transcription matches and IoU with GT is less than iou_th_vis and 1. and 2. hold
            if ratio > iou_th:
                vis.draw_box_points(draw, box, color=(0, 128, 0))
                if not gt_to_detection.has_key(gt_no):
                    gt_to_detection[gt_no] = [0, 0]

                if txt.lower() == det_text.lower():
                        [len(det_text), det[1][1], det[1][2], det[1][3]])
                                        color=(0, 255, 0),
                    gt[7] = 1  # Change this parameter to 1 when predicted transcription is correct.

                    if ratio < iou_th_vis:
                                            color=(255, 255, 255),
                        cv2.imshow('draw', draw)

                        [len(det_text), det[1][1], det[1][2], det[1][3]])

                tupl = gt_to_detection[gt_no]
                if tupl[0] < ratio:
                    tupl[0] = ratio
                    tupl[1] = i

    # Count the number of end-to-end and detection true-positives
    for gt_no in range(len(word_gto)):
        gt = word_gto[gt_no]
        txt = gt[5]
        if len(txt) > 2:
            gt_e2e += 1
            if gt[7] == 1:
                tp_e2e += 1

        if gt_to_detection.has_key(gt_no):
            tupl = gt_to_detection[gt_no]
            if tupl[0] > iou_th_eval:  # Increment detection true-positive, if IoU is greater than iou_th_eval
                tp += 1

    cv2.imshow('draw', draw)
    return tp, tp_e2e, gt_e2e
Esempio n. 14
def evaluate_image(batch, detections, word_gto, iou_th=0.3, iou_th_vis=0.5, iou_th_eval=0.4):
  Summary : Returns end-to-end true-positives, detection true-positives, number of GT to be considered for eval (len > 2).
  Description : For each predicted bounding-box, comparision is made with each GT entry. Values of number of end-to-end true
                positives, number of detection true positives, number of GT entries to be considered for evaluation are computed.
  iou_th_eval : float
      Threshold value of intersection-over-union used for evaluation of predicted bounding-boxes
  iou_th_vis : float
      Threshold value of intersection-over-union used for visualization when transciption is true but IoU is lesser.
  iou_th : float
      Threshold value of intersection-over-union between GT and prediction.
  word_gto : list of lists
      List of ground-truth bounding boxes along with transcription.
  batch : list of lists
      List containing data (input image, image file name, ground truth).
  detections : tuple of tuples
      Tuple of predicted bounding boxes along with transcriptions and text/no-text score.
  tp : int
      Number of predicted bounding-boxes having IoU with GT greater than iou_th_eval.
  tp_e2e : int
      Number of predicted bounding-boxes having same transciption as GT and len > 2.
  gt_e2e : int
      Number of GT entries for which transcription len > 2.
  gt_to_detection = {}
  tp = 0
  tp_e2e = 0
  gt_e2e = 0
  draw = batch[4][0]    
  normFactor = math.sqrt(draw.shape[1] * draw.shape[1] + draw.shape[0] * draw.shape[0]) # Normalization factor
  for i in range(0, len(detections)):
    det = detections[i]
    boxr = det[0]
    box = cv2.boxPoints(boxr) # Predicted bounding-box parameters
    box = np.array(box, dtype="int") # Convert predicted bounding-box to numpy array
    bbox = cv2.boundingRect(box)
    bbox = [bbox[0], bbox[1], bbox[2], bbox[3]]
    bbox[2] += bbox[0] # Convert width to right-coordinate
    bbox[3] += bbox[1] # Convert height to bottom-coordinate
    vis.draw_box_points(draw, box, color = (255, 0, 0))
    det_text = det[1][0] # Predicted transcription for bounding-box
    for gt_no in range(len(word_gto)):
      gt = word_gto[gt_no]
      txt = gt[5] # GT transcription for given GT bounding-box
      gtbox  = ((gt[0] * draw.shape[1], gt[1] * draw.shape[0]), (gt[2] * normFactor, gt[3] * normFactor), gt[4] * 180 / 3.14) # Re-scaling GT values
      gtbox = cv2.boxPoints(gtbox)
      gtbox = np.array(gtbox, dtype="int")
      rect_gt = cv2.boundingRect(gtbox)
      rect_gt = [rect_gt[0], rect_gt[1], rect_gt[2], rect_gt[3]]
      rect_gt[2] += rect_gt[0] # Convert GT width to right-coordinate
      rect_gt[3] += rect_gt[1] # Convert GT height to bottom-coordinate 

      inter = intersect(bbox, rect_gt) # Intersection of predicted and GT bounding-boxes
      uni = union(bbox, rect_gt) # Union of predicted and GT bounding-boxes
      ratio = area(inter) / float(area(uni)) # IoU measure between predicted and GT bounding-boxes
      # 1). Visualize the predicted-bounding box if IoU with GT is higher than IoU threshold (iou_th) (Always required)
      # 2). Visualize the predicted-bounding box if transcription matches the GT and condition 1. holds
      # 3). Visualize the predicted-bounding box if transcription matches and IoU with GT is less than iou_th_vis and 1. and 2. hold
      if ratio > iou_th:
        vis.draw_box_points(draw, box, color = (0, 128, 0))
        if not gt_to_detection.has_key(gt_no):
          gt_to_detection[gt_no] = [0, 0]
        if txt.lower() == det_text.lower():
          to_cls_x.append([len(det_text), det[1][1], det[1][2], det[1][3]])
          vis.draw_box_points(draw, box, color = (0, 255, 0), thickness=2)
          gt[7] = 1 # Change this parameter to 1 when predicted transcription is correct.
          if ratio < iou_th_vis:
              vis.draw_box_points(draw, box, color = (255, 255, 255), thickness=2)
              cv2.imshow('draw', draw) 
          to_cls_x.append([len(det_text), det[1][1], det[1][2], det[1][3]])
        tupl = gt_to_detection[gt_no] 
        if tupl[0] < ratio:
          tupl[0] = ratio 
          tupl[1] = i   
  # Count the number of end-to-end and detection true-positives
  for gt_no in range(len(word_gto)):
    gt = word_gto[gt_no]
    txt = gt[5]
    if len(txt) > 2:
      gt_e2e += 1
      if gt[7] == 1:
        tp_e2e += 1
    if gt_to_detection.has_key(gt_no):
      tupl = gt_to_detection[gt_no] 
      if tupl[0] > iou_th_eval: # Increment detection true-positive, if IoU is greater than iou_th_eval
        tp += 1             
  cv2.imshow('draw', draw)             
  return tp, tp_e2e, gt_e2e 
Esempio n. 15
def run_evaluation(inputDir, outputDir, invert=False, isFp=False):

    if not os.path.exists(outputDir):

    images = glob.glob('{0}/*.jpg'.format(inputDir))
    segmDir = '{0}/segmentations'.format(inputDir)

    for image in images:
        print('Processing {0}'.format(image))

        img = cv2.imread(image, 0)
        imgc = cv2.imread(image)
        imgproc = img

        imgKp = np.copy(img)

        baseName = os.path.basename(image)
        baseName = baseName[:-4]
        workPoint = 0.3
        segmentations = ftext.getCharSegmentations(
            imgproc)  #, outputDir, baseName)
        segmentations = segmentations[:, 0:10]
        segmentations = np.column_stack([
            np.zeros((segmentations.shape[0], 2), dtype=np.float)
        maskDuplicates = segmentations[:, 8] == -1
        segmentationsDuplicates = segmentations[maskDuplicates, :]
        maskNoNei = segmentationsDuplicates[:, 9] > workPoint
        segmentationsNoNei = segmentationsDuplicates[maskNoNei, :]
        keypoints = ftext.getLastDetectionKeypoints()
        imgKp[keypoints[:, 1].astype(int), keypoints[:, 0].astype(int)] = 255
        scales = ftext.getImageScales()
        statc = ftext.getDetectionStat()
        words = ftext.findTextLines()
        segmLine = segmentations[segmentations[:, 7] == 1.0, :]
        segmentations[:, 2] += segmentations[:, 0]
        segmentations[:, 3] += segmentations[:, 1]

        if isFp:
            for detId in range(0, segmentations.shape[0]):
                ftext.acummulateCharFeatures(0, detId)


        lineGt = '{0}/gt_{1}.txt'.format(inputDir, baseName)
        if not os.path.exists(lineGt):
            lineGt = '{0}/{1}.txt'.format(inputDir, baseName)

        lineGt = '{0}/gt_{1}.txt'.format(inputDir, baseName)
        if os.path.exists(lineGt):
                word_gt = utls.read_icdar2013_txt_gt(lineGt)
            except ValueError:
                    word_gt = utls.read_icdar2013_txt_gt(lineGt, separator=',')
                except ValueError:
                    word_gt = utls.read_icdar2015_txt_gt(lineGt, separator=',')
            lineGt = '{0}/{1}.txt'.format(inputDir, baseName)
            word_gt = utls.read_mrrc_txt_gt(lineGt, separator=',')

        rWcurrent = 0.0
        for gt_box in word_gt:
            if len(gt_box[4]) == 1:
            best_match = 0
            cv2.rectangle(imgc, (gt_box[0], gt_box[1]), (gt_box[2], gt_box[3]),
                          (0, 255, 0))
            for det_word in words:
                rect_int = utils.intersect(det_word, gt_box)
                int_area = utils.area(rect_int)
                union_area = utils.area(utils.union(det_word, gt_box))

                if union_area == 0:

                ratio = int_area / float(union_area)
                det_word[11] = max(det_word[11], ratio)

                if ratio > best_match:
                    best_match = ratio
            rWcurrent += best_match

            best_match = 0
            for detId in range(segmentations.shape[0]):
                rectn = segmentations[detId, :]
                rect_int = utils.intersect(rectn, gt_box)
                int_area = utils.area(rect_int)
                union_area = utils.area(utils.union(rectn, gt_box))

                ratio = int_area / float(union_area)
                rectn[11] = max(ratio, rectn[11])
                if ratio > best_match:
                    best_match = ratio
                if ratio > 0.7:

                    #print( "Word Match!" )
                    #tmp = ftext.getSegmentationMask(detId)
                    #cv2.imshow("ts", tmp)

                    ftext.acummulateCharFeatures(2, detId)

        segmImg = '{0}/{1}_GT.bmp'.format(segmDir, baseName)
        if not os.path.exists(segmImg):
            segmImg = '{0}/gt_{1}.png'.format(segmDir, baseName)
        if not os.path.exists(segmImg):
            segmImg = '{0}/{1}.png'.format(segmDir, baseName)
        segmImg = cv2.imread(segmImg, 0)
        if invert and segmImg is not None:
            segmImg = ~segmImg

        gt_rects = []
        miss_rects = []
        segmGt = '{0}/{1}_GT.txt'.format(segmDir, baseName)
        if os.path.exists(segmGt) and False:
            (gt_rects, groups) = utls.read_icdar2013_segm_gt(segmGt)
            segmImg = '{0}/{1}_GT.bmp'.format(segmDir, baseName)
            if not os.path.exists(segmImg):
                segmImg = '{0}/gt_{1}.png'.format(segmDir, baseName)
            segmImg = cv2.imread(segmImg)
            contours = cv2.findContours(np.copy(segmImg),
            for cont in contours:
                rect = cv2.boundingRect(cont)
                rect = [
                    rect[0], rect[1], rect[0] + rect[2], rect[1] + rect[3],
                    '?', 0, 0

        for detId in range(segmentations.shape[0]):
            rectn = segmentations[detId, :]

            for k in range(len(gt_rects)):
                gt_rect = gt_rects[k]
                best_match = 0
                best_match_line = 0
                if (gt_rect[4] == ',' or gt_rect[4] == '.'
                        or gt_rect[4] == '\'' or gt_rect[4] == ':'
                        or gt_rect[4] == '-') and not evalPunctuation:

                minSingleOverlap = MIN_SEGM_OVRLAP
                if gt_rect[4] == 'i' or gt_rect[4] == '!':
                    minSingleOverlap = 0.5

                rect_int = utils.intersect(rectn, gt_rect)
                int_area = utils.area(rect_int)
                union_area = utils.area(utils.union(rectn, gt_rect))
                ratio = int_area / float(union_area)
                rectn[10] = max(ratio, rectn[10])

                if rectn[9] > workPoint:
                    gt_rect[6] = max(ratio, gt_rect[6])

                if ratio > best_match:
                    best_match = ratio

                if ratio > best_match_line and rectn[7] == 1.0:
                    best_match_line = ratio
                if ratio > minSingleOverlap:
                    ftext.acummulateCharFeatures(1, detId)

                if ratio < minSingleOverlap:
                    if k < len(gt_rects) - 1:
                        gt_rect2 = gt_rects[k + 1]
                        chars2Rect = utils.union(gt_rect2, gt_rect)
                        rect_int = utils.intersect(rectn, chars2Rect)
                        int_area = utils.area(rect_int)
                        union_area = utils.area(utils.union(rectn, chars2Rect))
                        ratio = int_area / float(union_area)
                        rectn[10] = max(ratio, rectn[10])

                        if ratio > 0.8:
                            best_match2 = ratio
                            gt_rect[5] = ratio
                            gt_rect2[5] = ratio
                            ftext.acummulateCharFeatures(2, detId)

                thickness = 1
                color = (255, 0, 255)
                if best_match >= minSingleOverlap:
                    color = (0, 255, 0)
                if best_match > 0.7:
                    thickness = 2
                cv2.rectangle(imgc, (gt_rect[0], gt_rect[1]),
                              (gt_rect[2], gt_rect[3]), color, thickness)

            if rectn[10] == 0 and rectn[11] == 0:
                ftext.acummulateCharFeatures(0, detId)
Esempio n. 16
def run_words(inputDir, outputDir, invert=False):

    if not os.path.exists(outputDir):

    #images = glob.glob('{0}/*.png'.format('/datagrid/personal/TextSpotter/evaluation-sets/MS-text_database'))
    #images = glob.glob('{0}/*.jpg'.format('/datagrid/personal/TextSpotter/evaluation-sets/neocr_dataset'))
    images = glob.glob('{0}/*.jpg'.format(inputDir))

    matched_words = 0
    word_count = 0

    for image in sorted(images):
        print('Processing {0}'.format(image))

        img = cv2.imread(image, 0)
        imgc = cv2.imread(image)
        imgproc = img

        imgKp = np.copy(img)

        baseName = os.path.basename(image)
        baseName = baseName[:-4]
        workPoint = 0.3
        segmentations = ftext.getCharSegmentations(
            imgproc)  #, outputDir, baseName)
        segmentations = segmentations[:, 0:10]
        segmentations = np.column_stack([
            np.zeros((segmentations.shape[0], 2), dtype=np.float)
        maskDuplicates = segmentations[:, 8] == -1
        segmentationsDuplicates = segmentations[maskDuplicates, :]
        maskNoNei = segmentationsDuplicates[:, 9] > workPoint
        keypoints = ftext.getLastDetectionKeypoints()
        imgKp[keypoints[:, 1].astype(int), keypoints[:, 0].astype(int)] = 255
        scales = ftext.getImageScales()
        statc = ftext.getDetectionStat()
        words = ftext.findTextLines()
        segmentations[:, 2] += segmentations[:, 0]
        segmentations[:, 3] += segmentations[:, 1]

        lineGt = '{0}/gt_{1}.txt'.format(inputDir, baseName)
        if not os.path.exists(lineGt):
            lineGt = '{0}/{1}.txt'.format(inputDir, baseName)

        lineGt = '{0}/gt_{1}.txt'.format(inputDir, baseName)
        if os.path.exists(lineGt):
                word_gt = utls.read_icdar2013_txt_gt(lineGt)
            except ValueError:
                    word_gt = utls.read_icdar2013_txt_gt(lineGt, separator=',')
                except ValueError:
                    word_gt = utls.read_icdar2015_txt_gt(lineGt, separator=',')
            lineGt = '{0}/{1}.txt'.format(inputDir, baseName)
            word_gt = utls.read_mrrc_txt_gt(lineGt, separator=',')

        cw = 0
        for detId in range(segmentations.shape[0]):
            best_match = 0

            for gt_box in word_gt:
                if len(gt_box[4]) == 1:
                if gt_box[4][0] == "#":
                cw += 1

                rectn = segmentations[detId, :]
                rect_int = utils.intersect(rectn, gt_box)
                int_area = utils.area(rect_int)
                union_area = utils.area(utils.union(rectn, gt_box))

                ratio = int_area / float(union_area)
                rectn[11] = max(ratio, rectn[11])
                if ratio > best_match:
                    best_match = ratio
                if ratio > 0.7:

                    #print( "Word Match!" )
                    #cv2.rectangle(imgc, (rectn[0], rectn[1]), (rectn[2], rectn[3]), (0, 255, 0))
                    #cv2.imshow("ts", imgc)
                    ftext.acummulateCharFeatures(2, detId)
                    if gt_box[5] != -1:
                        matched_words += 1
                    gt_box[5] = -1

            if best_match == 0:
                ftext.acummulateCharFeatures(0, detId)

        word_count += cw
        print("word recall: {0}".format(matched_words / float(word_count)))
Esempio n. 17
    def serve(self):
        face_rec_delay = time.time()
        no_face_frame = 0
        cv2.namedWindow('gandalf', cv2.WINDOW_NORMAL)
        cv2.resizeWindow('gandalf', int( * self.window_mult),
                         int( * self.window_mult))
        cv2.setWindowProperty('gandalf', cv2.WND_PROP_FULLSCREEN,
        cam = cv2.VideoCapture(0)
        # cam = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
        # label = "Hello"
        while (True):
            ret, frame =
            # h, w, c = frame.shape
            # print(h, w, c)
            if not ret:
                # dead cam
                cam = cv2.VideoCapture(0)
                time.sleep(3.000)  # some delay to init cam
                # cam = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
                # cv2.destroyAllWindows()
                # return 1
            if self.test_face:
                frame[120:300, 280:400, :] = self.face_img
                # add test face on frame

            # go from standby to face detection phase
            # if no_face_frame > self.no_face_frame_limit:
            #     # no error
            #     cv2.destroyAllWindows()
            #     return 0
            # face detection phase
            face_bboxes = self.face_detect.inference(frame)
            if len(face_bboxes) > 0:
                no_face_frame = 0
                areas = [area(box) for box in face_bboxes]
                max_id = np.argmax(np.asarray(areas))
                mfb = face_bboxes[max_id]
                # print(area(mfb), self.face_min_size, frame.shape)
                # face consolidation phase, calculate face angle
                if area(mfb) > self.face_min_size:
                    x0 = max(0, mfb[0] - self.face_margin)
                    y0 = max(0, mfb[1] - self.face_margin)
                    x1 = min(, mfb[2] + self.face_margin)
                    y1 = min(, mfb[3] + self.face_margin)
                    main_head = frame[y0:y1, x0:x1, :]

                    # detect blurry face
                    h, w, c = main_head.shape
                    # print("hp img shape: ", img.shape)
                    if (h > 0) and (w > 0):
                        blur_face = cv2.resize(main_head, (112, 112))
                        blur_face_var = cv2.Laplacian(blur_face,
                        if blur_face_var < self.face_lap_min_score:
                            cv2.rectangle(frame, (mfb[0], mfb[1]),
                                          (mfb[2], mfb[3]), (255, 0, 0), 2)
                            face_rec_delay_amount = time.time(
                            ) - face_rec_delay
                            if face_rec_delay_amount > self.recognition_delay:
                                frame = self.display_draw.drawLACText(frame)
                                frame = self.display_draw.drawLastText(frame)
                                # label = "please look at the camera"
                            cv2.imshow("gandalf", frame)
                            if cv2.waitKey(1) & 0xFF == ord('q'):
                                return 2

                    # detect head pose
                    yaw, pitch, roll = self.head_pose.inference(main_head)
                    if not good_head_angle(yaw, pitch, roll, self.angle_min,
                        cv2.rectangle(frame, (mfb[0], mfb[1]),
                                      (mfb[2], mfb[3]), (255, 0, 0), 2)
                        face_rec_delay_amount = time.time() - face_rec_delay
                        if face_rec_delay_amount > self.recognition_delay:
                            frame = self.display_draw.drawLACText(frame)
                            frame = self.display_draw.drawLastText(frame)
                        cv2.imshow("gandalf", frame)
                        if cv2.waitKey(1) & 0xFF == ord('q'):
                            return 2

                    # TODO: face liveness detection
                    # face too small
                    face_rec_delay_amount = time.time() - face_rec_delay
                    if face_rec_delay_amount > self.recognition_delay:
                        frame = self.display_draw.drawMCText(frame)
                        frame = self.display_draw.drawLastText(frame)
                    cv2.imshow("gandalf", frame)
                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        return 2

                # face recognition phase
                face_rec_delay_amount = time.time() - face_rec_delay
                if face_rec_delay_amount >= self.recognition_delay:
                    main_face = frame[mfb[1]:mfb[3], mfb[0]:mfb[2], :]
                    # TODO face alignment
                    face_feature = self.face_embed.inference(main_face)
                    best_match = bruteforce(face_feature, self.face_database,
                    # TODO face record
                    if best_match is None:
                        new_log = {
                            'result': 'failed',
                  '%Y-%m-%d %H:%M:%S')
                        self.updateLog(new_log, main_face)
                        self.recognition_delay = self.recog_fai_delay
                        new_log = {
                            'result': 'success',
                            'face_id': best_match['_id'],
                  '%Y-%m-%d %H:%M:%S')
                        self.updateLog(new_log, main_face)
                            frame, str(best_match['name']))
                        self.recognition_delay = self.recog_suc_delay
                    face_rec_delay = time.time()
                cv2.rectangle(frame, (mfb[0], mfb[1]), (mfb[2], mfb[3]),
                              (255, 0, 0), 2)
                no_face_frame += 1
                face_rec_delay_amount = time.time() - face_rec_delay
                if face_rec_delay_amount > self.recognition_delay:
                    frame = self.display_draw.drawDefaultText(frame)
                    frame = self.display_draw.drawLastText(frame)
            cv2.imshow("gandalf", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                return 2
            elif cv2.waitKey(1) & 0xFF == ord('t'):
                self.test_face = True
            elif cv2.waitKey(1) & 0xFF == ord('y'):
                self.test_face = False
Esempio n. 18
def process_batch(nets, optim, optim2, image_size, args):
    global it, mean_loss, mean_rec
    it += 1  # 迭代次数加一

    net, net_ctc = nets

    net =
    net_ctc =

    net.blobs['data'].reshape(args.batch_size, 1, image_size[1],
                              image_size[0])  # 把一个batch的输入图片reshape


    im = net.blobs['data'].data[...]  # shape [batch_size,1,416,416]
    draw = np.swapaxes(im, 2, 3)
    draw = np.swapaxes(draw, 1, 3)
    im_ctc = np.copy(draw)
    draw += 1
    draw *= 128
    draw = np.array(draw, dtype="uint8").copy()

    if args.debug:
        grid_step = 16
        line = 0
        while line < image_size[0]:
            cv2.line(draw[0], (0, line), (image_size[1], line),
                     (128, 128, 128))
            line += grid_step

    boxes = net.blobs['boxes'].data[...]  # shape (4, 1, 500, 15)

    word_gtob = net.blobs['gt_boxes'].data[...]  # shape  (4, 6, 1, 6)
    word_txt = net.blobs['gt_labels'].data[...]  # shape (4, 6, 1, 14)

    lines_gtob = net.blobs['line_boxes'].data[...]  # shape (4, 1, 1, 5)
    lines_txt = net.blobs['line_labels'].data[...]  # shape (4, 1, 1, 7)

    #nms = boxeso[:, 0, 0, 8] == 0
    #boxes = boxes[:, :, nms, :]

    boxes[:, 0, :, 0] *= image_size[0]
    boxes[:, 0, :, 1] *= image_size[1]
    normFactor = math.sqrt(image_size[1] * image_size[1] +
                           image_size[0] * image_size[0])
    boxes[:, 0, :, 2] *= normFactor
    boxes[:, 0, :, 3] *= normFactor

    sum_cost = 0
    count = 0

    labels_gt = []
    labels_det = []

    gt_to_detection = {}

    batch_buckets = []
    dummy = {}

    matched_detections = 0
    for bid in range(im.shape[0]):  # 遍历batchsize下的每一个样本

        o_image = net.layers[0].get_image_file_name(bid)
        o_image = cv2.imread(o_image, cv2.IMREAD_GRAYSCALE)
        cx = net.layers[0].get_crop(bid, 0)
        cy = net.layers[0].get_crop(bid, 1)
        cmx = net.layers[0].get_crop(bid, 2)
        cmy = net.layers[0].get_crop(bid, 3)
        o_image = o_image[cy:cmy, cx:cmx]

        boxes_count = 0
        for i in range(0, boxes.shape[2]):
            det_word = boxes[bid, 0, i]
            if (det_word[0] == 0 and det_word[1] == 0) or det_word[5] < 0.01:
            boxes_count += 1

        x = [i for i in range(boxes_count)]

        bucket_images = {}

        word_gto = word_gtob[bid]
        word_gto_txt = word_txt[bid]
        gt_count = 0
        for gt_no in range(word_gto.shape[0]):
            gt = word_gto[gt_no, :]
            gt = gt.reshape(6)
            gtnum = 1000 * bid + gt_no

            if gt[5] == -1:
                #print("ignore gt!")

            gt_count += 1

            txt = word_gto_txt[gt_no, :]
            gtbox = ((gt[0] * image_size[0], gt[1] * image_size[1]),
                     (gt[2] * normFactor,
                      gt[3] * normFactor), gt[4] * 180 / 3.14)
            gtbox = cv2.boxPoints(gtbox)

            gtbox = np.array(gtbox, dtype="int")
            rect_gt = cv2.boundingRect(gtbox)

            if rect_gt[0] == 0 or rect_gt[
                    1] == 0 or rect_gt[0] + rect_gt[2] >= image_size[
                        0] or rect_gt[1] + rect_gt[3] >= image_size[1]:

            if gt[3] * normFactor < 3:
                if args.debug:
                    print('too small gt!')

            rect_gt = [rect_gt[0], rect_gt[1], rect_gt[2], rect_gt[3]]
            rect_gt[2] += rect_gt[0]
            rect_gt[3] += rect_gt[1]

            for i in range(0, min(100, boxes_count)):
                if math.fabs(gt[4] - det_word[4]) > math.pi / 16:

                det_word = boxes[bid, 0, x[i], :]

                if (det_word[0] == 0
                        and det_word[1] == 0) or det_word[5] < 0.01:

                box = ((det_word[0], det_word[1]), (det_word[2], det_word[3]),
                       det_word[4] * 180 / 3.14)
                box = cv2.boxPoints(box)

                if args.debug:
                    boxp = np.array(box, dtype="int")
                    vis.draw_box_points(draw[bid], boxp, color=(0, 255, 0))

                box = np.array(box, dtype="int")
                bbox = cv2.boundingRect(box)
                bbox = [bbox[0], bbox[1], bbox[2], bbox[3]]
                bbox[2] += bbox[0]
                bbox[3] += bbox[1]

                #rectangle intersection ...
                inter = intersect(bbox, rect_gt)
                uni = union(bbox, rect_gt)
                ratio = area(inter) / float(area(uni))

                ratio_gt = area(inter) / float(area(rect_gt))
                if ratio_gt < 0.95:

                if ratio < 0.5:

                if not gt_to_detection.has_key(gtnum):
                    gt_to_detection[gtnum] = [0, 0, 0]
                tupl = gt_to_detection[gtnum]
                if tupl[0] < ratio:
                    tupl[0] = ratio
                    tupl[1] = x[i]
                    tupl[2] = ratio_gt

                det_word = boxes[bid, 0, x[i], :]
                box = ([det_word[0],
                        det_word[1]], [det_word[2],
                                       det_word[3]], det_word[4] * 180 / 3.14)

                boxO = get_obox(im_ctc[bid], o_image, box)
                boxO = ((boxO[0][0], boxO[0][1]), (boxO[1][0], boxO[1][1]),
                norm2, rot_mat = get_normalized_image(o_image, boxO)
                #norm3, rot_mat = get_normalized_image(im_ctc[bid], ([det_word[0], det_word[1]], [det_word[2] * 1.2, det_word[3] * 1.1], det_word[4] * 180 / 3.14))
                if norm2 is None:
                #if norm3 is None:
                #  continue
                #cv2.imshow('ts', norm2)
                #cv2.imshow('ts3', norm3)
                width_scale = 32.0 / norm2.shape[0]
                width = norm2.shape[1] * width_scale
                best_diff = width
                bestb = 0
                for b in range(0, len(buckets)):
                    if best_diff > abs(width * 1.3 - buckets[b]):
                        best_diff = abs(width * 1.3 - buckets[b])
                        bestb = b

                scaled = cv2.resize(norm2, (buckets[bestb], 32))
                scaled = np.asarray(scaled, dtype=np.float)
                delta = scaled.max() - scaled.min()
                scaled = (scaled) / (delta / 2)
                scaled -= scaled.mean()

                if not bucket_images.has_key(bestb):
                    bucket_images[bestb] = {}
                    bucket_images[bestb]['img'] = []
                    bucket_images[bestb]['sizes'] = []
                    bucket_images[bestb]['txt'] = []
                    bucket_images[bestb]['gt_enc'] = []
                    dummy[bestb] = 1
                    if args.debug and len(bucket_images[bestb]) > 4:
                    elif len(bucket_images[bestb]) > 32:

                gt_labels = []
                txt_enc = ''
                for k in range(txt.shape[1]):
                    if txt[0, k] > 0:
                        if codec_rev.has_key(txt[0, k]):
                            gt_labels.append(codec_rev[txt[0, k]])

                        txt_enc += unichr(txt[0, k])

                if scaled.ndim == 3:
                    scaled = cv2.cvtColor(scaled, cv2.COLOR_BGR2GRAY)
                if args.debug:
                    cv2.imshow('scaled', scaled)
                matched_detections += 1

    #and learn OCR
    for bucket in bucket_images.keys():

        imtf = np.asarray(bucket_images[bucket]['img'], dtype=np.float)
        imtf = np.reshape(imtf,
                          (imtf.shape[0], -1, imtf.shape[1], imtf.shape[2]))
        #imtf = imtf.reshape((imtf.shape[0], imtf.shape[1], imtf.shape[2], 1))
        #imtf = np.swapaxes(imtf,1,3)

        net_ctc.blobs['data'].reshape(imtf.shape[0], imtf.shape[1],
                                      imtf.shape[2], imtf.shape[3])
        net_ctc.blobs['data'].data[...] = imtf

        labels = bucket_images[bucket]['gt_enc']
        txt = bucket_images[bucket]['txt']

        max_len = 0
        for l in range(0, len(labels)):
            max_len = max(max_len, len(labels[l]))
        for l in range(0, len(labels)):
            while len(labels[l]) < max_len:

        labels = np.asarray(labels, np.float)

        net_ctc.blobs['label'].reshape(labels.shape[0], labels.shape[1])

        net_ctc.blobs['label'].data[...] = labels

        if args.debug:
            cv2.imshow('draw', draw[0])

        sum_cost += net_ctc.blobs['loss'].data[...]
        if net_ctc.blobs['loss'].data[...] > 10:
            #cv2.imshow('draw', draw[0])
            sf = net_ctc.blobs['transpose'].data[...]
            labels2 = sf.argmax(3)
            out = utils.print_seq(labels2[:, 0, :])
            print(u'{0} --- {1}'.format(out, txt[0]))

        count += imtf.shape[0]

    correct_cout = 0
    for i in range(len(labels_gt)):
        det_text = labels_det[i]
        gt_text = labels_gt[i]

        if it % 100 == 0:
            #print( u"{0} -- {1}".format(det_text, gt_text).encode('utf8') )
        if det_text == gt_text:
            correct_cout += 1

    count = max(count, 1)
    mean_loss = 0.99 * mean_loss + 0.01 * sum_cost / count
    mean_rec = mean_rec * 0.99 + 0.01 * correct_cout / float(
        max(1, len(labels_gt)))

    #count detection ratio

    tp = 0
    for bid in range(im.shape[0]):
        word_gto = word_gtob[bid]
        for gt_no in range(len(word_gto)):
            gt = word_gto[gt_no]
            gtnum = 1000 * bid + gt_no
            if gt_to_detection.has_key(gtnum):
                tupl = gt_to_detection[gtnum]
                if tupl[0] > 0.5:
                    tp += 1

    loc_recall = tp / float(max(1, gt_count))

    if it % 10 == 0:
            '{0} - lr:{1:.3e} ctc:{2:.4f}/{3:.4f} wr:{4:.2f}/{5:.2f}, loc:{6:.2f} {7}'
            .format(it, 0.0001, sum_cost / count, mean_loss,
                    correct_cout / float(max(1, len(labels_gt))), mean_rec,
                    loc_recall, matched_detections))

    if it % snapshot_interval == 0:
Esempio n. 19
def run_evaluation(inputDir, outputDir, process_color = 0, processTest = 0):
    if not os.path.exists(outputDir):

    edgeThreshold = 14
    fastex = FASTex(edgeThreshold = edgeThreshold)
    modelFile = '/home/busta/outModel.boost'
    model = cv2.Boost()
    images = glob.glob('{0}/*.jpg'.format(inputDir))
    segmDir = '{0}/segmentations'.format(inputDir)
    precision = 0;
    precisionDen = 0
    recall = 0
    recall05 = 0
    recallNonMax = 0
    recallDen = 0
    wordRecall = 0
    wordRecallDen = 0
    segm2chars = 0 
    regionsCount = 0
    regionsCountNonMax = 0
    missing_segmNonMaxCount = 0
    letterKeypointHistogram = defaultdict(lambda : defaultdict(float))
    octaveLetterKeypointHistogram = defaultdict(lambda : defaultdict(float))
    missing_letters = {}
    letterHistogram = defaultdict(int)
    missing_segm = {}
    missing_segm2 = {}
    missing_segmNonMax = {}
    diffMaxOctavesMap = {}
    diffScoreOctavesMap = {}
    segmHistogram = []
    segmWordHistogram = []
    results = []  
    hist = None
    histFp = None
    histDist = None
    histDistFp = None
    histDistMax = None
    histDistMaxWhite = None
    histDistMaxFp = None
    hist2dDist =None
    hist2dDistFp = None
    hist2dDistScore = None
    hist2dDistScoreFp = None
    histDistMaxWhiteFp = None
    histSegm = np.zeros((256), dtype = np.float)
    histSegmCount = np.zeros((256), dtype =
    stat = np.asarray([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=np.float)
    times = []
    gtSegmCount = 0
    wordsOk = []
    wordsFp = []
    keypointsTotal = 0
    keypointsTotalInside = 0
    orbTime = 0
    lineNo = 0
    perfectWords = 0;
    perfectWordsNS = 0;
    hasSegm = False
    for image in images:
        print('Processing {0}'.format(image))
        img = cv2.imread(image, 0)
        imgc = cv2.imread(image)
        imgcO = cv2.imread(image)
        if process_color == 1:
            imgproc = imgc
            imgproc = img
        baseName = os.path.basename(image)
        baseName = baseName[:-4]
        workPoint = 0.3
        segmentations = fastex.getCharSegmentations(imgproc, outputDir, baseName)
        segmentations = segmentations[:, 0:10]
        segmentations = np.column_stack( [ segmentations , np.zeros( (segmentations.shape[0], 2), dtype = np.float ) ] )
        maskDuplicates = segmentations[:, 8] == -1
        segmentationsDuplicates = segmentations[maskDuplicates, :]
        maskNoNei = segmentationsDuplicates[:, 9] > workPoint
        segmentationsNoNei = segmentationsDuplicates[maskNoNei, :]
        if segmentations.shape[0] > 0:
            print( 'Dupl ratio: {0} - {1}/ {2} - {3}'.format(segmentationsDuplicates.shape[0] / float(segmentations.shape[0]), segmentationsDuplicates.shape[0], segmentations.shape[0], segmentationsNoNei.shape[0] ) )
        keypoints = fastex.getLastDetectionKeypoints()
        keypointsTotal += keypoints.shape[0]
        statc =  fastex.getDetectionStat()
        times.append([ statc[1], statc[2], statc[3], statc[4], statc[5], statc[6], statc[7], statc[8], statc[9], statc[10]])
        stat += statc
        values = img[ keypoints[:, 1].astype(int), keypoints[:, 0].astype(int) ]
        valuesMax = img[keypoints[:, 6].astype(int), keypoints[:, 5].astype(int)]
        diffValMax = np.abs(values - valuesMax)
        regionsCount += segmentations.shape[0]
        regionsCountNonMax += segmentationsNoNei.shape[0]
        segmentations[:, 2] += segmentations[:, 0]
        segmentations[:, 3] += segmentations[:, 1]
        keypointsOrb = fastex.getLastDetectionOrbKeypoints()
        orbTime += keypointsOrb[0][9]
        segmGt = '{0}/{1}_GT.txt'.format(segmDir, baseName)
        pden = 0
        rden = 0
        if os.path.exists(segmGt):
            hasSegm = True
            (gt_rects, groups) = utls.read_icdar2013_segm_gt(segmGt)
            segmImg = '{0}/{1}_GT.bmp'.format(segmDir, baseName)
            if not os.path.exists(segmImg):
                segmImg = '{0}/gt_{1}.png'.format(segmDir, baseName)
            segmImg = cv2.imread(segmImg)
                (hist, histFp, histDist, histDistMax, histDistMaxWhite, hist2dDist, hist2dDistScore, histDistFp, histDistMaxFp, histDistMaxWhiteFp, hist2dDistFp, hist2dDistScoreFp, keypointsInside) = collect_histograms(img, segmImg, keypoints, values, diffValMax, keypointsTotalInside, diffMaxOctavesMap, diffScoreOctavesMap, hist, histFp, histDist, histDistMax, histDistMaxWhite, hist2dDist, hist2dDistScore, histDistFp, histDistMaxFp, histDistMaxWhiteFp, hist2dDistFp, hist2dDistScoreFp)
            rcurrent = 0
            rcurrent05 = 0
            rcurrentNonMax = 0
            for k in range(len(gt_rects)):
                gt_rect = gt_rects[k]
                best_match = 0
                best_match_line = 0
                if (gt_rect[4] == ',' or gt_rect[4] == '.' or gt_rect[4] == '\'' or gt_rect[4] == ':' or gt_rect[4] == '-') and not evalPunctuation:
                gtSegmCount += 1
                rectMask = np.bitwise_and(np.bitwise_and( keypointsInside[:, 0] >= gt_rect[0], keypointsInside[:, 0] <= gt_rect[2]), np.bitwise_and(keypointsInside[:, 1] >= gt_rect[1], keypointsInside[:, 1] <= gt_rect[3]))
                letterInside =  keypointsInside[rectMask, :]
                #make keypoints histogram 
                if letterInside.shape[0] > 0:
                    octaves = np.unique( letterInside[:, 2])
                    maxOctave = np.max(octaves)
                    maxOctavePoints = 0
                    for i in range(int(maxOctave) + 1):
                        octavePoints = letterInside[letterInside[:, 2] == i, :]
                        maxOctavePoints = max(maxOctavePoints, octavePoints.shape[0])
                    if maxOctavePoints > 0:
                        octaveLetterKeypointHistogram[gt_rect[4]][0] += 1
                    if maxOctavePoints > 1:
                        octaveLetterKeypointHistogram[gt_rect[4]][1] += 1
                    if maxOctavePoints > 2:
                        octaveLetterKeypointHistogram[gt_rect[4]][2] += 1
                    if maxOctavePoints > 3:
                        octaveLetterKeypointHistogram[gt_rect[4]][3] += 1
                if letterInside.shape[0] == 0:
                    if not missing_letters.has_key(gt_rect[4]):
                        missing_letters[gt_rect[4]] = []
                    missing_letters[gt_rect[4]].append( (image, gt_rect) )  
                if letterInside.shape[0] > 0:
                    letterKeypointHistogram[gt_rect[4]][0] += 1
                if letterInside.shape[0] > 1:
                    letterKeypointHistogram[gt_rect[4]][1] += 1
                if letterInside.shape[0] > 2:
                    letterKeypointHistogram[gt_rect[4]][2] += 1
                if letterInside.shape[0] > 3:
                    letterKeypointHistogram[gt_rect[4]][3] += 1
                letterHistogram[gt_rect[4]] += 1
                best_match2 = 0 
                minSingleOverlap = MIN_SEGM_OVRLAP
                if gt_rect[4] == 'i' or gt_rect[4] == '!':
                    minSingleOverlap = 0.5
                for detId in range(segmentations.shape[0]):
                    rectn = segmentations[detId, :]
                    rect_int =  utils.intersect( rectn, gt_rect )
                    int_area = utils.area(rect_int)
                    union_area = utils.area(utils.union(rectn, gt_rect))
                    ratio = int_area / float(union_area)
                    rectn[10] = max(ratio, rectn[10])
                    if rectn[9] > workPoint:
                        gt_rect[6] =  max(ratio, gt_rect[6])
                    if ratio > best_match:
                        best_match = ratio
                        best_segm = segmentations[detId, :]
                    if ratio > best_match_line and rectn[7] == 1.0 :
                        best_match_line = ratio
                    if best_match < minSingleOverlap: 
                        if k < len(gt_rects) - 1:
                            gt_rect2 = gt_rects[k + 1]
                            chars2Rect = utils.union(gt_rect2, gt_rect)
                            rect_int = utils.intersect( rectn, chars2Rect )
                            int_area = utils.area(rect_int)
                            union_area = utils.area(utils.union(rectn, chars2Rect))
                            ratio = int_area / float(union_area)
                            rectn[10] = max(ratio, rectn[10]) 
                            if ratio > best_match2:
                                if ratio > MIN_SEGM_OVRLAP:
                                    segm2chars += 1
                                    best_match2 = ratio
                                    gt_rect[5] = ratio
                                    gt_rect2[5] = ratio
                thickness = 1
                color = (255, 0, 255)
                if best_match >= minSingleOverlap:
                    color = (0, 255, 0)
                if best_match > 0.7:
                    thickness = 2
                cv2.rectangle(imgc, (gt_rect[0], gt_rect[1]), (gt_rect[2], gt_rect[3]), color, thickness)
                recall += best_match
                recallNonMax += gt_rect[6]
                if best_match >= minSingleOverlap:
                    recall05 += best_match
                    rcurrent05 += best_match
                    if not missing_segm.has_key(image):
                        missing_segm[image] = []
                    if gt_rect[5] < MIN_SEGM_OVRLAP:
                        if not missing_segm2.has_key(image):
                            missing_segm2[image] = []
                        segm2chars += 1
                if gt_rect[6] < minSingleOverlap:
                    if not missing_segmNonMax.has_key(image):
                        missing_segmNonMax[image] = []
                    missing_segmNonMaxCount += 1
                rcurrent += best_match
                rcurrentNonMax += gt_rect[6]
                recallDen +=  1   
                rden += 1
                if best_match > 0 and process_color != 1:
                    val = img[best_segm[5], best_segm[4]]
                    histSegm[val] += best_match
                    histSegmCount[val] += 1
            pcurrent = 0
            for detId in range(segmentations.shape[0]):
                best_match = 0
                rectn = segmentations[detId, :]
                for gt_rect in gt_rects:
                    rect_int =  utils.intersect( rectn, gt_rect )
                    int_area = utils.area(rect_int)
                    union_area = utils.area(utils.union(rectn, gt_rect))
                    ratio = int_area / float(union_area)
                    if ratio > best_match:
                        best_match = ratio
                precision += best_match
                pcurrent += best_match
                precisionDen +=  1   
                pden += 1
        if pden == 0:
            pcurrent = 0
            pcurrent = pcurrent / pden
        if rden == 0:
            rcurrent = 0
            rcurrent05 = 0
            rcurrentNonMax = 0
            rcurrent = rcurrent / rden
            rcurrent05 = rcurrent05 / rden
            rcurrentNonMax = rcurrentNonMax / rden
        segmHistogram.append([ segmentations.shape[0], segmentations[segmentations[:, 10] > 0.4].shape[0], segmentations[segmentations[:, 10] > 0.5].shape[0], segmentations[segmentations[:, 10] > 0.6].shape[0], segmentations[segmentations[:, 10] > 0.7].shape[0] ])
        segmWordHistogram.append([segmentations.shape[0], segmentations[np.bitwise_or(segmentations[:, 10] > 0.5, segmentations[:, 11] > 0.5 )].shape[0]])
        results.append((baseName, rcurrent, pcurrent, rcurrent05))

    if precisionDen == 0:
        pcurrent = 0
        precision = precision / precisionDen
    if recallDen == 0:
        rcurrent = 0
        recall = recall / recallDen
        recall05 = recall05 / recallDen
        recallNonMax = recallNonMax / recallDen
    wordRecall = wordRecall / max(1, wordRecallDen)
        histSegm = histSegm / max(1, histSegmCount)
    except ValueError:
    print('Evalation Results:')
    print( 'recall: {0}, precision: {1}, recall 0.5: {2}, recall NonMax: {3}'.format(recall, precision, recall05, recallNonMax) )
    kpTimes = np.histogram(np.asarray(times)[:, 0], bins=20)
    print('Keypoint Time Histogram: {0}'.format(kpTimes))
    print('Detection statistics:')    
    for letter in letterKeypointHistogram.keys():
        for num in letterKeypointHistogram[letter].keys():
            letterKeypointHistogram[letter][num] = letterKeypointHistogram[letter][num] / float(letterHistogram[letter])
        for num in octaveLetterKeypointHistogram[letter].keys():
            octaveLetterKeypointHistogram[letter][num] = octaveLetterKeypointHistogram[letter][num] / float(letterHistogram[letter])
        letterKeypointHistogram[letter] = dict(letterKeypointHistogram[letter])
        octaveLetterKeypointHistogram[letter] = dict(octaveLetterKeypointHistogram[letter])
    print('Perfect words: {0}'.format(perfectWords))
    eval_date =
    np.savez('{0}/evaluation'.format(outputDir), recall=recall, recall05 = recall05, recallNonMax=recallNonMax, precision=precision, eval_date=eval_date, regionsCount=regionsCount, inputDir = inputDir, hist = hist, histSegm = histSegm, stat=stat, letterKeypointHistogram = dict(letterKeypointHistogram), missing_letters=missing_letters, octaveLetterKeypointHistogram=dict(octaveLetterKeypointHistogram), missing_segm=missing_segm, 
             times=np.asarray(times), histFp = histFp, gtSegmCount = gtSegmCount, wordRecall=wordRecall, histDist=histDist, histDistFp = histDistFp, histDistMax=histDistMax, histDistMaxFp=histDistMaxFp, hist2dDist=hist2dDist, hist2dDistFp=hist2dDistFp, hist2dDistScore=hist2dDistScore, hist2dDistScoreFp=hist2dDistScoreFp, histDistMaxWhite=histDistMaxWhite, histDistMaxWhiteFp=histDistMaxWhiteFp, wordsOk=wordsOk, wordsFp=wordsFp, diffMaxOctavesMap = diffMaxOctavesMap, diffScoreOctavesMap = diffScoreOctavesMap, 
             missing_segm2=missing_segm2, segmHistogram=segmHistogram, segmWordHistogram=segmWordHistogram, regionsCountNonMax=regionsCountNonMax, missing_segmNonMax=missing_segmNonMax)
    print( "GT segmentations count {0}".format(gtSegmCount) )
    print('FasTex Inside {0}/{1} ({2})'.format(keypointsTotalInside, keypointsTotal, keypointsTotalInside / float(keypointsTotal) ))
    print('FasText time: {0}, Orb time: {1} '.format( np.sum(times, 0)[0], orbTime))
    print('2 Chars Segmentation: {0}'.format(segm2chars) )
    print('NonMax Regions Count: {0}/{1}'.format(regionsCountNonMax, missing_segmNonMaxCount))
def creat_mask_VOC_labelfile():
    with open(gt_label_file_path, 'r') as f:
        gt_lines = f.readlines()
    with open(ignore_bbox_file_path, 'r') as f:
        ign_lines = f.readlines()
    plt.figure(figsize=(10, 10))
    for i in gt_lines:
        info = i.split(' ')
        img_origin = pimg.imread(images_root_path + info[0])
        img_copy = img_origin.copy()
        plt.imshow(img_copy, aspect='equal')

        ign_ = [j.strip().split(' ')[1:] for j in ign_lines if j.split(' ')[0] == info[0][:9]]
        ign_bboxs = []
        if ign_:
            ign_bboxs = [float(b) for b in ign_[0]]
            # print(ign_box)
            ign_bboxs = np.array(ign_bboxs, dtype=np.float32).reshape(-1, 4)
            for b in ign_bboxs:
                rect = plt.Rectangle((b[0], b[1]), b[2] - b[0],
                                     b[3] - b[1], fill=True,

                # add black mask to image
                img_copy[int(b[1]):int(b[3]), int(b[0]):int(b[2]), :] = 0

        #print (ign_bbox)
        bbox = [float(b) for b in info[1:]]
        boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4)

        efficient_bboxs = []

        # judge the area_intersect of bbox and ing_bbox
        for b in boxes:
            bbox_area = area(b)
            ratio_intersect = 0.0
            area_intersect = []

            for ign_bbox in ign_bboxs:
                area_intersect.append(area(intersect(b, ign_bbox)))

            max_area_intersect = max(area_intersect)
            ratio_intersect = max_area_intersect / bbox_area

            if ratio_intersect >= 0.2:
                rect = plt.Rectangle((b[0], b[1]), b[2] - b[0],
                                     b[3] - b[1], fill=False,
                                     edgecolor=(0, 1, 0),

        image_new_name = info[0].split('/')[0] + '_' + info[0].split('/')[1]
        #print (efficient_bboxs)
        create_VOC_label_file(os.path.join(image_label_save_rootpath, xml_file_path), image_new_name, img_origin.shape, efficient_bboxs)

        image_save_path = os.path.join(os.path.join(image_label_save_rootpath, image_path), image_new_name)
        cv2.imwrite(image_save_path, cv2.cvtColor(img_copy, cv2.COLOR_RGB2BGR))
Esempio n. 21
def main():
    cam = cv2.VideoCapture(0)
    # load data
    config = configparser.ConfigParser()"config.ini")
    face_database, face_collection = loadMongoData(config)
    # load models
    device = "MYRIAD"
    data_dir = "data"
    plugin = IEPlugin(device, plugin_dirs=None)
    face_embed = FaceEmbedding(plugin)
    face_detect = MobileFaceDetect(plugin)
    # params
    fd_conf = 0.5
    fm_threshold = 0.6
    label = "new"
    period = 1
    button_pressed = False
    max_num = 3
    num = 0
    face_features = []
    face_imgs = []

    s = time.time()
    while (True):
        ret, frame =
        if not ret:
            print("dead cam")
            cam = cv2.VideoCapture(0)
            if cv2.waitKey(1) & 0xFF == ord('q'):
        face_bboxes = []
        # if time.time() - s > period:
        if cv2.waitKey(1) & 0xFF == ord('c'):
            button_pressed = True
        if button_pressed and (num < max_num):
            face_bboxes = face_detect.inference(frame)
        if (len(face_bboxes) > 0) and button_pressed:
            areas = [area(box) for box in face_bboxes]
            max_id = np.argmax(np.asarray(areas))
            mfb = face_bboxes[max_id]
            main_face = frame[mfb[1]:mfb[3], mfb[0]:mfb[2], :]
            # TODO real face detection
            # TODO face alignment
            face_feature = face_embed.inference(main_face)
            face_feature = face_feature.tolist()
            num += 1
            button_pressed = False
            s = time.time()
            # visualize for debug
            cv2.rectangle(frame, (mfb[0], mfb[1]), (mfb[2], mfb[3]),
                          (255, 0, 0), 2)
                        str(num), (mfb[0], mfb[1]),
                        0.6, (0, 0, 255),

        if num >= max_num:
            # add new face features to database
            new_id = face_database.count()
            new_face = {'name': str(new_id), 'feats': face_features}
            p_id = face_collection.insert_one(new_face).inserted_id
            # commit changes
            face_collection.update_one({'_id': p_id}, {"$set": new_face},

            # save images
            img_dir = os.path.join(data_dir, str(new_id))
            for i, face in enumerate(face_imgs):
                img_path = os.path.join(img_dir, "{}.jpg".format(i))
                cv2.imwrite(img_path, face)
            face_imgs = []
            face_features = []
            num = 0
            s = time.time()

        cv2.imshow("face registration", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
Esempio n. 22
def main():