Exemple #1
0
def apply_nms(all_boxes, thresh):
    """Apply non-maximum suppression to all predicted boxes output by the
  test_net method.
  """
    num_classes = len(all_boxes)
    num_images = len(all_boxes[0])
    nms_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
    for cls_ind in range(num_classes):
        for im_ind in range(num_images):
            dets = all_boxes[cls_ind][im_ind]
            if dets == []:
                continue

            x1 = dets[:, 0]
            y1 = dets[:, 1]
            x2 = dets[:, 2]
            y2 = dets[:, 3]
            scores = dets[:, 4]
            inds = np.where((x2 > x1) & (y2 > y1))[0]
            dets = dets[inds, :]
            if dets == []:
                continue

            keep = nms(dets, thresh)
            if len(keep) == 0:
                continue
            nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
    return nms_boxes
def tester(fusion, NUM, bbox_vote=False, max_per_image=400):
    all_boxes = [[[] for _ in range(99999)] for _ in range(501)]

    for i in tqdm(range(99999),
                  file=sys.stdout,
                  leave=False,
                  dynamic_ncols=True):
        for j in range(1, 501):
            det_boxes = np.vstack((fusion[q][j][i] for q in range(NUM)))
            keep = nms(det_boxes, 0.4)
            det_boxes_after_nms = det_boxes[keep, :]
            if bbox_vote:
                cls_dets_after_vote = bbox_voting(det_boxes_after_nms,
                                                  det_boxes,
                                                  threshold=0.5)
                all_boxes[j][i] = cls_dets_after_vote
            else:
                all_boxes[j][i] = det_boxes_after_nms

        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, 501)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, 501):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
    '''step-3: save and eval'''
    with open('output/model_all-test-mst-nms0.4-bbox-vote0.5.pkl', 'wb') as f:
        cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL)
Exemple #3
0
    def forward(self, inputs):

        if self.training:
            img_batch, annotations = inputs
        else:
            img_batch = inputs

        x = self.conv1(img_batch)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x1 = self.layer1(x)
        x2 = self.layer2(x1)
        x3 = self.layer3(x2)
        x4 = self.layer4(x3)

        features = self.fpn([x2, x3, x4])

        regression = torch.cat(
            [self.regressionModel(feature) for feature in features], dim=1)

        classification = torch.cat(
            [self.classificationModel(feature) for feature in features], dim=1)

        anchors = self.anchors(img_batch)

        if self.training:
            return self.focalLoss(classification, regression, anchors,
                                  annotations)
        else:
            transformed_anchors = self.regressBoxes(anchors, regression)
            transformed_anchors = self.clipBoxes(transformed_anchors,
                                                 img_batch)

            scores = torch.max(classification, dim=2, keepdim=True)[0]

            scores_over_thresh = (scores > 0.05)[0, :, 0]

            if scores_over_thresh.sum() == 0:
                # no boxes to NMS, just return
                return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)]

            classification = classification[:, scores_over_thresh, :]
            transformed_anchors = transformed_anchors[:, scores_over_thresh, :]
            scores = scores[:, scores_over_thresh, :]

            anchors_nms_idx = nms(
                torch.cat([transformed_anchors, scores],
                          dim=2)[0, :, :].cpu().numpy(), 0.5)

            nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(
                dim=1)

            return [
                nms_scores, nms_class, transformed_anchors[0,
                                                           anchors_nms_idx, :]
            ]
Exemple #4
0
def proposal(cls_pre, box_pre_y, box_pre_offset, img_size):
    ### calculate the proposals from the output of the CTPN model, and filter them with NMS
    ### input:cls_pre,box_pre_y,box_pre_offset:the output of the CTPN model;img_size:the original size of image
    ### output:proposals
    h_feat, w_feat = cls_pre.shape[0:2]
    K = h_feat * w_feat
    base_anchors = gtf.gen_base_anchors()
    A = base_anchors.shape[0]
    base_anchors = base_anchors.reshape(1, A, 4)
    shift_x = np.arange(w_feat) * gtf._stripe  # 对于feature map上每个点,x方向anchor偏移量
    shift_y = np.arange(h_feat) * gtf._stripe  # 对于feature map上每个点,y方向anchor偏移量
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)  # 生成二维点阵的x,y方向偏移量
    shift_x = shift_x.ravel()  # 二维变一维
    shift_y = shift_y.ravel()
    shift = np.stack([shift_x, shift_y, shift_x, shift_y]).transpose()
    shift = shift.reshape(K, 1, 4)
    all_anchors = base_anchors + shift
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = K * A

    box_pre_y = np.reshape(box_pre_y, [-1, 2])
    box_pre_offset = np.reshape(box_pre_offset, -1)
    boxes, x_left_fixed, x_right_fixed = target_calc_inv_no_side_labels(
        all_anchors, box_pre_y, box_pre_offset)
    index_inside = np.where((boxes[:, 0] >= 0) & (boxes[:, 1] >= 0)
                            & (boxes[:, 2] < img_size[1])
                            & (boxes[:, 3] < img_size[0]))[0]
    proposals = boxes[index_inside, :]
    x_left_fixed = x_left_fixed[index_inside]
    x_right_fixed = x_right_fixed[index_inside]
    ###get the confidence scores for each anchor
    cls_pre = np.reshape(cls_pre, [-1, 2])
    cls_softmax = tf.nn.softmax(cls_pre, axis=1)
    scores = cls_softmax.numpy()[:, 1]
    scores = scores[index_inside]
    ### keep the proposals with score>0.7
    index_keep = np.where(scores > 0.7)[0]
    proposals = proposals[index_keep]
    x_left_fixed = x_left_fixed[index_keep]
    x_right_fixed = x_right_fixed[index_keep]
    scores = scores[index_keep]

    ###NMS filter
    order = scores.argsort()[::-1]
    proposals = proposals[order, :]
    scores = scores[order]
    x_left_fixed = x_left_fixed[order]
    x_right_fixed = x_right_fixed[order]
    scores = np.expand_dims(scores, axis=1)
    nms_input = np.hstack((proposals, scores)).astype(np.float32)
    nms_thresh = 0.2
    keep = nms.nms(nms_input, nms_thresh)
    proposals = proposals[keep, :]
    x_left_fixed = x_left_fixed[keep]
    x_right_fixed = x_right_fixed[keep]
    scores = scores[keep]
    return scores, proposals, x_left_fixed, x_right_fixed
Exemple #5
0
def test_net(sess, net, imdb, weights_filename, max_per_image=100, thresh=0.):
    np.random.seed(cfg.RNG_SEED)
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #  all_boxes[cls][image] = N x 5 array of detections in
    #  (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    output_dir = get_output_dir(imdb, weights_filename)
    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    for i in range(num_images):
        im = cv2.imread(imdb.image_path_at(i))

        _t['im_detect'].tic()
        scores, boxes = im_detect(sess, net, im)
        _t['im_detect'].toc()

        _t['misc'].tic()

        # skip j = 0, because it's the background class
        for j in range(1, imdb.num_classes):
            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
              .astype(np.float32, copy=False)
            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
            all_boxes[j][i] = cls_dets

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

        print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
            .format(i + 1, num_images, _t['im_detect'].average_time,
                _t['misc'].average_time))

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    imdb.evaluate_detections(all_boxes, output_dir)
Exemple #6
0
def _nms_filter(all_seg_cls_frm_dets, max_per_frame=20):
    all_seg_frm_cls_dets = []
    for seg_idx in range(len(all_seg_cls_frm_dets)):
        seg_dets = all_seg_cls_frm_dets[seg_idx]

        all_seg_frm_cls_dets.append([])
        cls_num = len(seg_dets)
        seg_len = len(seg_dets[0])

        # init new container
        for seg_frm_idx in range(seg_len):
            all_seg_frm_cls_dets[seg_idx].append([])
            for j in range(cls_num):
                all_seg_frm_cls_dets[seg_idx][seg_frm_idx].append([])

        # perform NMS
        for cls_idx, cls_dets in enumerate(seg_dets):
            for seg_frm_idx, frm_dets in enumerate(cls_dets):
                keep = nms(frm_dets, 0.3)
                all_seg_frm_cls_dets[seg_idx][seg_frm_idx][cls_idx] = [
                    frm_dets[i] for i in keep if frm_dets[i][4] >= 0.1
                ]
        # reserve top N per frame
        for seg_frm_idx in range(seg_len):
            frm_dets = []
            for cls_idx in range(cls_num):
                seg_frm_cls_dets = all_seg_frm_cls_dets[seg_idx][seg_frm_idx][
                    cls_idx]
                for det in seg_frm_cls_dets:
                    frm_dets.append({
                        'det': det,
                        'scr': det[-1],
                        'cls': cls_idx
                    })
            sorted_frm_dets = sorted(frm_dets,
                                     key=lambda item: item['scr'],
                                     reverse=True)[:max_per_frame]

            frm_cls_dets = [[] for _ in range(cls_num)]
            for frm_det in sorted_frm_dets:
                frm_cls_dets[frm_det['cls']].append(frm_det['det'])
            for cls_idx in range(cls_num):
                all_seg_frm_cls_dets[seg_idx][seg_frm_idx][
                    cls_idx] = frm_cls_dets[cls_idx]

    return all_seg_frm_cls_dets
Exemple #7
0
def demo(sess,net,image_dir,image_name):
    im_file = os.path.join(image_dir,image_name)
    im = cv2.imread(im_file)

    timer = Timer()
    timer.tic()
    scores,boxes = im_detect(sess,net,im)
    timer.toc()
    print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time,boxes.shape[0]))
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind,cls in enumerate(CLASSES[1:]):
        cls_ind += 1
        cls_boxes = boxes[:,4*cls_ind:4*(cls_ind+1)]
        cls_scores = scores[:,cls_ind]
        dets = np.hstack((cls_boxes,cls_scores[:,np.newaxis])).astype(np.float32)
        keep = nms(dets,NMS_THRESH)
        dets = dets[keep,:]
        vis_detections(im,cls,dets,thresh=CONF_THRESH)
Exemple #8
0
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride,
                   anchors, num_anchors):
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    # 获取得分和边界框
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    scores = scores.reshape((-1, 1))
    # 返回anchors关于预测Bbox的位置,采用yolo v2中预测相对于grid cell,生成proposals
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    # 将建议区域裁剪到图像边界内
    proposals = clip_boxes(proposals, im_info[:2])

    # 选择前面的建议区域,降成一维数组改成降序
    order = scores.ravel().argsort()[::-1]
    # 选择前12000个
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 进行非极大值抑制
    keep = nms(np.hstack((proposals, scores)), nms_thresh)

    # 再挑选前N项,现在是2000项
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    return blob, scores
Exemple #9
0
def tracking_by_match(vid_dets, thr=0.6, max_traj_num=50):

    # init: nms, add tids
    for cls_idx, cls_dets in enumerate(vid_dets):
        for frm_idx, frm_dets in enumerate(cls_dets):
            if frm_dets is None or len(frm_dets) == 0:
                continue
            keep = nms(frm_dets, 0.3)
            # pdb.set_trace()
            frm_dets = frm_dets[keep]
            frm_dets_new = np.zeros((frm_dets.shape[0], frm_dets.shape[1] + 1))
            frm_dets_new[:, :frm_dets.shape[1]] = frm_dets
            frm_dets_new[:, -1] = -1
            cls_dets[frm_idx] = frm_dets_new

    curr_tid = 0
    tid2scr = {}
    tid2cnt = {}

    for cls_idx, cls_dets in enumerate(vid_dets):
        for frm_idx in range(len(cls_dets) - 1):
            curr_frm_dets = cls_dets[frm_idx]
            next_frm_dets = cls_dets[frm_idx + 1]

            for det in curr_frm_dets:
                if det[-1] == -1:
                    det[-1] = curr_tid
                    tid2scr[curr_tid] = det[4]
                    tid2cnt[curr_tid] = 1
                    curr_tid += 1

                if next_frm_dets is None or len(next_frm_dets) == 0:
                    continue
                curr_x1, curr_y1, curr_x2, curr_y2, scr, tid = det
                next_x1s = next_frm_dets[:, 0]
                next_y1s = next_frm_dets[:, 1]
                next_x2s = next_frm_dets[:, 2]
                next_y2s = next_frm_dets[:, 3]

                i_x1s = np.maximum(curr_x1, next_x1s)
                i_y1s = np.maximum(curr_y1, next_y1s)
                i_x2s = np.minimum(curr_x2, next_x2s)
                i_y2s = np.minimum(curr_y2, next_y2s)

                # u_x1s = np.minimum(curr_x1, next_x1s)
                # u_y1s = np.minimum(curr_y1, next_y1s)
                # u_x2s = np.maximum(curr_x2, next_x2s)
                # u_y2s = np.maximum(curr_y2, next_y2s)

                i_areas = np.maximum((i_x2s - i_x1s + 1), 0) * np.maximum(
                    (i_y2s - i_y1s + 1), 0)
                u_areas = (curr_x2 - curr_x1 + 1) * (curr_y2 - curr_y1 + 1) + \
                          (next_x2s - next_x1s + 1) * (next_y2s - next_y1s + 1) - i_areas
                ious = i_areas / u_areas
                # print(ious)

                best_det_id = np.argmax(ious)
                if ious[best_det_id] > thr:
                    next_frm_dets[best_det_id, -1] = tid
                    tid2scr[tid] += next_frm_dets[best_det_id, 4]
                    tid2cnt[tid] += 1

            if frm_idx == len(cls_dets) - 2:
                for det in next_frm_dets:
                    if det[-1] == -1:
                        det[-1] = curr_tid
                        tid2scr[curr_tid] = det[-2]
                        tid2cnt[curr_tid] = 1
                        curr_tid += 1

    tid2conf = {}
    for tid in tid2cnt:
        traj_conf = tid2scr[tid] / tid2cnt[tid]
        if traj_conf >= 0.01:
            # tid2conf[tid] = (tid2scr[tid] / tid2cnt[tid] + tid2iou[tid] / tid2cnt[tid]) / 2
            tid2conf[tid] = (tid2scr[tid] * 1.0 / tid2cnt[tid] +
                             tid2cnt[tid] * 10.0 / len(vid_dets[0]))

    reserved_tid_conf_list = sorted(tid2conf.items(),
                                    key=lambda item: item[1],
                                    reverse=True)[:max_traj_num]
    reserved_tids = {tid: conf for tid, conf in reserved_tid_conf_list}

    all_boxes = []
    for cls_idx in range(len(vid_dets)):
        cls_boxes = []
        for frm_idx in range(len(vid_dets[0])):
            cls_boxes.append({})
        all_boxes.append(cls_boxes)

    for cls_idx, cls_dets in enumerate(vid_dets):
        for frm_idx, frm_dets in enumerate(cls_dets):
            for det in frm_dets:
                tid = det[-1]
                if tid not in reserved_tids:
                    continue
                else:
                    det[4] = tid2conf[tid]
                    all_boxes[cls_idx][frm_idx][tid] = det.tolist()

    for cls_id in range(len(all_boxes)):
        cls_boxes = all_boxes[cls_id]
        for frm_id in range(len(cls_boxes)):
            cls_boxes[frm_id] = np.array(cls_boxes[frm_id].values())
    return all_boxes
Exemple #10
0
    def process(self, img_name):
        txt_path = self.result_dir + img_name[:-4] + '.txt'
        if os.path.exists(txt_path):
            with open(txt_path, 'r') as f_txt:
                txt_items = f_txt.readlines()
                return len(txt_items), img_name

        img_path = os.path.join(self.img_dir, img_name)
        im = Image.open(img_path).convert('RGB')
        if cfg.predict_cut_text_line:
            im_array = np.array(im, dtype=np.float32)

        d_width, d_height = resize_image(im.size)
        scale_ratio_w = d_width / im.width
        scale_ratio_h = d_height / im.height
        im = im.resize((d_width, d_height), Image.BICUBIC)

        x = transform(im)
        x = x[np.newaxis, :]
        # lock.acquire()
        y = self.model(x.cuda()).cpu().detach().numpy()
        # lock.release()

        y = np.squeeze(y)
        y[:, :, :3] = sigmoid(y[:, :, :3])
        cond = np.greater_equal(y[:, :, 0], cfg.pixel_threshold)
        activation_pixels = np.asarray(np.where(cond), dtype=np.int32)

        quad_scores, quad_after_nms = nms(y, activation_pixels[0],
                                          activation_pixels[1])

        if self.isDraw:
            quad_im = im.copy()
            draw = ImageDraw.Draw(im)
            for i, j in zip(activation_pixels[0], activation_pixels[1]):
                px = (j + 0.5) * cfg.pixel_size
                py = (i + 0.5) * cfg.pixel_size
                line_width, line_color = 1, 'aqua'
                if y[i, j, 1] >= cfg.side_vertex_pixel_threshold:
                    if y[i, j, 2] < cfg.trunc_threshold:
                        line_width, line_color = 2, 'yellow'
                    elif y[i, j, 2] >= 1 - cfg.trunc_threshold:
                        line_width, line_color = 2, 'green'
                draw.line(
                    [(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                     (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                     (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                     (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                     (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)],
                    width=line_width,
                    fill=line_color)
            im.save(self.result_dir + img_name[:-4] + '_act.jpg')

            quad_draw = ImageDraw.Draw(quad_im)
        txt_items = []
        invalid = 0
        for score, geo, s in zip(quad_scores, quad_after_nms,
                                 range(len(quad_scores))):
            if np.amin(score) > 0:
                if self.isDraw:
                    quad_draw.line([
                        tuple(geo[0]),
                        tuple(geo[1]),
                        tuple(geo[2]),
                        tuple(geo[3]),
                        tuple(geo[0])
                    ],
                                   width=2,
                                   fill='aqua')
                if cfg.predict_cut_text_line:
                    self.cut_text_line(geo, scale_ratio_w, scale_ratio_h,
                                       im_array, img_name, s)
                rescaled_geo = geo / [scale_ratio_w, scale_ratio_h]
                rescaled_geo_list = np.reshape(rescaled_geo, (8, )).tolist()
                txt_item = ','.join(map(str, rescaled_geo_list))
                txt_items.append(txt_item + '\n')
            else:
                invalid += 1
        if self.isDraw:
            quad_im.save(self.result_dir + img_name[:-4] + '_predict.jpg')

        with open(txt_path, 'w') as f_txt:
            f_txt.writelines(txt_items)
        return (len(txt_items), img_name)