Example #1
0
def test_net(sess, net, imdb, weights_filename):
    timer = Timer()
    timer.tic()
    np.random.seed(cfg.RNG_SEED)
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    output_dir = get_output_dir(imdb, weights_filename)
    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}
    # all_boxes = []
    all_boxes = [[[] for _ in range(imdb.num_classes)]
                 for _ in range(num_images)]
    print(all_boxes)
    for i in range(num_images):
        print('***********', imdb.image_path_at(i))
        img = cv2.imread(imdb.image_path_at(i))
        img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE)
        scores, boxes = test_ctpn(sess, net, img)
        textdetector = TextDetector()
        boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
        print(('Detection took {:.3f}s for '
               '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
        boxes = check_unreasonable_box(boxes, scale)
        all_boxes[i][1] += boxes
    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    imdb.evaluate_detections(all_boxes, output_dir)
    timer.toc()
Example #2
0
def ctpn(sess, net, image_name, save_path1, save_path2):
    timer = Timer()
    timer.tic()

    #读取图片
    img = cv2.imread(image_name)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    #灰度化处理
    #img2 = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
    #img2 = cv2.cvtColor(img2,cv2.COLOR_GRAY2RGB)
    #     base_name = im_name.split('\\')[-1]
    #     cv2.imwrite(os.path.join("data/results2", base_name), img2)

    scores, boxes = test_ctpn(sess, net, img)

    #后处理过程,detect包含过滤和合并
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes2(img, boxes, image_name, save_path2, scale)
    draw_boxes(img, boxes, image_name, save_path1, scale)

    #后处理过程,detect2只过滤小文本框
    #     textdetector = TextDetector()
    #     boxes = textdetector.detect2(boxes, scores[:, np.newaxis], img.shape[:2])
    #     draw_boxes3(img, boxes,image_name, scale)

    timer.toc()
    print(('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
Example #3
0
def test():
    with torch.cuda.device(0):
        with torch.no_grad():
            args = parse_args()
            if args.config_file is not None:
                cfg_from_file(args.config_file)
        #test_model()
            s = Solver(args)
            model = s.model
            _t = Timer()

            batch_size = 16

            timing_array = []
            for i in range(1000):

                _t.tic()
                batch = torch.FloatTensor(batch_size, 3, cfg.DATASET.IMAGE_SIZE[0], cfg.DATASET.IMAGE_SIZE[1]).cuda(0)
                model = add_flops_counting_methods(model)
                model.eval().start_flops_count()
                out = model(batch)
                inf_time = _t.toc()
                timing_array.append(inf_time)

            print("Inference Time Mean: {:0.6f} Std Dev: {:0.6f}".format(np.mean(timing_array)*1000/batch_size, np.std(timing_array)*1000/batch_size))

            #print(model)


        #print('Output shape: {}'.format(list(out.shape)))
            print('Flops:  {}'.format(flops_to_string(model.compute_average_flops_cost())))
            print('Params: ' + get_model_parameters_number(model))
Example #4
0
File: demo.py Project: Skii3/temp
def ctpn(sess, net, image_name, boxlabel):
    timer = Timer()
    timer.tic()

    img = cv2.imread(image_name)

    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    scores, boxes = test_ctpn(sess, net, img)

    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    img = draw_boxes(img, image_name, boxes, scale, None)
    boxlabel2 = np.transpose(
        np.array([
            boxlabel[:, 0], boxlabel[:, 1], boxlabel[:, 2], boxlabel[:, 1],
            boxlabel[:, 0], boxlabel[:, 3], boxlabel[:, 2], boxlabel[:, 3],
            np.ones(len(boxlabel))
        ]))
    draw_boxes(img, image_name, boxlabel2, 1, (0, 0, 0))
    timer.toc()
    print(('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
    boxes = boxes / scale
    return boxes
def ctpn(sess,
         net,
         image_name,
         dst,
         draw_img=False,
         show_area=False,
         area_min=-0.1,
         area_max=1.1):
    timer = Timer()
    timer.tic()

    img = cv2.imread(image_name)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    scores, boxes = test_ctpn(sess, net, img)

    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    ret = draw_boxes(img,
                     image_name,
                     boxes,
                     scale,
                     dst,
                     draw_img=draw_img,
                     show_area=show_area,
                     area_min=area_min,
                     area_max=area_max)
    timer.toc()
    print(('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0]))

    return ret
Example #6
0
def demo(sess, net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.FLAGS2["data_dir"], 'demo', image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time, boxes.shape[0]))

    # Visualize detections for each class
    CONF_THRESH = 0.1
    NMS_THRESH = 0.1
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis_detections(im, cls, dets, thresh=CONF_THRESH)
Example #7
0
def demo(sess, net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    # 使用已经训练好的网络模型检测当前图片中所有的物体,得到所有predict boxes
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    print('Detection took {:.3f}s for {:d} object proposals'.format(
        timer.total_time, boxes.shape[0]))

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        """
        对于每个类,找到对应的predict boxes的概率得分和坐标描述,先进行nms缩减相近的boxes,对于保留的boxes,当概率得分大于CONF_THRESH
        阈值时,通过vis_detections函数将box画出来。
        """
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis_detections(im, cls, dets, thresh=CONF_THRESH)
Example #8
0
    def train_model(self, max_iters, snapshot_iters):
        """
        Train the model with max_iters.
        :return saved model paths
        """
        last_snapshot_iter = -1
        timer = Timer()
        model_paths = []

        print "Begin training the model."
        while self._solver.iter < max_iters:
            timer.tic()
            self._solver.step(1)
            timer.toc()

            # print the speed
            if self._solver.iter % 1000 == 0:
                print 'speed: {:.3f}s / iter.'.format(timer.average_time)
            # snapshot the weights
            if self._solver.iter % snapshot_iters == 0:
                last_snapshot_iter = self._solver.iter
                model_paths.append(self.snapshot())

        if last_snapshot_iter != self._solver.iter:
            model_paths.append(self.snapshot())

        return model_paths
Example #9
0
def demo(sess, net, image_name):
    # 根据路径,使用opencv读取图片数据
    im_file = os.path.join(cfg.FLAGS2["data_dir"], 'demo', image_name)
    im = cv2.imread(im_file)

    # 进行目标检查
    timer = Timer()
    timer.tic()
    # 进行预测返回300个box的得分和位置
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    print('Detection took {:.3f}s for {:d} object proposals'.format(
        timer.total_time, boxes.shape[0]))

    # 每个类最高得分上图的阈值
    CONF_THRESH = 0.1
    # 每个类NMS阈值
    NMS_THRESH = 0.1
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # +1需要跳过背景
        # 获取到所有候选框对应这个分类的位置
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        # 获取到所有候选框对应这个分类的得分
        cls_scores = scores[:, cls_ind]
        # 合并所有的位置和得分,(x1,y1,x2,y2,score)
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        # 通过非极大值抑制保留0.1的候选框以及得分
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        # 上图
        vis_detections(im, cls, dets, thresh=CONF_THRESH)
Example #10
0
def demo(sess, net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im = cv2.imread(image_name)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    im = im[:, :, (2, 1, 0)]
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.imshow(im, aspect='equal')

    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
Example #11
0
def boxdetect(sess, net, im_file, output_path):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the image
    im_file = im_file.replace('\\', '/')
    im = cv2.imread(im_file)
    image_name = im_file.split(r'/')[-1]
    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    print('Detection took {:.3f}s for {:d} object proposals'.format(
        timer.total_time, boxes.shape[0]))

    # Visualize detections for each class
    CONF_THRESH = 0.1
    NMS_THRESH = 0.1
    geetcode_bbox = []
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        bbox = vis_detections(im,
                              cls,
                              dets,
                              image_name,
                              output_path,
                              thresh=CONF_THRESH)
        geetcode_bbox.append(bbox)
    return geetcode_bbox
Example #12
0
def ctpn(sess, net, image_name):
    timer = Timer()
    timer.tic()

    img = cv2.imread(image_name)
    height, width = img.shape[:2]
    img = img[int(2 * height / 3.0):height, :]
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    scores, boxes = test_ctpn(sess, net, img)
    # for box in boxes:
    #     color = (0, 255, 0)
    #     cv2.line(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[1])), color, 2)
    #     cv2.line(img, (int(box[0]), int(box[1])), (int(box[0]), int(box[3])), color, 2)
    #     cv2.line(img, (int(box[2]), int(box[1])), (int(box[2]), int(box[3])), color, 2)
    #     cv2.line(img, (int(box[0]), int(box[3])), (int(box[2]), int(box[3])), color, 2)
    # base_name = image_name.split('/')[-1]
    # cv2.imwrite("data/results/test_"+base_name, img)
    # draw_boxes(img, image_name, boxes, scale)
    # print(boxes)
    # assert 0
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes(img, image_name, boxes, scale)
    timer.toc()
    print(('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
Example #13
0
    def detect(self, image):
        """Detect object classes in an image using pre-computed object proposals."""

        # Load the demo image
        # Detect all object classes and regress object bounds
        image = image_transform_1_3(image)
        timer = Timer()
        timer.tic()
        scores, boxes = self.im_detect(image)
        timer.toc()
        # print('rois--------------', scores)
        print('Detection took {:.3f}s for '
              '{:d} object proposals'.format(timer.total_time, boxes.shape[0]))

        CONF_THRESH = 0.7
        NMS_THRESH = 0.1
        for cls_ind, cls in enumerate(self.classes_detect[1:]):
            cls_ind += 1  # because we skipped background
            cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack(
                (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, NMS_THRESH)
            dets = dets[keep, :]
            inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
            dets = dets[inds, :]
        return dets
Example #14
0
def demo(sess, net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    #im_file = os.path.join(cfg.FLAGS2["data_dir"], 'demo', image_name)
    im_file = os.path.join(path1, image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    print('Detection took {:.3f}s for {:d} object proposals'.format(
        timer.total_time, boxes.shape[0]))

    # Visualize detections for each class
    CONF_THRESH = 0.5
    NMS_THRESH = 0.1
    thresh = CONF_THRESH

    im = im[:, :, (2, 1, 0)]
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.imshow(im, aspect='equal')

    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        #vis_detections(im, cls, dets, thresh=CONF_THRESH)
        inds = np.where(dets[:, -1] >= thresh)[0]
        if len(inds) == 0:
            continue
        for i in inds:
            bbox = dets[i, :4]
            score = dets[i, -1]

            ax.add_patch(
                plt.Rectangle((bbox[0], bbox[1]),
                              bbox[2] - bbox[0],
                              bbox[3] - bbox[1],
                              fill=False,
                              edgecolor='red',
                              linewidth=3.5))
            ax.text(bbox[0],
                    bbox[1] - 2,
                    '{:s} {:.3f}'.format(cls, score),
                    bbox=dict(facecolor='blue', alpha=0.5),
                    fontsize=14,
                    color='white')

    plt.axis('off')
    plt.tight_layout()
    plt.draw()
    os.chdir(path2)
    plt.savefig(im_name)
Example #15
0
def demo(sess, net, image_name, thresh=0.05):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    image = PIL.Image.open(image_name)
    im = cv2.imread(image_name)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()

    im_num = os.path.split(image_name)[1].split('.')[0]
    scores, boxes = im_detect(sess,
                              net,
                              im,
                              save_feature=True,
                              feature_path='./data/conv.npy')
    timer.toc()
    print('Detection took {:.3f}s for '
          '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    im = im[:, :, (2, 1, 0)]
    # fig, ax = plt.subplots(figsize=(12, 12))
    # ax.imshow(im, aspect='equal')

    CONF_THRESH = 0.7
    NMS_THRESH = 0.3
    results = []
    name = image_name.split('/')[-1]
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        cls_lables = np.full_like(cls_scores, cls_ind)
        dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis],
                          cls_lables[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        inds = np.where(dets[:, -2] > thresh)[0]
        dets = dets[inds]
        for i in range(dets.shape[0]):
            name = str(name)
            category = int(dets[i, -1])
            bbox = list(map(float, dets[i, :4]))
            bbox = [round(b, 2) for b in bbox]
            score = float(dets[i, -2])
            dic = collections.OrderedDict()
            dic['name'] = str(name)
            dic['category'] = int(category)
            dic['bbox'] = bbox
            dic['score'] = float(score)
            results.append(dic)
        im = vis_detections(image, cls, dets, ax=None, thresh=CONF_THRESH)

    out_path = './data/detection_result'
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    out_path = os.path.join(out_path, os.path.split(image_name)[-1])
    image.save(out_path)
Example #16
0
    def detect(self, image):
        """Detect object classes in an image using pre-computed object proposals."""

        # Load the demo image
        # Detect all object classes and regress object bounds
        image = image_transform_1_3(image)
        timer = Timer()
        timer.tic()
        scores, boxes = self.im_detect(image)
        timer.toc()
        print('kkk', np.argmax(scores, axis=1))
        print('lll', scores[np.argmax(scores, axis=1) == 4, 4])
        print('Detection took {:.3f}s for '
              '{:d} object proposals'.format(timer.total_time, boxes.shape[0]))

        CONF_THRESH = 0.3
        NMS_THRESH = 0.5
        dets_list = []
        for cls_ind, cls in enumerate(self.classes_detect[1:]):
            inds = np.where(scores[:, cls_ind] > CONF_THRESH)[0]
            cls_ind += 1  # because we skipped background
            cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack(
                (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets[inds, :], NMS_THRESH)
            dets = dets[keep, :]
            inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
            cls_ind_list = np.empty((len(inds), 1), np.int32)
            cls_ind_list.fill(cls_ind)
            dets = np.hstack((dets[inds, :-1], cls_ind_list))
            dets_list.append(dets)
        dets = np.vstack(dets_list)
        print('jjj', dets)
        return dets
Example #17
0
def demo(sess, net, image_name, memory_storex, memory_storey,
         kitti_memory_0323, AN, sess2):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im = cv2.imread(image_name)
    im = cv2.resize(im, (1242, 375))
    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, bbox_pred, _, rois, fc = im_detect(sess, net, im, memory_storex,
                                               memory_storey)
    timer.toc()
    print('Detection took {:.3f}s for {:d} object proposals'.format(
        timer.total_time, bbox_pred.shape[0]))
    # Visualize detections for each class
    CONF_THRESH = 0.1
    NMS_THRESH = 0.1
    im_shape = im.shape[:2]
    box_deltas = bbox_pred
    pred_boxes = bbox_transform_inv(rois, box_deltas)
    boxes = clip_boxes(pred_boxes, im_shape)

    # show.vis_detections(image_name, scores, boxes, dis_pre, fc, NMS_THRESH, CONF_THRESH)
    show.vis_detections(image_name, scores, boxes, fc, kitti_memory_0323, AN,
                        sess2, NMS_THRESH, CONF_THRESH)
Example #18
0
    def detect(self, image):
        """Detect object classes in an image using pre-computed object proposals."""

        # Load the demo image
        # Detect all object classes and regress object bounds
        image = image_transform_1_3(image)
        timer = Timer()
        timer.tic()
        scores, boxes = self.im_detect(image)
        timer.toc()
        print('rois--------------', scores)
        print('Detection took {:.3f}s for '
              '{:d} object proposals'.format(timer.total_time, len(boxes)))

        CONF_THRESH = 0.3
        # print(scores)
        NMS_THRESH = 0.5
        dets = []
        for i in range(len(boxes)):
            # print('lll')
            cls_boxes = boxes[i]
            cls_scores = scores[i]
            dets_i_ = np.hstack([cls_boxes[:, 0:4], cls_scores])
            keep = nms(dets_i_, NMS_THRESH)
            dets_i = np.hstack([cls_boxes, cls_scores])
            dets_i = dets_i[keep, :]
            inds = np.where(dets_i[:, -1] >= CONF_THRESH)[0]
            dets_i = dets_i[inds, :]
            dets_i = dets_i[:, 0:5]
            dets.append(dets_i)
        return dets
Example #19
0
def demo_video(sess, net, frame, camera_url):
    """Detect object classes in an image using pre-computed object proposals."""
    im = frame
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    # Visualize detections for each class
    CONF_THRESH = 0.6  # threshold
    NMS_THRESH = 0.1
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
        if cls == 'crow' or cls == 'magpie' or cls == 'pigeon' or cls == 'swallow' \
                or cls == 'sparrow' and len(inds) != 0:
            if time.time() - timer_trigger.start_time > residence_time:
                images = vis_detections_video(im, cls, dets, timer.start_time, timer.total_time, inds, CONF_THRESH)
                socket_client_target_detection(cls, len(inds), images, time.ctime(), camera_url, True)
                timer_trigger.tic()  # 修改起始时间
            else:
                images = vis_detections_video(im, cls, dets, timer.start_time, timer.total_time, inds, CONF_THRESH)
                socket_client_target_detection(cls, len(inds), images, time.ctime(), camera_url, False)
        elif cls == 'airplane' and len(inds) != 0:
            pass
        elif cls == 'person' and len(inds) != 0:
            pass
        else:
            pass
Example #20
0
    def process_frame(self, video_name, im_name, CLASSES, CONF_THRESH):
        # Output frame path
        im_path_ = os.path.join(api_config.upload_folder,
                                video_name.split(".")[0],
                                "annotated-frames", os.path.basename(im_name))
        im = np.array(Image.open(im_name))
        im = im[:, :, ::-1]
        timer = Timer()
        timer.tic()
        scores, boxes = im_detect(self.sess, self.net, im)
        timer.toc()
        print ('Detection took {:.3f}s for '
               '{:d} object proposals').format(timer.total_time,
                                               boxes.shape[0])

        NMS_THRESH = 0.3
        im = im[:, :, (2, 1, 0)]
        fig, ax = plt.subplots(figsize=(12, 12))
        ax.imshow(im, aspect='equal')
        self.annotation = xml_setup(im_name, im.shape)
        for cls_ind, cls in enumerate(CLASSES[1:]):
            cls_ind += 1  # because we skipped background
            cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack((cls_boxes,
                              cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, NMS_THRESH)
            dets = dets[keep, :]
            self.draw(im_path_, cls, dets, ax, thresh=CONF_THRESH)
        xml_write(video_name, os.path.basename(im_name), self.annotation)
        plt.savefig(im_path_, bbox_inches='tight')
        plt.close()
Example #21
0
def demo(sess, net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im = cv2.imread(image_name)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    print('Detection took {:.3f}s for '
          '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    im = im[:, :, (2, 1, 0)]
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.imshow(im, aspect='equal')

    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
def video_demo(sess, net, image):
    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes, _ = im_detect_bbox_kpoints(sess, net, image)
    # scores, boxes, points = im_detect(sess, net, image)
    # print("scores:", scores.shape)  --> (n, 1)
    timer.toc()
    print('Detection took {:.3f}s for {:d} object proposals'.format(
        timer.total_time, boxes.shape[0]))

    # Visualize detections for each class
    CONF_THRESH = 0.6
    NMS_THRESH = 0.3

    inds = np.where(scores[:, 0] > CONF_THRESH)[0]
    scores = scores[inds, 0]
    boxes = boxes[inds, :]
    # points = points[inds, :]
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    # dets = np.hstack((boxes, scores[:, np.newaxis], points)).astype(np.float32, copy=False)
    keep = nms(dets, NMS_THRESH)
    dets = dets[keep, :]
    return dets
Example #23
0
def SignalImage_Test(sess, net,image_path):

    im=cv2.imread(image_path)
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time, boxes.shape[0]))

    # Visualize detections for each class
    CONF_THRESH = 0.4
    NMS_THRESH = 0.35


    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]


        # print('\nboxes:',boxes)
        # print('\ncls_boxes:',cls_boxes)
        # print('\n ',boxes.shape)
        # print(len(cls_boxes),len(boxes))]


        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis_detections(im, cls, dets, thresh=CONF_THRESH)
Example #24
0
def demo(sess, net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im = readimage(image_name)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    # print('rois--------------', scores)
    print('Detection took {:.3f}s for '
          '{:d} object proposals'.format(timer.total_time, boxes.shape[0]))

    CONF_THRESH = 0.7
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis(im, image_name, cls, dets, thresh=CONF_THRESH)
Example #25
0
def demo(sess, net, image_name):

    # 加载目标图片
    im_file = os.path.join('test_images', image_name)
    im = cv2.imread(im_file)

    # 检测所有对象类并回归对象边界
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    print('Detection took {:.3f}s for {:d} object proposals'.format(
        timer.total_time, boxes.shape[0]))

    # 对每个检查的检测进行可视化
    CONF_THRESH = 0.1
    NMS_THRESH = 0.1

    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # 因为跳过了背景background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis_dections(im, cls, dets, thresh=CONF_THRESH)
Example #26
0
def ctpn(sess, net, image_name):
    timer = Timer()
    timer.tic()

    img = cv2.imread(image_name)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    scores, boxes = test_ctpn(sess, net, img)

    new_scores = scores[:, np.newaxis]

    keep_inds = np.where(new_scores > TextLineCfg.TEXT_PROPOSALS_MIN_SCORE)[0]
    boxes, new_scores = boxes[keep_inds], new_scores[keep_inds]

    sorted_indices = np.argsort(new_scores.ravel())[::-1]
    boxes, new_scores = boxes[sorted_indices], new_scores[sorted_indices]

    keep_inds = nms(np.hstack((boxes, new_scores)),
                    TextLineCfg.TEXT_PROPOSALS_NMS_THRESH)
    boxes, new_scores = boxes[keep_inds], new_scores[keep_inds]

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(10, 14))

    for key, box in enumerate(boxes):
        img_inside = img.copy()
        img_inside = cv2.rectangle(img_inside, (box[0], box[1]),
                                   (box[2], box[3]),
                                   color=(255, 0, 0),
                                   thickness=2)
        plt.imshow(img_inside)
        plt.title('Scores: {0}'.format(scores[key]))
        plt.savefig('./data/fig/fig_{0}.jpg'.format(key))
Example #27
0
def test_net_on_dataset(args,
                        dataset_name,
                        proposal_file,
                        output_dir,
                        multi_gpu=False,
                        gpu_id=0):
    """Run inference on a dataset."""
    dataset = JsonDataset(dataset_name)
    test_timer = Timer()
    test_timer.tic()
    if multi_gpu:
        num_images = len(dataset.get_roidb())
        all_boxes, all_segms, all_keyps = multi_gpu_test_net_on_dataset(
            args, dataset_name, proposal_file, num_images, output_dir)
    else:
        all_boxes, all_segms, all_keyps = test_net(args,
                                                   dataset_name,
                                                   proposal_file,
                                                   output_dir,
                                                   gpu_id=gpu_id)
    test_timer.toc()
    logger.info('Total inference time: {:.3f}s'.format(
        test_timer.average_time))
    results = task_evaluation.evaluate_all(dataset, all_boxes, all_segms,
                                           all_keyps, output_dir)
    return results
Example #28
0
def ctpn(sess, net, image_name):
    timer = Timer()
    timer.tic()

    img = cv2.imread(image_name)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)

    #将OPENCV图像转换为PIL图像,
    pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    #求图片清晰度
    imageVar = cv2.Laplacian(img, cv2.CV_64F).var()
    if imageVar <= 5000:
        pil_img = ImageEnhance.Sharpness(pil_img).enhance(3.0)
    #将PIL图像转换为opencv图像
    img = cv2.cvtColor(np.asarray(pil_img), cv2.COLOR_RGB2BGR)

    scores, boxes = test_ctpn(sess, net, img)

    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes(img, image_name, boxes, scale)
    timer.toc()
    print(('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
Example #29
0
def demo(net, matlab, image_filepath, classes, method, par1, par2):
    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    # Load pre-computed Selected Search object proposals
    obj_proposals = ROI_boxes(matlab, image_filepath, method, par1, par2)
    global OP_num
    OP_num = len(obj_proposals)
    if len(obj_proposals)==0:
        dets = []
        timer.toc()
        return dets, timer.total_time

    # Load the demo image
    im = cv2.imread(image_filepath)
    scores, boxes = im_detect(net, im, obj_proposals)
    timer.toc()

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls in classes:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        keep = np.where(cls_scores >= CONF_THRESH)[0]
        cls_boxes = cls_boxes[keep, :]
        cls_scores = cls_scores[keep]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
    return dets, timer.total_time
Example #30
0
    def test_epoch(self, model, data_loader, detector, output_dir, use_gpu):
        model.eval()

        dataset = data_loader.dataset
        num_images = len(dataset)
        num_classes = detector.num_classes
        all_boxes = [[[] for _ in range(num_images)]
                     for _ in range(num_classes)]
        empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))

        _t = Timer()

        for i in iter(range((num_images))):
            img = dataset.pull_image(i)
            scale = [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]
            with torch.no_grad():
                images = torch.Tensor(
                    dataset.preproc(img)[0].unsqueeze(0).to(self.device)).to(
                        self.device)

            _t.tic()
            # forward
            out = model(images, phase='eval')

            # detect
            detections = detector.forward(out)

            time = _t.toc()

            # TODO: make it smart:
            for j in range(1, num_classes):
                cls_dets = list()
                for det in detections[0][j]:
                    if det[0] > 0:
                        d = det.cpu().numpy()
                        score, box = d[0], d[1:]
                        box *= scale
                        box = np.append(box, score)
                        cls_dets.append(box)
                if len(cls_dets) == 0:
                    cls_dets = empty_array
                all_boxes[j][i] = np.array(cls_dets)

            # log per iter
            log = '{iters:d}/{epoch_size:d} in {time:.3f}s [{prograss}]\r'.format(
                prograss='#' * int(round(10 * i / num_images)) +
                '-' * int(round(10 * (1 - i / num_images))),
                iters=i,
                epoch_size=num_images,
                time=time)
            sys.stdout.write(log)
            sys.stdout.flush()

        # write result to pkl
        with open(os.path.join(output_dir, 'detections.pkl'), 'wb') as f:
            pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

        # currently the COCO dataset do not return the mean ap or ap 0.5:0.95 values
        print('Evaluating detections')
        data_loader.dataset.evaluate_detections(all_boxes, output_dir)
def demo(sess, net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(
        'G:\DeepLearning\Project\LJProject\Faster-RCNN\Faster-RCNN-TensorFlow-Python3-master-NEU\data\demo',
        image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    # 此处的boxes是经过bbox_pre修正过的Bbox的位置坐标,并且对于预测的每一个类别,都有一个预测的Bbox坐标
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    print('Detection took {:.3f}s for {:d} object proposals'.format(
        timer.total_time, boxes.shape[0]))

    # Visualize detections for each class
    CONF_THRESH = 0.1
    NMS_THRESH = 0.1
    #对每个类别进行一次画图
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        #利用非极大值抑制,从300个proposal中剔除掉与更大得分的proposal的IOU大于0.1的proposal
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis_detections(im, cls, dets, thresh=CONF_THRESH)
def time_analyse(matlab, cmd, image_filepath, par1, par2):
    timer = Timer()
    timer.tic()

    obj_proposals = ROI_boxes(matlab, image_filepath, cmd, par1, par2)

    timer.toc()
    time = timer.total_time
    box_numer = len(obj_proposals)

    return time, box_numer, obj_proposals
Example #33
0
def ctpn(sess, net, image_name):
    timer = Timer()
    timer.tic()

    img = cv2.imread(image_name)
    img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE)
    scores, boxes = test_ctpn(sess, net, img)

    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes(img, image_name, boxes, scale)
    timer.toc()
    print(('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
def demo(net, matlab, image_filepath, classes, args):
    """Detect object classes in an image using pre-computed object proposals."""
    timer = Timer()
    timer.tic()
    # Load pre-computed Selected Search object proposals
    obj_proposals = ROI_boxes(matlab, image_filepath, args.OP_method)
    if len(obj_proposals)==0:
        return

    # Load the demo image
    im = cv2.imread(image_filepath)

    # Detect all object classes and regress object bounds

    scores, boxes = im_detect(net, im, obj_proposals)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls in classes:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        keep = np.where(cls_scores >= CONF_THRESH)[0]
        cls_boxes = cls_boxes[keep, :]
        cls_scores = cls_scores[keep]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        if (len(dets) == 0):
            global count
            count += 1
            print('{} No Ear detected').format(count)
        # print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls,
        #                                                             CONF_THRESH)
        if args.video_mode:
            visualise(im, cls, dets, thresh=CONF_THRESH)
        elif args.image_path is not None:
            vis_detections(im, cls, dets, thresh=CONF_THRESH)
Example #35
0
def demo(net, image_name, classes):

    """Detect object classes in an image using pre-computed object proposals."""

    # Load pre-computed Selected Search object proposals
    box_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo',
                            image_name + '_boxes.mat')
    obj_proposals = sio.loadmat(box_file)['boxes']

    # Load the demo image
    im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name + '.jpg')
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im, obj_proposals)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls in classes:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        keep = np.where(cls_scores >= CONF_THRESH)[0]
        cls_boxes = cls_boxes[keep, :]
        cls_scores = cls_scores[keep]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls,
                                                                    CONF_THRESH)
        vis_detections(im, cls, dets, thresh=CONF_THRESH)
Example #36
0
    def train_model(self, epochs):
        #1. construct the computation graph
        self.net.init_modules()

        #save net structure to data folder
        net_f = open(os.path.join(self.output_dir, 'nn.txt'), 'w')
        net_f.write(str(self.net))
        net_f.close()

        #find previous snapshot 
        lsf, nfiles, sfiles = self.find_previous()

        #2. restore weights
        if lsf == 0:
            lr, last_iter, stepsizes, self.np_paths, self.ss_paths = self.initialize()
        else:
            lr, last_iter, stepsizes, self.np_paths, self.ss_paths = self.restore(str(sfiles[-1]),
                                                                                 str(nfiles[-1]))
        #3. fix weights and eval mode
        self.fix_eval_parts()

        # construct optimizer
        self.construct_optimizer(lr)

        if len(stepsizes) != 0:
            next_stepsize = stepsizes.pop(0)
        else:
            next_stepsize = -1

        train_timer = Timer()
        current_snapshot_epoch = int(last_iter / len(self.dataloader_train))
        for epoch in range(current_snapshot_epoch, epochs):
            print("start epoch {}".format(epoch))
            with output(initial_len=9, interval=0) as content:
                for iter, blobs in enumerate(tqdm(self.dataloader_train)):
                    last_iter += 1
                    # adjust learning rate
                    if last_iter == next_stepsize:
                        lr *= cfg.GAMMA
                        self.scale_lr(self.optimizer, lr)
                        if len(stepsizes) != 0:
                            next_stepsize = stepsizes.pop(0)

                    batch_size = blobs['data'].shape[0]
                    if len(blobs['gt_box']) < batch_size: #invalid sample
                        continue
                    train_timer.tic()
                    # IMAGE PART
                    if cfg.USE_IMAGES:
                        grid_shape = blobs['data'].shape[-3:]
                        projection_helper = ProjectionHelper(cfg.INTRINSIC, cfg.PROJ_DEPTH_MIN, cfg.PROJ_DEPTH_MAX, cfg.DEPTH_SHAPE, grid_shape, cfg.VOXEL_SIZE)
                        proj_mapping = [[projection_helper.compute_projection(d.cuda(), c.cuda(), t.cuda()) for d, c, t in zip(blobs['nearest_images']['depths'][i], blobs['nearest_images']['poses'][i], blobs['nearest_images']['world2grid'][i])] for i in range(batch_size)]

                        jump_flag = False
                        for i in range(batch_size):
                            if None in proj_mapping[i]: #invalid sample
                                jump_flag = True
                                break
                        if jump_flag:
                            continue
                        
                        blobs['proj_ind_3d'] = []
                        blobs['proj_ind_2d'] = []
                        for i in range(batch_size):
                            proj_mapping0, proj_mapping1 = zip(*proj_mapping[i])
                            blobs['proj_ind_3d'].append(torch.stack(proj_mapping0))
                            blobs['proj_ind_2d'].append(torch.stack(proj_mapping1))

                        
                    self.net.forward(blobs)
                    self.optimizer.zero_grad()
                    self.net._losses["total_loss"].backward()
                    self.optimizer.step()

                    train_timer.toc()

                    # Display training information
                    if iter % (cfg.DISPLAY) == 0:
                        self.log_print(epoch*len(self.dataloader_train)+iter, lr, content, train_timer.average_time())
                    self.net.delete_intermediate_states()

                    # validate if satisfying the time criterion
                    if train_timer.total_time() / 3600 >= cfg.VAL_TIME:
                        print('------------------------VALIDATION------------------------------')
                        self.validation(last_iter, 'val')
                        print('------------------------TRAINVAL--------------------------------')
                        self.validation(last_iter, 'trainval')

                        # snapshot
                        if cfg.VAL_TIME > 0.0:
                            ss_path, np_path = self.snapshot(last_iter)
                            self.np_paths.append(np_path)
                            self.ss_paths.append(ss_path)

                            #remove old snapshots if too many
                            if len(self.np_paths) > cfg.SNAPSHOT_KEPT and cfg.SNAPSHOT_KEPT:
                                self.remove_snapshot()

                        train_timer.clean_total_time()
Example #37
0
    def test(net, data_loader, data_logger):
        #####################################
        # Preparation
        #####################################
        os.makedirs(cfg.TEST_SAVE_DIR, exist_ok=True)
        mAP_CLASSIFICATION = Evaluate_metric(cfg.NUM_CLASSES, ignore_class=[0], overlap_threshold=cfg.MAP_THRESH)
        mAP_MASK = Evaluate_metric(cfg.NUM_CLASSES, ignore_class=[0], overlap_threshold=cfg.MAP_THRESH)

        ####################################
        # Accumulate data
        ####################################
        pred_all = {}
        gt_all = {}

        timer = Timer()
        timer.tic()
        print('starting test on whole scan....')
        for iter, blobs in enumerate(tqdm(data_loader)):

            try:
                gt_box = blobs['gt_box'][0].numpy()[:, 0:6]
                gt_class = blobs['gt_box'][0][:, 6].numpy()
            except:
                continue

            # color proj
            killing_inds = None
            if cfg.USE_IMAGES:
                grid_shape = blobs['data'].shape[-3:]
                projection_helper = ProjectionHelper(cfg.INTRINSIC, cfg.PROJ_DEPTH_MIN, cfg.PROJ_DEPTH_MAX, cfg.DEPTH_SHAPE, grid_shape, cfg.VOXEL_SIZE)
                if grid_shape[0]*grid_shape[1]*grid_shape[2] > cfg.MAX_VOLUME or blobs['nearest_images']['depths'][0].shape[0] > cfg.MAX_IMAGE:
                    proj_mapping = [projection_helper.compute_projection(d, c, t) for d, c, t in zip(blobs['nearest_images']['depths'][0], blobs['nearest_images']['poses'][0], blobs['nearest_images']['world2grid'][0])]
                else:
                    proj_mapping = [projection_helper.compute_projection(d.cuda(), c.cuda(), t.cuda()) for d, c, t in zip(blobs['nearest_images']['depths'][0], blobs['nearest_images']['poses'][0], blobs['nearest_images']['world2grid'][0])]
                    
                killing_inds = []
                real_proj_mapping = []
                if None in proj_mapping: #invalid sample
                    for killing_ind, killing_item in enumerate(proj_mapping):
                        if killing_item == None:
                            killing_inds.append(killing_ind)
                        else:
                            real_proj_mapping.append(killing_item)
                    print('{}: (invalid sample: no valid projection)'.format(blobs['id']))
                else:
                    real_proj_mapping = proj_mapping
                blobs['proj_ind_3d'] = []
                blobs['proj_ind_2d'] = []
                proj_mapping0, proj_mapping1 = zip(*real_proj_mapping)
                blobs['proj_ind_3d'].append(torch.stack(proj_mapping0))
                blobs['proj_ind_2d'].append(torch.stack(proj_mapping1))

            net.forward(blobs, 'TEST', killing_inds)

            # test with detection pipeline
            pred_class = net._predictions['cls_pred'].data.cpu().numpy()
            rois = net._predictions['rois'][0].cpu()
            box_reg_pre = net._predictions["bbox_pred"].data.cpu().numpy()
            box_reg = np.zeros((box_reg_pre.shape[0], 6))
            pred_conf_pre = net._predictions['cls_prob'].data.cpu().numpy()
            pred_conf = np.zeros((pred_conf_pre.shape[0]))

            for pred_ind in range(pred_class.shape[0]):
                box_reg[pred_ind, :] = box_reg_pre[pred_ind, pred_class[pred_ind]*6:(pred_class[pred_ind]+1)*6]
                pred_conf[pred_ind] = pred_conf_pre[pred_ind, pred_class[pred_ind]]

            pred_box = bbox_transform_inv(rois, torch.from_numpy(box_reg).float())
            pred_box = clip_boxes(pred_box, net._scene_info[:3]).numpy()

            os.makedirs('{}/{}'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), exist_ok=True)
            np.save('{}/{}/pred_class'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_class)
            np.save('{}/{}/pred_conf'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_conf)
            np.save('{}/{}/pred_box'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_box)
            np.save('{}/{}/scene'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), np.where(blobs['data'][0,0].numpy() <= 1, 1, 0))
            np.save('{}/{}/gt_class'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), gt_class)
            np.save('{}/{}/gt_box'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), gt_box)

            # pickup
            sort_index = []
            for conf_index in range(pred_conf.shape[0]):
                if pred_conf[conf_index] > cfg.CLASS_THRESH:
                    sort_index.append(True)
                else:
                    sort_index.append(False)

            # eliminate bad box
            for idx, box in enumerate(pred_box):
                if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]):
                    sort_index[idx] = False

            mAP_CLASSIFICATION.evaluate(
                    pred_box[sort_index],
                    pred_class[sort_index],
                    pred_conf[sort_index],
                    gt_box,
                    gt_class)

            if cfg.USE_MASK:
                gt_mask = blobs['gt_mask'][0]
                # pickup
                sort_index = []
                for conf_index in range(pred_conf.shape[0]):
                    if pred_conf[conf_index] > cfg.CLASS_THRESH:
                        sort_index.append(True)
                    else:
                        sort_index.append(False)

                # eliminate bad box
                for idx, box in enumerate(pred_box):
                    if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]):
                        sort_index[idx] = False

                # test with mask pipeline
                net.mask_backbone.eval()
                net.mask_backbone.cuda()
                mask_pred_batch = []
                for net_i in range(1):
                    mask_pred = []
                    for pred_box_ind, pred_box_item in enumerate(pred_box):
                        if sort_index[pred_box_ind]:
                            mask_pred.append(net.mask_backbone(Variable(blobs['data'].cuda())[net_i:net_i+1, :, 
                                                                            int(round(pred_box_item[0])):int(round(pred_box_item[3])),
                                                                            int(round(pred_box_item[1])):int(round(pred_box_item[4])), 
                                                                            int(round(pred_box_item[2])):int(round(pred_box_item[5]))
                                                                            ], [] if cfg.USE_IMAGES else None))

                    mask_pred_batch.append(mask_pred)
                net._predictions['mask_pred'] = mask_pred_batch

                # save test result
                pred_mask = []
                mask_ind = 0
                for ind, cls in enumerate(pred_class):
                    if sort_index[ind]:
                        mask = net._predictions['mask_pred'][0][mask_ind][0][cls].data.cpu().numpy()
                        mask = np.where(mask >=cfg.MASK_THRESH, 1, 0).astype(np.float32)
                        pred_mask.append(mask)
                        mask_ind += 1

                pickle.dump(pred_mask, open('{}/{}/pred_mask'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb'))
                pickle.dump(sort_index, open('{}/{}/pred_mask_index'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb'))
                pickle.dump(gt_mask, open('{}/{}/gt_mask'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb'))

                mAP_MASK.evaluate_mask(
                        pred_box[sort_index],
                        pred_class[sort_index],
                        pred_conf[sort_index],
                        pred_mask,
                        gt_box,
                        gt_class, 
                        gt_mask, 
                        net._scene_info)

        timer.toc()
        print('It took {:.3f}s for test on whole scenes'.format(timer.total_time()))

        ###################################
        # Summary
        ###################################
        if cfg.USE_CLASS:
            mAP_CLASSIFICATION.finalize()
            print('mAP of CLASSIFICATION: {}'.format(mAP_CLASSIFICATION.mAP()))
            for class_ind in range(cfg.NUM_CLASSES):
                if class_ind not in mAP_CLASSIFICATION.ignore_class:
                    print('class {}: {}'.format(class_ind, mAP_CLASSIFICATION.AP(class_ind)))

        if cfg.USE_MASK:
            mAP_MASK.finalize()
            print('mAP of mask: {}'.format(mAP_MASK.mAP()))
            for class_ind in range(cfg.NUM_CLASSES):
                if class_ind not in mAP_MASK.ignore_class:
                    print('class {}: {}'.format(class_ind, mAP_MASK.AP(class_ind)))
Example #38
0
    def validation(self, index, mode):
        #####################################
        # Preparation
        #####################################
        #-------------------------------
        # metric
        #-------------------------------
        mAP_RPN = Evaluate_metric(1, overlap_threshold=cfg.MAP_THRESH)
        mAP_CLASSIFICATION = Evaluate_metric(cfg.NUM_CLASSES, ignore_class=[0], overlap_threshold=cfg.MAP_THRESH)
        mAP_MASK = Evaluate_metric(cfg.NUM_CLASSES, ignore_class=[0], overlap_threshold=cfg.MAP_THRESH)
        if mode == 'val':
            data_loader = self.dataloader_val
            data_logger = self.logger_val
        elif mode == 'trainval':
            data_loader = self.dataloader_trainval
            data_logger = self.logger_trainval

        ####################################
        # Accumulate data
        ####################################
        timer = Timer()
        timer.tic()
        print('starting validation....')
        for iter, blobs in enumerate(tqdm(data_loader)):
            # if no box: skip
            if len(blobs['gt_box']) == 0:
                continue

            if cfg.USE_IMAGES:
                grid_shape = blobs['data'].shape[-3:]
                projection_helper = ProjectionHelper(cfg.INTRINSIC, cfg.PROJ_DEPTH_MIN, cfg.PROJ_DEPTH_MAX, cfg.DEPTH_SHAPE, grid_shape, cfg.VOXEL_SIZE)
                proj_mapping = [projection_helper.compute_projection(d.cuda(), c.cuda(), t.cuda()) for d, c, t in zip(blobs['nearest_images']['depths'][0], blobs['nearest_images']['poses'][0], blobs['nearest_images']['world2grid'][0])]

                if None in proj_mapping: #invalid sample
                    continue
                
                blobs['proj_ind_3d'] = []
                blobs['proj_ind_2d'] = []
                proj_mapping0, proj_mapping1 = zip(*proj_mapping)
                blobs['proj_ind_3d'].append(torch.stack(proj_mapping0))
                blobs['proj_ind_2d'].append(torch.stack(proj_mapping1))

            self.net.forward(blobs, 'TEST', [])
            #--------------------------------------
            # RPN: loss, metric 
            #--------------------------------------
            if cfg.USE_RPN:
                # (n, 6)
                gt_box = blobs['gt_box'][0].numpy()[:, 0:6]
                gt_box_label = np.zeros(gt_box.shape[0])

                try:
                    pred_box_num = (self.net._predictions['roi_scores'][0][:, 0] > cfg.ROI_THRESH).nonzero().size(0)
                    pred_box = self.net._predictions['rois'][0].cpu().numpy()[:pred_box_num]
                    pred_box_label = np.zeros(pred_box_num) 
                    pred_box_score = self.net._predictions['roi_scores'][0].cpu().numpy()[:pred_box_num, 0]
                except:
                    pred_box = self.net._predictions['rois'][0].cpu().numpy()[:1]
                    pred_box_label = np.zeros(1)
                    pred_box_score = self.net._predictions['roi_scores'][0].cpu().numpy()[:1, 0]

                #evaluation metric 
                mAP_RPN.evaluate(pred_box,
                                 pred_box_label,
                                 pred_box_score,
                                 gt_box,
                                 gt_box_label)

            #--------------------------------------
            # Classification: loss, metric 
            #--------------------------------------
            if cfg.USE_CLASS:
                # groundtruth
                gt_box = blobs['gt_box'][0].numpy()[:, 0:6]
                gt_class = blobs['gt_box'][0][:, 6].numpy()

                # predictions
                pred_class = self.net._predictions['cls_pred'].data.cpu().numpy()

                # only predictions['rois'] is list and is Tensor / others are no list and Variable
                rois = self.net._predictions['rois'][0].cpu()
                box_reg_pre = self.net._predictions["bbox_pred"].data.cpu().numpy()
                box_reg = np.zeros((box_reg_pre.shape[0], 6))
                pred_conf_pre = self.net._predictions['cls_prob'].data.cpu().numpy()
                pred_conf = np.zeros((pred_conf_pre.shape[0]))


                for pred_ind in range(pred_class.shape[0]):
                    box_reg[pred_ind, :] = box_reg_pre[pred_ind, pred_class[pred_ind]*6:(pred_class[pred_ind]+1)*6]
                    pred_conf[pred_ind] = pred_conf_pre[pred_ind, pred_class[pred_ind]]

                pred_box = bbox_transform_inv(rois, torch.from_numpy(box_reg).float())
                pred_box = clip_boxes(pred_box, self.net._scene_info[:3]).numpy()

                # pickup
                sort_index = []
                for conf_index in range(pred_conf.shape[0]):
                    if pred_conf[conf_index] > cfg.CLASS_THRESH:
                        sort_index.append(True)
                    else:
                        sort_index.append(False)

                # eliminate bad box
                for idx, box in enumerate(pred_box):
                    if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]):
                        sort_index[idx] = False

                if len(pred_box[sort_index]) == 0:
                    print('no pred box')

                if iter < cfg.VAL_NUM:
                    os.makedirs('{}/{}'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), exist_ok=True)
                    np.save('{}/{}/pred_class'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_class)
                    np.save('{}/{}/pred_conf'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_conf)
                    np.save('{}/{}/pred_box'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_box)
                    np.save('{}/{}/scene'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), np.where(blobs['data'][0,0].numpy() <= 1, 1, 0))
                    np.save('{}/{}/gt_class'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), gt_class)
                    np.save('{}/{}/gt_box'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), gt_box)

                mAP_CLASSIFICATION.evaluate(
                        pred_box[sort_index],
                        pred_class[sort_index],
                        pred_conf[sort_index],
                        gt_box,
                        gt_class)

            #--------------------------------------
            # MASK: loss, metric 
            #--------------------------------------
            if cfg.USE_MASK:
                # gt data
                gt_box = blobs['gt_box'][0].numpy()[:, 0:6]
                gt_class = blobs['gt_box'][0][:, 6].numpy()
                gt_mask = blobs['gt_mask'][0]

                pred_class = self.net._predictions['cls_pred'].data.cpu().numpy()
                pred_conf = np.zeros((pred_class.shape[0]))
                for pred_ind in range(pred_class.shape[0]):
                    pred_conf[pred_ind] = self.net._predictions['cls_prob'].data.cpu().numpy()[pred_ind, pred_class.data[pred_ind]]

                # pickup
                sort_index = pred_conf > cfg.CLASS_THRESH

                # eliminate bad box
                for idx, box in enumerate(pred_box):
                    if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]):
                        sort_index[idx] = False

                pred_mask = []
                mask_ind = 0
                for ind, cls in enumerate(pred_class):
                    if sort_index[ind]:
                        mask = self.net._predictions['mask_pred'][0][mask_ind][0][cls].data.cpu().numpy()
                        mask = np.where(mask >=cfg.MASK_THRESH, 1, 0).astype(np.float32)
                        pred_mask.append(mask)
                        mask_ind += 1

                if iter < cfg.VAL_NUM: 
                    pickle.dump(pred_mask, open('{}/{}/pred_mask'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb'))
                    pickle.dump(sort_index, open('{}/{}/pred_mask_index'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb'))
                    pickle.dump(gt_mask, open('{}/{}/gt_mask'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb'))

                mAP_MASK.evaluate_mask(
                        pred_box[sort_index],
                        pred_class[sort_index],
                        pred_conf[sort_index],
                        pred_mask,
                        gt_box,
                        gt_class, 
                        gt_mask, 
                        self.net._scene_info)

            self.net.delete_intermediate_states()
        timer.toc()
        print('It took {:.3f}s for Validation on chunks'.format(timer.total_time()))

        ###################################
        # Summary
        ###################################
        if cfg.USE_RPN:
            mAP_RPN.finalize()
            print('AP of RPN: {}'.format(mAP_RPN.mAP()))
            data_logger.scalar_summary('AP_ROI', mAP_RPN.mAP(), index)

        if cfg.USE_CLASS:
            mAP_CLASSIFICATION.finalize()
            print('mAP of CLASSIFICATION: {}'.format(mAP_CLASSIFICATION.mAP()))
            for class_ind in range(cfg.NUM_CLASSES):
                if class_ind not in mAP_CLASSIFICATION.ignore_class:
                    print('class {}: {}'.format(class_ind, mAP_CLASSIFICATION.AP(class_ind)))
            data_logger.scalar_summary('mAP_CLASSIFICATION', mAP_CLASSIFICATION.mAP(), index)

        if cfg.USE_MASK:
            mAP_MASK.finalize()
            print('mAP of mask: {}'.format(mAP_MASK.mAP()))
            for class_ind in range(cfg.NUM_CLASSES):
                if class_ind not in mAP_MASK.ignore_class:
                    print('class {}: {}'.format(class_ind, mAP_MASK.AP(class_ind)))
            data_logger.scalar_summary('mAP_MASK', mAP_MASK.mAP(), index)
Example #39
0
    def train_model(self, sess, max_iters, restore=False):
        """Network training loop."""
        data_layer = get_data_layer(self.roidb, self.imdb.num_classes)
        total_loss,model_loss, rpn_cross_entropy, rpn_loss_box=self.net.build_loss(ohem=cfg.TRAIN.OHEM)
        # scalar summary
        tf.summary.scalar('rpn_reg_loss', rpn_loss_box)
        tf.summary.scalar('rpn_cls_loss', rpn_cross_entropy)
        tf.summary.scalar('model_loss', model_loss)
        tf.summary.scalar('total_loss',total_loss)
        summary_op = tf.summary.merge_all()

        log_image, log_image_data, log_image_name =\
            self.build_image_summary()

        # optimizer
        lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False)
        if cfg.TRAIN.SOLVER == 'Adam':
            opt = tf.train.AdamOptimizer(cfg.TRAIN.LEARNING_RATE)
        elif cfg.TRAIN.SOLVER == 'RMS':
            opt = tf.train.RMSPropOptimizer(cfg.TRAIN.LEARNING_RATE)
        else:
            # lr = tf.Variable(0.0, trainable=False)
            momentum = cfg.TRAIN.MOMENTUM
            opt = tf.train.MomentumOptimizer(lr, momentum)

        global_step = tf.Variable(0, trainable=False)
        with_clip = True
        if with_clip:
            tvars = tf.trainable_variables()
            grads, norm = tf.clip_by_global_norm(tf.gradients(total_loss, tvars), 10.0)
            train_op = opt.apply_gradients(list(zip(grads, tvars)), global_step=global_step)
        else:
            train_op = opt.minimize(total_loss, global_step=global_step)

        # intialize variables
        sess.run(tf.global_variables_initializer())
        restore_iter = 0

        # load vgg16
        if self.pretrained_model is not None and not restore:
            try:
                print(('Loading pretrained model '
                   'weights from {:s}').format(self.pretrained_model))
                self.net.load(self.pretrained_model, sess, True)
            except:
                raise Exception('Check your pretrained model {:s}'.format(self.pretrained_model))

        # resuming a trainer
        if restore:
            try:
                ckpt = tf.train.get_checkpoint_state(self.output_dir)
                print('Restoring from {}...'.format(ckpt.model_checkpoint_path), end=' ')
                self.saver.restore(sess, ckpt.model_checkpoint_path)
                stem = os.path.splitext(os.path.basename(ckpt.model_checkpoint_path))[0]
                restore_iter = int(stem.split('_')[-1])
                sess.run(global_step.assign(restore_iter))
                print('done')
            except:
                raise 'Check your pretrained {:s}'.format(ckpt.model_checkpoint_path)

        last_snapshot_iter = -1
        timer = Timer()
        for iter in range(restore_iter, max_iters):
            timer.tic()
            # learning rate
            if iter != 0 and iter % cfg.TRAIN.STEPSIZE == 0:
                sess.run(tf.assign(lr, lr.eval() * cfg.TRAIN.GAMMA))
                print(lr)

            # get one batch
            blobs = data_layer.forward()

            feed_dict={
                self.net.data: blobs['data'],
                self.net.im_info: blobs['im_info'],
                self.net.keep_prob: 0.5,
                self.net.gt_boxes: blobs['gt_boxes'],
                self.net.gt_ishard: blobs['gt_ishard'],
                self.net.dontcare_areas: blobs['dontcare_areas']
            }
            res_fetches=[]
            fetch_list = [total_loss,model_loss, rpn_cross_entropy, rpn_loss_box,
                          summary_op,
                          train_op] + res_fetches

            total_loss_val,model_loss_val, rpn_loss_cls_val, rpn_loss_box_val, \
                summary_str, _ = sess.run(fetches=fetch_list, feed_dict=feed_dict)

            self.writer.add_summary(summary=summary_str, global_step=global_step.eval())

            _diff_time = timer.toc(average=False)


            if (iter) % (cfg.TRAIN.DISPLAY) == 0:
                print('iter: %d / %d, total loss: %.4f, model loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, lr: %f'%\
                        (iter, max_iters, total_loss_val,model_loss_val,rpn_loss_cls_val,rpn_loss_box_val,lr.eval()))
                print('speed: {:.3f}s / iter'.format(_diff_time))

            if (iter+1) % cfg.TRAIN.SNAPSHOT_ITERS == 0:
                last_snapshot_iter = iter
                self.snapshot(sess, iter)

        if last_snapshot_iter != iter:
            self.snapshot(sess, iter)