Example #1
0
def eval_net(val_dataset,
             val_loader,
             net,
             detector,
             cfg,
             transform,
             max_per_image=300,
             thresh=0.01,
             batch_size=1):
    net.eval()
    num_images = len(val_dataset)
    num_classes = cfg['num_classes']
    eval_save_folder = "./eval/"
    if not os.path.exists(eval_save_folder):
        os.mkdir(eval_save_folder)
    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
    det_file = os.path.join(eval_save_folder, 'detections.pkl')

    _t = {'im_detect': Timer(), 'misc': Timer()}

    if args.retest:
        f = open(det_file, 'rb')
        all_boxes = pickle.load(f)
        print('Evaluating detections')
        val_dataset.evaluate_detections(all_boxes, eval_save_folder)
        return

    for idx, (imgs, _, img_info) in enumerate(val_loader):
        with torch.no_grad():
            t1 = time.time()
            x = imgs
            x = x.cuda()
            output = net(x)
            t4 = time.time()
            boxes, scores = detector.forward(output)
            t2 = time.time()
            for k in range(boxes.size(0)):
                i = idx * batch_size + k
                boxes_ = boxes[k]
                scores_ = scores[k]
                boxes_ = boxes_.cpu().numpy()
                scores_ = scores_.cpu().numpy()
                img_wh = img_info[k]
                scale = np.array([img_wh[0], img_wh[1], img_wh[0], img_wh[1]])
                boxes_ *= scale
                for j in range(1, num_classes):
                    inds = np.where(scores_[:, j] > thresh)[0]
                    if len(inds) == 0:
                        all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
                        continue
                    c_bboxes = boxes_[inds]
                    c_scores = scores_[inds, j]
                    c_dets = np.hstack(
                        (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                                    copy=False)
                    keep = nms(c_dets, 0.45, force_cpu=True)
                    keep = keep[:50]
                    c_dets = c_dets[keep, :]
                    all_boxes[j][i] = c_dets
            t3 = time.time()
            detect_time = t2 - t1
            nms_time = t3 - t2
            forward_time = t4 - t1
            if idx % 10 == 0:
                print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s {:.3f}s'.format(
                    i + 1, num_images, forward_time, detect_time, nms_time))

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
    print('Evaluating detections')
    val_dataset.evaluate_detections(all_boxes, eval_save_folder)
    print("detect time: ", time.time() - st)
Example #2
0
def demo(net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    lst = [
    ]  #INITIALISATION OF LIST, IT WILL BE USED TO STORE ENTITIES LIKE IMAGE_NAME, OBJECT_CLASS, ETC
    count_target = 0  #A COUNTER USED FOR THE CLASSES THAT ARE NOT PRESENT IN A PARTICULAR IMAGE, SO, IF IT IS FOUR THAT MEANS THE IMAGE HAS NO TARGET OBJECTS
    # Load the demo image
    im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
    im = cv2.imread(im_file)
    fname = im_file.split(
        '/'
    )[-1]  #CONVERTED THE WHOLE IMAGE PATH TO IMAGE FILENAME THAT IS NEEDED TO BE SPECIFIED IN THE OUTPUT CSV GENERATED.

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print('Detection took {:.3f}s for {:d} object proposals'.format(
        timer.total_time(), boxes.shape[0]))

    # Visualize detections for each class
    thresh = 0.8  # CONF_THRESH
    NMS_THRESH = 0.3

    im = im[:, :, (2, 1, 0)]
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.imshow(im, aspect='equal')
    cntr = -1
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(torch.from_numpy(cls_boxes), torch.from_numpy(cls_scores),
                   NMS_THRESH)
        dets = dets[keep.numpy(), :]
        inds = np.where(dets[:, -1] >= thresh)[0]
        if len(inds) == 0:
            count_target += 1  #INCREMENTING THE COUNT SO THAT NO. OF CLASSES NOT PRESENT CAN BE CALCULATED, AS IN THIS PART THE LENGTH OF inds OF
            if count_target == 4:  #PARTICULAR OBJECT IS CHEKED IF IT IS ZERO, THEN continue IS USED SO THAT THE FURTHER DETECTION PROCESS COULD NOT BE DONE.
                writer(
                    lst, count_target, fname
                )  #fname IS PROVIDED AS A PARAMETER SEPARATELY TO THE writer FUNCTION FOR THOSE IMAGES WHICH DONT HAVE ANY TARGET OBJECT/CLASS, FOR IMAGES HAVING THE TARGET, A SEPARATE LIST 'a' IS CREATED BELOW.
            continue
        else:
            cntr += 1

        for i in inds:
            bbox = dets[i, :4]
            score = dets[i, -1]
            ax.add_patch(
                plt.Rectangle((bbox[0], bbox[1]),
                              bbox[2] - bbox[0],
                              bbox[3] - bbox[1],
                              fill=False,
                              edgecolor=COLORS[cntr % len(COLORS)],
                              linewidth=3.5))
            ax.text(bbox[0],
                    bbox[1] - 2,
                    '{:s} {:.3f}'.format(cls, score),
                    bbox=dict(facecolor='blue', alpha=0.5),
                    fontsize=14,
                    color='white')
            lst = [
                fname, cls, bbox[0], bbox[1], bbox[2], bbox[3], score
            ]  #A LIST 'a' WHICH CONTAINS PARAMETERS TAKEN FROM THE DETECTED IMAGE. IT HAS PARAMETERS LIKE FILENAME, CLASS, BOUNDING BOX CO-ORD. IN THE FORM[x y w h] (x,y)->CO-ORD. OF TOP LEFT CORNER OF BBOX, w & h ARE THE WIDTH OF BBOX. WHETHER TARGET IS PRESENT OR NOT, IS NOT A PART OF THIS LIST AS ENTRIES OF THIS COLUMN ARE FILLED SEPARATELY INSIDE THE writer FN. ACCORDING TO THE CONDITION.
            writer(
                lst, count_target, fname
            )  #writer FUNCTION IS CALLED AND LIST(a) IS PASSED AS A PARAMETER TO IT
        ax.set_title('All detections with threshold >= {:.1f}'.format(thresh),
                     fontsize=14)

        plt.axis('off')
        plt.tight_layout()
    plt.savefig(os.path.join('img_results', 'demo_' + image_name))
    if count_target == 4:
        print('No target objects present')
    print('Saved to `{}`'.format(
        os.path.join(os.getcwd(), 'img_results', 'demo_' + image_name)))
def test_net(sess,
             net,
             imdb,
             weights_filename,
             max_per_image=300,
             thresh=0.05,
             vis=False):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, weights_filename)

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    if not cfg.TEST.HAS_RPN:
        roidb = imdb.roidb

    for i in xrange(num_images):
        # filter out any ground truth boxes
        if cfg.TEST.HAS_RPN:
            box_proposals = None
        else:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select those the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]

        im = cv2.imread(imdb.image_path_at(i))
        _t['im_detect'].tic()
        scores, boxes = im_detect(sess, net, im, box_proposals)
        _t['im_detect'].toc()

        _t['misc'].tic()
        if vis:
            image = im[:, :, (2, 1, 0)]
            plt.cla()
            plt.imshow(image)

        # skip j = 0, because it's the background class
        for j in xrange(1, imdb.num_classes):
            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
            if vis:
                vis_detections(image, imdb.classes[j], cls_dets)
            all_boxes[j][i] = cls_dets
        if vis:
            save_path = os.path.join(output_dir, 'image_{:05d}.png'.format(i))
            print save_path
            plt.savefig(save_path)
            plt.close()
        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir)
Example #4
0
def test_net(sess,
             net,
             imdb,
             weights_filename,
             max_per_image=300,
             thresh=0.05,
             vis=False):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    #    all_boxes_cnr[cls][image] = N x 25 array of detections in
    #    (x0-x7, y0-y7, z0-z7, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    all_boxes_img = [[[] for _ in xrange(num_images)]
                     for _ in xrange(imdb.num_classes)]
    all_boxes_cnr = [[[] for _ in xrange(num_images)]
                     for _ in xrange(imdb.num_classes)]
    all_calib = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    all_score = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, weights_filename)
    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    # conv1_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv1_1")
    # conv1_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv1_2")
    # conv2_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv2_1")
    # conv2_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv2_2")
    # conv3_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv3_1")
    # conv3_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv3_2")
    # conv3_3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv3_3")
    # conv4_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv4_1")
    # conv4_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv4_2")
    # conv4_3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv4_3")
    # conv5_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv5_1")
    # conv5_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv5_2")
    # conv5_3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv5_3")

    # rpn_w = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_conv/3x3")[0]
    # rpn_b = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_conv/3x3")[1]
    # rpn_w2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_cls_score")[0]
    # rpn_b2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_cls_score")[1]
    # rpn_w3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_bbox_pred")[0]
    # rpn_b3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_bbox_pred")[1]

    # weights = {
    # 'conv1_1' : {"weights" : conv1_1[0].eval(session=sess), "biases": conv1_1[1].eval(session=sess)},
    # 'conv1_2' : {"weights" : conv1_2[0].eval(session=sess), "biases": conv1_2[1].eval(session=sess)},
    # 'conv2_1' : {"weights" : conv2_1[0].eval(session=sess), "biases": conv2_1[1].eval(session=sess)},
    # 'conv2_2' : {"weights" : conv2_2[0].eval(session=sess), "biases": conv2_2[1].eval(session=sess)},
    # 'conv3_1' : {"weights" : conv3_1[0].eval(session=sess), "biases": conv3_1[1].eval(session=sess)},
    # 'conv3_2' : {"weights" : conv3_2[0].eval(session=sess), "biases": conv3_2[1].eval(session=sess)},
    # 'conv3_3' : {"weights" : conv3_3[0].eval(session=sess), "biases": conv3_3[1].eval(session=sess)},
    # 'conv4_1' : {"weights" : conv4_1[0].eval(session=sess), "biases": conv4_1[1].eval(session=sess)},
    # 'conv4_2' : {"weights" : conv4_2[0].eval(session=sess), "biases": conv4_2[1].eval(session=sess)},
    # 'conv4_3' : {"weights" : conv4_3[0].eval(session=sess), "biases": conv4_3[1].eval(session=sess)},
    # 'conv5_1' : {"weights" : conv5_1[0].eval(session=sess), "biases": conv5_1[1].eval(session=sess)},
    # 'conv5_2' : {"weights" : conv5_2[0].eval(session=sess), "biases": conv5_2[1].eval(session=sess)},
    # 'conv5_3' : {"weights" : conv5_3[0].eval(session=sess), "biases": conv5_3[1].eval(session=sess)},

    # 'rpn_conv/3x3' : {"weights" : rpn_w.eval(session=sess), "biases": rpn_b.eval(session=sess)},
    # 'rpn_cls_score' : {"weights" : rpn_w2.eval(session=sess), "biases": rpn_b2.eval(session=sess)},
    # 'rpn_bbox_pred' : {"weights" : rpn_w3.eval(session=sess), "biases": rpn_b3.eval(session=sess)},
    # }
    # # print rpn_w.eval(session=sess)
    # np.save('rpn_data.npy', weights)

    # deconv2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="deconv_4x_1")[0]
    # shape_conv5_3 = conv5_3.get_shape().as_list()
    # shape1 = deconv1.get_shape().as_list()
    # shape2 = deconv2.get_shape().as_list()
    # print 'conv5_3 shape', shape_conv5_3
    # print 'deconv_2x_1 shape', shape1
    # print 'deconv_4x_1 shape', shape2

    for i in xrange(num_images):

        # filter out any ground truth boxes
        if cfg.TEST.HAS_RPN:
            box_proposals = None

        im = cv2.imread(imdb.image_path_at(i))
        bv = np.load(imdb.lidar_path_at(i))
        lidar3D = imdb.lidar3D_path_at(i)
        GT_boxes3D_corners = imdb.GT_annotation_at(i)["boxes_corners"]
        GT_boxes3D_camera_corners = imdb.GT_annotation_at(
            i)["boxes3D_cam_corners"]

        print "GT_boxes3D_corners", GT_boxes3D_corners
        # print "GT_boxes3D_camera_corners:",GT_boxes3D_camera_corners

        calib = imdb.calib_at(i)

        print "Inference: ", imdb.lidar_path_at(i)

        _t['im_detect'].tic()
        scores, boxes_bv, boxes_cnr, boxes_cnr_r = box_detect(
            sess, net, im, bv, calib, box_proposals)
        _t['im_detect'].toc()

        _t['misc'].tic()
        if vis:
            image = im[:, :, (2, 1, 0)]
            plt.cla()
            plt.imshow(image)

        thresh = 0.05
        #thresh = 0.8

        # skip j = 0, because it's the background class

        #for j in xrange(1, imdb.num_classes):
        for j in xrange(1, 2):
            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]

            #cls_boxes = boxes_bv[inds, j*4:(j+1)*4]
            #cls_boxes_cnr = boxes_cnr[inds, j * 24:(j + 1) * 24]
            cls_boxes = boxes_bv[inds, 0:4]
            cls_boxes_cnr = boxes_cnr[inds, 0:24]

            cls_boxes_cnr_r = boxes_cnr_r[inds, j * 24:(j + 1) * 24]

            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            cls_dets_cnr = np.hstack((cls_boxes_cnr, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            cls_dets_cnr_r = np.hstack((cls_boxes_cnr_r, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)

            # print "scores: ", scores.shape
            # print "cls_scores: ",cls_scores.shape
            # print "boxes_bv: ", boxes_bv.shape
            # print "cls_dets: ", cls_dets.shape
            # print "inds: ",inds.shape
            # print "boxes_cnr: ", boxes_cnr.shape
            # print "cls_dets_cnr: ",cls_dets_cnr.shape

            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
            cls_dets_cnr = cls_dets_cnr[keep, :]
            cls_dets_cnr_r = cls_dets_cnr_r[keep, :]
            cls_scores = cls_scores[keep]
            #img_boxes = cls_dets_cnr_r[:,4]
            # project to image
            if np.any(cls_dets_cnr):

                plt.rcParams['figure.figsize'] = (10, 10)

                img_boxes = lidar_cnr_to_img(cls_dets_cnr_r[:, :24], calib[3],
                                             calib[2], calib[0])

                img = show_image_boxes(im, img_boxes)
                # plt.imshow(img)
                # plt.show()

                all_boxes[j][i] = img_boxes

                image_bv = show_image_boxes(
                    scale_to_255(bv[:, :, 8], min=0, max=2), cls_dets[:, :4])
                image_cnr = show_lidar_corners(im, cls_dets_cnr_r[:, :24],
                                               calib)

                if 1:
                    import mayavi.mlab as mlab

                    #filename = os.path.join(imdb.lidar_path_at(i)[:-19], 'velodyne', str(3).zfill(6)+'.bin')
                    filename = lidar3D
                    print filename
                    scan = np.fromfile(filename, dtype=np.float32)
                    scan = scan.reshape((-1, 4))
                    corners = cls_dets_cnr[:, :24].reshape(
                        (-1, 3, 8)).transpose((0, 2, 1))

                    corners_r = cls_dets_cnr_r[:, :24].reshape(
                        (-1, 3, 8)).transpose((0, 2, 1))
                    GT_corners = GT_boxes3D_corners[:, :24].reshape(
                        (-1, 3, 8)).transpose((0, 2, 1))

                    # print corners_r
                    # print GT_corners

                    #print GT_corners
                    #camera_cors_r = lidar_cnr_to_camera(corners_r,calib[3])
                    fig = mlab.figure(figure=None,
                                      bgcolor=(0, 0, 0),
                                      fgcolor=None,
                                      engine=None,
                                      size=(1000, 500))
                    draw_lidar(scan, fig=fig)
                    draw_gt_boxes3d(corners, fig=fig)
                    draw_gt_boxes3d(corners_r, color=(1, 0, 0), fig=fig)
                    draw_gt_boxes3d(GT_corners, color=(0, 1, 0), fig=fig)
                    mlab.show()

                    # plt.subplot(211)
                    # plt.title('bv proposal')
                    # plt.imshow(image_bv, cmap='jet')
                    # plt.subplot(212)
                    # plt.imshow(image_cnr)
                    # plt.show()

            all_boxes_cnr[j][i] = cls_dets_cnr_r[:, :24]
            all_calib[j][i] = calib[3]
            all_score[j][i] = cls_scores

        # if vis:
        #    plt.show()
        # # Limit to max_per_image detections *over all classes*
        # if max_per_image > 0:
        #     image_scores = np.hstack([all_boxes[j][i][:, -1]
        #                               for j in xrange(1, imdb.num_classes)])
        #     if len(image_scores) > max_per_image:
        #         image_thresh = np.sort(image_scores)[-max_per_image]
        #         for j in xrange(1, imdb.num_classes):
        #             keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
        #             all_boxes[j][i] = all_boxes[j][i][keep, :]
        #             # all_boxes_img[j][i] = all_boxes_img[j][i][keep, :]
        #             all_boxes_cnr[j][i] = all_boxes_cnr[j][i][keep, :]

        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    det_cnr_file = os.path.join(output_dir, 'detections_cnr.pkl')
    with open(det_cnr_file, 'wb') as f:
        cPickle.dump(all_boxes_cnr, f, cPickle.HIGHEST_PROTOCOL)

    #print 'Evaluating detections'
    #imdb.evaluate_detections(all_boxes, all_boxes_cnr, output_dir)

    imdb.evaluate_detections3D(all_boxes, all_boxes_cnr, all_calib, all_score,
                               output_dir)
def demo(sess, net, image_name, CONF_THRESHES):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.DATA_DIR, 'demo', 'Images', image_name)
    im = cv2.imread(im_file, cv2.IMREAD_UNCHANGED)

    scene_name = image_name[:10]  # 'scene_0021'
    # scene_index = scene_name[-4:]
    image_index = image_name[11:15]  # '0003'

    theta, true_polygon_list = get_true_grasps(scene_name, image_index)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)

    timer.toc()
    print('Detection took {:.3f}s'.format(timer.total_time))

    NMS_THRESH = 0.3
    num_conf_threshes = len(CONF_THRESHES)
    # Count the TP num and total num of the image
    image_true_positive_num = np.zeros(
        num_conf_threshes
    )  # number of true positive proposed grasps in the image for EACH CONF_THRESH
    image_total_num = np.zeros(
        num_conf_threshes
    )  # number of proposed grasps in the image for EACH CONF_THRESH
    image_total_gt_num = theta.shape[
        0]  # the number of ground truth grasps in the image. IT IS A NUMBER, NOT ARRAY!
    detected_gt_grasp_index = [
        set() for i in range(num_conf_threshes)
    ]  # the list of sets of indexes of the detected ground truth grasps
    detected_gt_grasp_num = np.zeros(
        num_conf_threshes
    )  # number of detected ground truth grasps in the image for EACH THRESH

    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        class_true_positive_num = np.zeros(num_conf_threshes)
        class_total_num = np.zeros(num_conf_threshes)
        for i in range(num_conf_threshes):
            CONF_THRESH = CONF_THRESHES[i]
            class_true_positive_num[i], class_total_num[
                i] = count_true_positive(cls,
                                         dets,
                                         theta,
                                         true_polygon_list,
                                         detected_gt_grasp_index[i],
                                         thresh=CONF_THRESH)
        image_true_positive_num += class_true_positive_num
        image_total_num += class_total_num

    for j in range(num_conf_threshes):
        detected_gt_grasp_num[j] = len(detected_gt_grasp_index[j])

    return image_true_positive_num, image_total_num, image_total_gt_num, detected_gt_grasp_num
Example #6
0
            num_devices=cfg.NUM_DEVICES,
        )

    # do validation by default
    if True:
        val_model, _, _= \
            model_builder_rel.create(cfg.MODEL.MODEL_NAME, train=False, split='val')
        logger.info('Validation model built.')
        total_val_iters = int(
            math.ceil(
                float(len(val_model.roi_data_loader._roidb)) /
                float(cfg.NUM_DEVICES))) + 5
        val_evaluator = evaluator_rel.Evaluator(
            split=cfg.VAL.DATA_TYPE,
            roidb_size=len(val_model.roi_data_loader._roidb))
        val_timer = Timer()
        logger.info('Val epoch iters: {}'.format(total_val_iters))

        accumulated_accs = {}
        for key in val_evaluator.__dict__.keys():
            if key.find('acc') >= 0:
                accumulated_accs[key] = []
        # wins are for showing different plots
        wins = {}
        for key in val_evaluator.__dict__.keys():
            if key.find('acc') >= 0:
                wins[key] = None

    prev_checkpointed_lr = None

    lr_iters = model_builder_rel.get_lr_steps()
Example #7
0
def train(cfg):
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    test_prog = fluid.Program()
    if args.enable_ce:
        startup_prog.random_seed = 1000
        train_prog.random_seed = 1000
    drop_last = True

    dataset = SegDataset(file_list=cfg.DATASET.TRAIN_FILE_LIST,
                         mode=ModelPhase.TRAIN,
                         shuffle=True,
                         data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        if args.use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        batch_data = []
        for b in data_gen:
            batch_data.append(b)
            if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS):
                for item in batch_data:
                    yield item[0], item[1], item[2]
                batch_data = []
        # If use sync batch norm strategy, drop last batch if number of samples
        # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues
        if not cfg.TRAIN.SYNC_BATCH_NORM:
            for item in batch_data:
                yield item[0], item[1], item[2]

    # Get device environment
    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()

    # Get number of GPU
    dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places)
    print_info("#Device count: {}".format(dev_count))

    # Make sure BATCH_SIZE can divided by GPU cards
    assert cfg.BATCH_SIZE % dev_count == 0, (
        'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format(
            cfg.BATCH_SIZE, dev_count))
    # If use multi-gpu training mode, batch data will allocated to each GPU evenly
    batch_size_per_dev = cfg.BATCH_SIZE // dev_count
    print_info("batch_size_per_dev: {}".format(batch_size_per_dev))

    data_loader, avg_loss, lr, pred, grts, masks = build_model(
        train_prog, startup_prog, phase=ModelPhase.TRAIN)
    build_model(test_prog, fluid.Program(), phase=ModelPhase.EVAL)
    data_loader.set_sample_generator(data_generator,
                                     batch_size=batch_size_per_dev,
                                     drop_last=drop_last)

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    exec_strategy = fluid.ExecutionStrategy()
    # Clear temporary variables every 100 iteration
    if args.use_gpu:
        exec_strategy.num_threads = fluid.core.get_cuda_device_count()
    exec_strategy.num_iteration_per_drop_scope = 100
    build_strategy = fluid.BuildStrategy()

    if cfg.NUM_TRAINERS > 1 and args.use_gpu:
        dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog)
        exec_strategy.num_threads = 1

    if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu:
        if dev_count > 1:
            # Apply sync batch norm strategy
            print_info("Sync BatchNorm strategy is effective.")
            build_strategy.sync_batch_norm = True
        else:
            print_info(
                "Sync BatchNorm strategy will not be effective if GPU device"
                " count <= 1")
    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=avg_loss.name,
        exec_strategy=exec_strategy,
        build_strategy=build_strategy)

    # Resume training
    begin_epoch = cfg.SOLVER.BEGIN_EPOCH
    if cfg.TRAIN.RESUME_MODEL_DIR:
        begin_epoch = load_checkpoint(exe, train_prog)
    # Load pretrained model
    elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
        load_pretrained_weights(exe, train_prog,
                                cfg.TRAIN.PRETRAINED_MODEL_DIR)
    else:
        print_info(
            'Pretrained model dir {} not exists, training from scratch...'.
            format(cfg.TRAIN.PRETRAINED_MODEL_DIR))

    fetch_list = [avg_loss.name, lr.name]
    if args.debug:
        # Fetch more variable info and use streaming confusion matrix to
        # calculate IoU results if in debug mode
        np.set_printoptions(precision=4,
                            suppress=True,
                            linewidth=160,
                            floatmode="fixed")
        fetch_list.extend([pred.name, grts.name, masks.name])
        cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True)

    if args.use_vdl:
        if not args.vdl_log_dir:
            print_info("Please specify the log directory by --vdl_log_dir.")
            exit(1)

        from visualdl import LogWriter
        log_writer = LogWriter(args.vdl_log_dir)

    # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0))
    # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
    step = 0
    all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE
    if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True:
        all_step += 1
    all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1)

    avg_loss = 0.0
    best_mIoU = 0.0

    timer = Timer()
    timer.start()
    if begin_epoch > cfg.SOLVER.NUM_EPOCHS:
        raise ValueError((
            "begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format(
                begin_epoch, cfg.SOLVER.NUM_EPOCHS))

    if args.use_mpio:
        print_info("Use multiprocess reader")
    else:
        print_info("Use multi-thread reader")

    for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
        data_loader.start()
        while True:
            try:
                if args.debug:
                    # Print category IoU and accuracy to check whether the
                    # traning process is corresponed to expectation
                    loss, lr, pred, grts, masks = exe.run(
                        program=compiled_train_prog,
                        fetch_list=fetch_list,
                        return_numpy=True)
                    cm.calculate(pred, grts, masks)
                    avg_loss += np.mean(np.array(loss))
                    step += 1

                    if step % args.log_steps == 0:
                        speed = args.log_steps / timer.elapsed_time()
                        avg_loss /= args.log_steps
                        category_acc, mean_acc = cm.accuracy()
                        category_iou, mean_iou = cm.mean_iou()

                        print_info((
                            "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, step, lr[0], avg_loss, mean_acc,
                                 mean_iou, speed,
                                 calculate_eta(all_step - step, speed)))
                        print_info("Category IoU: ", category_iou)
                        print_info("Category Acc: ", category_acc)
                        if args.use_vdl:
                            log_writer.add_scalar('Train/mean_iou', mean_iou,
                                                  step)
                            log_writer.add_scalar('Train/mean_acc', mean_acc,
                                                  step)
                            log_writer.add_scalar('Train/loss', avg_loss, step)
                            log_writer.add_scalar('Train/lr', lr[0], step)
                            log_writer.add_scalar('Train/step/sec', speed,
                                                  step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        cm.zero_matrix()
                        timer.restart()
                else:
                    # If not in debug mode, avoid unnessary log and calculate
                    loss, lr = exe.run(program=compiled_train_prog,
                                       fetch_list=fetch_list,
                                       return_numpy=True)
                    avg_loss += np.mean(np.array(loss))
                    step += 1

                    if step % args.log_steps == 0 and cfg.TRAINER_ID == 0:
                        avg_loss /= args.log_steps
                        speed = args.log_steps / timer.elapsed_time()
                        print((
                            "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, step, lr[0], avg_loss, speed,
                                 calculate_eta(all_step - step, speed)))
                        if args.use_vdl:
                            log_writer.add_scalar('Train/loss', avg_loss, step)
                            log_writer.add_scalar('Train/lr', lr[0], step)
                            log_writer.add_scalar('Train/speed', speed, step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        timer.restart()

                    # NOTE : used for benchmark, profiler tools
                    if args.is_profiler and epoch == 1 and step == args.log_steps:
                        profiler.start_profiler("All")
                    elif args.is_profiler and epoch == 1 and step == args.log_steps + 5:
                        profiler.stop_profiler("total", args.profiler_path)
                        return

            except fluid.core.EOFException:
                data_loader.reset()
                break
            except Exception as e:
                print(e)

        if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0
                or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0:
            ckpt_dir = save_checkpoint(train_prog, epoch)
            save_infer_program(test_prog, ckpt_dir)

            if args.do_eval:
                print("Evaluation start")
                _, mean_iou, _, mean_acc = evaluate(cfg=cfg,
                                                    ckpt_dir=ckpt_dir,
                                                    use_gpu=args.use_gpu,
                                                    use_mpio=args.use_mpio)
                if args.use_vdl:
                    log_writer.add_scalar('Evaluate/mean_iou', mean_iou, step)
                    log_writer.add_scalar('Evaluate/mean_acc', mean_acc, step)

                if mean_iou > best_mIoU:
                    best_mIoU = mean_iou
                    update_best_model(ckpt_dir)
                    print_info(
                        "Save best model {} to {}, mIoU = {:.4f}".format(
                            ckpt_dir,
                            os.path.join(cfg.TRAIN.MODEL_SAVE_DIR,
                                         'best_model'), mean_iou))

            # Use VisualDL to visualize results
            if args.use_vdl and cfg.DATASET.VIS_FILE_LIST is not None:
                visualize(cfg=cfg,
                          use_gpu=args.use_gpu,
                          vis_file_list=cfg.DATASET.VIS_FILE_LIST,
                          vis_dir="visual",
                          ckpt_dir=ckpt_dir,
                          log_writer=log_writer)

    # save final model
    if cfg.TRAINER_ID == 0:
        ckpt_dir = save_checkpoint(train_prog, 'final')
        save_infer_program(test_prog, ckpt_dir)
    def testxx(self):
        print('Testing...', self.base_folder)
        video_frames = glob.glob(self.base_folder + '*.tif')
        video_frames = sorted([os.path.splitext(os.path.basename(frame))[0][1:] for frame in video_frames])
        #video_frames = video_frames[100:]

        #coord_factors = 0.001
        #min_cluster_size = 100
        #min_samples = 100
        #min_label_size_per_stack = 100
        tracker = EmbeddingTracker(coord_factors=self.coord_factors,
                                   stack_neighboring_slices=2,
                                   min_cluster_size=self.min_samples,
                                   min_samples=self.min_samples,
                                   min_label_size_per_stack=self.min_samples / 2,
                                   save_label_stack=True,
                                   image_ignore_border=self.border_size,
                                   parent_search_dilation_size=self.parent_dilation,
                                   max_parent_search_frames=self.parent_frame_search)

        first = True
        current_predictions = []
        current_predictions_2 = []
        current_images = []
        # reset_every_frames = 20
        for i, video_frame in enumerate(video_frames):
            #if int(video_frame) < 150 or int(video_frame) > 250:
            #    continue
            with Timer('processing video frame ' + str(video_frame)):
                dataset_entry = self.dataset_val.get({'image_id': video_frame})
                datasources = dataset_entry['datasources']
                generators = dataset_entry['generators']
                feed_dict = {self.data_val: np.expand_dims(generators['image'], axis=0)}
                # run loss and update loss accumulators
                if not first:
                    for i in range(len(self.lstm_input_states_val)):
                        feed_dict[self.lstm_input_states_val[i]] = current_lstm_states[i]

                run_tuple = self.sess.run([self.embeddings_normalized_val, self.embeddings_normalized_2_val] + list(self.lstm_output_states_val), feed_dict=feed_dict)
                # print(iv[0].decode())
                embeddings_softmax = np.squeeze(run_tuple[0], axis=0)
                embeddings_softmax_2 = np.squeeze(run_tuple[1], axis=0)
                current_lstm_states = run_tuple[2:]
                #current_predictions.append(embeddings_softmax)
                #current_predictions_2.append(embeddings_softmax_2)
                current_images.append(generators['image'])
                # current_instances.append(instance_segmentation_test.get_instances_cosine_kmeans_2d(embeddings_softmax))
                first = False

                datasources = dataset_entry['datasources']
                input_image = datasources['image']
                transformations = dataset_entry['transformations']
                transformation = transformations['image']
                # embeddings_original = utils.sitk_image.transform_np_output_to_sitk_input(embeddings_softmax_2,
                #                                                                        output_spacing=None,
                #                                                                        channel_axis=2,
                #                                                                        input_image_sitk=input_image,
                #                                                                        transform=transformation,
                #                                                                        interpolator='linear',
                #                                                                        output_pixel_type=sitk.sitkFloat32)
                # embeddings_softmax_2 = utils.sitk_np.sitk_list_to_np(embeddings_original, axis=2)

                current_predictions_2.append(embeddings_softmax_2)

                # if not first and i % reset_every_frames != 0:
                #     run_tuple = self.sess.run([self.embeddings_normalized_val, self.embeddings_normalized_2_val] + list(self.lstm_output_states_val), feed_dict=feed_dict)
                #     embeddings_softmax_2 = np.squeeze(run_tuple[1], axis=0)
                #     tracker.add_reset_slice(np.transpose(embeddings_softmax_2, [2, 0, 1]))


        # prediction = np.stack(current_predictions, axis=self.time_stack_axis)
        # del current_predictions
        # utils.io.image.write_np(prediction, os.path.join(self.output_folder, 'embeddings.mha'), self.data_format)
        # del prediction
        prediction_2 = np.stack(current_predictions_2, axis=self.time_stack_axis)
        del current_predictions_2
        utils.io.image.write_np(prediction_2, os.path.join(self.output_folder, 'embeddings_2.mha'), self.data_format)
        del prediction_2
        images = np.stack(current_images, axis=self.time_stack_axis)
        del current_images
        utils.io.image.write_np(images, os.path.join(self.output_folder, 'image.mha'), self.data_format)
        del images
        transformations = dataset_entry['transformations']
        transformation = transformations['image']
        sitk.WriteTransform(transformation, os.path.join(self.output_folder, 'transform.txt'))
    def test(self):
        print('Testing...', self.base_folder)
        video_frames = glob.glob(self.base_folder + '*.tif')
        video_frames = sorted([os.path.splitext(os.path.basename(frame))[0][1:] for frame in video_frames])
        video_frames = video_frames[:5]

        #coord_factors = 0.001
        #min_cluster_size = 100
        #min_samples = 100
        #min_label_size_per_stack = 100
        tracker = EmbeddingTracker(coord_factors=self.coord_factors,
                                   stack_neighboring_slices=2,
                                   min_cluster_size=self.min_samples,
                                   min_samples=self.min_samples,
                                   min_label_size_per_stack=self.min_samples / 2,
                                   save_label_stack=True,
                                   image_ignore_border=self.border_size,
                                   parent_search_dilation_size=self.parent_dilation,
                                   max_parent_search_frames=self.parent_frame_search)

        first = True
        current_predictions = []
        current_predictions_2 = []
        current_images = []
        # reset_every_frames = 20
        for i, video_frame in enumerate(video_frames):
            #if int(video_frame) < 150 or int(video_frame) > 250:
            #    continue
            with Timer('processing video frame ' + str(video_frame)):
                dataset_entry = self.dataset_val.get({'image_id': video_frame})
                datasources = dataset_entry['datasources']
                generators = dataset_entry['generators']
                feed_dict = {self.data_val: np.expand_dims(generators['image'], axis=0)}
                # run loss and update loss accumulators
                if not first:
                    for i in range(len(self.lstm_input_states_val)):
                        feed_dict[self.lstm_input_states_val[i]] = current_lstm_states[i]

                run_tuple = self.sess.run([self.embeddings_normalized_val, self.embeddings_normalized_2_val] + list(self.lstm_output_states_val), feed_dict=feed_dict)
                # print(iv[0].decode())
                embeddings_softmax = np.squeeze(run_tuple[0], axis=0)
                embeddings_softmax_2 = np.squeeze(run_tuple[1], axis=0)
                current_lstm_states = run_tuple[2:]
                #current_predictions.append(embeddings_softmax)
                #current_predictions_2.append(embeddings_softmax_2)
                current_images.append(generators['image'])
                # current_instances.append(instance_segmentation_test.get_instances_cosine_kmeans_2d(embeddings_softmax))
                first = False

                datasources = dataset_entry['datasources']
                input_image = datasources['image']
                transformations = dataset_entry['transformations']
                transformation = transformations['image']
                # embeddings_original = utils.sitk_image.transform_np_output_to_sitk_input(embeddings_softmax_2,
                #                                                                        output_spacing=None,
                #                                                                        channel_axis=2,
                #                                                                        input_image_sitk=input_image,
                #                                                                        transform=transformation,
                #                                                                        interpolator='linear',
                #                                                                        output_pixel_type=sitk.sitkFloat32)
                # embeddings_softmax_2 = utils.sitk_np.sitk_list_to_np(embeddings_original, axis=2)

                current_predictions_2.append(embeddings_softmax_2)

                tracker.add_slice(np.transpose(embeddings_softmax_2, [2, 0, 1]))

                if tracker.stacked_label_image is not None:
                    utils.io.image.write_np(tracker.stacked_label_image, os.path.join(self.output_folder, 'merged.mha'))

                # if not first and i % reset_every_frames != 0:
                #     run_tuple = self.sess.run([self.embeddings_normalized_val, self.embeddings_normalized_2_val] + list(self.lstm_output_states_val), feed_dict=feed_dict)
                #     embeddings_softmax_2 = np.squeeze(run_tuple[1], axis=0)
                #     tracker.add_reset_slice(np.transpose(embeddings_softmax_2, [2, 0, 1]))


        # prediction = np.stack(current_predictions, axis=self.time_stack_axis)
        # del current_predictions
        # utils.io.image.write_np(prediction, os.path.join(self.output_folder, 'embeddings.mha'), self.data_format)
        # del prediction
        prediction_2 = np.stack(current_predictions_2, axis=self.time_stack_axis)
        del current_predictions_2
        utils.io.image.write_np(prediction_2, os.path.join(self.output_folder, 'embeddings_2.mha'), self.data_format)
        del prediction_2
        images = np.stack(current_images, axis=self.time_stack_axis)
        del current_images
        utils.io.image.write_np(images, os.path.join(self.output_folder, 'image.mha'), self.data_format)
        del images
        transformations = dataset_entry['transformations']
        transformation = transformations['image']
        sitk.WriteTransform(transformation, os.path.join(self.output_folder, 'transform.txt'))

        #if self.data_format == 'channels_last':
        #    prediction_2 = np.transpose(prediction_2, [3, 0, 1, 2])


        # two_slices = tracker.get_instances_cosine_dbscan_slice_by_slice(prediction_2)
        # utils.io.image.write_np(two_slices, os.path.join(self.output_folder, 'two_slices.mha'))
        # merged = tracker.merge_consecutive_slices(two_slices, slice_neighbour_size=2)
        # utils.io.image.write_np(merged, os.path.join(self.output_folder, 'merged.mha'), self.data_format)


        datasources = dataset_entry['datasources']
        input_image = datasources['image']
        if self.sigma == 1:
            interpolator = 'label_gaussian'
        else:
            interpolator = 'nearest'

        merged = tracker.stacked_label_image
        final_predictions = utils.sitk_image.transform_np_output_to_sitk_input(merged,
                                                                               output_spacing=None,
                                                                               channel_axis=0,
                                                                               input_image_sitk=input_image,
                                                                               transform=transformation,
                                                                               interpolator=interpolator,
                                                                               output_pixel_type=sitk.sitkUInt16)
        tracker.stacked_label_image = np.stack([utils.sitk_np.sitk_to_np(sitk_im) for sitk_im in final_predictions], axis=0)
        tracker.finalize()
        final_predictions = [utils.sitk_np.np_to_sitk(sitk_im) for sitk_im in tracker.stacked_label_image]
        track_tuples = tracker.track_tuples

        #final_predictions = [utils.sitk_np.np_to_sitk(np.squeeze(im), type=np.uint16) for im in np.split(merged, merged.shape[0], axis=0)]
        #final_predictions_smoothed_2 = [utils.sitk_image.apply_np_image_function(im, lambda x: self.label_smooth(x, sigma=2)) for im in final_predictions]
        if self.sigma > 1:
            final_predictions = [utils.sitk_image.apply_np_image_function(im, lambda x: self.label_smooth(x, sigma=self.sigma)) for im in final_predictions]

        for video_frame, final_prediction in zip(video_frames, final_predictions):
            utils.io.image.write(final_prediction, os.path.join(self.output_folder, self.image_prefix + video_frame + '.tif'))

        utils.io.image.write_np(np.stack(tracker.label_stack_list, axis=1), os.path.join(self.output_folder, 'label_stack.mha'))

        final_predictions_stacked = utils.sitk_image.accumulate(final_predictions)
        utils.io.image.write(final_predictions_stacked, os.path.join(self.output_folder, 'stacked.mha'))
        #utils.io.image.write(utils.sitk_image.accumulate(final_predictions_smoothed_2), os.path.join(self.output_folder, 'stacked_2.mha'))
        #utils.io.image.write(utils.sitk_image.accumulate(final_predictions_smoothed_4), os.path.join(self.output_folder, 'stacked_4.mha'))

        print(track_tuples)
        utils.io.text.save_list_csv(track_tuples, os.path.join(self.output_folder, self.track_file_name), delimiter=' ')
Example #10
0
def demo(sess, net, im_file, vis_file, fits_fn, conf_thresh=0.8, eval_class=True):
    """
    Detect object classes in an image using pre-computed object proposals.
    im_file:    The "fused" image file path
    vis_file:   The background image file on which detections are laid.
                Normallly, this is just the IR image file path
    fits_fn:    The FITS file path
    eval_class: True - use traditional per class-based evaluation style
                False - use per RoI-based evaluation

    """
    show_img_size = cfg.TEST.SCALES[0]
    if (not os.path.exists(im_file)):
        print('%s cannot be found' % (im_file))
        return -1
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    image_name = osp.basename(im_file)
    scores, boxes = im_detect(sess, net, im, save_vis_dir=None,
                             img_name=os.path.splitext(image_name)[0])
    boxes *= float(show_img_size) / float(im.shape[0])
    timer.toc()
    sys.stdout.write('Done in {:.3f} secs'.format(timer.total_time))
    sys.stdout.flush()
    print(scores)

    im = cv2.imread(vis_file)

    my_dpi = 100
    fig = plt.figure()
    fig.set_size_inches(show_img_size / my_dpi, show_img_size / my_dpi)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.set_axis_off()
    fig.add_axes(ax)
    ax.set_xlim([0, show_img_size])
    ax.set_ylim([show_img_size, 0])
    #ax.set_aspect('equal')
    im = cv2.resize(im, (show_img_size, show_img_size))
    im = im[:, :, (2, 1, 0)]
    ax.imshow(im, aspect='equal')
    if (fits_fn is not None):
        patch_contour = fuse(fits_fn, im, None, sigma_level=4, mask_ir=False,
                             get_path_patch_only=True)
        ax.add_patch(patch_contour)
    NMS_THRESH = cfg.TEST.NMS #cfg.TEST.RPN_NMS_THRESH # 0.3

    tt_vis = 0
    bbox_img = []
    bscore_img = []
    num_sources = 0
    #if (eval_class):
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind : 4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis]))#.astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        num_sources += vis_detections(im, cls, dets, ax, thresh=conf_thresh)
        #dets = np.hstack((dets, np.ones([dets.shape[0], 1]) * cls_ind))
        # if (dets.shape[0] > 0):
        #     bbox_img.append(dets)
        #     bscore_img.append(np.reshape(dets[:, -2], [-1, 1]))
    # else:
    #     for eoi_ind, eoi in enumerate(boxes):
    #         eoi_scores = scores[eoi_ind, 1:] # skip background
    #         cls_ind = np.argmax(eoi_scores) + 1 # add the background index back
    #         cls_boxes = boxes[eoi_ind, 4 * cls_ind : 4 * (cls_ind + 1)]
    #         cls_scores = scores[eoi_ind, cls_ind]
    #         dets = np.hstack((np.reshape(cls_boxes, [1, -1]),
    #                           np.reshape(cls_scores, [-1, 1])))#.astype(np.float32)
    #         dets = np.hstack((dets, np.ones([dets.shape[0], 1]) * cls_ind))
    #         bbox_img.append(dets)
    #         bscore_img.append(np.reshape(dets[:, -2], [-1, 1]))
    #
    # boxes_im = np.vstack(bbox_img)
    # scores_im = np.vstack(bscore_img)
    #
    # #if (not eval_class):
    # # a numpy float is a C double, so need to use float32
    # keep = nms(boxes_im[:, :-1].astype(np.float32), NMS_THRESH)
    # boxes_im = boxes_im[keep, :]
    # scores_im = scores_im[keep, :]
    #
    # keep_indices = range(boxes_im.shape[0])
    #num_sources = vis_detections(im, None, boxes_im[keep_indices, :], ax, thresh=conf_thresh)

    print(', found %d sources' % num_sources)
    return 0
Example #11
0
def test_net3(net, imdb, all_boxes2_name):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # heuristic: keep an average of 40 detections per class per images prior
    # to NMS
    max_per_set = cfg.TEST.MAX_PER_SET_F * num_images
    # heuristic: keep at most 100 detection per class per image prior to NMS
    max_per_image = cfg.TEST.MAX_PER_IMAGE

    # detection thresold for each class (this is adaptively set based on the
    # max_per_set constraint)
    thresh = -np.inf * np.ones(imdb.num_classes)
    # top_scores will hold one minheap of scores per class (used to enforce
    # the max_per_set constraint)
    top_scores = [[] for _ in xrange(imdb.num_classes)]
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, net)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    roidb = imdb.roidb

    # another scores
    d = g_utils.load_variables(all_boxes2_name)
    all_boxes2 = d['all_boxes']

    for i in xrange(num_images):
        image_paths = imdb.image_path_at(i)
        im = []
        for image_path in image_paths:
            image_path2 = image_path + '_norm.png'
            im1 = cv2.imread(image_path)
            im2 = cv2.imread(image_path2)
            ims = np.zeros((im1.shape[0], im1.shape[1], 6))
            # TODO: to test on lua pre-trained model use:
            im1 = im1[:, :, ::-1]
            im2 = im2[:, :, ::-1]
            ims[:, :, 0:3] = im1
            ims[:, :, 3:6] = im2

            im.append(ims)

        _t['im_detect'].tic()
        scores, boxes = im_detect(net, im, roidb[i]['boxes'])
        _t['im_detect'].toc()

        _t['misc'].tic()

        for j in xrange(1, imdb.num_classes):
            # adding another scores
            scores[:, j] = (scores[:, j] + all_boxes2[j][i][:, 4]) / 2
            boxes[:, j * 4:(j + 1) * 4] = (boxes[:, j * 4:(j + 1) * 4] +
                                           all_boxes2[j][i][:, 0:4]) / 2

            inds = np.where((scores[:, j] > thresh[j])
                            & (roidb[i]['gt_classes'] == 0))[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]

            top_inds = np.argsort(-cls_scores)[:max_per_image]
            cls_scores = cls_scores[top_inds]
            cls_boxes = cls_boxes[top_inds, :]

            # push new scores onto the minheap
            for val in cls_scores:
                heapq.heappush(top_scores[j], val)
            # if we've collected more than the max number of detection,
            # then pop items off the minheap and update the class threshold
            if len(top_scores[j]) > max_per_set:
                while len(top_scores[j]) > max_per_set:
                    heapq.heappop(top_scores[j])
                thresh[j] = top_scores[j][0]

            all_boxes[j][i] = \
                    np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)

            if 0:
                keep = nms(all_boxes[j][i], 0.3)
                vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :])
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)

    for j in xrange(1, imdb.num_classes):
        for i in xrange(num_images):
            inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0]
            all_boxes[j][i] = all_boxes[j][i][inds, :]

    det_file = os.path.join(output_dir,
                            'detections' + cfg.TEST.DET_SALT + '.pkl')
    g_utils.save_variables(det_file, [all_boxes], ['all_boxes'],
                           overwrite=True)

    det_file = os.path.join(output_dir,
                            'detections' + cfg.TEST.DET_SALT + '.pkl')
    g_utils.scio.savemat(det_file, {'all_boxes': all_boxes},
                         do_compression=True)

    print 'Applying NMS to all detections'
    nms_dets = apply_nms(all_boxes, cfg.TEST.NMS)

    print 'Evaluating detections'
    ap, prec, rec, classes, class_to_ind = imdb.evaluate_detections(
        nms_dets, output_dir, cfg.TEST.DET_SALT, cfg.TEST.EVAL_SALT)
Example #12
0
    def train_model(self, sess, max_iters):
        """Network training loop."""

        data_layer = get_data_layer(self.roidb, self.imdb.num_classes)

        # classification loss
        cls_score = self.net.get_output('cls_score')
        label = tf.placeholder(tf.int32, shape=[None])
        cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(cls_score, label))

        # subcategory classification loss
        if cfg.TRAIN.SUBCLS:
            subcls_score = self.net.get_output('subcls_score')
            sublabel = tf.placeholder(tf.int32, shape=[None])
            subcls_cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(subcls_score, sublabel))

        # bounding box regression L1 loss
        bbox_pred = self.net.get_output('bbox_pred')
        bbox_targets = tf.placeholder(tf.float32, shape=[None, 4 * self.imdb.num_classes])
        bbox_weights = tf.placeholder(tf.float32, shape=[None, 4 * self.imdb.num_classes])
        loss_box = tf.reduce_mean(tf.reduce_sum(tf.mul(bbox_weights, tf.abs(tf.sub(bbox_pred, bbox_targets))), reduction_indices=[1]))

        # multi-task loss
        if cfg.TRAIN.SUBCLS:
            loss = cross_entropy + subcls_cross_entropy + loss_box
        else:
            loss = cross_entropy + loss_box

        # optimizer
        lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False)
        momentum = cfg.TRAIN.MOMENTUM
        train_op = tf.train.MomentumOptimizer(lr, momentum).minimize(loss)

        # intialize variables
        sess.run(tf.initialize_all_variables())
        if self.pretrained_model is not None:
            print ('Loading pretrained model '
                   'weights from {:s}').format(self.pretrained_model)
            self.net.load(self.pretrained_model, sess, True)

        last_snapshot_iter = -1
        timer = Timer()
        for iter in range(max_iters):
            # learning rate
            if iter >= cfg.TRAIN.STEPSIZE:
                sess.run(tf.assign(lr, cfg.TRAIN.LEARNING_RATE * cfg.TRAIN.GAMMA))
            else:
                sess.run(tf.assign(lr, cfg.TRAIN.LEARNING_RATE))

            # get one batch
            blobs = data_layer.forward()

            # Make one SGD update
            if cfg.TRAIN.SUBCLS:
                feed_dict={self.net.data: blobs['data'], self.net.rois: blobs['rois'], self.net.keep_prob: 0.5, \
                           label: blobs['labels'], sublabel: blobs['sublabels'], bbox_targets: blobs['bbox_targets'], bbox_weights: blobs['bbox_inside_weights']}
            else:
                feed_dict={self.net.data: blobs['data'], self.net.rois: blobs['rois'], self.net.keep_prob: 0.5, \
                           label: blobs['labels'], bbox_targets: blobs['bbox_targets'], bbox_weights: blobs['bbox_inside_weights']}
            
            timer.tic()
            if cfg.TRAIN.SUBCLS:
                loss_cls_value, loss_subcls_value, loss_box_value, _ = sess.run([cross_entropy, subcls_cross_entropy, loss_box, train_op], feed_dict=feed_dict)
            else:
                loss_cls_value, loss_box_value, _ = sess.run([cross_entropy, loss_box, train_op], feed_dict=feed_dict)
            timer.toc()

            if cfg.TRAIN.SUBCLS:
                print 'iter: %d / %d, loss_cls: %.4f, loss_subcls: %.4f, loss_box: %.4f, lr: %f, time: %f' %\
                    (iter+1, max_iters, loss_cls_value, loss_subcls_value, loss_box_value, lr.eval(), timer.diff)
            else:
                print 'iter: %d / %d, loss_cls: %.4f, loss_box: %.4f, lr: %f' %\
                    (iter+1, max_iters, loss_cls_value, loss_box_value, lr.eval())

            if (iter+1) % (10 * cfg.TRAIN.DISPLAY) == 0:
                print 'speed: {:.3f}s / iter'.format(timer.average_time)

            if (iter+1) % cfg.TRAIN.SNAPSHOT_ITERS == 0:
                last_snapshot_iter = iter
                self.snapshot(sess, iter)

        if last_snapshot_iter != iter:
            self.snapshot(sess, iter)
Example #13
0
def test_net(save_folder, net, detector, cuda, testset, transform, max_per_image=300, thresh=0.005):

>>>>>>> 6544e535e60c169d1904751184fb44cdf61ff894
    if not os.path.exists(save_folder):
        os.mkdir(save_folder)
    # dump predictions and assoc. ground truth to text file for now
    num_images = len(testset)
<<<<<<< HEAD
    num_classes = num_classes
=======
    num_classes = (21, 81)[args.dataset == 'COCO']
>>>>>>> 6544e535e60c169d1904751184fb44cdf61ff894
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(num_classes)]

    _t = {'im_detect': Timer(), 'misc': Timer()}
    det_file = os.path.join(save_folder, 'detections.pkl')

    if args.retest:
        f = open(det_file,'rb')
        all_boxes = pickle.load(f)
        print('Evaluating detections')
        testset.evaluate_detections(all_boxes, save_folder)
        return


    for i in range(num_images):
        img = testset.pull_image(i)
        x = Variable(transform(img).unsqueeze(0),volatile=True)
        if cuda:
            x = x.cuda()
Example #14
0
def train(cfg):
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    if args.enable_ce:
        startup_prog.random_seed = 1000
        train_prog.random_seed = 1000
    drop_last = True

    dataset = SegDataset(file_list=cfg.DATASET.TRAIN_FILE_LIST,
                         mode=ModelPhase.TRAIN,
                         shuffle=True,
                         data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        if args.use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        batch_data = []
        for b in data_gen:
            batch_data.append(b)
            if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS):
                for item in batch_data:
                    yield item[0], item[1], item[2]
                batch_data = []
        # If use sync batch norm strategy, drop last batch if number of samples
        # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues
        if not cfg.TRAIN.SYNC_BATCH_NORM:
            for item in batch_data:
                yield item[0], item[1], item[2]

    # Get device environment
    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()

    # Get number of GPU
    dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places)
    print_info("#Device count: {}".format(dev_count))

    # Make sure BATCH_SIZE can divided by GPU cards
    assert cfg.BATCH_SIZE % dev_count == 0, (
        'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format(
            cfg.BATCH_SIZE, dev_count))
    # If use multi-gpu training mode, batch data will allocated to each GPU evenly
    batch_size_per_dev = cfg.BATCH_SIZE // dev_count
    print_info("batch_size_per_dev: {}".format(batch_size_per_dev))

    config_info = {'input_size': 769, 'output_size': 1, 'block_num': 7}
    config = ([(cfg.SLIM.NAS_SPACE_NAME, config_info)])
    factory = SearchSpaceFactory()
    space = factory.get_search_space(config)

    port = cfg.SLIM.NAS_PORT
    server_address = (cfg.SLIM.NAS_ADDRESS, port)
    sa_nas = SANAS(config,
                   server_addr=server_address,
                   search_steps=cfg.SLIM.NAS_SEARCH_STEPS,
                   is_server=cfg.SLIM.NAS_IS_SERVER)
    for step in range(cfg.SLIM.NAS_SEARCH_STEPS):
        arch = sa_nas.next_archs()[0]

        start_prog = fluid.Program()
        train_prog = fluid.Program()

        data_loader, avg_loss, lr, pred, grts, masks = build_model(
            train_prog, start_prog, arch=arch, phase=ModelPhase.TRAIN)

        cur_flops = flops(train_prog)
        print('current step:', step, 'flops:', cur_flops)

        data_loader.set_sample_generator(data_generator,
                                         batch_size=batch_size_per_dev,
                                         drop_last=drop_last)

        exe = fluid.Executor(place)
        exe.run(start_prog)

        exec_strategy = fluid.ExecutionStrategy()
        # Clear temporary variables every 100 iteration
        if args.use_gpu:
            exec_strategy.num_threads = fluid.core.get_cuda_device_count()
        exec_strategy.num_iteration_per_drop_scope = 100
        build_strategy = fluid.BuildStrategy()

        if cfg.NUM_TRAINERS > 1 and args.use_gpu:
            dist_utils.prepare_for_multi_process(exe, build_strategy,
                                                 train_prog)
            exec_strategy.num_threads = 1

        if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu:
            if dev_count > 1:
                # Apply sync batch norm strategy
                print_info("Sync BatchNorm strategy is effective.")
                build_strategy.sync_batch_norm = True
            else:
                print_info(
                    "Sync BatchNorm strategy will not be effective if GPU device"
                    " count <= 1")
        compiled_train_prog = fluid.CompiledProgram(
            train_prog).with_data_parallel(loss_name=avg_loss.name,
                                           exec_strategy=exec_strategy,
                                           build_strategy=build_strategy)

        # Resume training
        begin_epoch = cfg.SOLVER.BEGIN_EPOCH
        if cfg.TRAIN.RESUME_MODEL_DIR:
            begin_epoch = load_checkpoint(exe, train_prog)
        # Load pretrained model
        elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
            load_pretrained_weights(exe, train_prog,
                                    cfg.TRAIN.PRETRAINED_MODEL_DIR)
        else:
            print_info(
                'Pretrained model dir {} not exists, training from scratch...'.
                format(cfg.TRAIN.PRETRAINED_MODEL_DIR))

        fetch_list = [avg_loss.name, lr.name]

        global_step = 0
        all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE
        if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True:
            all_step += 1
        all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1)

        avg_loss = 0.0
        timer = Timer()
        timer.start()
        if begin_epoch > cfg.SOLVER.NUM_EPOCHS:
            raise ValueError(
                ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]"
                 ).format(begin_epoch, cfg.SOLVER.NUM_EPOCHS))

        if args.use_mpio:
            print_info("Use multiprocess reader")
        else:
            print_info("Use multi-thread reader")

        best_miou = 0.0
        for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
            data_loader.start()
            while True:
                try:
                    loss, lr = exe.run(program=compiled_train_prog,
                                       fetch_list=fetch_list,
                                       return_numpy=True)
                    avg_loss += np.mean(np.array(loss))
                    global_step += 1

                    if global_step % args.log_steps == 0 and cfg.TRAINER_ID == 0:
                        avg_loss /= args.log_steps
                        speed = args.log_steps / timer.elapsed_time()
                        print((
                            "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, global_step, lr[0], avg_loss, speed,
                                 calculate_eta(all_step - global_step, speed)))

                        sys.stdout.flush()
                        avg_loss = 0.0
                        timer.restart()

                except fluid.core.EOFException:
                    data_loader.reset()
                    break
                except Exception as e:
                    print(e)
            if epoch > cfg.SLIM.NAS_START_EVAL_EPOCH:
                ckpt_dir = save_checkpoint(train_prog, '{}_tmp'.format(port))
                _, mean_iou, _, mean_acc = evaluate(cfg=cfg,
                                                    arch=arch,
                                                    ckpt_dir=ckpt_dir,
                                                    use_gpu=args.use_gpu,
                                                    use_mpio=args.use_mpio)
                if best_miou < mean_iou:
                    print('search step {}, epoch {} best iou {}'.format(
                        step, epoch, mean_iou))
                    best_miou = mean_iou

        sa_nas.reward(float(best_miou))
Example #15
0
def test_net(net, imdb):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # heuristic: keep an average of 40 detections per class per images prior
    # to NMS
    max_per_set = 40 * num_images
    # heuristic: keep at most 100 detection per class per image prior to NMS
    max_per_image = 100
    # detection thresold for each class (this is adaptively set based on the
    # max_per_set constraint)
    thresh = -np.inf * np.ones(imdb.num_classes)
    # top_scores will hold one minheap of scores per class (used to enforce
    # the max_per_set constraint)
    top_scores = [[] for _ in xrange(imdb.num_classes)]
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, net)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    if not cfg.TEST.HAS_RPN:
        roidb = imdb.roidb

    for i in xrange(num_images):
        # filter out any ground truth boxes
        if cfg.TEST.HAS_RPN:
            box_proposals = None
        else:
            box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]
        im = cv2.imread(imdb.image_path_at(i))
        _t['im_detect'].tic()
        scores, boxes = im_detect(net, im, box_proposals)
        _t['im_detect'].toc()

        _t['misc'].tic()
        for j in xrange(1, imdb.num_classes):
            inds = np.where(scores[:, j] > thresh[j])[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            top_inds = np.argsort(-cls_scores)[:max_per_image]
            cls_scores = cls_scores[top_inds]
            cls_boxes = cls_boxes[top_inds, :]
            # push new scores onto the minheap
            for val in cls_scores:
                heapq.heappush(top_scores[j], val)
            # if we've collected more than the max number of detection,
            # then pop items off the minheap and update the class threshold
            if len(top_scores[j]) > max_per_set:
                while len(top_scores[j]) > max_per_set:
                    heapq.heappop(top_scores[j])
                thresh[j] = top_scores[j][0]

            all_boxes[j][i] = \
                    np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)

            if 0:
                keep = nms(all_boxes[j][i], 0.3)
                vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :])
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)

    for j in xrange(1, imdb.num_classes):
        for i in xrange(num_images):
            inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0]
            all_boxes[j][i] = all_boxes[j][i][inds, :]

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Applying NMS to all detections'
    nms_dets = apply_nms(all_boxes, cfg.TEST.NMS)

    print 'Evaluating detections'
    imdb.evaluate_detections(nms_dets, output_dir)
Example #16
0
def test_net(net, imdb, max_per_image=100, thresh=0.000000001, vis=False):
    """Test a network on an image database."""
    if 'coco' in imdb.name:
        max_per_image = 100
    print 'max_per_image: ', max_per_image
    print 'thresh: ', thresh

    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    all_scores = [[[] for _ in xrange(num_images)]
                  for _ in xrange(imdb.num_classes)]

    all_boxes_o = [[[] for _ in xrange(num_images)]
                   for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, net)
    if cfg.OPG_DEBUG:
        vis_dir = get_vis_dir(imdb, net)

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    roidb = imdb.roidb

    test_scales = cfg.TEST.SCALES
    save_id = 0
    for i in xrange(num_images):
        # if imdb.image_index[i] != '001547':
        # continue
        # if i > 100:
        # break
        if vis:
            import matplotlib.pyplot as plt
            # 关闭所有窗口
            # plt.close('all')

        box_proposals = roidb[i]['boxes']
        rois_per_this_image = min(cfg.TEST.ROIS_PER_IM, len(box_proposals))
        box_proposals = box_proposals[0:rois_per_this_image, :]
        if cfg.USE_ROI_SCORE:
            box_scores = roidb[i]['box_scores']
        else:
            box_scores = None

        im = cv2.imread(imdb.image_path_at(i))
        _t['im_detect'].tic()

        scores = None
        boxes = None
        for target_size in test_scales:
            if cfg.OPG_DEBUG:
                save_path = os.path.join(vis_dir, str(save_id) + '_.png')
                save_debug_im(im, target_size, save_path)
                save_id += 1

            cfg.TEST.SCALES = (target_size, )
            scores_scale, boxes_scale = im_detect(net, im, box_proposals,
                                                  box_scores)
            if scores is None:
                scores = scores_scale
                boxes = boxes_scale
            else:
                # TODO(YH): something to do
                scores += scores_scale
                assert np.array_equal(
                    boxes,
                    boxes_scale), 'boxes at each scale should be the same'

            if cfg.OPG_DEBUG:
                os.remove(save_path)

        if cfg.TEST.USE_FLIPPED:
            im_flip = im[:, ::-1, :]
            box_proposals_flip = box_proposals.copy()
            oldx1 = box_proposals_flip[:, 0].copy()
            oldx2 = box_proposals_flip[:, 2].copy()
            box_proposals_flip[:, 0] = im.shape[1] - oldx2 - 1
            box_proposals_flip[:, 2] = im.shape[1] - oldx1 - 1

            for target_size in test_scales:
                boxes_scale_o = boxes_scale
                if cfg.OPG_DEBUG:
                    save_path = os.path.join(vis_dir, str(save_id) + '_.png')
                    save_debug_im(im_flip, target_size, save_path)
                    save_id += 1

                cfg.TEST.SCALES = (target_size, )
                scores_scale, boxes_scale, = im_detect(net, im_flip,
                                                       box_proposals_flip,
                                                       box_scores)

                scores += scores_scale

                if cfg.OPG_DEBUG:
                    os.remove(save_path)

        _t['im_detect'].toc()

        _t['misc'].tic()
        # skip j = 0, because it's the background class
        # f**k skip
        for j in xrange(0, imdb.num_classes):
            if 'trainval' in imdb.name:
                if imdb.image_classes_at(i)[j] == 0:
                    all_boxes[j][i] = np.zeros((0, 5), dtype=np.float32)
                    all_boxes_o[j][i] = np.zeros((0, 5), dtype=np.float32)
                    continue

            all_scores[j][i] = sum(scores[:, j])

            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
            if vis:
                # vis_heatmap(im, i, imdb.classes[j], cls_dets, thresh=0.3)
                # vis_detections_highest(
                # im, imdb.classes[j], cls_dets, thresh=0.3)
                vis_detections(im, imdb.classes[j], cls_dets, thresh=0.03)

            all_boxes[j][i] = cls_dets

            # 保留原始检测结果
            cls_scores_o = scores[:, j]
            cls_boxes_o = boxes[:, j * 4:(j + 1) * 4]
            cls_dets_o = np.hstack((cls_boxes_o, cls_scores_o[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            all_boxes_o[j][i] = cls_dets_o

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(0, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(0, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
            .format(i + 1, num_images, _t['im_detect'].average_time,
                    _t['misc'].average_time)

    if cfg.OPG_DEBUG:
        return

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    det_file_o = os.path.join(output_dir, 'detections_o.pkl')
    with open(det_file_o, 'wb') as f:
        cPickle.dump(all_boxes_o, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir, all_scores=all_scores)
Example #17
0
  def train_model(self, sess, max_iters):
    # Build data layers for both training and validation set
    self.data_layer = RoIDataLayer(self.roidb, self.imdb.num_classes)
    self.data_layer_val = RoIDataLayer(self.valroidb, self.imdb.num_classes, random=True)

    # Construct the computation graph
    lr, train_op = self.construct_graph(sess)

    # Find previous snapshots if there is any to restore from
    lsf, nfiles, sfiles = self.find_previous()

    # Initialize the variables or restore them from the last snapshot
    if lsf == 0:
      rate, last_snapshot_iter, stepsizes, np_paths, ss_paths = self.initialize(sess)
    else:
      rate, last_snapshot_iter, stepsizes, np_paths, ss_paths = self.restore(sess, 
                                                                            str(sfiles[-1]), 
                                                                            str(nfiles[-1]))
    timer = Timer()
    iter = last_snapshot_iter + 1
    last_summary_time = time.time()
    # Make sure the lists are not empty
    stepsizes.append(max_iters)
    stepsizes.reverse()
    next_stepsize = stepsizes.pop()
    while iter < max_iters + 1:
      # Learning rate
      if iter == next_stepsize + 1:
        # Add snapshot here before reducing the learning rate
        self.snapshot(sess, iter)
        rate *= cfg.TRAIN.GAMMA
        sess.run(tf.assign(lr, rate))
        next_stepsize = stepsizes.pop()

      timer.tic()
      # Get training data, one batch at a time
      blobs = self.data_layer.forward()

      now = time.time()
      if iter == 1 or now - last_summary_time > cfg.TRAIN.SUMMARY_INTERVAL:
        # Compute the graph with summary
        rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss, summary = \
          self.net.train_step_with_summary(sess, blobs, train_op)
        self.writer.add_summary(summary, float(iter))
        # Also check the summary on the validation set
        print('train_model before self.data_layer_val.forward')
        blobs_val = self.data_layer_val.forward()
        print('train_model after self.data_layer_val.forward')

        summary_val = self.net.get_summary(sess, blobs_val)
        self.valwriter.add_summary(summary_val, float(iter))
        last_summary_time = now
      else:
        # Compute the graph without summary
        rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss = \
          self.net.train_step(sess, blobs, train_op)
      timer.toc()

      # Display training information
      if iter % (cfg.TRAIN.DISPLAY) == 0:
        print('iter: %d / %d, total loss: %.6f\n >>> rpn_loss_cls: %.6f\n '
              '>>> rpn_loss_box: %.6f\n >>> loss_cls: %.6f\n >>> loss_box: %.6f\n >>> lr: %f' % \
              (iter, max_iters, total_loss, rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, lr.eval()))
        print('speed: {:.3f}s / iter'.format(timer.average_time))

      # Snapshotting
      if iter % cfg.TRAIN.SNAPSHOT_ITERS == 0:
        last_snapshot_iter = iter
        ss_path, np_path = self.snapshot(sess, iter)
        np_paths.append(np_path)
        ss_paths.append(ss_path)

        # Remove the old snapshots if there are too many
        if len(np_paths) > cfg.TRAIN.SNAPSHOT_KEPT:
          self.remove_snapshot(np_paths, ss_paths)

      iter += 1

    if last_snapshot_iter != iter - 1:
      self.snapshot(sess, iter - 1)

    self.writer.close()
    self.valwriter.close()
Example #18
0
def test_net_ensemble(det_dirs, imdb, max_per_image=100, thresh=0.000000001):
    print 'max_per_image: ', max_per_image
    print 'thresh: ', thresh

    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    all_scores = [[[] for _ in xrange(num_images)]
                  for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, None)

    # load all the detection results
    all_boxes_cache = None
    for det_dir in det_dirs:
        det_path = os.path.join(det_dir, 'detections_o.pkl')
        print 'load det: ', det_path
        assert os.path.isfile(det_path), 'no det file: ' + det_path
        with open(det_path, 'rb') as f:
            all_boxes_cache_this = cPickle.load(f)
        print 'all_boxes_cache_this: ', len(all_boxes_cache_this), len(
            all_boxes_cache_this[0])
        print 'all_boxes_cache_this[0][0]: ', all_boxes_cache_this[0][0].shape
        # print 'all_boxes_cache_this[0][0][0]: ', all_boxes_cache_this[0][0][0]
        # print 'all_boxes_cache_this[14][0]: ', all_boxes_cache_this[14][0].shape
        # print 'all_boxes_cache_this[14][0][0]: ',
        # all_boxes_cache_this[14][0][0]

        if all_boxes_cache is None:
            all_boxes_cache = all_boxes_cache_this
        else:
            print 'Sum up all result'
            print 'If error happen here, it counld be that the dimensions miss match.'
            for c in xrange(imdb.num_classes):
                for n in xrange(num_images):
                    all_boxes_cache[c][n][:,
                                          4] += all_boxes_cache_this[c][n][:,
                                                                           4]

    print 'all_boxes_cache: ', len(all_boxes_cache), len(all_boxes_cache[0])
    print 'all_boxes_cache[0][0]: ', all_boxes_cache[0][0].shape
    # print 'all_boxes_cache[0][0][0]: ', all_boxes_cache[0][0][0]
    # print 'all_boxes_cache[14][0]: ', all_boxes_cache[14][0].shape
    # print 'all_boxes_cache[14][0][0]: ', all_boxes_cache[14][0][0]

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    roidb = imdb.roidb

    for i in xrange(num_images):
        _t['im_detect'].tic()
        _t['im_detect'].toc()

        _t['misc'].tic()
        # skip j = 0, because it's the background class
        # f**k skip
        for j in xrange(0, imdb.num_classes):
            # all_scores[j][i] = sum(scores[:, j])
            all_scores[j][i] = sum(all_boxes_cache[j][i][:, -1])

            # inds = np.where(scores[:, j] > thresh)[0]
            # cls_scores = scores[inds, j]
            inds = np.where(all_boxes_cache[j][i][:, -1] > thresh)[0]
            cls_scores = all_boxes_cache[j][i][inds, -1]

            # cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_boxes = all_boxes_cache[j][i][inds, 0:4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)

            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]

            all_boxes[j][i] = cls_dets

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(0, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(0, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
            .format(i + 1, num_images, _t['im_detect'].average_time,
                    _t['misc'].average_time)

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir, all_scores=all_scores)
Example #19
0
import os
import caffe
import json
from core.config import cfg
import numpy as np
import numpy.random as npr
from core.bbox_transform import width_height_transform
from utils.timer import Timer

from utils.cython_bbox_maps import (get_bbox_coverage,
                                    get_objects_size_regression_matrix,
                                    get_bbox_levels)

DEBUG = False
t = Timer()


class BboxSegmentationLayer(caffe.Layer):
    """
    Assign anchors to ground-truth targets. Produces anchor classification
    labels and bounding-box regression targets.
    """
    def setup(self, bottom, top):

        layer_params = json.loads(self.param_str)
        self._feat_stride = layer_params['feat_stride']
        self._iters = 0

        self._batchsize = layer_params['batchsize']
        self._fg_fraction = layer_params['fg_fraction']
Example #20
0
def test_net_ensemble2(det_dirs, imdb, max_per_image=100, thresh=0.000000001):
    print 'max_per_image: ', max_per_image
    print 'thresh: ', thresh

    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    all_scores = [[[] for _ in xrange(num_images)]
                  for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, None)

    # load all the detection results
    # all_boxes_cache = None
    all_boxes_cache = [[[] for _ in xrange(num_images)]
                       for _ in xrange(imdb.num_classes)]
    image_index = imdb.image_index
    for det_dir in det_dirs:
        p = 1.0
        if '2' in det_dir:
            p = 10.0
        for dirpath, dirnames, filenames in os.walk(det_dir):
            for filename in filenames:
                print 'load res: ', os.path.join(dirpath, filename)
                c = -1
                for c_i, cls in enumerate(imdb.classes):
                    if cls + '.txt' in filename:
                        c = c_i
                        break
                assert c > -1
                with open(os.path.join(dirpath, filename), 'r') as f:
                    for line in f.readlines():
                        line = line.strip()
                        im_id, score, xmin, ymin, xmax, ymax = line.split(' ')
                        im_i = image_index.index(im_id)
                        all_boxes_cache[c][im_i].append([
                            float(xmin) - 1,
                            float(ymin) - 1,
                            float(xmax) - 1,
                            float(ymax) - 1,
                            float(score) * p
                        ])

    for n in xrange(num_images):
        for c in xrange(imdb.num_classes):
            if len(all_boxes_cache[c][n]) == 0:
                all_boxes_cache[c][n] = np.zeros((0, 5), dtype=np.float32)
            else:
                all_boxes_cache[c][n] = np.array(all_boxes_cache[c][n],
                                                 dtype=np.float32)

    print 'all_boxes_cache: ', len(all_boxes_cache), len(all_boxes_cache[0])
    print 'all_boxes_cache[0][0]: ', all_boxes_cache[0][0].shape
    # print 'all_boxes_cache[0][0][0]: ', all_boxes_cache[0][0][0]
    # print 'all_boxes_cache[14][0]: ', all_boxes_cache[14][0].shape
    # print 'all_boxes_cache[14][0][0]: ', all_boxes_cache[14][0][0]

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    roidb = imdb.roidb

    for i in xrange(num_images):
        _t['im_detect'].tic()
        _t['im_detect'].toc()

        _t['misc'].tic()
        # skip j = 0, because it's the background class
        # f**k skip
        for j in xrange(0, imdb.num_classes):
            # all_scores[j][i] = sum(scores[:, j])
            all_scores[j][i] = sum(all_boxes_cache[j][i][:, -1])

            # inds = np.where(scores[:, j] > thresh)[0]
            # cls_scores = scores[inds, j]
            inds = np.where(all_boxes_cache[j][i][:, -1] > thresh)[0]
            cls_scores = all_boxes_cache[j][i][inds, -1]

            # cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_boxes = all_boxes_cache[j][i][inds, 0:4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)

            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]

            all_boxes[j][i] = cls_dets

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(0, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(0, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
            .format(i + 1, num_images, _t['im_detect'].average_time,
                    _t['misc'].average_time)

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir, all_scores=all_scores)
Example #21
0
    predict_image_paths = my_utils.get_all_file_paths(predict_folder)
    extensions = args.ext.split(",")
    predict_image_paths = my_utils.get_files_with_extension(
        predict_image_paths, extensions)
    total_pred_images = len(predict_image_paths)

    # testing scale
    # if args.dataset == "FDDB":
    #     resize = 3
    # elif args.dataset == "PASCAL":
    #     resize = 2.5
    # elif args.dataset == "AFW":
    #     resize = 1
    resize = 1

    _t = {'forward_pass': Timer(), 'misc': Timer()}

    # predicting begin
    id2det = {}
    test_time, total_detect_time, total_nms_time = 0, 0, 0
    total_boxes = 0
    num_pred_images = 0
    error_image_paths = []
    for i, image_path in enumerate(predict_image_paths):
        image_path = os.path.abspath(image_path)
        image_name = os.path.basename(image_path)
        # if i < 2:
        #     print("Image_path : {} - Image name : {}".format(image_path, image_name))

        try:
            img = np.float32(cv2.imread(image_path, cv2.IMREAD_COLOR))
Example #22
0
def test_net_cache(net,
                   imdb,
                   max_per_image=100,
                   thresh=0.000000001,
                   vis=False,
                   scale=1.0):
    """Test a network on an image database."""
    print 'max_per_image: ', max_per_image
    print 'thresh: ', thresh

    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    all_scores = [[[] for _ in xrange(num_images)]
                  for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, net)
    if cfg.OPG_DEBUG:
        vis_dir = get_vis_dir(imdb, net)

    det_file = os.path.join(output_dir, 'detections.pkl')
    if not os.path.isfile(det_file):
        print 'file not exists: ', det_file
        # we make sure all region all left
        origin_NMS = cfg.TEST.NMS
        cfg.TEST.NMS = 1.1
        test_net(net, imdb, max_per_image=99999, thresh=0.0000, vis=False)
        cfg.TEST.NMS = origin_NMS

    with open(det_file, 'rb') as f:
        all_boxes_cache = cPickle.load(f)
    print 'all_boxes_cache: ', len(all_boxes_cache), len(all_boxes_cache[0])
    print 'all_boxes_cache: ', all_boxes_cache[0][0].shape
    print 'all_boxes_cache: ', all_boxes_cache[14][0].shape

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    roidb = imdb.roidb

    test_scales = cfg.TEST.SCALES
    save_id = 0
    for i in xrange(num_images):
        _t['im_detect'].tic()
        _t['im_detect'].toc()

        _t['misc'].tic()
        # skip j = 0, because it's the background class
        # f**k skip
        for j in xrange(0, imdb.num_classes):
            # all_scores[j][i] = sum(scores[:, j])
            all_scores[j][i] = sum(all_boxes_cache[j][i][:, -1])

            # inds = np.where(scores[:, j] > thresh)[0]
            # cls_scores = scores[inds, j]
            inds = np.where(all_boxes_cache[j][i][:, -1] > thresh)[0]
            cls_scores = all_boxes_cache[j][i][inds, -1]

            # cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_boxes = all_boxes_cache[j][i][inds, 0:4]
            cls_boxes = resize_boxes(cls_boxes, scale)

            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)

            if vis:
                vis_heatmap(im, i, imdb.classes[j], cls_dets, thresh=0.3)

            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]

            # if vis:
            # vis_detections(im, imdb.classes[j], cls_dets, thresh=thresh)
            all_boxes[j][i] = cls_dets

        if vis:
            import matplotlib.pyplot as plt
            # plt.show()
            plt.close('all')
        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(0, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(0, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
            .format(i + 1, num_images, _t['im_detect'].average_time,
                    _t['misc'].average_time)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir, all_scores=all_scores)
Example #23
0
 for filename in os.listdir(fromDir):
     if not 'mp4' in filename:  #skin DS_Store
         continue
     print(filename)
     if filename == "20180627_momo_0007.mp4":
         continue
     if filename == "20180627_momo_0023.mp4":
         continue
     video = cv2.VideoCapture(fromDir + filename)
     numberVideo += 1
     success, im = video.read()
     numFrame = 0
     while success:
         numFrame += 1
         savename = filename.split('.')[0] + '_f' + str(numFrame) + '.jpg'
         timer = Timer()
         timer.tic()
         scores, boxes = im_detect(net, im)
         timer.toc()
         print('No.{:d} - {:d} took {:.3f}s for '
               '{:d} object proposals').format(numberVideo, numFrame,
                                               timer.total_time,
                                               boxes.shape[0])
         timeUsed = timeUsed + timer.total_time
         CONF_THRESH = 0.9
         NMS_THRESH = 0.01
         numGesture = 0
         gestureboxes = {}
         for cls_ind, cls in enumerate(CLASSES[1:]):
             cls_ind += 1  #because we skipped background
             cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]  #300*4矩阵
Example #24
0
def test_net_bbox(net, imdb, max_per_image=100, thresh=0.00000001, vis=False):
    """Test a network on an image database."""
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    all_scores = [[[] for _ in xrange(num_images)]
                  for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, net)
    if cfg.OPG_DEBUG:
        vis_dir = get_vis_dir(imdb, net)

    # timers
    _t = {'im_detect_bbox': Timer(), 'misc': Timer()}

    roidb = imdb.roidb

    test_scales = cfg.TEST.SCALES
    save_id = 0
    for i in xrange(num_images):
        # if imdb.image_index[i] != '001547':
        # continue
        # if i>100:
        # continue

        # filter out any ground truth boxes
        # The roidb may contain ground-truth rois (for example, if the roidb
        # comes from the training or val split). We only want to evaluate
        # detection on the *non*-ground-truth rois. We select those the rois
        # that have the gt_classes field set to 0, which means there's no
        # ground truth.
        # box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]
        box_proposals = roidb[i]['boxes']
        rois_per_this_image = min(cfg.TEST.ROIS_PER_IM, len(box_proposals))
        box_proposals = box_proposals[0:rois_per_this_image, :]
        if cfg.USE_ROI_SCORE:
            box_scores = roidb[i]['box_scores']
        else:
            box_scores = None

        im = cv2.imread(imdb.image_path_at(i))

        _t['im_detect_bbox'].tic()

        scores = None
        boxes = None
        for target_size in test_scales:
            if cfg.OPG_DEBUG:
                # save_subdir = time.strftime("%Y-%m-%d", time.gmtime())
                # save_dir = os.path.join('tmp', save_subdir)
                # if not os.path.exists(save_dir):
                # os.makedirs(save_dir)
                cv2.imwrite(os.path.join(vis_dir, str(save_id) + '_.png'), im)
                save_id += 1

            cfg.TEST.SCALES = (target_size, )
            scores_scale, boxes_scale = im_detect_bbox(net, im, box_proposals,
                                                       box_scores)
            if scores is None:
                scores = scores_scale
                boxes = boxes_scale
            else:
                scores = np.vstack((scores, scores_scale))
                boxes = np.vstack((boxes, boxes_scale))

        if cfg.TEST.USE_FLIPPED:
            im_flip = im[:, ::-1, :]
            box_proposals_flip = box_proposals.copy()
            oldx1 = box_proposals_flip[:, 0].copy()
            oldx2 = box_proposals_flip[:, 2].copy()
            box_proposals_flip[:, 0] = im.shape[1] - oldx2 - 1
            box_proposals_flip[:, 2] = im.shape[1] - oldx1 - 1

            for target_size in test_scales:
                if cfg.OPG_DEBUG:
                    # save_subdir = time.strftime("%Y-%m-%d", time.gmtime())
                    # save_dir = os.path.join('tmp', save_subdir)
                    cv2.imwrite(os.path.join(vis_dir,
                                             str(save_id) + '_.png'), im_flip)
                    save_id += 1

                cfg.TEST.SCALES = (target_size, )
                scores_scale, boxes_scale = im_detect_bbox(
                    net, im_flip, box_proposals_flip, box_scores)

                # scores = np.vstack((scores, scores_scale))
                # boxes = np.vstack((boxes, boxes_scale))

        _t['im_detect_bbox'].toc()

        _t['misc'].tic()
        # skip j = 0, because it's the background class
        # f**k skip
        for j in xrange(0, imdb.num_classes):
            all_scores[j][i] = sum(scores[:, j])

            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]

            # if len(cls_scores) > 0:
            # sum_score = sum(cls_scores)
            # max_score = max(cls_scores)
            # print cls_scores
            # cls_scores *= (sum_score / max_score)
            # print sum_score, max_score, sum_score / max_score
            # print cls_scores

            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)

            if vis:
                vis_heatmap(im, i, imdb.classes[j], cls_dets, thresh=0.3)

            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]

            # if vis:
            # vis_detections(im, imdb.classes[j], cls_dets, thresh=thresh)
            all_boxes[j][i] = cls_dets

        if vis:
            import matplotlib.pyplot as plt
            # plt.show()
            plt.close('all')

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(0, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(0, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

        print 'im_detect_bbox: {:d}/{:d} {:.3f}s {:.3f}s' \
            .format(i + 1, num_images, _t['im_detect_bbox'].average_time,
                    _t['misc'].average_time)

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir, all_scores=all_scores)
Example #25
0
def test_net(save_folder,
             net,
             detector,
             cuda,
             testset,
             transform,
             max_per_image=300,
             thresh=0.005):
    if not os.path.exists(save_folder):
        os.mkdir(save_folder)
    # dump predictions and assoc. ground truth to text file for now
    num_images = len(testset)
    num_classes = (21, 81)[args.dataset == 'COCO']
    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]

    _t = {'im_detect': Timer(), 'misc': Timer()}
    det_file = os.path.join(save_folder, 'detections.pkl')

    if args.retest:
        f = open(det_file, 'rb')
        all_boxes = pickle.load(f)
        print('Evaluating detections')
        testset.evaluate_detections(all_boxes, save_folder)
        return

    for i in range(num_images):
        img = testset.pull_image(i)
        x = Variable(transform(img).unsqueeze(0), volatile=True)
        if cuda:
            x = x.cuda()

        _t['im_detect'].tic()
        out = net(x=x, test=True)  # forward pass
        boxes, scores = detector.forward(out, priors)
        detect_time = _t['im_detect'].toc()
        boxes = boxes[0]
        scores = scores[0]

        boxes = boxes.cpu().numpy()
        scores = scores.cpu().numpy()
        # scale each detection back up to the image
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1],
             img.shape[0]]).cpu().numpy()
        boxes *= scale

        _t['misc'].tic()

        for j in range(1, num_classes):
            inds = np.where(scores[:, j] > thresh)[0]
            if len(inds) == 0:
                all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            if args.dataset == 'VOC':
                cpu = False
            else:
                cpu = False

            keep = nms(c_dets, 0.45, force_cpu=cpu)
            keep = keep[:50]
            c_dets = c_dets[keep, :]
            all_boxes[j][i] = c_dets
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in range(1, num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        nms_time = _t['misc'].toc()

        if i % 20 == 0:
            print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format(
                i + 1, num_images, detect_time, nms_time))
            _t['im_detect'].clear()
            _t['misc'].clear()

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    if args.dataset == 'VOC':
        APs, mAP = testset.evaluate_detections(all_boxes, save_folder)
        return APs, mAP
    else:
        testset.evaluate_detections(all_boxes, save_folder)
Example #26
0
           model)  #loading pretrained weights into the network

if cfg.use_cuda:
    model = model.cuda()

model.eval()
print("Model loaded successfully.")
print("Setting Model to Evaluation Mode")

# pretrained_model = os.path.join(cfg.train_output_dir,
#     'darknet19_voc07trainval_exp1_63.h5')
# pretrained_model = cfg.trained_model
# net_utils.load_net(pretrained_model, net)
# model.load_from_npz(cfg.pretrained_model, num_conv=18)

t_det = Timer()
t_total = Timer()
t_cap = Timer()
cap = cv2.VideoCapture("/dev/video1")
i = 0

while (True):
    t_cap.tic()
    # Capture frame by frame
    ret, frame = cap.read()
    cap_time = t_cap.toc()

    # Our operations on the frame come here
    #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    t_total.tic()
Example #27
0
def face_extract(retinaface, cfg, image: Image.Image):

    cudnn.benchmark = True
    device = torch.device("cpu" if True else "cuda")
    retinaface = retinaface.to(device)

    #Resize small image
    if image.shape[0] < 300 or image.shape[1] < 300:
        dim = (500, 500)
        image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA)
    elif image.shape[0] > 2000 or image.shape[1] > 2000:
        dim = (1500, 1500)
        image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA)

    img_raw = image
    image, scale, im_height, im_width, resize = image_preprocessing(image)

    image = image.to(device)
    scale = scale.to(device)

    _t = {'forward_pass': Timer(), 'misc': Timer()}

    _t['forward_pass'].tic()

    loc, conf, landms = retinaface(image)  # forward pass

    _t['forward_pass'].toc()
    _t['misc'].tic()

    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data

    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale / resize
    boxes = boxes.cpu().numpy()

    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
    scale1 = torch.Tensor([
        image.shape[3], image.shape[2], image.shape[3], image.shape[2],
        image.shape[3], image.shape[2], image.shape[3], image.shape[2],
        image.shape[3], image.shape[2]
    ])
    scale1 = scale1.to(device)

    landms = landms * scale1 / resize
    landms = landms.cpu().numpy()

    # ignore low scores
    inds = np.where(scores > 0.02)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1]
    # order = scores.argsort()[::-1][:args.top_k]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    keep = py_cpu_nms(dets, 0.4)
    # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
    dets = dets[keep, :]
    landms = landms[keep]

    dets = np.concatenate((dets, landms), axis=1)
    _t['misc'].toc()

    return dets, img_raw
    def train_model(self, sess, max_iters):
        """Network training loop."""

        data_layer = get_data_layer(self.roidb, self.imdb.num_classes)

        part_features_fc7 = self.net.get_output(
            'pool_5')[:self.proposal_number, :]
        part_features_fc71 = self.net1.get_output(
            'pool_5')[:self.proposal_number, :]
        part_features_fc72 = self.net2.get_output(
            'pool_5')[:self.proposal_number, :]
        part_features_fc73 = self.net3.get_output(
            'pool_5')[:self.proposal_number, :]
        part_features_fc74 = self.net4.get_output(
            'pool_5')[:self.proposal_number, :]
        part_features_fc75 = self.net5.get_output(
            'pool_5')[:self.proposal_number, :]
        part_features_fc76 = self.net6.get_output(
            'pool_5')[:self.proposal_number, :]
        part_features_fc77 = self.net7.get_output(
            'pool_5')[:self.proposal_number, :]
        part_features_fc78 = self.net8.get_output(
            'pool_5')[:self.proposal_number, :]
        part_features_fc79 = self.net9.get_output(
            'pool_5')[:self.proposal_number, :]
        part_features_fc710 = self.net10.get_output(
            'pool_5')[:self.proposal_number, :]
        part_features_fc711 = self.net11.get_output(
            'pool_5')[:self.proposal_number, :]

        #print(part_features)

        # learning matrix 1
        Matrix_L1_S1 = tf.get_variable(
            'L1_S1', [self.feature_size, self.feature_size],
            initializer=tf.random_normal_initializer(
                stddev=1 / math.sqrt(self.feature_size * self.feature_size)))
        # learning matrix 2
        Matrix_L1_S2 = tf.get_variable(
            'L1_S2', [self.feature_size, self.feature_size],
            initializer=tf.random_normal_initializer(
                stddev=1 / math.sqrt(self.feature_size * self.feature_size)))

        # # learning matrix 3
        # Matrix_L2_S1 = tf.get_variable('L2_S1', [self.feature_size, self.feature_size], initializer=tf.random_normal_initializer(
        #                                    stddev=1 / math.sqrt(self.feature_size * self.feature_size)))

        # learning matrix 4
        #Matrix_L1_S3 = tf.get_variable('L1_S3', [self.hidden_size, self.hidden_size],
        #                               initializer=tf.random_normal_initializer(
        #                                   stddev=1 / math.sqrt(self.hidden_size * self.hidden_size)))

        ################################
        #### get the region feature ####
        ######### max pooling ##########
        ################################
        part_features_fc7 = tf.reduce_max(tf.reshape(
            part_features_fc7, [self.proposal_number, 49, 512]),
                                          axis=1)
        part_features_fc71 = tf.reduce_max(tf.reshape(
            part_features_fc71, [self.proposal_number, 49, 512]),
                                           axis=1)
        part_features_fc72 = tf.reduce_max(tf.reshape(
            part_features_fc72, [self.proposal_number, 49, 512]),
                                           axis=1)
        part_features_fc73 = tf.reduce_max(tf.reshape(
            part_features_fc73, [self.proposal_number, 49, 512]),
                                           axis=1)
        part_features_fc74 = tf.reduce_max(tf.reshape(
            part_features_fc74, [self.proposal_number, 49, 512]),
                                           axis=1)
        part_features_fc75 = tf.reduce_max(tf.reshape(
            part_features_fc75, [self.proposal_number, 49, 512]),
                                           axis=1)
        part_features_fc76 = tf.reduce_max(tf.reshape(
            part_features_fc76, [self.proposal_number, 49, 512]),
                                           axis=1)
        part_features_fc77 = tf.reduce_max(tf.reshape(
            part_features_fc77, [self.proposal_number, 49, 512]),
                                           axis=1)
        part_features_fc78 = tf.reduce_max(tf.reshape(
            part_features_fc78, [self.proposal_number, 49, 512]),
                                           axis=1)
        part_features_fc79 = tf.reduce_max(tf.reshape(
            part_features_fc79, [self.proposal_number, 49, 512]),
                                           axis=1)
        part_features_fc710 = tf.reduce_max(tf.reshape(
            part_features_fc710, [self.proposal_number, 49, 512]),
                                            axis=1)
        part_features_fc711 = tf.reduce_max(tf.reshape(
            part_features_fc711, [self.proposal_number, 49, 512]),
                                            axis=1)

        #######get model parts #########
        '''
        part_features = tf.stack([part_features_fc7, part_features_fc71], axis=0)
        part_features = tf.concat([part_features, [part_features_fc72]], axis=0)
        part_features = tf.concat([part_features, [part_features_fc73]], axis=0)
        part_features = tf.concat([part_features, [part_features_fc74]], axis=0)
        part_features = tf.concat([part_features, [part_features_fc75]], axis=0)
        part_features = tf.concat([part_features, [part_features_fc76]], axis=0)
        part_features = tf.concat([part_features, [part_features_fc77]], axis=0)
        part_features = tf.concat([part_features, [part_features_fc78]], axis=0)
        part_features = tf.concat([part_features, [part_features_fc79]], axis=0)
        part_features = tf.concat([part_features, [part_features_fc710]], axis=0)
        part_features = tf.concat([part_features, [part_features_fc711]], axis=0)
	'''

        ##############################
        ######### L1_S1 ##############
        ##############################
        '''
        #no part attention
        similarity = tf.constant([[1.0 / self.proposal_number]] * self.proposal_number, dtype=tf.float32)
        similarity1 = similarity
        similarity2 = similarity
        similarity3 = similarity
        similarity4 = similarity
        similarity5 = similarity
        similarity6 = similarity
        similarity7 = similarity
        similarity8 = similarity
        similarity9 = similarity
        similarity10 = similarity
        similarity11 = similarity
        part_sum = tf.reduce_sum(tf.multiply(similarity, part_features_fc7), axis=0, keep_dims=True)
        part_sum1 = tf.reduce_sum(tf.multiply(similarity1, part_features_fc71), axis=0, keep_dims=True)
        part_sum2 = tf.reduce_sum(tf.multiply(similarity2, part_features_fc72), axis=0, keep_dims=True)
        part_sum3 = tf.reduce_sum(tf.multiply(similarity3, part_features_fc73), axis=0, keep_dims=True)
        part_sum4 = tf.reduce_sum(tf.multiply(similarity4, part_features_fc74), axis=0, keep_dims=True)
        part_sum5 = tf.reduce_sum(tf.multiply(similarity5, part_features_fc75), axis=0, keep_dims=True)
        part_sum6 = tf.reduce_sum(tf.multiply(similarity6, part_features_fc76), axis=0, keep_dims=True)
        part_sum7 = tf.reduce_sum(tf.multiply(similarity7, part_features_fc77), axis=0, keep_dims=True)
        part_sum8 = tf.reduce_sum(tf.multiply(similarity8, part_features_fc78), axis=0, keep_dims=True)
        part_sum9 = tf.reduce_sum(tf.multiply(similarity9, part_features_fc79), axis=0, keep_dims=True)
        part_sum10 = tf.reduce_sum(tf.multiply(similarity10, part_features_fc710), axis=0, keep_dims=True)
        part_sum11 = tf.reduce_sum(tf.multiply(similarity11, part_features_fc711), axis=0, keep_dims=True)
	
	'''
        # view 0
        L1_S1_Similarity = tf.nn.softmax(
            tf.matmul(tf.matmul(part_features_fc7, Matrix_L1_S1),
                      tf.transpose(part_features_fc7)))
        similarity = tf.reduce_sum(L1_S1_Similarity, axis=0,
                                   keep_dims=True) / self.proposal_number
        similarity = tf.transpose(similarity)
        part_sum = tf.reduce_sum(tf.multiply(similarity, part_features_fc7),
                                 axis=0,
                                 keep_dims=True)

        # view 1
        L1_S1_Similarity1 = tf.nn.softmax(
            tf.matmul(tf.matmul(part_features_fc71, Matrix_L1_S1),
                      tf.transpose(part_features_fc71)))
        similarity1 = tf.reduce_sum(L1_S1_Similarity1, axis=0,
                                    keep_dims=True) / self.proposal_number
        similarity1 = tf.transpose(similarity1)
        part_sum1 = tf.reduce_sum(tf.multiply(similarity1, part_features_fc71),
                                  axis=0,
                                  keep_dims=True)

        # view 2
        L1_S1_Similarity2 = tf.nn.softmax(
            tf.matmul(tf.matmul(part_features_fc72, Matrix_L1_S1),
                      tf.transpose(part_features_fc72)))
        similarity2 = tf.reduce_sum(L1_S1_Similarity2, axis=0,
                                    keep_dims=True) / self.proposal_number
        similarity2 = tf.transpose(similarity2)
        part_sum2 = tf.reduce_sum(tf.multiply(similarity2, part_features_fc72),
                                  axis=0,
                                  keep_dims=True)

        # view 3
        L1_S1_Similarity3 = tf.nn.softmax(
            tf.matmul(tf.matmul(part_features_fc73, Matrix_L1_S1),
                      tf.transpose(part_features_fc73)))
        similarity3 = tf.reduce_sum(L1_S1_Similarity3, axis=0,
                                    keep_dims=True) / self.proposal_number
        similarity3 = tf.transpose(similarity3)
        part_sum3 = tf.reduce_sum(tf.multiply(similarity3, part_features_fc73),
                                  axis=0,
                                  keep_dims=True)

        # view 4
        L1_S1_Similarity4 = tf.nn.softmax(
            tf.matmul(tf.matmul(part_features_fc74, Matrix_L1_S1),
                      tf.transpose(part_features_fc74)))
        similarity4 = tf.reduce_sum(L1_S1_Similarity4, axis=0,
                                    keep_dims=True) / self.proposal_number
        similarity4 = tf.transpose(similarity4)
        part_sum4 = tf.reduce_sum(tf.multiply(similarity4, part_features_fc74),
                                  axis=0,
                                  keep_dims=True)

        # view 5
        L1_S1_Similarity5 = tf.nn.softmax(
            tf.matmul(tf.matmul(part_features_fc75, Matrix_L1_S1),
                      tf.transpose(part_features_fc75)))
        similarity5 = tf.reduce_sum(L1_S1_Similarity5, axis=0,
                                    keep_dims=True) / self.proposal_number
        similarity5 = tf.transpose(similarity5)
        part_sum5 = tf.reduce_sum(tf.multiply(similarity5, part_features_fc75),
                                  axis=0,
                                  keep_dims=True)

        # view 6
        L1_S1_Similarity6 = tf.nn.softmax(
            tf.matmul(tf.matmul(part_features_fc76, Matrix_L1_S1),
                      tf.transpose(part_features_fc76)))
        similarity6 = tf.reduce_sum(L1_S1_Similarity6, axis=0,
                                    keep_dims=True) / self.proposal_number
        similarity6 = tf.transpose(similarity6)
        part_sum6 = tf.reduce_sum(tf.multiply(similarity6, part_features_fc76),
                                  axis=0,
                                  keep_dims=True)

        # view 7
        L1_S1_Similarity7 = tf.nn.softmax(
            tf.matmul(tf.matmul(part_features_fc77, Matrix_L1_S1),
                      tf.transpose(part_features_fc77)))
        similarity7 = tf.reduce_sum(L1_S1_Similarity7, axis=0,
                                    keep_dims=True) / self.proposal_number
        similarity7 = tf.transpose(similarity7)
        part_sum7 = tf.reduce_sum(tf.multiply(similarity7, part_features_fc77),
                                  axis=0,
                                  keep_dims=True)

        # view 8
        L1_S1_Similarity8 = tf.nn.softmax(
            tf.matmul(tf.matmul(part_features_fc78, Matrix_L1_S1),
                      tf.transpose(part_features_fc78)))
        similarity8 = tf.reduce_sum(L1_S1_Similarity8, axis=0,
                                    keep_dims=True) / self.proposal_number
        similarity8 = tf.transpose(similarity8)
        part_sum8 = tf.reduce_sum(tf.multiply(similarity8, part_features_fc78),
                                  axis=0,
                                  keep_dims=True)

        # view 9
        L1_S1_Similarity9 = tf.nn.softmax(
            tf.matmul(tf.matmul(part_features_fc79, Matrix_L1_S1),
                      tf.transpose(part_features_fc79)))
        similarity9 = tf.reduce_sum(L1_S1_Similarity9, axis=0,
                                    keep_dims=True) / self.proposal_number
        similarity9 = tf.transpose(similarity9)
        part_sum9 = tf.reduce_sum(tf.multiply(similarity9, part_features_fc79),
                                  axis=0,
                                  keep_dims=True)

        # view 10
        L1_S1_Similarity10 = tf.nn.softmax(
            tf.matmul(tf.matmul(part_features_fc710, Matrix_L1_S1),
                      tf.transpose(part_features_fc710)))
        similarity10 = tf.reduce_sum(
            L1_S1_Similarity10, axis=0, keep_dims=True) / self.proposal_number
        similarity10 = tf.transpose(similarity10)
        part_sum10 = tf.reduce_sum(tf.multiply(similarity10,
                                               part_features_fc710),
                                   axis=0,
                                   keep_dims=True)

        # view 11
        L1_S1_Similarity11 = tf.nn.softmax(
            tf.matmul(tf.matmul(part_features_fc711, Matrix_L1_S1),
                      tf.transpose(part_features_fc711)))
        similarity11 = tf.reduce_sum(
            L1_S1_Similarity11, axis=0, keep_dims=True) / self.proposal_number
        similarity11 = tf.transpose(similarity11)
        part_sum11 = tf.reduce_sum(tf.multiply(similarity11,
                                               part_features_fc711),
                                   axis=0,
                                   keep_dims=True)

        # concat views
        view_parts = tf.concat([part_sum, part_sum1], axis=0)
        view_parts = tf.concat([view_parts, part_sum2], axis=0)
        view_parts = tf.concat([view_parts, part_sum3], axis=0)
        view_parts = tf.concat([view_parts, part_sum4], axis=0)
        view_parts = tf.concat([view_parts, part_sum5], axis=0)
        view_parts = tf.concat([view_parts, part_sum6], axis=0)
        view_parts = tf.concat([view_parts, part_sum7], axis=0)
        view_parts = tf.concat([view_parts, part_sum8], axis=0)
        view_parts = tf.concat([view_parts, part_sum9], axis=0)
        view_parts = tf.concat([view_parts, part_sum10], axis=0)
        view_parts = tf.concat([view_parts, part_sum11], axis=0)
        view_parts = tf.nn.l2_normalize(view_parts, 1)

        # no view attention
        #view_similarity = tf.constant([[1.0 / self.views]] * self.views, dtype=tf.float32)
        #view_sums = tf.reduce_sum(tf.multiply(view_similarity, view_parts), axis=0, keep_dims=True)
        '''L1_S2'''
        #view attention
        L1_S2_Similarity = tf.nn.softmax(
            tf.matmul(tf.matmul(view_parts, Matrix_L1_S2),
                      tf.transpose(view_parts)))
        view_similarity = tf.reduce_sum(
            L1_S2_Similarity, axis=0, keep_dims=True) / self.views
        view_similarity = tf.transpose(view_similarity)
        view_sums = tf.reduce_sum(tf.multiply(view_similarity, view_parts),
                                  axis=0,
                                  keep_dims=True)

        view_sums = tf.nn.l2_normalize(view_sums, 1)
        #
        view_sums_extend = tf.tile(view_sums, [self.views, 1])
        views_input = tf.add(view_parts, view_sums_extend)
        #view_extend = tf.expand_dims(views_input, 0)

        view_extend = [views_input]
        view_sequence = tf.unstack(view_extend, self.rnn_steps, 1)

        ######RNN Part##########
        ########################
        ########################
        outputs, states = self.build_RNN(view_sequence)

        #use outputs
        outputs = tf.reshape(outputs, [-1, self.views, self.hidden_size])
        model_feature = tf.reduce_max(outputs, 1)
        #model_feature = tf.reduce_max(tf.concat(outputs, 2),1)
        # states = tf.nn.l2_normalize(states, 1)
        # states = states.h

        # output_similarity = tf.nn.softmax(tf.matmul(tf.matmul(outputs, Matrix_L1_S3), tf.transpose(outputs)))
        # output_similarity = tf.reduce_sum(output_similarity, axis=0, keep_dims=True) / self.views
        # output_similarity = tf.transpose(output_similarity)
        # output_sums = tf.reduce_sum(tf.multiply(output_similarity, outputs), axis=0, keep_dims=True)

        #second branch
        # '''L2_S1'''
        # part_features = tf.reshape(part_features, [self.views*self.proposal_number, self.feature_size])
        # L2_S1_Similarity = tf.nn.softmax(tf.matmul(tf.matmul(part_features, Matrix_L2_S1),
        #                                            tf.transpose(part_features)))
        # global_similarity = tf.reduce_sum(L2_S1_Similarity, axis=0, keep_dims=True) / (self.proposal_number * self.views)
        # global_similarity = tf.transpose(global_similarity)
        # global_sums = tf.reduce_sum(tf.multiply(global_similarity, part_features), axis=0, keep_dims=True)
        # global_sums = tf.nn.l2_normalize(global_sums, 1)
        # # #global_sums = tf.nn.softmax(global_sums)
        # #
        # model_feature = tf.concat([global_sums, states], axis=1)
        # #print(model_feature)

        # classification layer
        # second attention part is related to the acutual classes
        w_init = tf.truncated_normal_initializer(stddev=0.1)
        b_init = tf.constant_initializer(0.1)
        fc2_w = tf.get_variable('fc2_w', [self.hidden_size, self.classes],
                                dtype=tf.float32,
                                initializer=w_init)
        fc2_b = tf.get_variable('fc2_b', [self.classes],
                                dtype=tf.float32,
                                initializer=b_init)

        cls_logits = tf.matmul(model_feature, fc2_w) + fc2_b
        cls_prob = tf.nn.softmax(cls_logits)

        cls_output = tf.placeholder(tf.float32, [self.classes],
                                    name='cls_output')

        #Euclidean distance
        #loss = tf.reduce_sum((vlad_prob - cls_output) ** 2)

        #cross entropy
        loss = tf.reduce_sum(
            tf.nn.softmax_cross_entropy_with_logits(labels=cls_output,
                                                    logits=cls_logits))

        # optimizer and learning rate, Stochastic Gradient Descent
        #global_step = tf.Variable(0, trainable=False)
        #lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step,
        #                                cfg.TRAIN.STEPSIZE, 0.9, staircase=True)
        #momentum = cfg.TRAIN.MOMENTUM
        #train_op = tf.train.MomentumOptimizer(lr, momentum).minimize(loss, global_step=global_step)

        # Adam Optimizer
        train_op = tf.train.AdamOptimizer(
            cfg.TRAIN.LEARNING_RATE).minimize(loss)

        # initialize variables
        sess.run(tf.global_variables_initializer())
        self.net.load(self.pretrained_model, sess, self.saver, True)
        print('loaded:%s' % (self.pretrained_model))

        # model saver
        saver1 = tf.train.Saver(max_to_keep=150)
        self.saver = saver1

        last_snapshot_iter = -1
        timer = Timer()

        # training steps
        for iter in range(max_iters):
            # get model label
            train_target = data_layer.model_target()
            randnum = data_layer.rand_target()

            # get model images
            blobs = data_layer.forward()
            blobs1 = data_layer.forward()
            blobs2 = data_layer.forward()
            blobs3 = data_layer.forward()
            blobs4 = data_layer.forward()
            blobs5 = data_layer.forward()
            blobs6 = data_layer.forward()
            blobs7 = data_layer.forward()
            blobs8 = data_layer.forward()
            blobs9 = data_layer.forward()
            blobs10 = data_layer.forward()
            blobs11 = data_layer.forward()

            # blobl = [blobs, blobs1, blobs2, blobs3, blobs4, blobs5, blobs6, blobs7, blobs8, blobs9, blobs10, blobs11]
            # bloblist = blobl[randnum:self.views] + blobl[0:randnum]
            # feed_dict = {self.net.data: bloblist[0]['data'], self.net.im_info: bloblist[0]['im_info'], self.net.keep_prob: 1.0,
            #              self.net1.data: bloblist[1]['data'], self.net1.im_info: bloblist[1]['im_info'], self.net1.keep_prob: 1.0,
            #              self.net2.data: bloblist[2]['data'], self.net2.im_info: bloblist[2]['im_info'], self.net2.keep_prob: 1.0,
            #              self.net3.data: bloblist[3]['data'], self.net3.im_info: bloblist[3]['im_info'], self.net3.keep_prob: 1.0,
            #              self.net4.data: bloblist[4]['data'], self.net4.im_info: bloblist[4]['im_info'], self.net4.keep_prob: 1.0,
            #              self.net5.data: bloblist[5]['data'], self.net5.im_info: bloblist[5]['im_info'], self.net5.keep_prob: 1.0,
            #              self.net6.data: bloblist[6]['data'], self.net6.im_info: bloblist[6]['im_info'], self.net6.keep_prob: 1.0,
            #              self.net7.data: bloblist[7]['data'], self.net7.im_info: bloblist[7]['im_info'], self.net7.keep_prob: 1.0,
            #              self.net8.data: bloblist[8]['data'], self.net8.im_info: bloblist[8]['im_info'], self.net8.keep_prob: 1.0,
            #              self.net9.data: bloblist[9]['data'], self.net9.im_info: bloblist[9]['im_info'], self.net9.keep_prob: 1.0,
            #              self.net10.data: bloblist[10]['data'], self.net10.im_info: bloblist[10]['im_info'],
            #              self.net10.keep_prob: 1.0,
            #              self.net11.data: bloblist[11]['data'], self.net11.im_info: bloblist[11]['im_info'],
            #              self.net11.keep_prob: 1.0,
            #              cls_output: train_target}
            # ''''''
            # # # build feed_dict batch
            feed_dict = {
                self.net.data: blobs['data'],
                self.net.im_info: blobs['im_info'],
                self.net.keep_prob: 1.0,
                self.net1.data: blobs1['data'],
                self.net1.im_info: blobs1['im_info'],
                self.net1.keep_prob: 1.0,
                self.net2.data: blobs2['data'],
                self.net2.im_info: blobs2['im_info'],
                self.net2.keep_prob: 1.0,
                self.net3.data: blobs3['data'],
                self.net3.im_info: blobs3['im_info'],
                self.net3.keep_prob: 1.0,
                self.net4.data: blobs4['data'],
                self.net4.im_info: blobs4['im_info'],
                self.net4.keep_prob: 1.0,
                self.net5.data: blobs5['data'],
                self.net5.im_info: blobs5['im_info'],
                self.net5.keep_prob: 1.0,
                self.net6.data: blobs6['data'],
                self.net6.im_info: blobs6['im_info'],
                self.net6.keep_prob: 1.0,
                self.net7.data: blobs7['data'],
                self.net7.im_info: blobs7['im_info'],
                self.net7.keep_prob: 1.0,
                self.net8.data: blobs8['data'],
                self.net8.im_info: blobs8['im_info'],
                self.net8.keep_prob: 1.0,
                self.net9.data: blobs9['data'],
                self.net9.im_info: blobs9['im_info'],
                self.net9.keep_prob: 1.0,
                self.net10.data: blobs10['data'],
                self.net10.im_info: blobs10['im_info'],
                self.net10.keep_prob: 1.0,
                self.net11.data: blobs11['data'],
                self.net11.im_info: blobs11['im_info'],
                self.net11.keep_prob: 1.0,
                cls_output: train_target
            }

            run_options = None
            run_metadata = None
            if cfg.TRAIN.DEBUG_TIMELINE:
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

            timer.tic()

            #training
            loss_value, _ = sess.run([loss, train_op],
                                     feed_dict=feed_dict,
                                     options=run_options,
                                     run_metadata=run_metadata)

            timer.toc()

            if cfg.TRAIN.DEBUG_TIMELINE:
                trace = timeline.Timeline(step_stats=run_metadata.step_stats)
                trace_file = open(
                    str(long(time.time() * 1000)) + '-train-timeline.ctf.json',
                    'w')
                trace_file.write(
                    trace.generate_chrome_trace_format(show_memory=False))
                trace_file.close()

            #print debug informations
            if (iter + 1) % (cfg.TRAIN.DISPLAY) == 0:
                # print('iter: %d / %d, loss: %.4f, lr: %f, randnum: %d' % (iter + 1, max_iters, loss_value, cfg.TRAIN.LEARNING_RATE, randnum))
                print(
                    'iter: %d / %d, loss: %.4f, lr: %.8f' %
                    (iter + 1, max_iters, loss_value, cfg.TRAIN.LEARNING_RATE))
                #print('iter: %d / %d, loss: %.4f' % (iter + 1, max_iters, loss_value))
                # print 'speed: {:.3f}s / iter'.format(timer.average_time)

            if (iter + 1) % cfg.TRAIN.SNAPSHOT_ITERS == 0:
                last_snapshot_iter = iter
                self.snapshot(sess, iter)
        if last_snapshot_iter != iter:
            self.snapshot(sess, iter)
Example #29
0
    torch.cuda.synchronize()

    return counts


if __name__ == "__main__":
    N_points = 1024 * 16 * 16
    cube_edge = 10
    points = np.random.rand(3, N_points) * cube_edge - cube_edge / 3
    points = torch.from_numpy(points.astype(np.float32)).cuda()

    points[:, 1] = points[:, 0]

    tree = generate_octree(points)

    with Timer(message="Octree creation"):
        tree = generate_octree(points)

    import sys
    sys.exit(0)

    with Timer(message="Chamfer calculation"):
        chamfer(points, tree, own_tree=True)

    # expected value here:
    count_radius = 0.5
    expected_neighbours_per_point = 4 / 3 * np.pi * count_radius**3 * N_points / cube_edge**3
    print("Radius count: expected roughly %f neighbours per point" %
          expected_neighbours_per_point)
    with Timer(message="Radius count"):
        point_counts = radius_count(tree, radius=count_radius)
Example #30
0
def test_net(net, imdb, weights_filename, max_per_image=100, thresh=0.):
    vis = False

    np.random.seed(cfg.RNG_SEED)
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #  all_boxes[cls][image] = N x 5 array of detections in
    #  (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]
    ##
    original_all_boxes = [[[] for _ in range(num_images)]
                          for _ in range(imdb.num_classes)]
    ##

    output_dir = get_output_dir(imdb, weights_filename)

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    # extract gt objects for this class
    class_recs = {}
    npos = 0

    for i in range(num_images):
        im = cv2.imread(imdb.image_path_at(i))
        _t['im_detect'].tic()
        scores, boxes = im_detect(net, im)
        _t['im_detect'].toc()

        _t['misc'].tic()

        # skip j = 0, because it's the background class
        for j in range(1, imdb.num_classes):
            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
              .astype(np.float32, copy=False)
            keep = nms(torch.from_numpy(cls_dets),
                       cfg.TEST.NMS).numpy() if cls_dets.size > 0 else []
            # ##
            # original_all_boxes[j][i] = cls_dets
            # ##
            cls_dets = cls_dets[keep, :]
            all_boxes[j][i] = cls_dets
        ##
        obj_scores = net.roi_scores.cpu().data.numpy()
        inds = np.where(obj_scores[:] > thresh)[0]
        cls_scores = obj_scores[inds]
        cls_boxes = boxes[inds, 4:8]
        cls_dets = np.hstack((cls_boxes, obj_scores[:])) \
          .astype(np.float32, copy=False)

        original_all_boxes[j][i] = cls_dets

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        _t['misc'].toc()

        print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
            .format(i + 1, num_images, _t['im_detect'].average_time(),
                _t['misc'].average_time()))

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    imdb.evaluate_detections(all_boxes, output_dir)