Ejemplo n.º 1
0
def im_detect_keypoints(model, im_scale, boxes):
    """Infer instance keypoint poses. This function must be called after
    im_detect_bbox as it assumes that the Caffe2 workspace is already populated
    with the necessary blobs.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im_scales (list): image blob scales as returned by im_detect_bbox
        boxes (ndarray): R x 4 array of bounding box detections (e.g., as
            returned by im_detect_bbox)

    Returns:
        pred_heatmaps (ndarray): R x J x M x M array of keypoint location
            logits (softmax inputs) for each of the J keypoint types output
            by the network (must be processed by keypoint_results to convert
            into point predictions in the original image coordinate space)
    """
    M = cfg.KRCNN.HEATMAP_SIZE
    if boxes.shape[0] == 0:
        pred_heatmaps = np.zeros((0, cfg.KRCNN.NUM_KEYPOINTS, M, M),
                                 np.float32)
        return pred_heatmaps

    inputs = {'keypoint_rois': _get_rois_blob(boxes, im_scale)}

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS:
        _add_multilevel_rois_for_test(inputs, 'keypoint_rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    if os.environ.get('INT8INFO') == "1":
        algorithm = AbsmaxCalib()
        kind = os.environ.get('INT8CALIB')
        if kind == "moving_average":
            ema_alpha = 0.5
            algorithm = EMACalib(ema_alpha)
        elif kind == "kl_divergence":
            kl_iter_num_for_range = int(os.environ.get('INT8KLNUM'))
            if not kl_iter_num_for_range:
                kl_iter_num_for_range = 100
            algorithm = KLCalib(kl_iter_num_for_range)
        calib = Calibrator(algorithm)
        calib.RunCalibIter(workspace, model.keypoint_net.Proto())
    else:
        workspace.RunNet(model.keypoint_net.Proto().name)

    pred_heatmaps = workspace.FetchBlob(core.ScopedName('kps_score')).squeeze()

    # In case of 1
    if pred_heatmaps.ndim == 3:
        pred_heatmaps = np.expand_dims(pred_heatmaps, axis=0)

    return pred_heatmaps
Ejemplo n.º 2
0
def im_conv_body_only(model, im, target_scale, target_max_size):
    """Runs `model.conv_body_net` on the given image `im`."""
    im_blob, im_scale, _ = blob_utils.get_image_blob(im, target_scale,
                                                     target_max_size)
    workspace.FeedBlob(core.ScopedName('data'), im_blob)
    if os.environ.get('INT8INFO') == "1":
        algorithm = AbsmaxCalib()
        kind = os.environ.get('INT8CALIB')
        if kind == "moving_average":
            ema_alpha = 0.5
            algorithm = EMACalib(ema_alpha)
        elif kind == "kl_divergence":
            kl_iter_num_for_range = int(os.environ.get('INT8KLNUM'))
            if not kl_iter_num_for_range:
                kl_iter_num_for_range = 100
            algorithm = KLCalib(kl_iter_num_for_range)
        calib = Calibrator(algorithm)
        calib.RunCalibIter(workspace, model.conv_body_net.Proto())
    else:
        workspace.RunNet(model.conv_body_net.Proto().name)
    return im_scale
Ejemplo n.º 3
0
def im_detect_mask(model, im_scale, boxes, timers=None):
    """Infer instance segmentation masks. This function must be called after
    im_detect_bbox as it assumes that the Caffe2 workspace is already populated
    with the necessary blobs.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im_scales (list): image blob scales as returned by im_detect_bbox
        boxes (ndarray): R x 4 array of bounding box detections (e.g., as
            returned by im_detect_bbox)

    Returns:
        pred_masks (ndarray): R x K x M x M array of class specific soft masks
            output by the network (must be processed by segm_results to convert
            into hard masks in the original image coordinate space)
    """
    if timers is None:
        timers = defaultdict(Timer)
    timers['data_mask'].tic()
    M = cfg.MRCNN.RESOLUTION
    if boxes.shape[0] == 0:
        pred_masks = np.zeros((0, M, M), np.float32)
        return pred_masks

    inputs = {'mask_rois': _get_rois_blob(boxes, im_scale)}
    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS:
        _add_multilevel_rois_for_test(inputs, 'mask_rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    timers['data_mask'].toc()
    #run first time to warm up
    if os.environ.get('EPOCH2OLD') == "1":
        workspace.RunNet(model.mask_net.Proto().name)
    timers['run_mask'].tic()
    if os.environ.get('INT8INFO') == "1":
        algorithm = AbsmaxCalib()
        kind = os.environ.get('INT8CALIB')
        if kind == "moving_average":
            ema_alpha = 0.5
            algorithm = EMACalib(ema_alpha)
        elif kind == "kl_divergence":
            kl_iter_num_for_range = int(os.environ.get('INT8KLNUM'))
            if not kl_iter_num_for_range:
                kl_iter_num_for_range = 100
            algorithm = KLCalib(kl_iter_num_for_range)
        calib = Calibrator(algorithm)
        calib.RunCalibIter(workspace, model.mask_net.Proto())
    else:
        workspace.RunNet(model.mask_net.Proto().name)
    timers['run_mask'].toc()
    timers['result_mask'].tic()
    # Fetch masks
    pred_masks = workspace.FetchBlob(
        core.ScopedName('mask_fcn_probs')).squeeze()

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        pred_masks = pred_masks.reshape([-1, cfg.MODEL.NUM_CLASSES, M, M])
    else:
        pred_masks = pred_masks.reshape([-1, 1, M, M])
    timers['result_mask'].toc()
    return pred_masks
Ejemplo n.º 4
0
def im_detect_bbox(model,
                   im,
                   target_scale,
                   target_max_size,
                   size_fix=None,
                   timers=None,
                   model1=None,
                   boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    if timers is None:
        timers = defaultdict(Timer)

    if model1 is None and os.environ.get('COSIM'):
        print("cosim must has model1")

    fp32_ws_name = "__fp32_ws__"
    int8_ws_name = "__int8_ws__"

    timers['data1'].tic()
    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size,
                                  size_fix)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')
    for k, v in inputs.items():
        if os.environ.get('COSIM'):
            workspace.SwitchWorkspace(int8_ws_name, True)
        workspace.FeedBlob(core.ScopedName(k), v)
        if os.environ.get('COSIM'):
            workspace.SwitchWorkspace(fp32_ws_name, True)
            workspace.FeedBlob(core.ScopedName(k), v)
    timers['data1'].toc()
    # run first time to warm up
    if os.environ.get('EPOCH2OLD') == "1":
        workspace.RunNet(model.net.Proto().name)
    timers['run'].tic()
    if os.environ.get('INT8INFO') == "1":
        algorithm = AbsmaxCalib()
        kind = os.environ.get('INT8CALIB')
        if kind == "moving_average":
            ema_alpha = 0.5
            algorithm = EMACalib(ema_alpha)
        elif kind == "kl_divergence":
            kl_iter_num_for_range = os.environ.get('INT8KLNUM')
            if not kl_iter_num_for_range:
                kl_iter_num_for_range = 100
            kl_iter_num_for_range = int(kl_iter_num_for_range)
            algorithm = KLCalib(kl_iter_num_for_range)
        calib = Calibrator(algorithm)
        calib.RunCalibIter(workspace, model.net.Proto())
    else:
        if os.environ.get('COSIM'):
            cosim_alg = os.environ.get('COSIM')
            with open("int8.txt", "wb") as p:
                p.write(str(model.net.Proto()))
            with open("fp32.txt", "wb") as p:
                p.write(str(model1.net.Proto()))
            for i in range(len(model.net.Proto().op)):
                workspace.SwitchWorkspace(int8_ws_name)
                int8_inputs = []
                for inp in model.net.Proto().op[i].input:
                    int8_inputs.append(workspace.FetchBlob(str(inp)))
                logging.warning(" opint8[{0}] is  {1}".format(
                    i,
                    model.net.Proto().op[i]))
                workspace.RunOperatorOnce(model.net.Proto().op[i])
                int8_results = []
                for res in model.net.Proto().op[i].output:
                    int8_results.append(workspace.FetchBlob(str(res)))
                workspace.SwitchWorkspace(fp32_ws_name)
                fp32_inputs = []
                for inp1 in model1.net.Proto().op[i].input:
                    fp32_inputs.append(workspace.FetchBlob(str(inp1)))
                logging.warning(" opfp32[{0}] is  {1}".format(
                    i,
                    model1.net.Proto().op[i]))
                workspace.RunOperatorOnce(model1.net.Proto().op[i])
                fp32_results = []
                for res1 in model1.net.Proto().op[i].output:
                    fp32_results.append(workspace.FetchBlob(str(res1)))
                if len(int8_inputs) != len(fp32_inputs):
                    logging.error("Wrong number of inputs")
                    return
                if len(int8_results) != len(fp32_results):
                    logging.error("Wrong number of outputs")
                    return
                logging.warning("begin to check op[{}] {} input".format(
                    i,
                    model.net.Proto().op[i].type))
                for k in range(len(int8_inputs)):
                    if model.net.Proto().op[i].input[k][0] == '_':
                        continue
                    #assert_allclose(int8_inputs[k], fp32_inputs[k], **tol)
                logging.warning("pass checking op[{0}] {1} input".format(
                    i,
                    model.net.Proto().op[i].type))
                logging.warning("begin to check op[{0}] {1} output".format(
                    i,
                    model.net.Proto().op[i].type))
                for j, int8_result in enumerate(int8_results):
                    if model.net.Proto().op[i].output[j][0] == '_':
                        continue
                    #logging.warning("int8_outputis {} and fp32 output is {} ".format(int8_results[j], fp32_results[j]))
                    #if not compare_utils.assert_allclose(int8_results[j], fp32_results[j], **tol):
                    if not compare_utils.assert_compare(
                            int8_result, fp32_results[j], 1e-01, cosim_alg):
                        for k, int8_input in enumerate(int8_inputs):
                            logging.warning("int8_input[{}] is {}".format(
                                k, int8_input))
                            logging.warning("fp32_input[{}] is {}".format(
                                k, fp32_inputs[k]))
                    #assert_allclose(int8_results[j], fp32_results[j], **tol)
                logging.warning("pass checking op[{0}] {1} output".format(
                    i,
                    model.net.Proto().op[i].type))
        else:
            workspace.RunNet(model.net.Proto().name)
    timers['run'].toc()
    timers['result'].tic()
    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        rois = workspace.FetchBlob(core.ScopedName('rois'))
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scale
        batch_indices = rois[:, 0]

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(
            core.ScopedName('bbox_pred')).squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im[0].shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]
    timers['result'].toc()
    return scores, pred_boxes, im_scale, batch_indices
Ejemplo n.º 5
0
def test_net(weights_file,
             dataset_name,
             proposal_file,
             output_dir,
             ind_range=None,
             gpu_id=0):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    assert not cfg.MODEL.RPN_ONLY, \
        'Use rpn_generate to generate proposals from RPN-only models'
    fp32_ws_name = "__fp32_ws__"
    int8_ws_name = "__int8_ws__"
    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(
        dataset_name, proposal_file, ind_range)
    model1 = None
    if os.environ.get('COSIM'):
        workspace.SwitchWorkspace(int8_ws_name, True)
    model, ob, ob_mask, ob_keypoint = initialize_model_from_cfg(weights_file,
                                                                gpu_id=gpu_id)
    if os.environ.get('COSIM'):
        workspace.SwitchWorkspace(fp32_ws_name, True)
        model1, _, _, _ = initialize_model_from_cfg(weights_file,
                                                    gpu_id=gpu_id,
                                                    int8=False)
    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)
    timers = defaultdict(Timer)

    # for kl_divergence calibration, we use the first 100 images to get
    # the min and max values, and the remaing images are applied to compute the hist.
    # if the len(images) <= 100, we extend the images with themselves.
    if os.environ.get('INT8INFO') == "1" and os.environ.get(
            'INT8CALIB') == "kl_divergence":
        kl_iter_num_for_range = int(os.environ.get('INT8KLNUM'))
        if not kl_iter_num_for_range:
            kl_iter_num_for_range = 100
        while (len(roidb) < 2 * kl_iter_num_for_range):
            roidb += roidb
    if os.environ.get('EPOCH2') == "1":
        for i, entry in enumerate(roidb):
            if cfg.TEST.PRECOMPUTED_PROPOSALS:
                # The roidb may contain ground-truth rois (for example, if the roidb
                # comes from the training or val split). We only want to evaluate
                # detection on the *non*-ground-truth rois. We select only the rois
                # that have the gt_classes field set to 0, which means there's no
                # ground truth.
                box_proposals = entry['boxes'][entry['gt_classes'] == 0]
                if len(box_proposals) == 0:
                    continue
            else:
                # Faster R-CNN type models generate proposals on-the-fly with an
                # in-network RPN; 1-stage models don't require proposals.
                box_proposals = None

            im = []
            im.append(cv2.imread(entry['image']))
            print("im is {} and i is {} ".format(entry['image'], i))
            with c2_utils.NamedCudaScope(gpu_id):
                cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(
                    model, im, box_proposals, timers, model1)
            extend_results(i, all_boxes, cls_boxes_i[0])
            if cls_segms_i is not None:
                extend_results(i, all_segms, cls_segms_i[0])
            if cls_keyps_i is not None:
                extend_results(i, all_keyps, cls_keyps_i[0])
            all_boxes, all_segms, all_keyps = empty_results(
                num_classes, num_images)
    logging.warning("begin to run benchmark")
    for i, entry in enumerate(roidb):
        if cfg.TEST.PRECOMPUTED_PROPOSALS:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select only the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = entry['boxes'][entry['gt_classes'] == 0]
            if len(box_proposals) == 0:
                continue
        else:
            # Faster R-CNN type models generate proposals on-the-fly with an
            # in-network RPN; 1-stage models don't require proposals.
            box_proposals = None

        im = []
        im.append(cv2.imread(entry['image']))
        print("im is {} and i is {} ".format(entry['image'], i))
        with c2_utils.NamedCudaScope(gpu_id):
            cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(
                model, im, box_proposals, timers, model1)
        if os.environ.get('DPROFILE') == "1" and ob != None:
            logging.warning("enter profile log")
            logging.warning("net observer time = {}".format(ob.average_time()))
            logging.warning("net observer time = {}".format(
                ob.average_time_children()))
        if os.environ.get('DPROFILE') == "1" and ob_mask != None:
            logging.warning("mask net observer time = {}".format(
                ob_mask.average_time()))
            logging.warning("mask net observer time = {}".format(
                ob_mask.average_time_children()))
        if os.environ.get('DPROFILE') == "1" and ob_mask != None:
            logging.warning("keypoint net observer time = {}".format(
                ob_keypoint.average_time()))
            logging.warning("keypoint net observer time = {}".format(
                ob_keypoint.average_time_children()))
        extend_results(i, all_boxes, cls_boxes_i[0])
        if cls_segms_i is not None:
            extend_results(i, all_segms, cls_segms_i[0])
        if cls_keyps_i is not None:
            extend_results(i, all_keyps, cls_keyps_i[0])

        if i % 10 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            det_time = (timers['im_detect_bbox'].average_time +
                        timers['im_detect_mask'].average_time +
                        timers['im_detect_keypoints'].average_time)
            misc_time = (timers['misc_bbox'].average_time +
                         timers['misc_mask'].average_time +
                         timers['misc_keypoints'].average_time)
            logger.info(('im_detect: range [{:d}, {:d}] of {:d}: '
                         '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})').format(
                             start_ind + 1, end_ind, total_num_images,
                             start_ind + i + 1, start_ind + num_images,
                             det_time, misc_time, eta))
        if cfg.VIS:
            im_name = os.path.splitext(os.path.basename(entry['image']))[0]
            vis_utils.vis_one_image(im[:, :, ::-1],
                                    '{:d}_{:s}'.format(i, im_name),
                                    os.path.join(output_dir, 'vis'),
                                    cls_boxes_i[0],
                                    segms=cls_segms_i[0],
                                    keypoints=cls_keyps_i[0],
                                    thresh=cfg.VIS_TH,
                                    box_alpha=0.8,
                                    dataset=dataset,
                                    show_class=True)
        for key, value in timers.items():
            logger.info('{} : {}'.format(key, value.average_time))

    #remove observer
    if ob != None:
        model.net.RemoveObserver(ob)
    if ob_mask != None:
        model.mask_net.RemoveObserver(ob_mask)
    if ob_keypoint != None:
        model.keypoint_net.RemoveObserver(ob_keypoint)
    if os.environ.get('INT8INFO') == "1":

        def save_net(net_def, init_def):
            if net_def is None or init_def is None:
                return
            if net_def.name is None or init_def.name is None:
                return
            if os.environ.get('INT8PTXT') == "1":
                with open(net_def.name + '_predict_int8.pbtxt', 'wb') as n:
                    n.write(str(net_def))
                with open(net_def.name + '_init_int8.pbtxt', 'wb') as n:
                    n.write(str(init_def))
            else:
                with open(net_def.name + '_predict_int8.pb', 'wb') as n:
                    n.write(net_def.SerializeToString())
                with open(net_def.name + '_init_int8.pb', 'wb') as n:
                    n.write(init_def.SerializeToString())

        algorithm = AbsmaxCalib()
        kind = os.environ.get('INT8CALIB')
        if kind == "moving_average":
            ema_alpha = 0.5
            algorithm = EMACalib(ema_alpha)
        elif kind == "kl_divergence":
            algorithm = KLCalib(kl_iter_num_for_range)
        calib = Calibrator(algorithm)
        if model.net:
            predict_quantized, init_quantized = calib.DepositQuantizedModule(
                workspace, model.net.Proto())
            save_net(predict_quantized, init_quantized)
        if cfg.MODEL.MASK_ON:
            predict_quantized, init_quantized = calib.DepositQuantizedModule(
                workspace, model.mask_net.Proto())
            save_net(predict_quantized, init_quantized)
        if cfg.MODEL.KEYPOINTS_ON:
            predict_quantized, init_quantized = calib.DepositQuantizedModule(
                workspace, model.keypoint_net.Proto())
            save_net(predict_quantized, init_quantized)
    cfg_yaml = yaml.dump(cfg)
    if ind_range is not None:
        det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)
    else:
        det_name = 'detections.pkl'
    det_file = os.path.join(output_dir, det_name)
    save_object(
        dict(all_boxes=all_boxes,
             all_segms=all_segms,
             all_keyps=all_keyps,
             cfg=cfg_yaml), det_file)
    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
    return all_boxes, all_segms, all_keyps
Ejemplo n.º 6
0
def main(args):
    """
    main entry to run
    """
    logger = logging.getLogger(__name__)

    merge_cfg_from_file(args.cfg)
    cfg.NUM_GPUS = 1
    args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE)
    assert_and_infer_cfg(cache_urls=False)

    assert not cfg.MODEL.RPN_ONLY, \
        'RPN models are not supported'
    assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \
        'Models that require precomputed proposals are not supported'
    fp32_ws_name = "__fp32_ws__"
    int8_ws_name = "__int8_ws__"
    model1 = None
    if os.environ.get('COSIM'):
        workspace.SwitchWorkspace(int8_ws_name, True)
    model, _, _, _ = infer_engine.initialize_model_from_cfg(
        args.weights, gpu_id=args.device_id)
    if os.environ.get('COSIM'):
        workspace.SwitchWorkspace(fp32_ws_name, True)
        model1, _, _, _ = infer_engine.initialize_model_from_cfg(
            args.weights, gpu_id=args.device_id, int8=False)

    dummy_coco_dataset = dummy_datasets.get_coco_dataset()

    if os.path.isdir(args.im_or_folder):
        im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext)
    else:
        im_list = [args.im_or_folder]

    fnames = batch_image(im_list, args.batch_size)
    # for kl_divergence calibration, we use the first 100 images to get
    # the min and max values, and the remaing images are applied to compute the hist.
    # if the len(images) <= 100, we extend the images with themselves.
    if os.environ.get('INT8INFO') == "1" and os.environ.get(
            'INT8CALIB') == "kl_divergence":
        kl_iter_num_for_range = os.environ.get('INT8KLNUM')
        if not kl_iter_num_for_range:
            kl_iter_num_for_range = 100
        kl_iter_num_for_range = int(kl_iter_num_for_range)
        while (len(fnames) < 2 * kl_iter_num_for_range):
            fnames += fnames
    if os.environ.get('EPOCH2') == "1":
        for i, im_name in enumerate(fnames):
            im = []
            for _, name in enumerate(im_name):
                image = cv2.imread(name)
                im.append(image)

            timers = defaultdict(Timer)
            t = time.time()
            with c2_utils.NamedCudaScope(args.device_id):
                cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all(
                    model, im, None, timers, model1)
    logger.warning("begin to run benchmark\n")
    for i, im_name in enumerate(fnames):
        im = []
        for _, name in enumerate(im_name):
            image = cv2.imread(name)
            im.append(image)

        timers = defaultdict(Timer)
        t = time.time()
        with c2_utils.NamedCudaScope(args.device_id):
            cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all(
                model, im, None, timers, model1)
        logger.info('Inference time: {:.3f}s'.format(time.time() - t))
        for k, v in timers.items():
            logger.info(' | {}: {:.3f}s'.format(k, v.average_time))

        if i == 0:
            logger.info(
                ' \ Note: inference on the first batch will be slower than the '
                'rest (caches and auto-tuning need to warm up)')

        cls_segm = None
        cls_keyp = None
        for bs in range(args.batch_size):
            image = im[bs]
            if cls_segms != None:
                cls_segm = cls_segms[bs]
            if cls_keyp != None:
                cls_keyp = cls_keyps[bs]

            cls_box = cls_boxes[bs]
            image_name = fnames[i][bs].split("/")[-1]

            vis_utils.vis_one_image(
                image[:, :, ::-1],  # BGR -> RGB for visualization
                image_name,
                args.output_dir,
                cls_box,
                cls_segm,
                cls_keyp,
                dataset=dummy_coco_dataset,
                box_alpha=0.3,
                show_class=True,
                thresh=0.7,
                kp_thresh=2,
                ext=args.output_ext,
                out_when_no_box=args.out_when_no_box)

    if os.environ.get('INT8INFO') == "1":

        def save_net(net_def, init_def):
            if net_def is None or init_def is None:
                return
            if net_def.name is None or init_def.name is None:
                return
            if os.environ.get('INT8PTXT') == "1":
                with open(net_def.name + '_predict_int8.pbtxt', 'wb') as n:
                    n.write(str(net_def))
                with open(net_def.name + '_init_int8.pbtxt', 'wb') as n:
                    n.write(str(init_def))
            else:
                with open(net_def.name + '_predict_int8.pb', 'wb') as n:
                    n.write(net_def.SerializeToString())
                with open(net_def.name + '_init_int8.pb', 'wb') as n:
                    n.write(init_def.SerializeToString())

        algorithm = AbsmaxCalib()
        kind = os.environ.get('INT8CALIB')
        if kind == "moving_average":
            ema_alpha = 0.5
            algorithm = EMACalib(ema_alpha)
        elif kind == "kl_divergence":
            algorithm = KLCalib(kl_iter_num_for_range)
        calib = Calibrator(algorithm)
        if model.net:
            predict_quantized, init_quantized = calib.DepositQuantizedModule(
                workspace, model.net.Proto())
            save_net(predict_quantized, init_quantized)
        if cfg.MODEL.MASK_ON:
            predict_quantized, init_quantized = calib.DepositQuantizedModule(
                workspace, model.mask_net.Proto())
            save_net(predict_quantized, init_quantized)
        if cfg.MODEL.KEYPOINTS_ON:
            predict_quantized, init_quantized = calib.DepositQuantizedModule(
                workspace, model.keypoint_net.Proto())
            save_net(predict_quantized, init_quantized)
Ejemplo n.º 7
0
def im_detect_bbox(model, im, timers=None, model1=None):
    """Generate RetinaNet detections on a single image."""
    if timers is None:
        timers = defaultdict(Timer)

    if model1 is None and os.environ.get('COSIM'):
        print("cosim must has model1")

    fp32_ws_name = "__fp32_ws__"
    int8_ws_name = "__int8_ws__"
    # Although anchors are input independent and could be precomputed,
    # recomputing them per image only brings a small overhead
    anchors = _create_cell_anchors()
    timers['im_detect_bbox'].tic()
    timers['data1'].tic()
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, cfg.TEST.SIZEFIX)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        if os.environ.get('COSIM'):
            workspace.SwitchWorkspace(int8_ws_name, True)
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32,
                                                        copy=False))
        if os.environ.get('COSIM'):
            workspace.SwitchWorkspace(fp32_ws_name, True)
            workspace.FeedBlob(core.ScopedName(k),
                               v.astype(np.float32, copy=False))
    timers['data1'].toc()
    if os.environ.get('EPOCH2OLD') == "1":
        workspace.RunNet(model.net.Proto().name)
    timers['run'].tic()
    if os.environ.get('INT8INFO') == "1":
        algorithm = AbsmaxCalib()
        kind = os.environ.get('INT8CALIB')
        if kind == "moving_average":
            ema_alpha = 0.5
            algorithm = EMACalib(ema_alpha)
        elif kind == "kl_divergence":

            kl_iter_num_for_range = os.environ.get('INT8KLNUM')
            if not kl_iter_num_for_range:
                kl_iter_num_for_range = 100
            kl_iter_num_for_range = int(kl_iter_num_for_range)
            algorithm = KLCalib(kl_iter_num_for_range)
        calib = Calibrator(algorithm)
        calib.RunCalibIter(workspace, model.net.Proto())
    else:
        if os.environ.get('COSIM'):
            with open("int8.txt", "wb") as p:
                p.write(str(model.net.Proto()))
            with open("fp32.txt", "wb") as p:
                p.write(str(model1.net.Proto()))
            for i in range(len(model.net.Proto().op)):
                workspace.SwitchWorkspace(int8_ws_name)
                int8_inputs = []
                for inp in model.net.Proto().op[i].input:
                    int8_inputs.append(workspace.FetchBlob(str(inp)))
                logging.warning(" opint8[{0}] is  {1}".format(
                    i,
                    model.net.Proto().op[i]))
                workspace.RunOperatorOnce(model.net.Proto().op[i])
                int8_results = []
                for res in model.net.Proto().op[i].output:
                    int8_results.append(workspace.FetchBlob(str(res)))
                workspace.SwitchWorkspace(fp32_ws_name)
                fp32_inputs = []
                for inp1 in model1.net.Proto().op[i].input:
                    fp32_inputs.append(workspace.FetchBlob(str(inp1)))
                logging.warning(" opfp32[{0}] is  {1}".format(
                    i,
                    model1.net.Proto().op[i]))
                workspace.RunOperatorOnce(model1.net.Proto().op[i])
                fp32_results = []
                for res1 in model1.net.Proto().op[i].output:
                    fp32_results.append(workspace.FetchBlob(str(res1)))
                if len(int8_inputs) != len(fp32_inputs):
                    logging.error("Wrong number of inputs")
                    return
                if len(int8_results) != len(fp32_results):
                    logging.error("Wrong number of outputs")
                    return
                logging.warning("begin to check op[{}] {} input".format(
                    i,
                    model.net.Proto().op[i].type))
                for k in range(len(int8_inputs)):
                    if model.net.Proto().op[i].input[k][0] == '_':
                        continue
                    #assert_allclose(int8_inputs[k], fp32_inputs[k], **tol)
                logging.warning("pass checking op[{0}] {1} input".format(
                    i,
                    model.net.Proto().op[i].type))
                logging.warning("begin to check op[{0}] {1} output".format(
                    i,
                    model.net.Proto().op[i].type))
                for j, int8_result in enumerate(int8_results):
                    if model.net.Proto().op[i].output[j][0] == '_':
                        continue
                    #logging.warning("int8_outputis {} and fp32 output is {} ".format(int8_results[j], fp32_results[j]))
                    #if not compare_utils.assert_allclose(int8_results[j], fp32_results[j], **tol):
                    if not compare_utils.assert_compare(
                            int8_result, fp32_results[j], 1e-01,
                            os.environ.get('COSIM')):
                        for k, int8_input in enumerate(int8_inputs):
                            logging.warning("int8_input[{}] is {}".format(
                                k, int8_input))
                            logging.warning("fp32_input[{}] is {}".format(
                                k, fp32_inputs[k]))

                logging.warning("pass checking op[{0}] {1} output".format(
                    i,
                    model.net.Proto().op[i].type))
        else:
            workspace.RunNet(model.net.Proto().name)
    timers['run'].toc()
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    batch_size = cls_probs[0].shape[0]
    boxes_all_list = [boxes_all] * batch_size
    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2.**lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape(
            (cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
             cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape(
            (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        for i in range(batch_size):
            cls_prob_ravel = cls_prob[i, :].ravel()

            # In some cases [especially for very small img sizes], it's possible that
            # candidate_ind is empty if we impose threshold 0.05 at all levels. This
            # will lead to errors since no detections are found for this image. Hence,
            # for lvl 7 which has small spatial resolution, we take the threshold 0.0
            th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
            candidate_inds = np.where(cls_prob_ravel > th)[0]
            if (len(candidate_inds) == 0):
                continue

            pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N,
                               len(candidate_inds))
            inds = np.argpartition(cls_prob_ravel[candidate_inds],
                                   -pre_nms_topn)[-pre_nms_topn:]
            inds = candidate_inds[inds]

            inds_4d = np.array(np.unravel_index(
                inds, (cls_prob[i, :]).shape)).transpose()
            classes = inds_4d[:, 1]
            anchor_ids, y, x = inds_4d[:, 0], inds_4d[:, 2], inds_4d[:, 3]
            scores = cls_prob[i, anchor_ids, classes, y, x]
            boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
            boxes *= stride
            boxes += cell_anchors[anchor_ids, :]

            if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
                box_deltas = box_pred[i, anchor_ids, :, y, x]
            else:
                box_cls_inds = classes * 4
                box_deltas = np.vstack([
                    box_pred[i, ind:ind + 4, yi, xi]
                    for ind, yi, xi in zip(box_cls_inds, y, x)
                ])
            pred_boxes = (box_utils.bbox_transform(boxes, box_deltas)
                          if cfg.TEST.BBOX_REG else boxes)
            pred_boxes /= im_scale
            pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im[0].shape)
            box_scores = np.zeros((pred_boxes.shape[0], 5))
            box_scores[:, 0:4] = pred_boxes
            box_scores[:, 4] = scores

            for cls in range(1, cfg.MODEL.NUM_CLASSES):
                inds = np.where(classes == cls - 1)[0]
                if len(inds) > 0:
                    boxes_all_list[i][cls].extend(box_scores[inds, :])

    timers['im_detect_bbox'].toc()

    cls_boxes_list = []
    for i in range(batch_size):
        boxes_all = boxes_all_list[i]
        # Combine predictions across all levels and retain the top scoring by class
        timers['misc_bbox'].tic()
        detections = []
        for cls, boxes in boxes_all.items():
            cls_dets = np.vstack(boxes).astype(dtype=np.float32)
            # do class specific nms here
            keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
            out = np.zeros((len(keep), 6))
            out[:, 0:5] = cls_dets
            out[:, 5].fill(cls)
            detections.append(out)

        # detections (N, 6) format:
        #   detections[:, :4] - boxes
        #   detections[:, 4] - scores
        #   detections[:, 5] - classes
        detections = np.vstack(detections)
        # sort all again
        inds = np.argsort(-detections[:, 4])
        detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]

        # Convert the detections to image cls_ format (see core/test_engine.py)
        num_classes = cfg.MODEL.NUM_CLASSES
        cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
        for c in range(1, num_classes):
            inds = np.where(detections[:, 5] == c)[0]
            cls_boxes[c] = detections[inds, :5]
        cls_boxes_list.append(cls_boxes)

    timers['misc_bbox'].toc()

    return cls_boxes_list