def run_model_pb(args, net, init_net, im, check_blobs):
    workspace.ResetWorkspace()
    workspace.RunNetOnce(init_net)
    mutils.create_input_blobs_for_net(net.Proto())
    workspace.CreateNet(net)

    # input_blobs, _ = core_test._get_blobs(im, None)
    input_blobs = _prepare_blobs(
        im,
        cfg.PIXEL_MEANS,
        cfg.TEST.SCALE, cfg.TEST.MAX_SIZE
    )
    gpu_blobs = []
    if args.device == 'gpu':
        gpu_blobs = ['data']
    for k, v in input_blobs.items():
        workspace.FeedBlob(
            core.ScopedName(k),
            v,
            mutils.get_device_option_cuda() if k in gpu_blobs else
            mutils.get_device_option_cpu()
        )

    try:
        workspace.RunNet(net.Proto().name)
        scores = workspace.FetchBlob('score_nms')
        classids = workspace.FetchBlob('class_nms')
        boxes = workspace.FetchBlob('bbox_nms')
    except Exception as e:
        print('Running pb model failed.\n{}'.format(e))
        # may not detect anything at all
        R = 0
        scores = np.zeros((R,), dtype=np.float32)
        boxes = np.zeros((R, 4), dtype=np.float32)
        classids = np.zeros((R,), dtype=np.float32)

    boxes = np.column_stack((boxes, scores))

    # sort the results based on score for comparision
    boxes, _, _, classids = _sort_results(
        boxes, None, None, classids)

    # write final result back to workspace
    workspace.FeedBlob('result_boxes', boxes)
    workspace.FeedBlob('result_classids', classids)

    ret = _get_result_blobs(check_blobs)

    return ret
def convert_model_gpu(args, net, init_net):
    assert args.device == 'gpu'

    ret_net = copy.deepcopy(net)
    ret_init_net = copy.deepcopy(init_net)

    cdo_cuda = mutils.get_device_option_cuda()
    cdo_cpu = mutils.get_device_option_cpu()

    CPU_OPS = [
        ["CollectAndDistributeFpnRpnProposals", None],
        ["GenerateProposals", None],
        ["BBoxTransform", None],
        ["BoxWithNMSLimit", None],
    ]
    CPU_BLOBS = ["im_info", "anchor"]

    @op_filter()
    def convert_op_gpu(op):
        for x in CPU_OPS:
            if mutils.filter_op(op, type=x[0], inputs=x[1]):
                return None
        op.device_option.CopyFrom(cdo_cuda)
        return [op]

    @op_filter()
    def convert_init_op_gpu(op):
        if op.output[0] in CPU_BLOBS:
            op.device_option.CopyFrom(cdo_cpu)
        else:
            op.device_option.CopyFrom(cdo_cuda)
        return [op]

    convert_op_in_proto(ret_init_net.Proto(), convert_init_op_gpu)
    convert_op_in_proto(ret_net.Proto(), convert_op_gpu)

    ret = core.InjectDeviceCopiesAmongNets([ret_init_net, ret_net])

    return [ret[0][1], ret[0][0]]
Esempio n. 3
0
def convert_model_gpu(args, net, init_net):
    assert args.device == 'gpu'

    ret_net = copy.deepcopy(net)
    ret_init_net = copy.deepcopy(init_net)

    cdo_cuda = mutils.get_device_option_cuda()
    cdo_cpu = mutils.get_device_option_cpu()

    CPU_OPS = [
        ["GenerateProposals", None],
        ["BBoxTransform", None],
        ["BoxWithNMSLimit", None],
    ]
    CPU_BLOBS = ["im_info", "anchor"]

    @op_filter()
    def convert_op_gpu(op):
        for x in CPU_OPS:
            if mutils.filter_op(op, type=x[0], inputs=x[1]):
                return None
        op.device_option.CopyFrom(cdo_cuda)
        return [op]

    @op_filter()
    def convert_init_op_gpu(op):
        if op.output[0] in CPU_BLOBS:
            op.device_option.CopyFrom(cdo_cpu)
        else:
            op.device_option.CopyFrom(cdo_cuda)
        return [op]

    convert_op_in_proto(ret_init_net.Proto(), convert_init_op_gpu)
    convert_op_in_proto(ret_net.Proto(), convert_op_gpu)

    ret = core.InjectDeviceCopiesAmongNets([ret_init_net, ret_net])

    return [ret[0][1], ret[0][0]]
def run_model_pb(args, net, init_net, im, check_blobs):
    workspace.ResetWorkspace()
    with c2_utils.NamedCudaScope(0):
        workspace.RunNetOnce(init_net)

        for k in workspace.Blobs():
            if k not in ['data', 'im_info','anchor2','anchor3','anchor4','anchor5','anchor6','bbox_pred','rpn_rois','bbox_pred_w','bbox_pred_b']:
                a=workspace.FetchBlob(k)
                workspace.FeedBlob(k,a.astype(np.float16))
            # else:
            #     a = workspace.FetchBlob(k)
            #     workspace.FeedBlob(core.ScopedName(k), a)

        mutils.create_input_blobs_for_net(net.Proto())
        workspace.CreateNet(net)

        tt1=time.time()
        for i in range(10):
            # input_blobs, _ = core_test._get_blobs(im, None)
            input_blobs = _prepare_blobs(
                im,
                cfg.PIXEL_MEANS,
                cfg.TEST.SCALES, cfg.TEST.MAX_SIZE
            )
            gpu_blobs = []
            if args.device == 'gpu':
                gpu_blobs = ['data']
            for k, v in input_blobs.items():
                workspace.FeedBlob(
                    k,#core.ScopedName(k),
                    v,
                    mutils.get_device_option_cuda() if k in gpu_blobs else
                    mutils.get_device_option_cpu()
                )
            try:
                workspace.RunNet(net)
                scores = workspace.FetchBlob('score_nms')
                classids = workspace.FetchBlob('class_nms')
                boxes = workspace.FetchBlob('bbox_nms')
            except Exception as e:
                print('Running pb model failed.\n{}'.format(e))
                # may not detect anything at all
                R = 0
                scores = np.zeros((R,), dtype=np.float32)
                boxes = np.zeros((R, 4), dtype=np.float32)
                classids = np.zeros((R,), dtype=np.float32)
        tt2=time.time()-tt1
        print("model2 1000 times total time {}s".format(tt2))

        boxes = np.column_stack((boxes, scores))

        # sort the results based on score for comparision
        boxes, _, _, classids = _sort_results(
            boxes, None, None, classids)

        # write final result back to workspace
        workspace.FeedBlob(core.ScopedName('result_boxes'), boxes)
        workspace.FeedBlob(core.ScopedName('result_classids'), classids)

        ret = _get_result_blobs(check_blobs)

        return ret