Example #1
0
def ConvertTensorProtosToInitNet(net_params, input_name):
    """Takes the net_params returned from TranslateModel, and wrap it as an
    init net that contain GivenTensorFill.

    This is a very simple feature that only works with float tensors, and is
    only intended to be used in an environment where you want a single
    initialization file - for more complex cases, use a db to store the
    parameters.
    """
    init_net = caffe2_pb2.NetDef()
    for tensor in net_params.protos:
        if len(tensor.float_data) == 0:
            raise RuntimeError(
                "Only float tensors are supported in this util.")
        op = core.CreateOperator(
            "GivenTensorFill", [], [tensor.name],
            arg=[
                utils.MakeArgument("shape", list(tensor.dims)),
                utils.MakeArgument("values", tensor.float_data)
            ])
        init_net.op.extend([op])
    init_net.op.extend(
        [core.CreateOperator("ConstantFill", [], [input_name], shape=[1])])
    return init_net
Example #2
0
def check_set_pb_arg(pb, arg_name, arg_attr, arg_value, allow_override=False):
    arg = get_pb_arg(pb, arg_name)
    if arg is None:
        arg = putils.MakeArgument(arg_name, arg_value)
        assert hasattr(arg, arg_attr)
        pb.arg.extend([arg])
    if allow_override and getattr(arg, arg_attr) != arg_value:
        logger.warning(
            "Override argument {}: {} -> {}".format(arg_name, getattr(arg, arg_attr), arg_value)
        )
        setattr(arg, arg_attr, arg_value)
    else:
        assert arg is not None
        assert getattr(arg, arg_attr) == arg_value, "Existing value {}, new value {}".format(
            getattr(arg, arg_attr), arg_value
        )
    def run_on_device(self, device_opts):
        np.random.seed(0)

        proposal_count = 5000
        input_names = []
        inputs = []

        for lvl in range(RPN_MIN_LEVEL, RPN_MAX_LEVEL + 1):
            rpn_roi = (ROI_CANONICAL_SCALE *
                       np.random.rand(proposal_count, 5).astype(np.float32))
            for i in range(proposal_count):
                # Make RoIs have positive area, since they
                # are in the format [[batch_idx, x0, y0, x1, y2], ...]
                rpn_roi[i][3] += rpn_roi[i][1]
                rpn_roi[i][4] += rpn_roi[i][2]
            input_names.append('rpn_rois_fpn{}'.format(lvl))
            inputs.append(rpn_roi)
        for lvl in range(RPN_MIN_LEVEL, RPN_MAX_LEVEL + 1):
            rpn_roi_score = np.random.rand(proposal_count).astype(np.float32)
            input_names.append('rpn_roi_probs_fpn{}'.format(lvl))
            inputs.append(rpn_roi_score)

        output_names = [
            'rois',
        ]
        for lvl in range(ROI_MIN_LEVEL, ROI_MAX_LEVEL + 1):
            output_names.append('rois_fpn{}'.format(lvl))
        output_names.append('rois_idx_restore')

        op = core.CreateOperator(
            'CollectAndDistributeFpnRpnProposals',
            input_names,
            output_names,
            arg=[
                utils.MakeArgument("roi_canonical_scale", ROI_CANONICAL_SCALE),
                utils.MakeArgument("roi_canonical_level", ROI_CANONICAL_LEVEL),
                utils.MakeArgument("roi_max_level", ROI_MAX_LEVEL),
                utils.MakeArgument("roi_min_level", ROI_MIN_LEVEL),
                utils.MakeArgument("rpn_max_level", RPN_MAX_LEVEL),
                utils.MakeArgument("rpn_min_level", RPN_MIN_LEVEL),
                utils.MakeArgument("post_nms_topN", RPN_POST_NMS_TOP_N),
            ],
            device_option=device_opts)

        self.assertReferenceChecks(
            device_option=device_opts,
            op=op,
            inputs=inputs,
            reference=collect_and_distribute_fpn_rpn_ref,
        )
Example #4
0
    def test_collect_and_dist(
        self,
        proposal_count,
        rpn_min_level, rpn_num_levels,
        roi_min_level, roi_num_levels,
        rpn_post_nms_topN,
        roi_canonical_scale, roi_canonical_level,
        gc, dc
    ):
        input_names, inputs = self._create_input(
            proposal_count, rpn_min_level, rpn_num_levels, roi_canonical_scale
        )

        output_names = [
            'rois',
        ]
        for lvl in range(roi_num_levels):
            output_names.append('rois_fpn{}'.format(lvl + roi_min_level))
        output_names.append('rois_idx_restore')

        op = core.CreateOperator(
            'CollectAndDistributeFpnRpnProposals',
            input_names,
            output_names,
            arg=[
                utils.MakeArgument("roi_canonical_scale", roi_canonical_scale),
                utils.MakeArgument("roi_canonical_level", roi_canonical_level),
                utils.MakeArgument("roi_max_level", roi_min_level + roi_num_levels - 1),
                utils.MakeArgument("roi_min_level", roi_min_level),
                utils.MakeArgument("rpn_max_level", rpn_min_level + rpn_num_levels - 1),
                utils.MakeArgument("rpn_min_level", rpn_min_level),
                utils.MakeArgument("rpn_post_nms_topN", rpn_post_nms_topN),
            ],
            device_option=gc)
        args = {
            'rpn_min_level' : rpn_min_level,
            'rpn_num_levels' : rpn_num_levels,
            'roi_min_level' : roi_min_level,
            'roi_num_levels' : roi_num_levels,
            'rpn_post_nms_topN' : rpn_post_nms_topN,
            'roi_canonical_scale' : roi_canonical_scale,
            'roi_canonical_level' : roi_canonical_level}

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=inputs + [args],
            reference=collect_and_distribute_fpn_rpn_ref,
        )
    def test_alias_with_name_op(self, shape, dtype, dc, gc):
        test_input = (100 * np.random.random(shape)).astype(dtype)
        test_inputs = [test_input]

        alias_op = core.CreateOperator(
            "AliasWithName",
            ["input"],
            ["output"],
            device_option=gc,
        )
        alias_op.arg.add().CopyFrom(utils.MakeArgument("name",
                                                       "whatever_name"))

        def reference_func(x):
            return (x, )

        self.assertReferenceChecks(gc, alias_op, test_inputs, reference_func)
Example #6
0
    def setUp(self):
        super(TestHeatmapMaxKeypointOp, self).setUp()
        np.random.seed(0)

        # initial coordinates and interpolate HEATMAP_SIZE from it
        HEATMAP_SMALL_SIZE = 4
        bboxes_in = 500 * np.random.rand(NUM_TEST_ROI, 4).astype(np.float32)
        # only bbox with smaller first coordiantes
        for i in range(NUM_TEST_ROI):
            if bboxes_in[i][0] > bboxes_in[i][2]:
                tmp = bboxes_in[i][2]
                bboxes_in[i][2] = bboxes_in[i][0]
                bboxes_in[i][0] = tmp
            if bboxes_in[i][1] > bboxes_in[i][3]:
                tmp = bboxes_in[i][3]
                bboxes_in[i][3] = bboxes_in[i][1]
                bboxes_in[i][1] = tmp

        # initial randomized coordiantes for heatmaps and expand it with interpolation
        init = np.random.rand(NUM_TEST_ROI, NUM_KEYPOINTS, HEATMAP_SMALL_SIZE,
                              HEATMAP_SMALL_SIZE).astype(np.float32)
        heatmaps_in = np.zeros((NUM_TEST_ROI, NUM_KEYPOINTS, HEATMAP_SIZE,
                                HEATMAP_SIZE)).astype(np.float32)
        for roi in range(NUM_TEST_ROI):
            for keyp in range(NUM_KEYPOINTS):
                f = interpolate.interp2d(np.arange(0, 1,
                                                   1.0 / HEATMAP_SMALL_SIZE),
                                         np.arange(0, 1,
                                                   1.0 / HEATMAP_SMALL_SIZE),
                                         init[roi][keyp],
                                         kind='cubic')
                heatmaps_in[roi][keyp] = f(np.arange(0, 1, 1.0 / HEATMAP_SIZE),
                                           np.arange(0, 1, 1.0 / HEATMAP_SIZE))

        self.heatmaps_in = heatmaps_in
        self.bboxes_in = bboxes_in

        self.op = core.CreateOperator('HeatmapMaxKeypoint',
                                      ['heatmaps_in', 'bboxes_in'],
                                      ['keypoints_out'],
                                      arg=[
                                          utils.MakeArgument(
                                              "should_output_softmax", True),
                                      ],
                                      device_option=caffe2_pb2.DeviceOption())
Example #7
0
def CreateOperator(
    operator_type,
    inputs,
    outputs,
    name='',
    device_option=None,
    arg=None,
    engine=None,
    **kwargs
):
    """A function wrapper that allows one to create operators based on the
    operator type. The type should be a string corresponding to an operator
    registered with Caffe2.
    """
    operator = caffe2_pb2.OperatorDef()
    operator.type = operator_type
    operator.name = name
    # Add rectified inputs and outputs
    inputs = _RectifyInputOutput(inputs)
    outputs = _RectifyInputOutput(outputs)
    operator.input.extend([str(i) for i in inputs])
    operator.output.extend([str(o) for o in outputs])
    # Set device option:
    # (1) If device_option is explicitly set, use device_option.
    # (2) If not, but _DEVICESCOPE is set, then we use the _DEVICESCOPE.
    # (3) Otherwise, do not set device option.
    if device_option is not None:
        operator.device_option.CopyFrom(device_option)
    elif _DEVICESCOPE is not None:
        operator.device_option.CopyFrom(_DEVICESCOPE)
    if engine is not None:
        operator.engine = engine
    # random seed is defined in the device option, so we need to do special
    # care.
    if 'random_seed' in kwargs:
        operator.device_option.random_seed = kwargs['random_seed']
        del kwargs['random_seed']
    # Add given arguments that do not need parsing
    if arg is not None:
        operator.arg.extend(arg)
    # Add all other arguments
    for key, value in kwargs.items():
        operator.arg.add().CopyFrom(utils.MakeArgument(key, value))
    return operator
Example #8
0
    def get_max(self, op, tensor, tensor_idx, tensor_name, max_name):
        global iteration_idx
        name = max_name + "_" + str(tensor_idx)
        op_hist_name = tensor_name + "_" + max_name + "_" + str(tensor_idx)

        arg = self.get_arg(op, name)
        if iteration_idx < self.kl_iter_num_for_range:
            max_min = np.array([np.max(tensor), np.min(tensor)]).astype(np.float32)
            if arg is not None:
                orig_max = arg.floats[0]
                orig_min = arg.floats[1]
                cur_max = max(orig_max, max_min[0])
                cur_min = min(orig_min, max_min[1])
                max_min = np.array([cur_max, cur_min]).astype(np.float32)
                self.remove_arg(op, name)
            # save max vaules in predict_def as operator arguments
            max_arg = utils.MakeArgument(name, max_min)
            op.arg.extend([max_arg])
        else:
            assert arg is not None
            max_val = arg.floats[0]
            min_val = arg.floats[1]
            self.get_kl_hist(tensor, min_val, max_val, op_hist_name)
Example #9
0
    def update_max(self, op, max_name, tensor_idx, tensor_name):
        """update the max data of the collected data"""
        global hist
        global hist_edges
        global iteration_idx

        name = max_name + "_" + str(tensor_idx)
        hist_name = tensor_name + "_" + max_name + "_" + str(tensor_idx)

        P_sum = iteration_idx - self.kl_iter_num_for_range
        arg = self.get_arg(op, name)
        assert arg is not None
        max_val = arg.floats[0]
        min_val = arg.floats[1]

        hist_iter = hist[hist_name]
        hist_edges_iter = hist_edges[hist_name]
        layer_max = self.get_optimal_scaling_factor(hist_iter, hist_edges_iter,
                                                    P_sum, max_val, min_val)

        self.remove_arg(op, name)
        max_arg = utils.MakeArgument(name, np.array([layer_max]).astype(np.float32))
        # save max vaules in predict_def as operator arguments
        op.arg.extend([max_arg])
Example #10
0
 def testArgsToDict(self):
     args = [
         utils.MakeArgument("int1", 3),
         utils.MakeArgument("float1", 4.0),
         utils.MakeArgument("string1", "foo"),
         utils.MakeArgument("intlist1", np.array([3, 4])),
         utils.MakeArgument("floatlist1", np.array([5.0, 6.0])),
         utils.MakeArgument("stringlist1", np.array(["foo", "bar"]))
     ]
     dict_ = utils.ArgsToDict(args)
     expected = {
         "int1": 3,
         "float1": 4.0,
         "string1": b"foo",
         "intlist1": [3, 4],
         "floatlist1": [5.0, 6.0],
         "stringlist1": [b"foo", b"bar"]
     }
     self.assertEqual(
         dict_, expected, "dictionary version of arguments "
         "doesn't match original")
Example #11
0
def add_bbox_ops(args, net, blobs):
    new_ops = []
    new_external_outputs = []

    # Operators for bboxes
    op_box = core.CreateOperator(
        "BBoxTransform",
        ["rpn_rois", "bbox_pred", "im_info"],
        ["pred_bbox"],
        weights=cfg.MODEL.BBOX_REG_WEIGHTS,
        apply_scale=False,
        correct_transform_coords=True,
    )
    new_ops.extend([op_box])

    blob_prob = "cls_prob"
    blob_box = "pred_bbox"
    op_nms = core.CreateOperator(
        "BoxWithNMSLimit",
        [blob_prob, blob_box],
        ["score_nms", "bbox_nms", "class_nms"],
        arg=[
            putils.MakeArgument("score_thresh", cfg.TEST.SCORE_THRESH),
            putils.MakeArgument("nms", cfg.TEST.NMS),
            putils.MakeArgument("detections_per_im",
                                cfg.TEST.DETECTIONS_PER_IM),
            putils.MakeArgument("soft_nms_enabled", cfg.TEST.SOFT_NMS.ENABLED),
            putils.MakeArgument("soft_nms_method", cfg.TEST.SOFT_NMS.METHOD),
            putils.MakeArgument("soft_nms_sigma", cfg.TEST.SOFT_NMS.SIGMA),
        ],
    )
    new_ops.extend([op_nms])
    new_external_outputs.extend(["score_nms", "bbox_nms", "class_nms"])

    net.Proto().op.extend(new_ops)
    net.Proto().external_output.extend(new_external_outputs)
Example #12
0
# ./tests/unittests/caffe2ImporterTest.cpp
# Run $>python gen_caffe2_model.py to get the model files.

from caffe2.proto import caffe2_pb2
from caffe2.python import utils
from google.protobuf import text_format


# Define a weights network
weights = caffe2_pb2.NetDef()
weights.name = "init"

op = caffe2_pb2.OperatorDef()
op.type = "GivenTensorFill"
op.output.extend(["conv_w"])
op.arg.extend([utils.MakeArgument("shape", [1, 1, 2, 2])])
op.arg.extend([utils.MakeArgument("values", [1.0 for i in range(4)])])
weights.op.extend([op])

op = caffe2_pb2.OperatorDef()
op.type = "GivenTensorFill"
op.output.extend(["conv_b"])
op.arg.extend([utils.MakeArgument("shape", [1])])
op.arg.extend([utils.MakeArgument("values", [2.0 for i in range(1)])])
weights.op.extend([op])
weights.external_output.extend(op.output)

# Define an inference net
net = caffe2_pb2.NetDef()
net.name = "predict"
Example #13
0
from caffe2.python import utils

# Define a weights network
weights = caffe2_pb2.NetDef()
weights.name = "init"

op = caffe2_pb2.OperatorDef()
op.type = "fake_data_provider"
op.output.extend(["data"])
weights.op.extend([op])
weights.external_output.extend(op.output)

op = caffe2_pb2.OperatorDef()
op.type = "GivenTensorFill"
op.output.extend(["fc_w"])
op.arg.extend([utils.MakeArgument("shape", [1,4])])
op.arg.extend([utils.MakeArgument("values", [1.0 for i in range(4)])])
weights.op.extend([op])
weights.external_output.extend(op.output)

op = caffe2_pb2.OperatorDef()
op.type = "GivenTensorFill"
op.output.extend(["fc_b"])
op.arg.extend([utils.MakeArgument("shape", [1,4])])
op.arg.extend([utils.MakeArgument("values", [1.0 for i in range(4)])])
weights.op.extend([op])
weights.external_output.extend(op.output)

# Define an inference net
net = caffe2_pb2.NetDef()
net.name = "predict"
def run_single_kpts(
    net,
    image,
    target_size,
    pixel_means=PIXEL_MEANS_DEFAULT,
    pixel_stds=PIXEL_STDS_DEFAULT,
    max_size=1333,
):
    inputs = utils2.prepare_blobs(
        image,
        target_size=target_size,
        max_size=max_size,
        pixel_means=pixel_means,
        pixel_stds=pixel_stds,
    )

    # Prepare inputs for AABB and Int8AABB operators
    im_info = inputs["im_info"]
    scale = im_info[0][2]
    inputs["im_infoq"] = np.rint(im_info[:,:2] * 8.0).astype(np.uint16)
    inputs["im_info2"] = im_info[:,:2]

    blob_names = []
    ser_blobs = []

    # Serialize inputs for remote device
    for k, v in inputs.items():
        workspace.FeedBlob(k, v)
        blob_names.append(k)
        ser_blobs.append(workspace.SerializeBlob(k))

    # Serialize output templates for remote device
    fully_quantized = any(op.type == "Int8AABBRoIProposals" for op in net.op)
    bbox_type = np.uint16 if fully_quantized else np.float32
    output_templates = {
            "score_nms": np.zeros((3,), np.float32),
            "keypoint_rois": np.zeros((3, 4), bbox_type),
            "keypoints_out": np.zeros((3, 17, 2), bbox_type),
            "class_nms": np.zeros((3,), np.int32),
            "keypoints_scores_out": np.zeros((3, 17), np.float32),
    }
    for out_name in net.external_output:
        fake_name = out_name + "_empty_template"
        blob_names.append(out_name)
        workspace.FeedBlob(fake_name, output_templates[out_name])
        ser_blobs.append(workspace.SerializeBlob(fake_name))

    # Package inputs and output templates
    inout_netdef = caffe2_pb2.NetDef()
    inout_netdef.arg.extend([
            utils.MakeArgument("blob_names", blob_names),
            utils.MakeArgument("ser_blobs", ser_blobs),
        ])

    # Send in/out to the remote device
    with tempfile.NamedTemporaryFile() as inout_file:
        inout_file.write(inout_netdef.SerializeToString())
        inout_file.flush()
        subprocess.check_call(["adb", "push", inout_file.name, "/data/local/tmp/input_output.pb"])

    try:
        # Run the model
        use_caffe2 = "--use_caffe2_reference true" if os.environ.get("USE_CAFFE2_REFERENCE") in ("1", "true", "yes", "on") else ""
        subprocess.check_call("adb shell 'cd /data/local/tmp ; GLOG_logtostderr=true GLOG_v=0 ./nnapi_runner %s --init_net init_net.pb --predict_net predict_net.pb --inout_net input_output.pb --out_path output_blobs.pb'" % use_caffe2, shell=True)

        # Retrieve and deserialize outputs
        with tempfile.TemporaryDirectory() as tmpdir:
            output_file = os.path.join(tmpdir, "output_blobs.pb")
            subprocess.check_call(["adb", "pull", "/data/local/tmp/output_blobs.pb", output_file])

            out_net = caffe2_pb2.NetDef()
            with open(output_file, "rb") as handle:
                out_net.ParseFromString(handle.read())

        all_outputs = utils.ArgsToDict(out_net.arg)["outputs"]
        for output in all_outputs:
            bp = caffe2_pb2.BlobProto()
            bp.ParseFromString(output)
            workspace.DeserializeBlob(bp.name, output)

        scores = workspace.FetchBlob("score_nms")
        boxes = workspace.FetchBlob("keypoint_rois")
        coords_preds = workspace.FetchBlob("keypoints_out")
        scores_preds = workspace.FetchBlob("keypoints_scores_out")
        classids = workspace.FetchBlob("class_nms")

        if boxes.dtype == np.uint16:
            boxes = boxes.astype(np.float32) * 0.125

        # New output format of AABBRoIKeypoints:
        # - XY coordinates are [num_rois, num_keypoints, 2] array in keypoints_out
        # - Scores are [num_rois, num_keypoints] array in keypoints_scores_out
        if coords_preds.dtype == np.uint16:
            coords_preds = coords_preds.astype(np.float32) * 0.125
        assert coords_preds.shape[:2] == scores_preds.shape
        num_rois, num_keypoints = coords_preds.shape[:2]
        xy_preds = np.concatenate(
            (coords_preds, scores_preds.reshape([num_rois, num_keypoints, 1]),
            np.zeros([num_rois, num_keypoints, 1], dtype=np.float32)),
            axis=2)
        assert xy_preds.shape == (num_rois, num_keypoints, 4)
        xy_preds = np.swapaxes(xy_preds, 1, 2)
        assert xy_preds.shape == (num_rois, 4, num_keypoints)


    except Exception as e:
        print(e)
        # may not detect anything at all
        R = 0
        scores = np.zeros((R,), dtype=np.float32)
        boxes = np.zeros((R, 4), dtype=np.float32)
        xy_preds = np.zeros((R, 4, 1), dtype=np.float32)
        classids = np.zeros((R,), dtype=np.float32)

    scale = inputs["im_info"][0][2]
    boxes /= scale
    if xy_preds is not None:
        xy_preds /= scale

    boxes = np.column_stack((boxes, scores))

    return boxes, xy_preds, classids
    def test_collect_and_dist(self, proposal_count, rpn_min_level,
                              rpn_num_levels, roi_min_level, roi_num_levels,
                              rpn_post_nms_topN, roi_canonical_scale,
                              roi_canonical_level, gc, dc):

        np.random.seed(0)

        input_names = []
        inputs = []

        for lvl in range(rpn_num_levels):
            rpn_roi = (roi_canonical_scale *
                       np.random.rand(proposal_count, 5).astype(np.float32))
            for i in range(proposal_count):
                # Make RoIs have positive area, since they
                # are in the format [[batch_idx, x0, y0, x1, y2], ...]
                rpn_roi[i][3] += rpn_roi[i][1]
                rpn_roi[i][4] += rpn_roi[i][2]
            input_names.append('rpn_rois_fpn{}'.format(lvl + rpn_min_level))
            inputs.append(rpn_roi)
        for lvl in range(rpn_num_levels):
            rpn_roi_score = np.random.rand(proposal_count).astype(np.float32)
            input_names.append('rpn_roi_probs_fpn{}'.format(lvl +
                                                            rpn_min_level))
            inputs.append(rpn_roi_score)

        output_names = [
            'rois',
        ]
        for lvl in range(roi_num_levels):
            output_names.append('rois_fpn{}'.format(lvl + roi_min_level))
        output_names.append('rois_idx_restore')

        op = core.CreateOperator(
            'CollectAndDistributeFpnRpnProposals',
            input_names,
            output_names,
            arg=[
                utils.MakeArgument("roi_canonical_scale", roi_canonical_scale),
                utils.MakeArgument("roi_canonical_level", roi_canonical_level),
                utils.MakeArgument("roi_max_level",
                                   roi_min_level + roi_num_levels - 1),
                utils.MakeArgument("roi_min_level", roi_min_level),
                utils.MakeArgument("rpn_max_level",
                                   rpn_min_level + rpn_num_levels - 1),
                utils.MakeArgument("rpn_min_level", rpn_min_level),
                utils.MakeArgument("rpn_post_nms_topN", rpn_post_nms_topN),
            ],
            device_option=gc)
        args = {
            'proposal_count': proposal_count,
            'rpn_min_level': rpn_min_level,
            'rpn_num_levels': rpn_num_levels,
            'roi_min_level': roi_min_level,
            'roi_num_levels': roi_num_levels,
            'rpn_post_nms_topN': rpn_post_nms_topN,
            'roi_canonical_scale': roi_canonical_scale,
            'roi_canonical_level': roi_canonical_level
        }

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=inputs + [args],
            reference=collect_and_distribute_fpn_rpn_ref,
        )
def Benchmark(model_gen, arg):
    model, input_size = model_gen(arg.order)

    # In order to be able to run everything without feeding more stuff, let's
    # add the data and label blobs to the parameter initialization net as well.
    if arg.order == "NCHW":
        input_shape = [arg.batch_size, 3, input_size, input_size]
    else:
        input_shape = [arg.batch_size, input_size, input_size, 3]
    model.param_init_net.GaussianFill([],
                                      "data",
                                      shape=input_shape,
                                      mean=0.0,
                                      std=1.0)
    model.param_init_net.UniformIntFill([],
                                        "label",
                                        shape=[
                                            arg.batch_size,
                                        ],
                                        min=0,
                                        max=999)

    # Note: even when we are running things on CPU, adding a few engine related
    # argument will not hurt since the CPU operator registy will simply ignore
    # these options and go the default path.
    for op in model.net.Proto().op:
        if op.type == 'Conv' or op.type == 'ConvFp16':
            op.engine = 'CUDNN'
            #op.arg.add().CopyFrom(utils.MakeArgument('ws_nbytes_limit', arg.cudnn_limit))
            op.arg.add().CopyFrom(utils.MakeArgument('exhaustive_search', 1))
            op.arg.add().CopyFrom(
                utils.MakeArgument('shared_ws_name', 'cudnn_workspace'))
        elif op.type in [
                'MaxPool', 'MaxPoolFp16', 'AveragePool', 'AveragePoolFp16',
                'Relu', 'ReluFp16', 'Softmax', 'SoftmaxFp16'
        ]:
            op.engine = 'CUDNN'
    if arg.forward_only:
        print arg.model, ': running forward only.'
    else:
        print arg.model, ': running forward-backward.'
        model.AddGradientOperators()
        if arg.order == 'NHWC':
            print(
                '==WARNING==\n'
                'NHWC order with CuDNN may not be supported yet, so I might\n'
                'exit suddenly.')

    if not arg.cpu:
        model.param_init_net.RunAllOnGPU()
        model.net.RunAllOnGPU()

    workspace.RunNetOnce(model.param_init_net)
    workspace.CreateNet(model.net)
    for i in range(arg.warmup_iterations):
        workspace.RunNet(model.net.Proto().name)

    start = time.time()
    for i in range(arg.iterations):
        workspace.RunNet(model.net.Proto().name)
    print 'Spent: ', (time.time() - start) / arg.iterations
    if arg.layer_wise_benchmark:
        print 'Layer-wise benchmark.'
        workspace.BenchmarkNet(model.net.Proto().name, 1, arg.iterations, True)
    # Writes out the pbtxt for benchmarks on e.g. Android
    with open("{0}_init_batch_{1}.pbtxt".format(arg.model, arg.batch_size),
              "w") as fid:
        fid.write(str(model.param_init_net.Proto()))
    with open("{0}.pbtxt".format(arg.model, arg.batch_size), "w") as fid:
        fid.write(str(model.net.Proto()))
Example #17
0
 def parse_args(args):
     operator = caffe2_pb2.OperatorDef()
     for k, v in args.items():
         arg = utils.MakeArgument(k, v)
         operator.arg.add().CopyFrom(arg)
     return operator.arg
def run_single_segms(
    net,
    image,
    target_size,
    pixel_means=PIXEL_MEANS_DEFAULT,
    pixel_stds=PIXEL_STDS_DEFAULT,
    rle_encode=True,
    max_size=1333,
):
    inputs = utils2.prepare_blobs(
        image,
        target_size=target_size,
        max_size=max_size,
        pixel_means=pixel_means,
        pixel_stds=pixel_stds,
    )

    # Prepare inputs for AABB and Int8AABB operators
    im_info = inputs["im_info"]
    scale = im_info[0][2]
    inputs["im_infoq"] = np.rint(im_info[:,:2] * 8.0).astype(np.uint16)
    inputs["im_info2"] = im_info[:,:2]

    blob_names = []
    ser_blobs = []

    # Serialize inputs for remote device
    for k, v in inputs.items():
        workspace.FeedBlob(k, v)
        blob_names.append(k)
        ser_blobs.append(workspace.SerializeBlob(k))

    # Serialize output templates for remote device
    fully_quantized = any(op.type == "Int8AABBRoIProposals" for op in net.op)
    bbox_type = np.uint16 if fully_quantized else np.float32
    output_templates = {
            "score_nms": np.zeros((LIMIT,), np.float32),
            "bbox_nms": np.zeros((LIMIT, 4), bbox_type),
            "class_nms": np.zeros((LIMIT,), np.int32),
            "mask_fcn_probs": np.zeros((LIMIT, CLASSES, RES, RES), np.float32),
            }
    for out_name in net.external_output:
        fake_name = out_name + "_empty_template"
        blob_names.append(out_name)
        workspace.FeedBlob(fake_name, output_templates[out_name])
        ser_blobs.append(workspace.SerializeBlob(fake_name))

    # Package inputs and output templates
    inout_netdef = caffe2_pb2.NetDef()
    inout_netdef.arg.extend([
            utils.MakeArgument("blob_names", blob_names),
            utils.MakeArgument("ser_blobs", ser_blobs),
        ])

    # Send in/out to the remote device
    with tempfile.NamedTemporaryFile() as inout_file:
        inout_file.write(inout_netdef.SerializeToString())
        inout_file.flush()
        subprocess.check_call(["adb", "push", inout_file.name, "/data/local/tmp/input_output.pb"])

    try:
        # Run the model
        use_caffe2 = "--use_caffe2_reference true" if os.environ.get("USE_CAFFE2_REFERENCE") in ("1", "true", "yes", "on") else ""
        subprocess.check_call("adb shell 'cd /data/local/tmp ; GLOG_logtostderr=true GLOG_v=0 ./nnapi_runner %s --init_net init_net.pb --predict_net predict_net.pb --inout_net input_output.pb --out_path output_blobs.pb'" % use_caffe2, shell=True)

        # Retrieve and deserialize outputs
        with tempfile.TemporaryDirectory() as tmpdir:
            output_file = os.path.join(tmpdir, "output_blobs.pb")
            subprocess.check_call(["adb", "pull", "/data/local/tmp/output_blobs.pb", output_file])

            out_net = caffe2_pb2.NetDef()
            with open(output_file, "rb") as handle:
                out_net.ParseFromString(handle.read())

        all_outputs = utils.ArgsToDict(out_net.arg)["outputs"]
        for output in all_outputs:
            bp = caffe2_pb2.BlobProto()
            bp.ParseFromString(output)
            workspace.DeserializeBlob(bp.name, output)

        classids = workspace.FetchBlob("class_nms")
        scores = workspace.FetchBlob("score_nms")  # bbox scores, (R, )
        boxes = workspace.FetchBlob("bbox_nms")  # i.e., boxes, (R, 4*1)
        masks = workspace.FetchBlob("mask_fcn_probs")  # (R, cls, mask_dim, mask_dim)
        if boxes.dtype == np.uint16:
            boxes = boxes.astype(np.float32) * 0.125
            boxes /= scale
    except Exception as e:
        print(e)
        # may not detect anything at all
        R = 0
        scores = np.zeros((R,), dtype=np.float32)
        boxes = np.zeros((R, 4), dtype=np.float32)
        classids = np.zeros((R,), dtype=np.float32)
        masks = np.zeros((R, 1, 1, 1), dtype=np.float32)

    # included in the model
    # scale = inputs["im_info"][0][2]
    # boxes /= scale

    R = boxes.shape[0]
    im_masks = []
    if R > 0:
        im_dims = image.shape
        im_masks = utils2.compute_segm_results(
            masks, boxes, classids, im_dims[0], im_dims[1], rle_encode=rle_encode
        )

    boxes = np.column_stack((boxes, scores))

    ret = {"classids": classids, "boxes": boxes, "masks": masks, "im_masks": im_masks}
    return ret
def Train(args):
    # Either use specified device list or generate one
    if args.gpus is not None:
        gpus = [int(x) for x in args.gpus.split(',')]
        num_gpus = len(gpus)
    else:
        gpus = list(range(args.num_gpus))
        num_gpus = args.num_gpus

    log.info("Running on GPUs: {}".format(gpus))

    # Verify valid batch size
    total_batch_size = args.batch_size
    batch_per_device = total_batch_size // num_gpus
    assert \
        total_batch_size % num_gpus == 0, \
        "Number of GPUs must divide batch size"

    # Round down epoch size to closest multiple of batch size across machines
    global_batch_size = total_batch_size * args.num_shards
    epoch_iters = int(args.epoch_size / global_batch_size)
    args.epoch_size = epoch_iters * global_batch_size
    log.info("Using epoch size: {}".format(args.epoch_size))

    # Create ModelHelper object
    # train_arg_scope = {
    #     'order': 'NCHW',
    #     'use_cudnn': True,
    #     'cudnn_exhaustice_search': True,
    #     'ws_nbytes_limit': (args.cudnn_workspace_limit_mb * 1024 * 1024),
    # }
    # train_model = model_helper.ModelHelper(
    #     name="mobilenet", arg_scope=train_arg_scope
    # )

    num_shards = args.num_shards

    rendezvous = None

    # Model building functions
    # def create_mobilenet_model_ops(model, loss_scale):
    #     [softmax, loss] = mobilenet.create_mobilenet(
    #         model,
    #         "data",
    #         num_input_channels=args.num_channels,
    #         num_labels=args.num_labels,
    #         label="label",
    #         is_test=True,
    #     )
    #     loss = model.Scale(loss, scale=loss_scale)
    #     brew.accuracy(model, [softmax, "label"], "accuracy")
    #     return [loss]

    # def add_optimizer(model):
    #     stepsz = int(30 * args.epoch_size / total_batch_size / num_shards)
    #     optimizer.add_weight_decay(model, args.weight_decay)
    #     optimizer.build_sgd(
    #         model,
    #         args.base_learning_rate,
    #         momentum=0.9,
    #         nesterov=1,
    #         policy="step",
    #         stepsize=stepsz,
    #         gamma=0.1
    #     )

    # def add_image_input(model):
    #     AddImageInput(
    #         model,
    #         reader,
    #         batch_size=batch_per_device,
    #         img_size=args.image_size,
    #     )
    # def add_post_sync_ops(model):
    #     for param_info in model.GetOptimizationParamInfo(model.GetParams()):
    #         if param_info.blob_copy is not None:
    #             model.param_init_net.HalfToFloat(
    #                 param_info.blob,
    #                 param_info.blob_copy[core.DataType.FLOAT]
    #             )

    test_arg_scope = {
        'order': "NCHW",
        # 'use_cudnn': True,
        # 'cudnn_exhaustive_search': True,
    }
    test_model = model_helper.ModelHelper(name="mobilenet_test",
                                          arg_scope=test_arg_scope)

    deploy_arg_scope = {'order': "NCHW"}
    deploy_model = model_helper.ModelHelper(name="mobilenet_deploy",
                                            arg_scope=deploy_arg_scope)
    mobilenet.create_mobilenet(
        deploy_model,
        "data",
        num_input_channels=args.num_channels,
        num_labels=args.num_labels,
        is_test=True,
    )

    # raw_data = np.random.randn(1, 3, 224, 224).astype(np.float32)
    # workspace.FeedBlob("data", raw_data)

    # workspace.RunNetOnce(deploy_model.param_init_net)
    # workspace.CreateNet(deploy_model.net)
    # mobilenet.create_mobilenet(
    #     test_model,
    #     "gpu_0/data",
    #     num_input_channels=args.num_channels,
    #     num_labels=args.num_labels,
    #     is_test=True,
    # )
    # test_reader = test_model.CreateDB(
    #     "test_reader",
    #     db=args.test_data,
    #     db_type=args.db_type,
    # )

    # def test_input_fn(model):
    #     AddImageInput(
    #         model,
    #         test_reader,
    #         batch_size=batch_per_device,
    #         img_size=args.image_size,
    #     )

    # data_parallel_model.Parallelize_GPU(
    #     test_model,
    #     input_builder_fun=test_input_fn,
    #     forward_pass_builder_fun=create_mobilenet_model_ops,
    #     post_sync_builder_fun=add_post_sync_ops,
    #     param_update_builder_fun=None,
    #     devices=gpus,
    # )

    # inputs = np.zeros((32,3,224,224), dtype='f')
    # labels = np.zeros((32,), dtype='f')
    # workspace.FeedBlob("gpu_0/data", inputs)
    # workspace.FeedBlob("gpu_0/label", labels)

    workspace.RunNetOnce(test_model.param_init_net)
    workspace.CreateNet(test_model.net)

    LoadModel(args.load_model_path, test_model)

    prefix = "gpu_0/"
    for value in deploy_model.params:
        workspace.FeedBlob(value, workspace.FetchBlob(prefix + value))
    # SaveModel(args, test_model)

    # workspace.ResetWorkspace()
    # print(workspace.Blobs())
    # print(deploy_model.params)
    # print("=====================")
    # print(test_model.params)
    # print("=====================")
    # print(workspace.FetchBlob("gpu_0/comp_11_spatbn_2_rm"))
    # print(workspace.FetchBlob("comp_11_spatbn_2_rm"))
    # print(deploy_model.net.Proto())
    # print(deploy_model.param_init_net.Proto())
    # exit(0)

    init_net = caffe2_pb2.NetDef()

    # # print(len(deploy_model.params))
    # # print(deploy_model.param_init_net.Proto())
    # with open("params", 'wb') as f:
    #     f.write(str(deploy_model.param_init_net.Proto()))
    tmp_o = np.zeros((1, 1)).astype(np.float32)
    # print(tmp_o.shape)
    # print(type(tmp_o))
    # exit(0)
    init_net.name = "mobilenet_init"
    rm_riv = []
    for value in deploy_model.params:
        tmp = workspace.FetchBlob(prefix + value)
        # print(type(tmp.shape), type(tmp))

        if "spatbn" == str(value)[-10:-4]:
            # print(value)
            if "s" == str(value)[-1]:
                # print(str(value)[:-1] + "rm")
                # init_net.op.extend([core.CreateOperator("GivenTensorFill", [], [str(value)[:-1] + "rm"], arg=[utils.MakeArgument("shape", tmp_o.shape), utils.MakeArgument("values", tmp_o)])])
                rm_riv.append(
                    core.CreateOperator(
                        "GivenTensorFill", [], [str(value)[:-1] + "rm"],
                        arg=[
                            utils.MakeArgument("shape", tmp_o.shape),
                            utils.MakeArgument("values", tmp_o)
                        ]))
                rm_riv.append(
                    core.CreateOperator(
                        "GivenTensorFill", [], [str(value)[:-1] + "riv"],
                        arg=[
                            utils.MakeArgument("shape", tmp_o.shape),
                            utils.MakeArgument("values", tmp_o)
                        ]))
            # elif "b" == str(value)[-1]:
            #     # print(str(value)[:-1] + "riv")
            #     init_net.op.extend([core.CreateOperator("GivenTensorFill", [], [str(value)[:-1] + "riv"], arg=[utils.MakeArgument("shape", tmp_o.shape), utils.MakeArgument("values", tmp_o)])])
        init_net.op.extend([
            core.CreateOperator("GivenTensorFill", [], [value],
                                arg=[
                                    utils.MakeArgument("shape", tmp.shape),
                                    utils.MakeArgument("values", tmp)
                                ])
        ])
    init_net.op.extend([
        core.CreateOperator("ConstantFill", [], ["data"],
                            shape=(1, 3, 224, 224))
    ])
    # exit(0)
    # for value in rm_riv:
    # init_net.op.extend([value])

    deploy_model.net._net.external_output.extend(["softmax"])
    predict_net = deploy_model.net._net

    # print(dir(deploy_model.net._net))

    # with open("pparams", 'wb') as f:
    #     f.write(str(deploy_model.param_init_net.Proto()))
    # print(workspace.Blobs())
    # for k, value in enumerate(deploy_model.params):
    #     # print(k,value)
    #     name = k + value
    #     name = workspace.FetchBlob(prefix + value)

    # tmp_work = {value: workspace.FetchBlob(prefix + value) for value in deploy_model.params}
    # # tmp_params = (str(deploy_model.params)

    # workspace.ResetWorkspace()
    # # print(workspace.Blobs())
    # # exit(0)
    # for value in deploy_model.params:
    #     workspace.FeedBlob(value, tmp_work[value])

    # # print(workspace.Blobs())
    # print(workspace.FetchBlob("last_out_b"))
    # exit(0)

    # deploy_model.net._net.external_output.extend(["softmax"])

    # #====================================================================
    # init_net, predict_net = me.Export(workspace, deploy_model.net, deploy_model.params)
    # # print(dir(predict_net.op.remove))
    # # # print(dir(caffe2_pb2.NetDef))
    # # print("===========")
    # # init_net.op.pop(0)
    # flag_di = []
    # print(len(init_net.op))
    # for k, value in enumerate(init_net.op):
    #     for x in value.output:
    #         if ("data" == str(x)) and ("GivenTensorFill" == str(value.type)):
    #             flag_di.append(k)

    # flag_di = sorted(flag_di)
    # for k, v in enumerate(flag_di):
    #     init_net.op.pop(v - k)
    # print(len(init_net.op))

    # flag_dp = []
    # print(len(predict_net.external_input))
    # for k, value in enumerate(predict_net.external_input):
    #     if "data" == str(value):
    #         flag_dp.append(k)

    # flag_dp = sorted(flag_dp)
    # for k, v in enumerate(flag_dp):
    #     predict_net.external_input.pop(v - k)

    # print(len(predict_net.external_input))

    # predict_net.external_input.extend(["data"])
    # init_net.op.extend([core.CreateOperator("ConstantFill", [], ["data"], shape=(1, 3, 224, 224))])
    # #==============================================

    with open("pred_net", 'wb') as f:
        f.write(str(predict_net))
    # with open("e_pred_net", 'wb') as f:
    # f.write(str(e_predict_net))
    with open("init_net", 'wb') as f:
        f.write(str(init_net))

    with open(output_predict_net, 'wb') as f:
        f.write(predict_net.SerializeToString())
    print(output_predict_net)

    with open(output_init_net, 'wb') as f:
        f.write(init_net.SerializeToString())
    print(output_init_net)

    print("OK!")
Example #20
0
def AddArgument(op, key, value):
    """Makes an argument based on the value type."""
    op.arg.extend([utils.MakeArgument(key, value)])
Example #21
0
def add_quantization_param_args_(op, q_param):
    op.arg.extend([
        utils.MakeArgument("Y_scale", q_param.scale),
        utils.MakeArgument("Y_zero_point", q_param.zero_point),
    ])