Esempio n. 1
0
def debug_model(model,
                in_stream=sys.stdin,
                out_stream=sys.stdout,
                exit_func=sys.exit):
    '''
    Returns a cloned model that has debug nodes inserted everywhere. When the
    graph is evaluated or trained, those nodes will allow to inspect the graph.

    Args:
      model (root node): root node until which the nodes are to be debugged
      in_stream (object behaving like sys.stdin, default stdin): `readline()`
       will be called on it to obtain user input
      out_stream (object behaving like sys.stdout, default stdout): `write()`
       and `flush()` will be called on it to output debug info to the user
      exit_func (callable, default sys.exit): callable that takes an exit code and is called,
       when the user exits the debugging process

    Returns:
      a clone of the model that has debugging enabled
    '''
    nodes = _nodes_to_debug(model)
    dbg_state = _DebugState(nodes)

    orig_node_count = len(nodes)
    mod_counter = 1

    # We cannot add the DebugNodes in one clone because the replacements will
    # hide parent nodes.
    while True:
        modifications = {
            n: user_function(
                _DebugNode(n, dbg_state, in_stream, out_stream, exit_func))
            for n in nodes
        }

        model = model.clone(CloneMethod.share, modifications)
        from cntk.graph import plot

        nodes = _nodes_to_debug(model)
        if len(nodes) == 1:
            # last node is the model node, which we want to debug as well
            model = user_function(
                _DebugNode(model, dbg_state, in_stream, out_stream))
            break

        if mod_counter > orig_node_count:
            raise ValueError('cannot debug this graph')

        mod_counter += 1

    return model
Esempio n. 2
0
def debug_model(model, in_stream=sys.stdin, out_stream=sys.stdout,
                exit_func=sys.exit):
    '''
    Returns a cloned model that has debug nodes inserted everywhere. When the
    graph is evaluated or trained, those nodes will allow to inspect the graph.

    Args:
      model (root node): root node until which the nodes are to be debugged
      in_stream (object behaving like sys.stdin, default stdin): `readline()`
       will be called on it to obtain user input
      out_stream (object behaving like sys.stdout, default stdout): `write()`
       and `flush()` will be called on it to output debug info to the user
      exit_func (callable, default sys.exit): callable that takes an exit code and is called,
       when the user exits the debugging process

    Returns:
      a clone of the model that has debugging enabled
    '''
    nodes = _nodes_to_debug(model)
    dbg_state = _DebugState(nodes)

    orig_node_count = len(nodes)
    mod_counter = 1

    # We cannot add the DebugNodes in one clone because the replacements will
    # hide parent nodes.
    while True:
        modifications = {n: user_function(_DebugNode(n, dbg_state,
                                                     in_stream, out_stream,
                                                     exit_func))
                         for n in nodes}

        model = model.clone(CloneMethod.share, modifications)
        from cntk.graph import plot

        nodes = _nodes_to_debug(model)
        if len(nodes) == 1:
            # last node is the model node, which we want to debug as well
            model = user_function(_DebugNode(model, dbg_state,
                                             in_stream, out_stream))
            break

        if mod_counter > orig_node_count:
            raise ValueError('cannot debug this graph')

        mod_counter += 1

    return model
def create_proposal_target_layer(rpn_rois, scaled_gt_boxes, num_classes):
    '''
    Creates a proposal target layer that is used for training an object detection network as proposed in the "Faster R-CNN" paper:
        Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
        "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"

    Assigns object detection proposals to ground-truth targets.
    Produces proposal classification labels and bounding-box regression targets.
    It also adds gt_boxes to candidates and samples fg and bg rois for training.

    Args:
        rpn_rois:        The proposed ROIs, e.g. from a region proposal network
        scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
        num_classes:     The number of classes in the data set

    Returns:
        rpn_target_rois - a set of rois containing the ground truth and a number of sampled fg and bg ROIs
        label_targets - the target labels for the rois
        bbox_targets - the regression coefficient targets for the rois
        bbox_inside_weights - the weights for the regression loss
    '''

    ptl_param_string = "'num_classes': {}".format(num_classes)
    ptl = user_function(ProposalTargetLayer(rpn_rois, scaled_gt_boxes, param_str=ptl_param_string))

    # use an alias if you need to access the outputs, e.g., when cloning a trained network
    rois = alias(ptl.outputs[0], name='rpn_target_rois')
    label_targets = ptl.outputs[1]
    bbox_targets = ptl.outputs[2]
    bbox_inside_weights = ptl.outputs[3]

    return rois, label_targets, bbox_targets, bbox_inside_weights
Esempio n. 4
0
def test_ext_backpropstate(payload):

    class TestBackPropState(UserFunction):
        def __init__(self, arg, payload, name='f1'):
            self.payload = payload
            super(TestBackPropState, self).__init__([arg])

        def infer_outputs(self):
            return [C.output_variable(self.inputs[0].shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes)]

        def forward(self, argument, device=None, outputs_to_retain=None):
            return self.payload, argument

        def backward(self, state, root_gradients):
            assert state == self.payload
            return root_gradients

    dim = 4

    p = C.parameter(shape=(dim,), init=10)
    in1 = C.input_variable(dim, needs_gradient=True, name='i_var')
    m = C.user_function(TestBackPropState(in1, payload))
    z = m + p

    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    trainer = C.Trainer(None, (z), [C.sgd(z.parameters, lr_per_sample)])

    for i in range(100):
        input_data = np.random.rand(dim)
        trainer.train_minibatch({in1: [input_data]})
Esempio n. 5
0
def create_proposal_target_layer(rpn_rois, scaled_gt_boxes, num_classes):
    '''
    Creates a proposal target layer that is used for training an object detection network as proposed in the "Faster R-CNN" paper:
        Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
        "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"

    Assigns object detection proposals to ground-truth targets.
    Produces proposal classification labels and bounding-box regression targets.
    It also adds gt_boxes to candidates and samples fg and bg rois for training.

    Args:
        rpn_rois:        The proposed ROIs, e.g. from a region proposal network
        scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
        num_classes:     The number of classes in the data set

    Returns:
        rpn_target_rois - a set of rois containing the ground truth and a number of sampled fg and bg ROIs
        label_targets - the target labels for the rois
        bbox_targets - the regression coefficient targets for the rois
        bbox_inside_weights - the weights for the regression loss
    '''

    ptl_param_string = "'num_classes': {}".format(num_classes)
    ptl = user_function(ProposalTargetLayer(rpn_rois, scaled_gt_boxes, param_str=ptl_param_string))
    rois = alias(ptl.outputs[0], name='rpn_target_rois')
    label_targets = alias(ptl.outputs[1], name='label_targets')
    bbox_targets = alias(ptl.outputs[2], name='bbox_targets')
    bbox_inside_weights = alias(ptl.outputs[3], name='bbox_inside_w')

    return rois, label_targets, bbox_targets, bbox_inside_weights
Esempio n. 6
0
def test_proposal_layer():
    cls_prob_shape_cntk = (18,61,61)
    cls_prob_shape_caffe = (18,61,61)
    rpn_bbox_shape = (36, 61, 61)
    im_info = [1000, 1000, 1]

    # Create input tensors with values
    cls_prob =  np.random.random_sample(cls_prob_shape_cntk).astype(np.float32)
    rpn_bbox_pred = np.random.random_sample(rpn_bbox_shape).astype(np.float32)

    # Create CNTK layer and call forward
    cls_prob_var = input_variable(cls_prob_shape_cntk)
    rpn_bbox_var = input_variable(rpn_bbox_shape)

    cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, cntk.constant(im_info, (3,))))
    state, cntk_output = cntk_layer.forward({cls_prob_var: [cls_prob], rpn_bbox_var: [rpn_bbox_pred]})
    cntk_proposals = cntk_output[next(iter(cntk_output))][0]

    # Create Caffe layer and call forward
    cls_prob_caffe = cls_prob.reshape(cls_prob_shape_caffe)
    bottom = [np.array([cls_prob_caffe]),np.array([rpn_bbox_pred]),np.array([im_info])]
    top = None # handled through return statement in caffe layer for unit testing

    param_str = "'feat_stride': 16"
    caffe_layer = CaffeProposalLayer()
    caffe_layer.set_param_str(param_str)
    caffe_layer.setup(bottom, top)
    caffe_output = caffe_layer.forward(bottom, top)
    caffe_proposals = caffe_output[:,1:]

    # assert that results are exactly the same
    assert cntk_proposals.shape == caffe_proposals.shape
    assert np.allclose(cntk_proposals, caffe_proposals, rtol=0.0, atol=0.0)
    print("Verified ProposalLayer")
Esempio n. 7
0
def create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg, use_native_proposal_layer=False):
    layer_config = {}
    layer_config["feat_stride"] = cfg["MODEL"].FEATURE_STRIDE
    layer_config["scales"] = cfg["DATA"].PROPOSAL_LAYER_SCALES

    layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
    layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N
    layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH)
    layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE)

    layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N
    layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N
    layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH)
    layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE)

    if use_native_proposal_layer:
        cntk.ops.register_native_user_function('ProposalLayerOp',
                                               'Cntk.ProposalLayerLib-' + cntk.__version__.rstrip('+'),
                                               'CreateProposalLayer')
        rpn_rois_raw = ops.native_user_function('ProposalLayerOp', [rpn_cls_prob_reshape, rpn_bbox_pred, im_info],
                                                layer_config, 'native_proposal_layer')
    else:
        rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, layer_config))

    return alias(rpn_rois_raw, name='rpn_rois')
Esempio n. 8
0
def test_ext_backpropstate(payload):
    class TestBackPropState(UserFunction):
        def __init__(self, arg, payload, name='f1'):
            self.payload = payload
            super(TestBackPropState, self).__init__([arg])

        def infer_outputs(self):
            return [
                C.output_variable(self.inputs[0].shape, self.inputs[0].dtype,
                                  self.inputs[0].dynamic_axes)
            ]

        def forward(self, argument, device=None, outputs_to_retain=None):
            return self.payload, argument

        def backward(self, state, root_gradients):
            assert state == self.payload
            return root_gradients

    dim = 4

    p = C.parameter(shape=(dim, ), init=10)
    in1 = C.input_variable(dim, needs_gradient=True, name='i_var')
    m = C.user_function(TestBackPropState(in1, payload))
    z = m + p

    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    trainer = C.Trainer(None, (z), [C.sgd(z.parameters, lr_per_sample)])

    for i in range(100):
        input_data = np.random.rand(dim)
        trainer.train_minibatch({in1: [input_data]})
Esempio n. 9
0
def create_proposal_layer(rpn_cls_prob_reshape,
                          rpn_bbox_pred,
                          im_info,
                          cfg,
                          use_native_proposal_layer=False):
    layer_config = {}
    layer_config["feat_stride"] = cfg["MODEL"].FEATURE_STRIDE
    layer_config["scales"] = cfg["DATA"].PROPOSAL_LAYER_SCALES

    layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
    layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N
    layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH)
    layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE)

    layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N
    layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N
    layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH)
    layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE)

    if use_native_proposal_layer:
        cntk.ops.register_native_user_function(
            'ProposalLayerOp',
            'Cntk.ProposalLayerLib-' + cntk.__version__.rstrip('+'),
            'CreateProposalLayer')
        rpn_rois_raw = ops.native_user_function(
            'ProposalLayerOp', [rpn_cls_prob_reshape, rpn_bbox_pred, im_info],
            layer_config, 'native_proposal_layer')
    else:
        rpn_rois_raw = user_function(
            ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info,
                          layer_config))

    return alias(rpn_rois_raw, name='rpn_rois')
def build_test_function():
    dev = C.cpu()
    w_value = np.asarray([[0.5, 2], [-0.5, 1.5]]).astype(np.float32)
    c1_value = 2.718
    c2_value = -3.141

    if not C.cntk_py.is_native_user_function_registered('NativeUserTimesOp'):
        C.ops.register_native_user_function('NativeUserTimesOp', 'Cntk.ExtensibilityExamples-' + C.__version__.rstrip('+'), 'CreateUserTimesFunction')

    x = C.input_variable((2))

    w = C.parameter((2, 2), init=w_value, device=dev)

    op = C.user_function(MyPlus(x, C.constant(c1_value)))
    op = C.ops.native_user_function('NativeUserTimesOp', [w, op], user_function_instance_name='my_times')

    return dev, w_value, c1_value, c2_value, C.user_function(MyPlus(op, C.constant(c2_value)))
Esempio n. 11
0
def build_test_function():
    dev = C.cpu()
    w_value = np.asarray([[0.5, 2], [-0.5, 1.5]]).astype(np.float32)
    c1_value = 2.718
    c2_value = -3.141

    if not C.cntk_py.is_native_user_function_registered('NativeUserTimesOp'):
        C.ops.register_native_user_function('NativeUserTimesOp', 'Cntk.ExtensibilityExamples-' + C.__version__.rstrip('+'), 'CreateUserTimesFunction')

    x = C.input_variable((2))

    w = C.parameter((2, 2), init=w_value, device=dev)

    op = C.user_function(MyPlus(x, C.constant(c1_value)))
    op = C.ops.native_user_function('NativeUserTimesOp', [w, op], user_function_instance_name='my_times')

    return dev, w_value, c1_value, c2_value, C.user_function(MyPlus(op, C.constant(c2_value)))
Esempio n. 12
0
def test_anchor_target_layer():
    from utils.rpn.anchor_target_layer import AnchorTargetLayer as CntkAnchorTargetLayer
    from utils.caffe_layers.anchor_target_layer import AnchorTargetLayer as CaffeAnchorTargetLayer

    rpn_cls_score_shape_cntk = (1, 18, 61, 61)
    num_gt_boxes = 50
    gt_boxes_shape_cntk = (num_gt_boxes,5)
    dims_info_shape = (6,)
    im_info = [1000, 1000, 1]

    # Create input tensors with values
    rpn_cls_score_dummy = np.random.random_sample(rpn_cls_score_shape_cntk).astype(np.float32)
    dims_input = np.array([1000, 1000, 1000, 1000, 1000, 1000]).astype(np.float32)

    x1y1 = np.random.random_sample((num_gt_boxes, 2)) * 500
    wh = np.random.random_sample((num_gt_boxes, 2)) * 400
    x2y2 = x1y1 + wh + 50
    label = np.random.random_sample((num_gt_boxes, 1))
    label = (label * 17.0)
    gt_boxes = np.hstack((x1y1, x2y2, label)).astype(np.float32)

    # Create CNTK layer and call forward
    rpn_cls_score_var = input_variable(rpn_cls_score_shape_cntk)
    gt_boxes_var = input_variable(gt_boxes_shape_cntk)
    dims_info_var = input_variable(dims_info_shape)

    cntk_layer = user_function(CntkAnchorTargetLayer(rpn_cls_score_var, gt_boxes_var, dims_info_var, deterministic=True))
    state, cntk_output = cntk_layer.forward({rpn_cls_score_var: [rpn_cls_score_dummy], gt_boxes_var: [gt_boxes], dims_info_var: dims_input})

    obj_key = [k for k in cntk_output if 'objectness_target' in str(k)][0]
    bbt_key = [k for k in cntk_output if 'rpn_bbox_target' in str(k)][0]
    bbw_key = [k for k in cntk_output if 'rpn_bbox_inside_w' in str(k)][0]

    cntk_objectness_target = cntk_output[obj_key][0]
    cntk_bbox_targets = cntk_output[bbt_key][0]
    cntk_bbox_inside_w = cntk_output[bbw_key][0]

    # Create Caffe layer and call forward
    bottom = [np.array(rpn_cls_score_dummy),np.array(gt_boxes), np.array(im_info)]
    top = None # handled through return statement in caffe layer for unit testing

    param_str = "'feat_stride': 16"
    caffe_layer = CaffeAnchorTargetLayer()
    caffe_layer.set_param_str(param_str)
    caffe_layer.setup(bottom, top)
    caffe_layer.set_deterministic_mode()

    caffe_objectness_target, caffe_bbox_targets, caffe_bbox_inside_w = caffe_layer.forward(bottom, top)

    # assert that results are exactly the same
    assert cntk_objectness_target.shape == caffe_objectness_target.shape
    assert cntk_bbox_targets.shape == caffe_bbox_targets.shape
    assert cntk_bbox_inside_w.shape == caffe_bbox_inside_w.shape

    assert np.allclose(cntk_objectness_target, caffe_objectness_target, rtol=0.0, atol=0.0)
    assert np.allclose(cntk_bbox_targets, caffe_bbox_targets, rtol=0.0, atol=0.0)
    assert np.allclose(cntk_bbox_inside_w, caffe_bbox_inside_w, rtol=0.0, atol=0.0)
    print("Verified AnchorTargetLayer")
Esempio n. 13
0
def test_ext_eval_3_no_input():
    dim = 4
    p = C.parameter(shape=(dim, ), init=10, name='p')
    m = C.user_function(MyPlus(p, C.constant(3)))
    z = m + 0

    result = z.eval()
    # No batch dimension since we have no input
    assert np.allclose(result, np.zeros_like(p) + 10 + 3)
Esempio n. 14
0
def test_ext_eval_4_b_inside_graph():
    dim = 4
    p_init = 10
    p = C.parameter(shape=(dim, ), init=p_init, name='p')
    z = C.user_function(p * MyPlus(p, C.constant(3)))

    result = z.eval()
    # No batch dimension since we have no input
    assert np.allclose(result, ((p_init * np.ones_like(result)) + 3) * p_init)
Esempio n. 15
0
def test_ext_eval_4_b_inside_graph():
    dim = 4
    p_init = 10
    p = C.parameter(shape=(dim,), init=p_init, name='p')
    z = C.user_function(p * MyPlus(p, C.constant(3)))

    result = z.eval()
    # No batch dimension since we have no input
    assert np.allclose(result, ((p_init * np.ones_like(result)) + 3) * p_init)
Esempio n. 16
0
def test_ext_eval_3_no_input():
    dim = 4
    p = C.parameter(shape=(dim,), init=10, name='p')
    m = C.user_function(MyPlus(p, C.constant(3)))
    z = m + 0

    result = z.eval()
    # No batch dimension since we have no input
    assert np.allclose(result, np.zeros_like(p) + 10 + 3)
Esempio n. 17
0
def test_ext_eval_5_times():
    dim = 2
    p_init = 10
    p = C.parameter(shape=(dim, ), init=p_init, name='p')
    m = C.user_function(MyPlus(p, C.constant(3)))
    z = C.times(m, C.parameter(shape=(2, 50), init=2))

    result = z.eval()
    # No batch dimension since we have no input
    assert np.allclose(result, ((p_init * np.ones_like(result)) + 3) * 2 * 2)
Esempio n. 18
0
def test_ext_eval_1():
    dim = 4
    p = C.parameter(shape=(dim,), init=10, name='p')
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m = C.user_function(MyPlus(i, C.constant(3)))
    z = m + p

    input_data = np.random.rand(dim)
    result = z.eval([input_data])
    assert np.allclose(result[0][0], input_data + 3 + 10)
Esempio n. 19
0
def test_ext_eval_1():
    dim = 4
    p = C.parameter(shape=(dim, ), init=10, name='p')
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m = C.user_function(MyPlus(i, C.constant(3)))
    z = m + p

    input_data = np.random.rand(dim)
    result = z.eval([input_data])
    assert np.allclose(result[0][0], input_data + 3 + 10)
Esempio n. 20
0
def test_ext_eval_5_times():
    dim = 2
    p_init = 10
    p = C.parameter(shape=(dim,), init=p_init, name='p')
    m = C.user_function(MyPlus(p, C.constant(3)))
    z = C.times(m, C.parameter(shape=(2, 50), init=2))

    result = z.eval()
    # No batch dimension since we have no input
    assert np.allclose(result, ((p_init * np.ones_like(result)) + 3) * 2 * 2)
def test_anchor_target_layer():
    rpn_cls_score_shape_cntk = (1, 18, 61, 61)
    num_gt_boxes = 50
    gt_boxes_shape_cntk = (num_gt_boxes,5)
    dims_info_shape = (6,)
    im_info = [1000, 1000, 1]

    # Create input tensors with values
    rpn_cls_score_dummy = np.random.random_sample(rpn_cls_score_shape_cntk).astype(np.float32)
    dims_input = np.array([1000, 1000, 1000, 1000, 1000, 1000]).astype(np.float32)

    x1y1 = np.random.random_sample((num_gt_boxes, 2)) * 500
    wh = np.random.random_sample((num_gt_boxes, 2)) * 400
    x2y2 = x1y1 + wh + 50
    label = np.random.random_sample((num_gt_boxes, 1))
    label = (label * 17.0)
    gt_boxes = np.hstack((x1y1, x2y2, label)).astype(np.float32)

    # Create CNTK layer and call forward
    rpn_cls_score_var = input_variable(rpn_cls_score_shape_cntk)
    gt_boxes_var = input_variable(gt_boxes_shape_cntk)
    dims_info_var = input_variable(dims_info_shape)

    cntk_layer = user_function(CntkAnchorTargetLayer(rpn_cls_score_var, gt_boxes_var, dims_info_var, deterministic=True))
    state, cntk_output = cntk_layer.forward({rpn_cls_score_var: [rpn_cls_score_dummy], gt_boxes_var: [gt_boxes], dims_info_var: dims_input})

    obj_key = [k for k in cntk_output if 'objectness_target' in str(k)][0]
    bbt_key = [k for k in cntk_output if 'rpn_bbox_target' in str(k)][0]
    bbw_key = [k for k in cntk_output if 'rpn_bbox_inside_w' in str(k)][0]

    cntk_objectness_target = cntk_output[obj_key][0]
    cntk_bbox_targets = cntk_output[bbt_key][0]
    cntk_bbox_inside_w = cntk_output[bbw_key][0]

    # Create Caffe layer and call forward
    bottom = [np.array(rpn_cls_score_dummy),np.array(gt_boxes), np.array(im_info)]
    top = None # handled through return statement in caffe layer for unit testing

    param_str = "'feat_stride': 16"
    caffe_layer = CaffeAnchorTargetLayer()
    caffe_layer.set_param_str(param_str)
    caffe_layer.setup(bottom, top)
    caffe_layer.set_deterministic_mode()

    caffe_objectness_target, caffe_bbox_targets, caffe_bbox_inside_w = caffe_layer.forward(bottom, top)

    # assert that results are exactly the same
    assert cntk_objectness_target.shape == caffe_objectness_target.shape
    assert cntk_bbox_targets.shape == caffe_bbox_targets.shape
    assert cntk_bbox_inside_w.shape == caffe_bbox_inside_w.shape

    assert np.allclose(cntk_objectness_target, caffe_objectness_target, rtol=0.0, atol=0.0)
    assert np.allclose(cntk_bbox_targets, caffe_bbox_targets, rtol=0.0, atol=0.0)
    assert np.allclose(cntk_bbox_inside_w, caffe_bbox_inside_w, rtol=0.0, atol=0.0)
    print("Verified AnchorTargetLayer")
Esempio n. 22
0
def test_proposal_layer():
    from utils.rpn.proposal_layer import ProposalLayer as CntkProposalLayer
    from utils.caffe_layers.proposal_layer import ProposalLayer as CaffeProposalLayer
    from FasterRCNN.FasterRCNN_config import cfg

    cls_prob_shape_cntk = (18,61,61)
    cls_prob_shape_caffe = (18,61,61)
    rpn_bbox_shape = (36, 61, 61)
    dims_info_shape = (6,)
    im_info = [1000, 1000, 1]

    # Create input tensors with values
    cls_prob =  np.random.random_sample(cls_prob_shape_cntk).astype(np.float32)
    rpn_bbox_pred = np.random.random_sample(rpn_bbox_shape).astype(np.float32)
    dims_input = np.array([1000, 1000, 1000, 1000, 1000, 1000]).astype(np.float32)

    # Create CNTK layer and call forward
    cls_prob_var = input_variable(cls_prob_shape_cntk)
    rpn_bbox_var = input_variable(rpn_bbox_shape)
    dims_info_var = input_variable(dims_info_shape)

    layer_config = {}
    layer_config["feat_stride"] = 16
    layer_config["scales"] = [8, 16, 32]

    layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
    layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N
    layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH)
    layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE)

    layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N
    layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N
    layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH)
    layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE)

    cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, dims_info_var, layer_config))
    state, cntk_output = cntk_layer.forward({cls_prob_var: [cls_prob], rpn_bbox_var: [rpn_bbox_pred], dims_info_var: dims_input})
    cntk_proposals = cntk_output[next(iter(cntk_output))][0]

    # Create Caffe layer and call forward
    cls_prob_caffe = cls_prob.reshape(cls_prob_shape_caffe)
    bottom = [np.array([cls_prob_caffe]),np.array([rpn_bbox_pred]),np.array([im_info])]
    top = None # handled through return statement in caffe layer for unit testing

    param_str = "'feat_stride': 16"
    caffe_layer = CaffeProposalLayer()
    caffe_layer.set_param_str(param_str)
    caffe_layer.setup(bottom, top)
    caffe_output = caffe_layer.forward(bottom, top)
    caffe_proposals = caffe_output[:,1:]

    # assert that results are exactly the same
    assert cntk_proposals.shape == caffe_proposals.shape
    assert np.allclose(cntk_proposals, caffe_proposals, rtol=0.0, atol=0.0)
    print("Verified ProposalLayer")
Esempio n. 23
0
def test_udf_clone():
    dim = 4
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m_udf = C.user_function(MyPlus(i, C.constant(3)))
    p = C.parameter(shape=(dim, ), init=10, name='p')
    z = m_udf + p

    z_clone = z.clone('share')

    input_data = np.random.rand(dim)
    result = z_clone.eval([input_data])
    assert np.allclose(result[0][0], input_data + 3 + 10)
def test_udf_input_values_no_sharing():
    i = C.input_variable(1, needs_gradient=True, name='i_var')
    m = C.user_function(MyArgumentPreservingPlus(i + 1, i + 2))
    
    w = C.parameter(shape=(1,), init=1)
    m = m + w
    m2 = C.splice(m, m, axis=0)
    m3 = C.splice(m2, m2, axis=0)
    m4 = C.splice(m3, m3, axis=0)

    grad_value, result = m4.grad({i : np.asarray([2], dtype=np.float32)}, outputs=[m4], wrt=[w, i])
    assert np.array_equal(result, [[8,  8,  8,  8,  8,  8,  8,  8]])
Esempio n. 25
0
def test_udf_input_values_no_sharing():
    i = C.input_variable(1, needs_gradient=True, name='i_var')
    m = C.user_function(MyArgumentPreservingPlus(i + 1, i + 2))
    
    w = C.parameter(shape=(1,), init=1)
    m = m + w
    m2 = C.splice(m, m, axis=0)
    m3 = C.splice(m2, m2, axis=0)
    m4 = C.splice(m3, m3, axis=0)

    grad_value, result = m4.grad({i : np.asarray([2], dtype=np.float32)}, outputs=[m4], wrt=[w, i])
    assert np.array_equal(result, [[8,  8,  8,  8,  8,  8,  8,  8]])
Esempio n. 26
0
def test_ext_eval_2_only_param():
    dim = 4
    p = C.parameter(shape=(dim,), init=10, name='p')
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m = C.user_function(MyPlus(p, C.constant(3)))
    # combine does not work
    # z = combine([m.output])
    z = m + i

    input_data = np.random.rand(dim)
    result = z.eval([input_data])
    assert np.allclose(result[0][0], input_data + 3 + 10)
Esempio n. 27
0
def test_udf_clone():
    dim = 4
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m_udf = C.user_function(MyPlus(i, C.constant(3)))
    p = C.parameter(shape=(dim,), init=10, name='p')
    z = m_udf + p

    z_clone = z.clone('share')

    input_data = np.random.rand(dim)
    result = z_clone.eval([input_data])
    assert np.allclose(result[0][0], input_data + 3 + 10)
Esempio n. 28
0
def test_ext_eval_2_only_param():
    dim = 4
    p = C.parameter(shape=(dim, ), init=10, name='p')
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m = C.user_function(MyPlus(p, C.constant(3)))
    # combine does not work
    # z = combine([m.output])
    z = m + i

    input_data = np.random.rand(dim)
    result = z.eval([input_data])
    assert np.allclose(result[0][0], input_data + 3 + 10)
Esempio n. 29
0
def test_ext_eval_freedimension_input():
    i = C.sequence.input_variable((C.FreeDimension), needs_gradient=True, name='i_var')
    m = C.user_function(MyPlus(i, C.constant(3)))

    input_data = np.random.rand(3)
    gradient_value, result = m.grad({i: input_data}, wrt=[i], outputs=[m.output])
    assert np.allclose(result[0][0], input_data + 3)
    assert np.allclose(gradient_value[0][0], np.ones_like(input_data))

    input_data = np.random.rand(6)
    gradient_value, result = m.grad({i: input_data}, wrt=[i], outputs=[m.output])
    assert np.allclose(result[0][0], input_data + 3)
    assert np.allclose(gradient_value[0][0], np.ones_like(input_data))
def test_ext_eval_freedimension_input():
    i = C.sequence.input_variable((C.FreeDimension), needs_gradient=True, name='i_var')
    m = C.user_function(MyPlus(i, C.constant(3)))

    input_data = np.random.rand(3)
    gradient_value, result = m.grad({i: input_data}, wrt=[i], outputs=[m.output])
    assert np.allclose(result[0][0], input_data + 3)
    assert np.allclose(gradient_value[0][0], np.ones_like(input_data))

    input_data = np.random.rand(6)
    gradient_value, result = m.grad({i: input_data}, wrt=[i], outputs=[m.output])
    assert np.allclose(result[0][0], input_data + 3)
    assert np.allclose(gradient_value[0][0], np.ones_like(input_data))
Esempio n. 31
0
def CustomMultibitKernel(input, bit_map, mean_bits=None):
    if (mean_bits):
        bit_map = np.asarray(np.maximum(np.round(np.random.normal(mean_bits, 1, input.shape)), 1), dtype=np.int32)
        print("Mean Bits: ",np.mean(bit_map))
    else:
        if (type(bit_map) == int):
            length = C.reshape(input, (-1))
            bit_map = [bit_map]*length.shape[0]
            bit_map = np.asarray(bit_map)
            bit_map = bit_map.reshape(input.shape)
        else:
            bit_map = np.asarray(bit_map)
    assert (bit_map.shape == input.shape)
    return C.user_function(MultibitKernel(input, bit_map))
def test_udf_no_gradient_for_some_inputs():
    dim = 2
    x = C.sequence.input_variable(dim, needs_gradient=True, name='x')
    y = C.sequence.input_variable(dim, needs_gradient=True, name='y')
    op = C.user_function(MyPlusWithNoGradientToRightOperand(x, y))

    x_data = [AA([[1., 2.], [3., 4.]], dtype=np.float32)]
    y_data = [AA([[5., 6.], [7., 8.]], dtype=np.float32)]
    gradients, result = op.grad({x: x_data, y: y_data}, op.arguments, [op.output])

    assert np.allclose(gradients[op.arguments[0]], [[[1., 1.], [1., 1.]]])
    assert np.allclose(gradients[op.arguments[1]], [[[0., 0.], [0., 0.]]])

    assert np.allclose(result, [[[6., 8.], [10., 12.]]])
Esempio n. 33
0
def test_udf_plus_and_last():
    x = C.sequence.input_variable(shape=(2, ))
    y = C.input_variable(shape=(2, ))

    func = C.user_function(PlusAndLast(x, y))

    dt_precision = np.float32
    operand1 = [AA([[1., 2.], [3., 4.]], dtype=dt_precision)]
    operand2 = [AA([2., 2.], dtype=dt_precision)]

    _, result = func.forward({x: operand1, y: operand2}, [func.output])

    expected_forward = AA([[[5., 6.]]], dtype=dt_precision)
    assert np.allclose(result[func.output], expected_forward)
Esempio n. 34
0
def test_udf_plus_and_last():
    x = C.sequence.input_variable(shape=(2,))
    y = C.input_variable(shape=(2,))

    func = C.user_function(PlusAndLast(x, y))

    dt_precision = np.float32
    operand1 = [AA([[1., 2.], [3., 4.]], dtype=dt_precision)]
    operand2 = [AA([2., 2.], dtype=dt_precision)]

    _, result = func.forward({x: operand1, y: operand2}, [func.output])

    expected_forward = AA([[[5., 6.]]], dtype=dt_precision)
    assert np.allclose(result[func.output], expected_forward)
Esempio n. 35
0
def test_udf_no_gradient_for_some_inputs():
    dim = 2
    x = C.sequence.input_variable(dim, needs_gradient=True, name='x')
    y = C.sequence.input_variable(dim, needs_gradient=True, name='y')
    op = C.user_function(MyPlusWithNoGradientToRightOperand(x, y))

    x_data = [AA([[1., 2.], [3., 4.]], dtype=np.float32)]
    y_data = [AA([[5., 6.], [7., 8.]], dtype=np.float32)]
    gradients, result = op.grad({x: x_data, y: y_data}, op.arguments, [op.output])

    assert np.allclose(gradients[op.arguments[0]], [[[1., 1.], [1., 1.]]])
    assert np.allclose(gradients[op.arguments[1]], [[[0., 0.], [0., 0.]]])

    assert np.allclose(result, [[[6., 8.], [10., 12.]]])
Esempio n. 36
0
def test_override_serialize(tmpdir):
    dev = C.cpu()
    a, b = 1.2322341, -0.29084
    op = MyPlusPlus([C.constant(a), C.constant(b)], '++')
    op = MyPlusPlus([op, op], '+++')
    op = MyPlusPlus([op, op], '++++')
    op = C.user_function(op)
    result1 = op.eval({}, device=dev)

    filepath = str(tmpdir / 'test_udf_with_renamed_deserialize.dat')
    op.save(filepath)

    op_reloaded = Function.load(filepath, device=dev)

    assert result1 == op_reloaded.eval({}, device=dev)
Esempio n. 37
0
def test_override_serialize(tmpdir):
    dev = C.cpu()
    a, b = 1.2322341, -0.29084
    op = MyPlusPlus([C.constant(a), C.constant(b)], '++')
    op = MyPlusPlus([op, op], '+++')
    op = MyPlusPlus([op, op], '++++')
    op = C.user_function(op)
    result1 = op.eval({}, device=dev)

    filepath = str(tmpdir / 'test_udf_with_renamed_deserialize.dat')
    op.save(filepath)

    op_reloaded = Function.load(filepath, device=dev)

    assert result1 == op_reloaded.eval({}, device=dev)
Esempio n. 38
0
def test_multi_freedim_output_udf():
    dim = 2
    x = C.sequence.input_variable(dim, needs_gradient=True, name='x')
    y = C.sequence.input_variable(dim, needs_gradient=True, name='y')
    op = C.user_function(MultiFreeDimensionOutputUserFunction(x, y))

    x_data = [AA([[1., 2.], [3., 4.]], dtype=np.float32)]
    y_data = [AA([[5., 6.], [7., 8.]], dtype=np.float32)]
    result = op.eval({x: x_data, y: y_data})
    assert np.allclose(result[op.outputs[0]], x_data[0] + 2 * y_data[0])
    assert np.allclose(result[op.outputs[1]], 2 * x_data[0] + y_data[0])

    op = op.outputs[0] + op.outputs[1]
    gradients = op.grad({x: x_data, y: y_data}, op.arguments)
    assert np.allclose(gradients[op.arguments[0]], [[[3., 3.], [3., 3.]]])
    assert np.allclose(gradients[op.arguments[1]], [[[3., 3.], [3., 3.]]])
Esempio n. 39
0
def CustomMultibitKernel(input, bit_map, mean_bits=None):
    if (mean_bits):
        bit_map = np.asarray(np.maximum(
            np.round(np.random.normal(mean_bits, 1, input.shape)), 1),
                             dtype=np.int32)
        print("Mean Bits: ", np.mean(bit_map))
    else:
        if (type(bit_map) == int):
            length = C.reshape(input, (-1))
            bit_map = [bit_map] * length.shape[0]
            bit_map = np.asarray(bit_map)
            bit_map = bit_map.reshape(input.shape)
        else:
            bit_map = np.asarray(bit_map)
    assert (bit_map.shape == input.shape)
    return C.user_function(MultibitKernel(input, bit_map))
Esempio n. 40
0
def test_multioutput_udf():
    dim = 2
    x = C.sequence.input_variable(dim, needs_gradient=True, name='x')
    y = C.sequence.input_variable(dim, needs_gradient=True, name='y')
    op = C.user_function(MultiOutputUserFunction(x, y))

    x_data = [AA([[1., 2.], [3., 4.]], dtype=np.float32)]
    y_data = [AA([[5., 6.], [7., 8.]], dtype=np.float32)]
    result = op.eval({x: x_data, y: y_data})
    assert np.allclose(result[op.outputs[0]], x_data[0] + 2 * y_data[0])
    assert np.allclose(result[op.outputs[1]], 2 * x_data[0] + y_data[0])

    op = op.outputs[0] + op.outputs[1]
    gradients = op.grad({x: x_data, y: y_data}, op.arguments)
    assert np.allclose(gradients[op.arguments[0]], [[[3., 3.], [3., 3.]]])
    assert np.allclose(gradients[op.arguments[1]], [[[3., 3.], [3., 3.]]])
Esempio n. 41
0
def test_proposal_layer():
    cls_prob_shape_cntk = (18, 61, 61)
    cls_prob_shape_caffe = (18, 61, 61)
    rpn_bbox_shape = (36, 61, 61)
    dims_info_shape = (6, )
    im_info = [1000, 1000, 1]

    # Create input tensors with values
    cls_prob = np.random.random_sample(cls_prob_shape_cntk).astype(np.float32)
    rpn_bbox_pred = np.random.random_sample(rpn_bbox_shape).astype(np.float32)
    dims_input = np.array([1000, 1000, 1000, 1000, 1000,
                           1000]).astype(np.float32)

    # Create CNTK layer and call forward
    cls_prob_var = input_variable(cls_prob_shape_cntk)
    rpn_bbox_var = input_variable(rpn_bbox_shape)
    dims_info_var = input_variable(dims_info_shape)

    cntk_layer = user_function(
        CntkProposalLayer(cls_prob_var, rpn_bbox_var, dims_info_var))
    state, cntk_output = cntk_layer.forward({
        cls_prob_var: [cls_prob],
        rpn_bbox_var: [rpn_bbox_pred],
        dims_info_var: dims_input
    })
    cntk_proposals = cntk_output[next(iter(cntk_output))][0]

    # Create Caffe layer and call forward
    cls_prob_caffe = cls_prob.reshape(cls_prob_shape_caffe)
    bottom = [
        np.array([cls_prob_caffe]),
        np.array([rpn_bbox_pred]),
        np.array([im_info])
    ]
    top = None  # handled through return statement in caffe layer for unit testing

    param_str = "'feat_stride': 16"
    caffe_layer = CaffeProposalLayer()
    caffe_layer.set_param_str(param_str)
    caffe_layer.setup(bottom, top)
    caffe_output = caffe_layer.forward(bottom, top)
    caffe_proposals = caffe_output[:, 1:]

    # assert that results are exactly the same
    assert cntk_proposals.shape == caffe_proposals.shape
    assert np.allclose(cntk_proposals, caffe_proposals, rtol=0.0, atol=0.0)
    print("Verified ProposalLayer")
Esempio n. 42
0
def test_ext_train(tmpdir):
    dim = 4

    p = C.parameter(shape=(dim, ), init=10)
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m = MyPlus(i, C.constant(3), 'my_plus')
    # keeping m unwrapped since we need to access its member variables
    z = C.user_function(m) + p

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size=1)
    trainer = C.Trainer(z, (z + 0, z + 0), [
        C.momentum_sgd(z.parameters,
                       lr_per_sample,
                       momentum_time_constant,
                       True,
                       minibatch_size=0)
    ])

    i = 0
    while i < 100:
        i += 1
        input_data = np.random.rand(dim)
        trainer.train_minibatch([input_data])

    assert m.forward_calls == m.backward_calls == 100

    filepath = str(tmpdir / 'test_ext_train.dat')

    z.save(filepath)

    buf = open(filepath, 'rb').read()

    # this is only need for Python 2.7
    # (which does not distinguish between bytes and strings)
    if isinstance(buf, str):
        buf = bytearray(buf)

    z1 = Function.load(buf)

    m1 = z1.find_by_name('my_plus')
    # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus,
    # using serialize as workaround:
    state = m1.serialize()['state']

    assert state['forward_calls'] == state['backward_calls'] == 100
Esempio n. 43
0
def test_lstm_over_lstm_thought_vectors_2(device_id):
    dev = cntk_device(device_id)
    input_vocab_size=3
    emb_dim = 2
    hidden_dim = 2
    num_labels = 2
    utterances_input = C.sequence.input_variable((input_vocab_size), is_sparse=True, name='utterances')
    conversation_lengths_input = C.input_variable((), name='conversation_sequence_lengths')
    label_input = C.sequence.input_variable(num_labels, is_sparse=True, sequence_axis=C.Axis('label_sequence'), name='labels')
    with C.default_options(initial_state=0.1):
        model = C.layers.Embedding(emb_dim, name='embed')(utterances_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.sequence.last(model)
        model = C.user_function(UtteranceBatchReshape(model, conversation_lengths_input))
        model = C.to_sequence_like(model, label_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.layers.Dense(num_labels, name='classify')(model)

    z = model
    ce = C.cross_entropy_with_softmax(z, label_input)

    sentinel_utt_data = C.NDArrayView.from_csr(_to_csr([[0, 0, 1]]), device=C.cpu())
    c1_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1], [0, 1, 0], [1, 0, 0]]), device=C.cpu())
    c1_utt2_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0], [0, 1, 1]]), device=C.cpu())
    c1_utt3_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1], [0, 1, 0]]), device=C.cpu())
    c2_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1]]), device=C.cpu())
    c3_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0], [0, 1, 1], [1, 0, 0]]), device=C.cpu())
    c3_utt2_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0]]), device=C.cpu())

    all_utt_data = C.Value.create(C.sequence.input_variable((input_vocab_size), is_sparse=True), [c1_utt1_data, c1_utt2_data, c1_utt3_data, c2_utt1_data, sentinel_utt_data, sentinel_utt_data, c3_utt1_data, c3_utt2_data, sentinel_utt_data], device=C.cpu()).data
    conversation_lengths_data = np.asarray([3, 1, 2], dtype=np.float32)
    seq1_label_data = [[0, 1], [0, 1], [1, 0]]
    seq2_label_data = [[1, 0]]
    seq3_label_data = [[1, 0], [0, 1]]
    label_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data), _to_csr(seq3_label_data)]
    param_grads, loss_result = ce.grad({utterances_input : all_utt_data, label_input : label_data, conversation_lengths_input : conversation_lengths_data},
                                       wrt=ce.parameters, outputs=[ce], as_numpy=False)

    loss_result = loss_result.as_sequences()

    absolute_tolerance = 0.01
    assert np.allclose(loss_result[0], [[0.678914], [0.668076], [0.728129]], atol=absolute_tolerance)
    assert np.allclose(loss_result[1], [[0.679029]], atol=absolute_tolerance)
    assert np.allclose(loss_result[2], [[0.705393], [0.674243]], atol=absolute_tolerance)
def test_ext_lambdafunc(tmpdir):
    dim = 4

    class CallbackCounter(object):
        def __init__(self):
            self.count = 0

        def inc(self, arg):
            self.count += 1

    cb = CallbackCounter()

    p = C.parameter(shape=(dim,), init=1)
    i = C.input_variable(dim, needs_gradient=True, name='i_var')
    k = i * p
    m = LambdaFunc(k,
                   when=lambda arg: np.sum(arg) > 1,
                   execute=cb.inc)
    m = C.user_function(m)
    z0 = m + 0

    filepath = str(tmpdir / 'test_ext_lambdafunc.dat')
    z0.save(filepath)

    Function.register_udf_deserialize_callback('conditional_exec_lambda',
                                               lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc))

    z = Function.load(filepath)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size = 1)
    trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters,
                                                           lr_per_sample,
                                                           momentum_time_constant,
                                                           True)])

    i = 0
    input_data = 0.1 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 0

    input_data = 0.3 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 1
Esempio n. 45
0
def test_lstm_over_lstm_thought_vectors_2(device_id):
    dev = cntk_device(device_id)
    input_vocab_size=3
    emb_dim = 2
    hidden_dim = 2
    num_labels = 2
    utterances_input = C.sequence.input_variable((input_vocab_size), is_sparse=True, name='utterances')
    conversation_lengths_input = C.input_variable((), name='conversation_sequence_lengths')
    label_input = C.sequence.input_variable(num_labels, is_sparse=True, sequence_axis=C.Axis('label_sequence'), name='labels')
    with C.default_options(initial_state=0.1):
        model = C.layers.Embedding(emb_dim, name='embed')(utterances_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.sequence.last(model)
        model = C.user_function(UtteranceBatchReshape(model, conversation_lengths_input))
        model = C.to_sequence_like(model, label_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.layers.Dense(num_labels, name='classify')(model)

    z = model
    ce = C.cross_entropy_with_softmax(z, label_input)

    sentinel_utt_data = C.NDArrayView.from_csr(_to_csr([[0, 0, 1]]), device=C.cpu())
    c1_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1], [0, 1, 0], [1, 0, 0]]), device=C.cpu())
    c1_utt2_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0], [0, 1, 1]]), device=C.cpu())
    c1_utt3_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1], [0, 1, 0]]), device=C.cpu())
    c2_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1]]), device=C.cpu())
    c3_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0], [0, 1, 1], [1, 0, 0]]), device=C.cpu())
    c3_utt2_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0]]), device=C.cpu())

    all_utt_data = C.Value.create(C.sequence.input_variable((input_vocab_size), is_sparse=True), [c1_utt1_data, c1_utt2_data, c1_utt3_data, c2_utt1_data, sentinel_utt_data, sentinel_utt_data, c3_utt1_data, c3_utt2_data, sentinel_utt_data], device=C.cpu()).data
    conversation_lengths_data = np.asarray([3, 1, 2], dtype=np.float32)
    seq1_label_data = [[0, 1], [0, 1], [1, 0]]
    seq2_label_data = [[1, 0]]
    seq3_label_data = [[1, 0], [0, 1]]
    label_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data), _to_csr(seq3_label_data)]
    param_grads, loss_result = ce.grad({utterances_input : all_utt_data, label_input : label_data, conversation_lengths_input : conversation_lengths_data},
                                       wrt=ce.parameters, outputs=[ce], as_numpy=False)

    loss_result = loss_result.as_sequences()

    absolute_tolerance = 0.01
    assert np.allclose(loss_result[0], [[0.678914], [0.668076], [0.728129]], atol=absolute_tolerance)
    assert np.allclose(loss_result[1], [[0.679029]], atol=absolute_tolerance)
    assert np.allclose(loss_result[2], [[0.705393], [0.674243]], atol=absolute_tolerance)
Esempio n. 46
0
def test_ext_lambdafunc(tmpdir):
    dim = 4

    class CallbackCounter(object):
        def __init__(self):
            self.count = 0

        def inc(self, arg):
            self.count += 1

    cb = CallbackCounter()

    p = C.parameter(shape=(dim,), init=1)
    i = C.input_variable(dim, needs_gradient=True, name='i_var')
    k = i * p
    m = LambdaFunc(k,
                   when=lambda arg: np.sum(arg) > 1,
                   execute=cb.inc)
    m = C.user_function(m)
    z0 = m + 0

    filepath = str(tmpdir / 'test_ext_lambdafunc.dat')
    z0.save(filepath)

    Function.register_udf_deserialize_callback('conditional_exec_lambda',
                                               lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc))

    z = Function.load(filepath)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters,
                                                           lr_per_sample,
                                                           momentum_time_constant,
                                                           True)])

    i = 0
    input_data = 0.1 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 0

    input_data = 0.3 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 1
Esempio n. 47
0
def create_proposal_target_layer(rpn_rois, scaled_gt_boxes, cfg):
    '''
    Creates a proposal target layer that is used for training an object detection network as proposed in the "Faster R-CNN" paper:
        Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
        "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"

    Assigns object detection proposals to ground-truth targets.
    Produces proposal classification labels and bounding-box regression targets.
    It also adds gt_boxes to candidates and samples fg and bg rois for training.

    Args:
        rpn_rois:        The proposed ROIs, e.g. from a region proposal network
        scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
        num_classes:     The number of classes in the data set

    Returns:
        rpn_target_rois - a set of rois containing the ground truth and a number of sampled fg and bg ROIs
        label_targets - the target labels for the rois
        bbox_targets - the regression coefficient targets for the rois
        bbox_inside_weights - the weights for the regression loss
    '''

    ptl_param_string = "'num_classes': {}".format(cfg["DATA"].NUM_CLASSES)
    ptl = user_function(
        ProposalTargetLayer(rpn_rois,
                            scaled_gt_boxes,
                            batch_size=cfg.NUM_ROI_PROPOSALS,
                            fg_fraction=cfg["TRAIN"].FG_FRACTION,
                            normalize_targets=cfg.BBOX_NORMALIZE_TARGETS,
                            normalize_means=cfg.BBOX_NORMALIZE_MEANS,
                            normalize_stds=cfg.BBOX_NORMALIZE_STDS,
                            fg_thresh=cfg["TRAIN"].FG_THRESH,
                            bg_thresh_hi=cfg["TRAIN"].BG_THRESH_HI,
                            bg_thresh_lo=cfg["TRAIN"].BG_THRESH_LO,
                            param_str=ptl_param_string))

    # use an alias if you need to access the outputs, e.g., when cloning a trained network
    rois = alias(ptl.outputs[0], name='rpn_target_rois')
    label_targets = ptl.outputs[1]
    bbox_targets = ptl.outputs[2]
    bbox_inside_weights = ptl.outputs[3]

    return rois, label_targets, bbox_targets, bbox_inside_weights
Esempio n. 48
0
def test_ext_train(tmpdir):
    dim = 4

    p = C.parameter(shape=(dim,), init=10)
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m = MyPlus(i, C.constant(3), 'my_plus')
    # keeping m unwrapped since we need to access its member variables
    z = C.user_function(m) + p

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    trainer = C.Trainer(z, (z + 0, z + 0),
                        [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                                        True)])

    i = 0
    while i < 100:
        i += 1
        input_data = np.random.rand(dim)
        trainer.train_minibatch([input_data])

    assert m.forward_calls == m.backward_calls == 100

    filepath = str(tmpdir / 'test_ext_train.dat')

    z.save(filepath)

    buf = open(filepath, 'rb').read()

    # this is only need for Python 2.7
    # (which does not distinguish between bytes and strings)
    if isinstance(buf, str):
        buf = bytearray(buf)

    z1 = Function.load(buf)

    m1 = z1.find_by_name('my_plus')
    # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus,
    # using serialize as workaround:
    state = m1.serialize()['state']

    assert state['forward_calls'] == state['backward_calls'] == 100
Esempio n. 49
0
def create_proposal_target_layer(rpn_rois, scaled_gt_boxes, cfg):
    '''
    Creates a proposal target layer that is used for training an object detection network as proposed in the "Faster R-CNN" paper:
        Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
        "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"

    Assigns object detection proposals to ground-truth targets.
    Produces proposal classification labels and bounding-box regression targets.
    It also adds gt_boxes to candidates and samples fg and bg rois for training.

    Args:
        rpn_rois:        The proposed ROIs, e.g. from a region proposal network
        scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
        num_classes:     The number of classes in the data set

    Returns:
        rpn_target_rois - a set of rois containing the ground truth and a number of sampled fg and bg ROIs
        label_targets - the target labels for the rois
        bbox_targets - the regression coefficient targets for the rois
        bbox_inside_weights - the weights for the regression loss
    '''

    ptl_param_string = "'num_classes': {}".format(cfg["DATA"].NUM_CLASSES)
    ptl = user_function(ProposalTargetLayer(rpn_rois, scaled_gt_boxes,
                                            batch_size=cfg.NUM_ROI_PROPOSALS,
                                            fg_fraction=cfg["TRAIN"].FG_FRACTION,
                                            normalize_targets=cfg.BBOX_NORMALIZE_TARGETS,
                                            normalize_means=cfg.BBOX_NORMALIZE_MEANS,
                                            normalize_stds=cfg.BBOX_NORMALIZE_STDS,
                                            fg_thresh=cfg["TRAIN"].FG_THRESH,
                                            bg_thresh_hi=cfg["TRAIN"].BG_THRESH_HI,
                                            bg_thresh_lo=cfg["TRAIN"].BG_THRESH_LO,
                                            param_str=ptl_param_string))

    # use an alias if you need to access the outputs, e.g., when cloning a trained network
    rois = alias(ptl.outputs[0], name='rpn_target_rois')
    label_targets = ptl.outputs[1]
    bbox_targets = ptl.outputs[2]
    bbox_inside_weights = ptl.outputs[3]

    return rois, label_targets, bbox_targets, bbox_inside_weights
Esempio n. 50
0
def test_proposal_target_layer():
    num_rois = 400
    all_rois_shape_cntk = (num_rois,4)
    num_gt_boxes = 50
    gt_boxes_shape_cntk = (num_gt_boxes,5)
    im_info = [1000, 1000, 1]

    # Create input tensors with values
    x1y1 = np.random.random_sample((num_rois, 2)) * 500
    wh = np.random.random_sample((num_rois, 2)) * 400
    x2y2 = x1y1 + wh + 50
    all_rois = np.hstack((x1y1, x2y2)).astype(np.float32)
    #all_rois = np.random.random_sample(all_rois_shape_cntk).astype(np.float32)

    x1y1 = np.random.random_sample((num_gt_boxes, 2)) * 500
    wh = np.random.random_sample((num_gt_boxes, 2)) * 400
    x2y2 = x1y1 + wh + 50
    label = np.random.random_sample((num_gt_boxes, 1))
    label = (label * 17.0)
    gt_boxes = np.hstack((x1y1, x2y2, label)).astype(np.float32)

    # Create CNTK layer and call forward
    all_rois_var = input_variable(all_rois_shape_cntk)
    gt_boxes_var = input_variable(gt_boxes_shape_cntk)

    cntk_layer = user_function(CntkProposalTargetLayer(all_rois_var, gt_boxes_var, param_str="'num_classes': 17", deterministic=True))
    state, cntk_output = cntk_layer.forward({all_rois_var: [all_rois], gt_boxes_var: [gt_boxes]})

    roi_key = [k for k in cntk_output if 'rpn_target_rois_raw' in str(k)][0]
    labels_key = [k for k in cntk_output if 'label_targets_raw' in str(k)][0]
    bbox_key = [k for k in cntk_output if 'bbox_targets_raw' in str(k)][0]
    bbox_w_key = [k for k in cntk_output if 'bbox_inside_w_raw' in str(k)][0]

    cntk_rois = cntk_output[roi_key][0]
    cntk_labels_one_hot = cntk_output[labels_key][0]
    cntk_bbox_targets = cntk_output[bbox_key][0]
    cntk_bbox_inside_weights = cntk_output[bbox_w_key][0]

    cntk_labels = np.argmax(cntk_labels_one_hot, axis=1)

    # Create Caffe layer and call forward
    zeros = np.zeros((all_rois.shape[0], 1), dtype=gt_boxes.dtype)
    all_rois_caffe = np.hstack((zeros, all_rois))

    bottom = [np.array(all_rois_caffe),np.array(gt_boxes)]
    top = None # handled through return statement in caffe layer for unit testing

    param_str = "'num_classes': 17"
    caffe_layer = CaffeProposalTargetLayer()
    caffe_layer.set_param_str(param_str)
    caffe_layer.setup(bottom, top)
    caffe_layer.set_deterministic_mode()

    caffe_rois, caffe_labels, caffe_bbox_targets, caffe_bbox_inside_weights = caffe_layer.forward(bottom, top)
    caffe_rois = caffe_rois[:,1:]

    num_caffe_rois = caffe_rois.shape[0]
    cntk_rois = cntk_rois[:num_caffe_rois,:]
    cntk_labels = cntk_labels[:num_caffe_rois]
    cntk_bbox_targets = cntk_bbox_targets[:num_caffe_rois,:]
    cntk_bbox_inside_weights = cntk_bbox_inside_weights[:num_caffe_rois,:]

    # assert that results are exactly the same
    assert cntk_rois.shape == caffe_rois.shape
    assert cntk_labels.shape == caffe_labels.shape
    assert cntk_bbox_targets.shape == caffe_bbox_targets.shape
    assert cntk_bbox_inside_weights.shape == caffe_bbox_inside_weights.shape

    caffe_labels = [int(x) for x in caffe_labels]

    assert np.allclose(cntk_rois, caffe_rois, rtol=0.0, atol=0.0)
    assert np.allclose(cntk_labels, caffe_labels, rtol=0.0, atol=0.0)
    assert np.allclose(cntk_bbox_targets, caffe_bbox_targets, rtol=0.0, atol=0.0)
    assert np.allclose(cntk_bbox_inside_weights, caffe_bbox_inside_weights, rtol=0.0, atol=0.0)
    print("Verified ProposalTargetLayer")
def print_node(v):
    return C.user_function(LambdaFunc(v))
Esempio n. 52
0
def __cntk_det__(m):
    return C.user_function(__cntk_class_det__(m))
Esempio n. 53
0
def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True,
               proposal_layer_param_string=None):
    '''
    Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper:
        Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
        "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"

    Outputs object detection proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").

    Args:
        conv_out:        The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network
        scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
        im_info:         (image_widht, image_height, image_scale) as CNTK variable or constant
        add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses
        proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer.

    Returns:
        rpn_rois - the proposed ROIs
        rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness)
    '''

    # RPN network
    # init = 'normal', initValueScale = 0.01, initBias = 0.1
    rpn_conv_3x3 = Convolution((3, 3), 256, activation=relu, pad=True, strides=1,
                                init = normal(scale=0.01), init_bias=0.1)(conv_out)
    rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score",
                                init = normal(scale=0.01), init_bias=0.1)(rpn_conv_3x3)  # 2(bg/fg)  * 9(anchors)
    rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred",
                                init = normal(scale=0.01), init_bias=0.1)(rpn_conv_3x3)  # 4(coords) * 9(anchors)

    # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W)
    num_predictions = int(np.prod(rpn_cls_score.shape) / 2)
    rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions))
    rpn_cls_prob = softmax(rpn_cls_score_rshp, axis=0, name="objness_softmax")
    rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape)

    # proposal layer
    rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string))
    rpn_rois = alias(rpn_rois_raw, name='rpn_rois')

    rpn_losses = None
    if(add_loss_functions):
        # RPN targets
        # Comment: rpn_cls_score is only passed   vvv   to get width and height of the conv feature map ...
        atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string))
        rpn_labels = atl.outputs[0]
        rpn_bbox_targets = atl.outputs[1]
        rpn_bbox_inside_weights = atl.outputs[2]

        # For loss functions: ignore label predictions for the 'ignore label',
        # i.e. set target and prediction to 0 --> needs to be softmaxed before
        rpn_labels_rshp = reshape(rpn_labels, (1, num_predictions))
        ignore = user_function(IgnoreLabel(rpn_cls_prob, rpn_labels_rshp, ignore_label=-1))
        rpn_cls_prob_ignore = ignore.outputs[0]
        fg_targets = ignore.outputs[1]
        bg_targets = 1 - fg_targets
        rpn_labels_ignore = splice(bg_targets, fg_targets, axis=0)

        # RPN losses
        rpn_loss_cls = cross_entropy_with_softmax(rpn_cls_prob_ignore, rpn_labels_ignore, axis=0)
        rpn_loss_bbox = user_function(SmoothL1Loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights))
        rpn_losses = plus(reduce_sum(rpn_loss_cls), reduce_sum(rpn_loss_bbox), name="rpn_losses")

    return rpn_rois, rpn_losses
Esempio n. 54
0
def test_udf_op_name():
    dim = 4
    i = C.input_variable(dim, needs_gradient=True, name='i_var')
    m = C.user_function(MyPlus(i, C.constant(3)))
    assert str(m.root_function) != ''
def dense_layer(inp, output_dim, nonlinearity):
    r = linear_layer(inp, output_dim)
    r = nonlinearity(r)
    if isinstance(r, UserFunction):
        r = C.user_function(r)
    return r
Esempio n. 56
0
 def _(x):
     return C.user_function(__cntk_class_mvn_log_prob__(x, mu, sig))
def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True,
               proposal_layer_param_string=None, conv_bias_init=0.0):
    '''
    Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper:
        Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
        "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"

    Outputs object detection proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").

    Args:
        conv_out:        The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network
        scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
        im_info:         A CNTK variable or constant containing
                         (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
                         e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
        add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses
        proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer.

    Returns:
        rpn_rois - the proposed ROIs
        rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness)
    '''

    # RPN network
    # init = 'normal', initValueScale = 0.01, initBias = 0.1
    num_channels = cfg["CNTK"].RPN_NUM_CHANNELS
    rpn_conv_3x3 = Convolution((3, 3), num_channels, activation=relu, pad=True, strides=1,
                                init = normal(scale=0.01), init_bias=conv_bias_init)(conv_out)
    rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score",
                                init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3)  # 2(bg/fg)  * 9(anchors)
    rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred",
                                init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3)  # 4(coords) * 9(anchors)

    # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W)
    num_predictions = int(rpn_cls_score.shape[0] / 2)
    rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]), name="rpn_cls_score_rshp")
    p_rpn_cls_score_rshp = cntk.placeholder()
    rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0)
    rpn_cls_prob = cntk.as_block(rpn_cls_sm, [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'Softmax', 'rpn_cls_prob')
    rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape")

    # proposal layer
    rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string))
    rpn_rois = alias(rpn_rois_raw, name='rpn_rois')

    rpn_losses = None
    if(add_loss_functions):
        # RPN targets
        # Comment: rpn_cls_score is only passed   vvv   to get width and height of the conv feature map ...
        atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string))
        rpn_labels = atl.outputs[0]
        rpn_bbox_targets = atl.outputs[1]
        rpn_bbox_inside_weights = atl.outputs[2]

        # classification loss
        p_rpn_labels = cntk.placeholder()
        p_rpn_cls_score_rshp = cntk.placeholder()

        keeps = cntk.greater_equal(p_rpn_labels, 0.0)
        fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets")
        bg_labels = minus(1, fg_labels, name="bg_targets")
        rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0)
        rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp, rpn_labels_ignore, axis=0)
        rpn_loss_cls = element_times(rpn_ce, keeps)

        # The terms that are accounted for in the cls loss are those that have a label >= 0
        cls_num_terms = reduce_sum(keeps)
        cls_normalization_factor = 1.0 / cls_num_terms
        normalized_rpn_cls_loss = reduce_sum(rpn_loss_cls) * cls_normalization_factor

        reduced_rpn_loss_cls = cntk.as_block(normalized_rpn_cls_loss,
                                         [(p_rpn_labels, rpn_labels), (p_rpn_cls_score_rshp, rpn_cls_score_rshp)],
                                         'CE_with_ignore', 'norm_rpn_cls_loss')

        # regression loss
        p_rpn_bbox_pred = cntk.placeholder()
        p_rpn_bbox_targets = cntk.placeholder()
        p_rpn_bbox_inside_weights = cntk.placeholder()
        rpn_loss_bbox = SmoothL1Loss(cfg["CNTK"].SIGMA_RPN_L1, p_rpn_bbox_pred, p_rpn_bbox_targets, p_rpn_bbox_inside_weights, 1.0)
        # The bbox loss is normalized by the rpn batch size
        bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE
        normalized_rpn_bbox_loss = reduce_sum(rpn_loss_bbox) * bbox_normalization_factor

        reduced_rpn_loss_bbox = cntk.as_block(normalized_rpn_bbox_loss,
                                          [(p_rpn_bbox_pred, rpn_bbox_pred), (p_rpn_bbox_targets, rpn_bbox_targets),
                                           (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)],
                                          'SmoothL1Loss', 'norm_rpn_bbox_loss')

        rpn_losses = plus(reduced_rpn_loss_cls, reduced_rpn_loss_bbox, name="rpn_losses")

    return rpn_rois, rpn_losses
Esempio n. 58
0
def create_rpn(conv_out,
               scaled_gt_boxes,
               im_info,
               cfg,
               add_loss_functions=True):
    '''
    Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper:
        Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
        "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"

    Outputs object detection proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").

    Args:
        conv_out:        The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network
        scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
        im_info:         A CNTK variable or constant containing
                         (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
                         e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
        cfg:             The configuration dictionary
        add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses

    Returns:
        rpn_rois - the proposed ROIs
        rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness)
    '''

    # RPN network
    # init = 'normal', initValueScale = 0.01, initBias = 0.1
    num_channels = cfg["MODEL"].RPN_NUM_CHANNELS
    rpn_conv_3x3 = Convolution((3, 3),
                               num_channels,
                               activation=relu,
                               pad=True,
                               strides=1,
                               init=normal(scale=0.01),
                               init_bias=0.0)(conv_out)
    rpn_cls_score = Convolution(
        (1, 1),
        18,
        activation=None,
        name="rpn_cls_score",
        init=normal(scale=0.01),
        init_bias=0.0)(rpn_conv_3x3)  # 2(bg/fg)  * 9(anchors)
    rpn_bbox_pred = Convolution(
        (1, 1),
        36,
        activation=None,
        name="rpn_bbox_pred",
        init=normal(scale=0.01),
        init_bias=0.0)(rpn_conv_3x3)  # 4(coords) * 9(anchors)

    # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W)
    num_predictions = int(rpn_cls_score.shape[0] / 2)
    rpn_cls_score_rshp = reshape(
        rpn_cls_score,
        (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]),
        name="rpn_cls_score_rshp")
    p_rpn_cls_score_rshp = cntk.placeholder()
    rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0)
    rpn_cls_prob = cntk.as_block(rpn_cls_sm,
                                 [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)],
                                 'Softmax', 'rpn_cls_prob')
    rpn_cls_prob_reshape = reshape(rpn_cls_prob,
                                   rpn_cls_score.shape,
                                   name="rpn_cls_prob_reshape")

    # proposal layer
    rpn_rois = create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred,
                                     im_info, cfg)

    rpn_losses = None
    if (add_loss_functions):
        # RPN targets
        # Comment: rpn_cls_score is only passed   vvv   to get width and height of the conv feature map ...
        proposal_layer_params = "'feat_stride': {}\n'scales':\n - {}". \
            format(cfg["MODEL"].FEATURE_STRIDE, "\n - ".join([str(v) for v in cfg["DATA"].PROPOSAL_LAYER_SCALES]))
        atl = user_function(
            AnchorTargetLayer(
                rpn_cls_score,
                scaled_gt_boxes,
                im_info,
                rpn_batch_size=cfg["TRAIN"].RPN_BATCHSIZE,
                rpn_fg_fraction=cfg["TRAIN"].RPN_FG_FRACTION,
                clobber_positives=cfg["TRAIN"].RPN_CLOBBER_POSITIVES,
                positive_overlap=cfg["TRAIN"].RPN_POSITIVE_OVERLAP,
                negative_overlap=cfg["TRAIN"].RPN_NEGATIVE_OVERLAP,
                param_str=proposal_layer_params))
        rpn_labels = atl.outputs[0]
        rpn_bbox_targets = atl.outputs[1]
        rpn_bbox_inside_weights = atl.outputs[2]

        # classification loss
        p_rpn_labels = cntk.placeholder()
        p_rpn_cls_score_rshp = cntk.placeholder()

        keeps = cntk.greater_equal(p_rpn_labels, 0.0)
        fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets")
        bg_labels = minus(1, fg_labels, name="bg_targets")
        rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0)
        rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp,
                                            rpn_labels_ignore,
                                            axis=0)
        rpn_loss_cls = element_times(rpn_ce, keeps)

        # The terms that are accounted for in the cls loss are those that have a label >= 0
        cls_num_terms = reduce_sum(keeps)
        cls_normalization_factor = 1.0 / cls_num_terms
        normalized_rpn_cls_loss = reduce_sum(
            rpn_loss_cls) * cls_normalization_factor

        reduced_rpn_loss_cls = cntk.as_block(
            normalized_rpn_cls_loss,
            [(p_rpn_labels, rpn_labels),
             (p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'CE_with_ignore',
            'norm_rpn_cls_loss')

        # regression loss
        p_rpn_bbox_pred = cntk.placeholder()
        p_rpn_bbox_targets = cntk.placeholder()
        p_rpn_bbox_inside_weights = cntk.placeholder()
        rpn_loss_bbox = SmoothL1Loss(cfg.SIGMA_RPN_L1, p_rpn_bbox_pred,
                                     p_rpn_bbox_targets,
                                     p_rpn_bbox_inside_weights, 1.0)
        # The bbox loss is normalized by the rpn batch size
        bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE
        normalized_rpn_bbox_loss = reduce_sum(
            rpn_loss_bbox) * bbox_normalization_factor

        reduced_rpn_loss_bbox = cntk.as_block(
            normalized_rpn_bbox_loss,
            [(p_rpn_bbox_pred, rpn_bbox_pred),
             (p_rpn_bbox_targets, rpn_bbox_targets),
             (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)],
            'SmoothL1Loss', 'norm_rpn_bbox_loss')

        rpn_losses = plus(reduced_rpn_loss_cls,
                          reduced_rpn_loss_bbox,
                          name="rpn_losses")

    return rpn_rois, rpn_losses
def dense_layer(inp, output_dim, nonlinearity):
    r = linear_layer(inp, output_dim)
    r = nonlinearity(r)
    if isinstance(r, UserFunction):
        r = C.user_function(r)
    return r
Esempio n. 60
0
def test_no_deadlock_init_outputs():
    x = C.input_variable((3, C.FreeDimension, 2), name='x')
    from cntk import user_function
    with pytest.raises(RuntimeError):
        s = user_function(FaultyUserFunc(x))