Ejemplo n.º 1
0
def minus(left, right, name=''):
    '''
    The output of this operation is left minus right tensor. It supports broadcasting. 
    In case of scalars its backward pass propagates the received gradient. 
    The operator (-) has been overloaded and can equally be used instead of minus()

    Example:
        >>> C.eval(C.minus([1, 2, 3], [4, 5, 6]))
        [array([[-3., -3., -3.]])]
        
        >>> C.eval(C.minus([[1,2],[3,4]], 1))
        [array([[[ 0.,  1.],
                 [ 2.,  3.]]])]

    Args:
        left: left side tensor
        right: right side tensor
        name (str): the name of the node in the network            
    Returns:
        :class:`cntk.Function`
    '''

    from cntk import minus
    left = sanitize_input(left, get_data_type(right))
    right = sanitize_input(right, get_data_type(left))
    return minus(left, right, name).output()
Ejemplo n.º 2
0
def minus(left, right, name=''):
    '''
    The output of this operation is left minus right tensor. It supports broadcasting. 
    In case of scalars its backward pass propagates the received gradient. 
    The operator (-) has been overloaded and can equally be used instead of minus()

    Example:
        >>> C.eval(C.minus([1, 2, 3], [4, 5, 6]))
        [array([[-3., -3., -3.]])]
        
        >>> C.eval(C.minus([[1,2],[3,4]], 1))
        [array([[[ 0.,  1.],
                 [ 2.,  3.]]])]

    Args:
        left: left side tensor
        right: right side tensor
        name (str): the name of the node in the network            
    Returns:
        :class:`cntk.Function`
    '''

    from cntk import minus
    left = sanitize_input(left, get_data_type(right))
    right = sanitize_input(right, get_data_type(left))
    return minus(left, right, name).output()
Ejemplo n.º 3
0
def test_minus_3():
    cntk_op = C.minus([1, 2, 3], [[4, 5, 6], [7, 8, 9]])
    cntk_ret = cntk_op.eval()

    ng_op, _ = CNTKImporter().import_model(cntk_op)
    ng_ret = ng.transformers.make_transformer().computation(ng_op)()

    assert np.array_equal(cntk_ret, ng_ret)
Ejemplo n.º 4
0
def create_network(feature_dim = 40, num_classes=256, feature_mean_file=None, feature_inv_stddev_file=None,
                       feature_norm_files = None, label_prior_file = None, context=(0,0), model_type=None):

    def MyMeanVarNorm(feature_mean_file, feature_inv_stddev_file):
        m = C.reshape(load_ascii_vector(feature_mean_file,'feature_mean'), shape=(1, feature_dim))
        s = C.reshape(load_ascii_vector(feature_inv_stddev_file,'feature_invstddev'), shape=(1,feature_dim))
        def _func(operand):
            return C.reshape(C.element_times(C.reshape(operand,shape=(1+context[0]+context[1], feature_dim)) - m, s), shape=operand.shape)
        return _func


    def MyDNNLayer(hidden_size=128, num_layers=2):
        return C.layers.Sequential([
            C.layers.For(range(num_layers), lambda: C.layers.Dense(hidden_size, activation=C.sigmoid))
        ])

    def MyBLSTMLayer(hidden_size=128, num_layers=2):
        W = C.Parameter((C.InferredDimension, hidden_size), init=C.he_normal(1.0), name='rnn_parameters')
        def _func(operand):
            return C.optimized_rnnstack(operand, weights=W, hidden_size=hidden_size, num_layers=num_layers, bidirectional=True, recurrent_op='lstm' )
        return _func

    # Input variables denoting the features and label data
    feature_var = C.sequence.input_variable(feature_dim * (1+context[0]+context[1]))
    label_var = C.sequence.input_variable(num_classes)

    feature_norm = MyMeanVarNorm(feature_mean_file, feature_inv_stddev_file)(feature_var)
    label_prior = load_ascii_vector(label_prior_file, 'label_prior')
    log_prior = C.log(label_prior)

    if (model_type=="DNN"):
        net = MyDNNLayer(512,4)(feature_norm)
    elif (model_type=="BLSTM"):
        net = MyBLSTMLayer(512,2)(feature_norm)
    else:
        raise RuntimeError("model_type must be DNN or BLSTM")

    out = C.layers.Dense(num_classes, init=C.he_normal(scale=1/3))(net)

    # loss and metric
    ce = C.cross_entropy_with_softmax(out, label_var)
    pe = C.classification_error(out, label_var)
    ScaledLogLikelihood = C.minus(out, log_prior, name='ScaledLogLikelihood')

    # talk to the user
    C.logging.log_number_of_parameters(out)
    print()

    return {
        'feature': feature_var,
        'label': label_var,
        'output': out,
        'ScaledLogLikelihood': ScaledLogLikelihood,
        'ce': ce,
        'pe': pe,
        'final_hidden': net # adding last hidden layer output for future use in CTC tutorial
    }
def SmoothL1Loss(sigma, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights):
    """
        From https://github.com/smallcorgi/Faster-RCNN_TF/blob/master/lib/fast_rcnn/train.py

        ResultLoss = outside_weights * SmoothL1(inside_weights * (bbox_pred - bbox_targets))
        SmoothL1(x) = 0.5 * (sigma * x)^2,    if |x| < 1 / sigma^2
                        |x| - 0.5 / sigma^2,    otherwise
    """
    sigma2 = sigma * sigma

    inside_mul_abs = C.abs(C.element_times(bbox_inside_weights, C.minus(bbox_pred, bbox_targets)))

    smooth_l1_sign = C.less(inside_mul_abs, 1.0 / sigma2)
    smooth_l1_option1 = C.element_times(C.element_times(inside_mul_abs, inside_mul_abs), 0.5 * sigma2)
    smooth_l1_option2 = C.minus(inside_mul_abs, 0.5 / sigma2)
    smooth_l1_result = C.plus(C.element_times(smooth_l1_option1, smooth_l1_sign),
                              C.element_times(smooth_l1_option2, C.minus(1.0, smooth_l1_sign)))

    return C.element_times(bbox_outside_weights, smooth_l1_result)
Ejemplo n.º 6
0
def SmoothL1Loss(sigma, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights):
    """
        From https://github.com/smallcorgi/Faster-RCNN_TF/blob/master/lib/fast_rcnn/train.py

        ResultLoss = outside_weights * SmoothL1(inside_weights * (bbox_pred - bbox_targets))
        SmoothL1(x) = 0.5 * (sigma * x)^2,    if |x| < 1 / sigma^2
                        |x| - 0.5 / sigma^2,    otherwise
    """
    sigma2 = sigma * sigma

    inside_mul_abs = C.abs(C.element_times(bbox_inside_weights, C.minus(bbox_pred, bbox_targets)))

    smooth_l1_sign = C.less(inside_mul_abs, 1.0 / sigma2)
    smooth_l1_option1 = C.element_times(C.element_times(inside_mul_abs, inside_mul_abs), 0.5 * sigma2)
    smooth_l1_option2 = C.minus(inside_mul_abs, 0.5 / sigma2)
    smooth_l1_result = C.plus(C.element_times(smooth_l1_option1, smooth_l1_sign),
                              C.element_times(smooth_l1_option2, C.minus(1.0, smooth_l1_sign)))

    return C.element_times(bbox_outside_weights, smooth_l1_result)
def _simple_dict():
    d = {}

    d['i1'] = C.input_variable(shape=(2, 3), name='i1')
    d['c1'] = C.constant(shape=(2, 3), value=6, name='c1')
    d['p1'] = C.parameter(shape=(3, 2), init=7, name='p1')
    d['op1'] = C.plus(d['i1'], d['c1'], name='op1')
    d['op2'] = C.times(d['op1'], d['p1'], name='op2')
    d['root'] = d['op2']

    d['target'] = C.input_variable((), name='label')
    d['all'] = C.combine([d['root'], C.minus(
        d['target'], C.constant(1, name='c2'), name='minus')], name='all')

    return d
Ejemplo n.º 8
0
def _simple_dict():
    d = {}

    d['i1'] = C.input_variable(shape=(2, 3), name='i1')
    d['c1'] = C.constant(shape=(2, 3), value=6, name='c1')
    d['p1'] = C.parameter(shape=(3, 2), init=7, name='p1')
    d['op1'] = C.plus(d['i1'], d['c1'], name='op1')
    d['op2'] = C.times(d['op1'], d['p1'], name='op2')
    d['root'] = d['op2']

    d['target'] = C.input_variable((), name='label')
    d['all'] = C.combine([d['root'], C.minus(
        d['target'], C.constant(1, name='c2'), name='minus')], name='all')

    return d
    def MyDNNLayer(hidden_size=128, num_layers=2):
    return C.layers.Sequential([
        C.layers.For(range(num_layers), lambda: C.layers.Dense(hidden_size)>> C.layers.BatchNormalization()>>C.sigmoid>>C.layers.Dropout(.3))
    ])
    
    def MyBLSTMLayer(hidden_size=128, num_layers=2):                                                        
        W = C.Parameter((C.InferredDimension, hidden_size), init=C.he_normal(1.0), name='rnn_parameters') #initialize weights of RNN #'C.Parameter'--> it creates a parameter tensor
        def _func(operand):        #operand represents input data
            return C.optimized_rnnstack(operand, weights=W, hidden_size=hidden_size, num_layers=num_layers, bidirectional=True, recurrent_op='lstm' )
        return _func

    # Input variables denoting the features and label data
                                                  #shape of input data
    feature_var = C.sequence.input_variable(feature_dim * (1+context[0]+context[1]))  #It creates an input in the network: a place where data, such as features and labels, should be provided.
    label_var = C.sequence.input_variable(num_classes)
                                                                                           ###1st layer
    feature_norm = MyMeanVarNorm(feature_mean_file, feature_inv_stddev_file)(feature_var)  #feature_var is operand in _fun in MyMeanVarNorm function
    label_prior = load_ascii_vector(label_prior_file, 'label_prior')
    log_prior = C.log(label_prior)     #Computes the element-wise the natural logarithm of  label_prior

    if (model_type=="DNN"):
        net = MyDNNLayer(512,4)(feature_norm)              ###########
    elif (model_type=="BLSTM"):
        net = MyBLSTMLayer(512,3)(feature_norm)
    else:
        raise RuntimeError("model_type must be DNN or BLSTM")
                                              #initial value of weights W            #'C.he_normal'-->initializer for Parameter initialized to Gaussian distribution with mean 0 and standard deviation scale *....
    out = C.layers.Dense(num_classes, init=C.he_normal(scale=1/3))(net)   #####last layer in any network of both NN

    ##### loss and metric##
    ce = C.cross_entropy_with_softmax(out, label_var)      #loss function  ((objective function))
    pe = C.classification_error(out, label_var)    ###for evaluation
    ScaledLogLikelihood = C.minus(out, log_prior, name='ScaledLogLikelihood')

    # talk to the user
    C.logging.log_number_of_parameters(out)     #print number of parameters in the whole model
    print()

    return {
        'feature': feature_var,
        'label': label_var,
        'output': out,
        'ScaledLogLikelihood': ScaledLogLikelihood,
        'ce': ce,
        'pe': pe,
        'final_hidden': net # adding last hidden layer output for future use in CTC tutorial
    }
def create_binary_convolution_model():

    # Input variables denoting the features and label data
    feature_var = C.input((num_channels, image_height, image_width))
    label_var = C.input((num_classes))

    # apply model to input
    scaled_input = C.element_times(C.constant(0.00390625), feature_var)

    # first layer is ok to be full precision
    z = C.layers.Convolution((3, 3), 64, pad=True,
                             activation=C.relu)(scaled_input)
    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3, 3), 128, channels=64, pad=True)
    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3, 3), 128, channels=128, pad=True)
    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (1, 1), num_classes, channels=128, pad=True)
    z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z)
    z = C.reshape(z, (num_classes, ))

    # Add binary regularization (ala Gang Hua)
    weight_sum = C.constant(0)
    for p in z.parameters:
        if (p.name == "filter"):
            weight_sum = C.plus(weight_sum,
                                C.reduce_sum(C.minus(1, C.square(p))))
    bin_reg = C.element_times(.000005, weight_sum)

    # After the last layer, we need to apply a learnable scale
    SP = C.parameter(shape=z.shape, init=0.001)
    z = C.element_times(z, SP)

    # loss and metric
    ce = C.cross_entropy_with_softmax(z, label_var)
    ce = C.plus(ce, bin_reg)
    pe = C.classification_error(z, label_var)

    return C.combine([z, ce, pe])
Ejemplo n.º 11
0
def create_binary_convolution_model():

    # Input variables denoting the features and label data
    feature_var = C.input((num_channels, image_height, image_width))
    label_var = C.input((num_classes))

    # apply model to input
    scaled_input = C.element_times(C.constant(0.00390625), feature_var)

    # first layer is ok to be full precision
    z = C.layers.Convolution((3, 3), 32, pad=True, activation=C.relu)(scaled_input)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3,3), 128, channels=32, pad=True)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3,3), 128, channels=128, pad=True)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (1,1), num_classes, channels=128, pad=True)
    z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z)
    z = C.reshape(z, (num_classes,))

    # Add binary regularization (ala Gang Hua)
    weight_sum = C.constant(0)
    for p in z.parameters:
        if (p.name == "filter"):
            weight_sum = C.plus(weight_sum, C.reduce_sum(C.minus(1, C.square(p))))
    bin_reg = C.element_times(.000005, weight_sum)

    # After the last layer, we need to apply a learnable scale
    SP = C.parameter(shape=z.shape, init=0.001)
    z = C.element_times(z, SP)

    # loss and metric
    ce = C.cross_entropy_with_softmax(z, label_var)
    ce = C.plus(ce, bin_reg)
    pe = C.classification_error(z, label_var)

    return C.combine([z, ce, pe])
Ejemplo n.º 12
0
    def build_trainer(self):

        # Set the learning rate, and the momentum parameters for the Adam optimizer.
        lr = learning_rate_schedule(self.lr, UnitType.minibatch)
        beta1 = momentum_schedule(0.9)
        beta2 = momentum_schedule(0.99)

        # Calculate the losses.
        loss_on_v = cntk.squared_error(self.R, self.v)
        pi_a_s = cntk.log(cntk.times_transpose(self.pi, self.action))

        loss_on_pi = cntk.variables.Constant(-1) * (cntk.plus(
            cntk.times(pi_a_s, cntk.minus(self.R, self.v_calc)),
            0.01 * cntk.times_transpose(self.pi, cntk.log(self.pi))))
        #loss_on_pi = cntk.times(pi_a_s, cntk.minus(self.R, self.v_calc))

        self.tensorboard_v_writer = TensorBoardProgressWriter(
            freq=10, log_dir="tensorboard_v_logs", model=self.v)
        self.tensorboard_pi_writer = TensorBoardProgressWriter(
            freq=10, log_dir="tensorboard_pi_logs", model=self.pi)

        # tensorboard --logdir=tensorboard_pi_logs  http://localhost:6006/
        # tensorboard --logdir=tensorboard_v_logs  http://localhost:6006/

        # Create the trainiers.
        self.trainer_v = cntk.Trainer(self.v, (loss_on_v), [
            adam(self.pms_v,
                 lr,
                 beta1,
                 variance_momentum=beta2,
                 gradient_clipping_threshold_per_sample=2,
                 l2_regularization_weight=0.01)
        ], self.tensorboard_v_writer)
        self.trainer_pi = cntk.Trainer(self.pi, (loss_on_pi), [
            adam(self.pms_pi,
                 lr,
                 beta1,
                 variance_momentum=beta2,
                 gradient_clipping_threshold_per_sample=2,
                 l2_regularization_weight=0.01)
        ], self.tensorboard_pi_writer)
Ejemplo n.º 13
0
def create_binary_convolution_model():

    feature_var = C.input((num_channels, image_height, image_width))
    label_var = C.input((num_classes))

    scaled_input = C.element_times(C.constant(0.00390625), feature_var)

    z = C.layers.Convolution((3, 3), 32, pad=True,
                             activation=C.relu)(scaled_input)
    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution((3, 3), 128, channels=32, pad=True)(z)

    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution((3, 3), 128, channels=128, pad=True)(z)
    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution((1, 1), num_classes, channels=128, pad=True)(z)
    z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z)
    z = C.reshape(z, (num_classes, ))

    weight_sum = C.constant(0)
    for p in z.parameters:
        if (p.name == "filter"):
            weight_sum = C.plus(weight_sum,
                                C.reduce_sum(C.minus(1, C.square(p))))
    bin_reg = C.element_times(.000005, weight_sum)

    SP = C.parameter(shape=z.shape, init=0.001)
    z = C.element_times(z, SP)

    ce = C.cross_entropy_with_softmax(z, label_var)
    ce = C.plus(ce, bin_reg)
    pe = C.classification_error(z, label_var)

    return C.combine([z, ce, pe])
Ejemplo n.º 14
0
def test_Sub(tmpdir, dtype):
    with C.default_options(dtype=dtype):
        model = C.minus(
            np.array([1, 2, 3]).astype(dtype),
            np.array([4, 5, 6]).astype(dtype))
        verify_no_input(model, tmpdir, 'Sub_0')
Ejemplo n.º 15
0
def test_Sub(tmpdir):
    pytest.skip('Need to support new ONNX spec.')
    model = C.minus([1, 2, 3], [4, 5, 6])
    verify_no_input(model, tmpdir, 'Sub_0')
Ejemplo n.º 16
0
def gram(x):
    features = C.minus(flatten(x), C.reduce_mean(x))
    return C.times_transpose(features, features)
Ejemplo n.º 17
0
def test_Sub(tmpdir, dtype):
    with C.default_options(dtype = dtype):
        model = C.minus(np.array([1, 2, 3]).astype(dtype), np.array([4, 5, 6]).astype(dtype))
        verify_no_input(model, tmpdir, 'Sub_0')
Ejemplo n.º 18
0
def create_rpn(conv_out,
               scaled_gt_boxes,
               im_info,
               cfg,
               add_loss_functions=True):
    '''
    Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper:
        Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
        "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"

    Outputs object detection proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").

    Args:
        conv_out:        The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network
        scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
        im_info:         A CNTK variable or constant containing
                         (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
                         e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
        cfg:             The configuration dictionary
        add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses

    Returns:
        rpn_rois - the proposed ROIs
        rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness)
    '''

    # RPN network
    # init = 'normal', initValueScale = 0.01, initBias = 0.1
    num_channels = cfg["MODEL"].RPN_NUM_CHANNELS
    rpn_conv_3x3 = Convolution((3, 3),
                               num_channels,
                               activation=relu,
                               pad=True,
                               strides=1,
                               init=normal(scale=0.01),
                               init_bias=0.0)(conv_out)
    rpn_cls_score = Convolution(
        (1, 1),
        18,
        activation=None,
        name="rpn_cls_score",
        init=normal(scale=0.01),
        init_bias=0.0)(rpn_conv_3x3)  # 2(bg/fg)  * 9(anchors)
    rpn_bbox_pred = Convolution(
        (1, 1),
        36,
        activation=None,
        name="rpn_bbox_pred",
        init=normal(scale=0.01),
        init_bias=0.0)(rpn_conv_3x3)  # 4(coords) * 9(anchors)

    # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W)
    num_predictions = int(rpn_cls_score.shape[0] / 2)
    rpn_cls_score_rshp = reshape(
        rpn_cls_score,
        (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]),
        name="rpn_cls_score_rshp")
    p_rpn_cls_score_rshp = cntk.placeholder()
    rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0)
    rpn_cls_prob = cntk.as_block(rpn_cls_sm,
                                 [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)],
                                 'Softmax', 'rpn_cls_prob')
    rpn_cls_prob_reshape = reshape(rpn_cls_prob,
                                   rpn_cls_score.shape,
                                   name="rpn_cls_prob_reshape")

    # proposal layer
    rpn_rois = create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred,
                                     im_info, cfg)

    rpn_losses = None
    if (add_loss_functions):
        # RPN targets
        # Comment: rpn_cls_score is only passed   vvv   to get width and height of the conv feature map ...
        proposal_layer_params = "'feat_stride': {}\n'scales':\n - {}". \
            format(cfg["MODEL"].FEATURE_STRIDE, "\n - ".join([str(v) for v in cfg["DATA"].PROPOSAL_LAYER_SCALES]))
        atl = user_function(
            AnchorTargetLayer(
                rpn_cls_score,
                scaled_gt_boxes,
                im_info,
                rpn_batch_size=cfg["TRAIN"].RPN_BATCHSIZE,
                rpn_fg_fraction=cfg["TRAIN"].RPN_FG_FRACTION,
                clobber_positives=cfg["TRAIN"].RPN_CLOBBER_POSITIVES,
                positive_overlap=cfg["TRAIN"].RPN_POSITIVE_OVERLAP,
                negative_overlap=cfg["TRAIN"].RPN_NEGATIVE_OVERLAP,
                param_str=proposal_layer_params))
        rpn_labels = atl.outputs[0]
        rpn_bbox_targets = atl.outputs[1]
        rpn_bbox_inside_weights = atl.outputs[2]

        # classification loss
        p_rpn_labels = cntk.placeholder()
        p_rpn_cls_score_rshp = cntk.placeholder()

        keeps = cntk.greater_equal(p_rpn_labels, 0.0)
        fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets")
        bg_labels = minus(1, fg_labels, name="bg_targets")
        rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0)
        rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp,
                                            rpn_labels_ignore,
                                            axis=0)
        rpn_loss_cls = element_times(rpn_ce, keeps)

        # The terms that are accounted for in the cls loss are those that have a label >= 0
        cls_num_terms = reduce_sum(keeps)
        cls_normalization_factor = 1.0 / cls_num_terms
        normalized_rpn_cls_loss = reduce_sum(
            rpn_loss_cls) * cls_normalization_factor

        reduced_rpn_loss_cls = cntk.as_block(
            normalized_rpn_cls_loss,
            [(p_rpn_labels, rpn_labels),
             (p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'CE_with_ignore',
            'norm_rpn_cls_loss')

        # regression loss
        p_rpn_bbox_pred = cntk.placeholder()
        p_rpn_bbox_targets = cntk.placeholder()
        p_rpn_bbox_inside_weights = cntk.placeholder()
        rpn_loss_bbox = SmoothL1Loss(cfg.SIGMA_RPN_L1, p_rpn_bbox_pred,
                                     p_rpn_bbox_targets,
                                     p_rpn_bbox_inside_weights, 1.0)
        # The bbox loss is normalized by the rpn batch size
        bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE
        normalized_rpn_bbox_loss = reduce_sum(
            rpn_loss_bbox) * bbox_normalization_factor

        reduced_rpn_loss_bbox = cntk.as_block(
            normalized_rpn_bbox_loss,
            [(p_rpn_bbox_pred, rpn_bbox_pred),
             (p_rpn_bbox_targets, rpn_bbox_targets),
             (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)],
            'SmoothL1Loss', 'norm_rpn_bbox_loss')

        rpn_losses = plus(reduced_rpn_loss_cls,
                          reduced_rpn_loss_bbox,
                          name="rpn_losses")

    return rpn_rois, rpn_losses
Ejemplo n.º 19
0
def get_model(f_dim, c_dim, l_dim, m_dim, num_stack_layers,
              super_res_class_weight, super_res_loss_weight,
              high_res_loss_weight):
    # Define the variables into which the minibatch data will be loaded.
    num_nlcd_classes, num_landcover_classes = c_dim
    _, block_size, _ = f_dim
    input_im = cntk.input_variable(f_dim, np.float32)
    lc = cntk.input_variable(l_dim, np.float32)
    lc_weight_map = cntk.input_variable((1, l_dim[1], l_dim[2]), np.float32)
    interval_center = cntk.input_variable(c_dim, np.float32)
    interval_radius = cntk.input_variable(c_dim, np.float32)
    mask = cntk.input_variable(m_dim, np.float32)

    # Create the model definition. c_map defines the number of filters trained
    # at layers of different depths in the model. num_stack_layers defines the
    # number of (modified) residual units per layer.
    # model = dense_fc_model(
    #     input_tensor=input_im,
    #     num_stack_layers=num_stack_layers,
    #     c_map=[32, 32, 16, 16, 16],
    #     num_classes=num_landcover_classes,
    #     bs=block_size
    # )

    model = cnn_model(input_tensor=input_im,
                      num_stack_layers=num_stack_layers,
                      c_map=[64, 32, 32, 32, 32],
                      num_classes=num_landcover_classes,
                      bs=block_size)

    # At this stage the model produces output for the whole region in the input
    # image, but we will focus only on the center of that region during
    # training. Here we drop the predictions at the edges.
    output = cntk.reshape(model,
                          (num_landcover_classes, block_size, block_size))
    probs = cntk.reshape(cntk.softmax(output, axis=0),
                         (num_landcover_classes, block_size, block_size))

    # Now we calculate the supre-res loss. Note that this loss function has the
    # potential to become negative since the variance is fractional.
    # Additionally, we need to make sure that when the nlcd mask[0, ...]
    # is always 1, which means that there's no nlcd label everywhere,
    # the supre_res_loss comes out as a constant.
    super_res_crit = 0
    mask_size = cntk.reshape(
        cntk.reduce_sum(cntk.slice(mask, 0, 1, num_nlcd_classes)),
        (1, )) + 10.0
    # Not considering nlcd class 0
    for nlcd_id in range(1, num_nlcd_classes):
        c_mask = cntk.reshape(cntk.slice(mask, 0, nlcd_id, nlcd_id + 1),
                              (1, block_size, block_size))
        c_mask_size = cntk.reshape(cntk.reduce_sum(c_mask), (1, )) + 0.000001
        c_interval_center = cntk.reshape(
            cntk.slice(interval_center, 0, nlcd_id, nlcd_id + 1),
            (num_landcover_classes, ))
        c_interval_radius = cntk.reshape(
            cntk.slice(interval_radius, 0, nlcd_id, nlcd_id + 1),
            (num_landcover_classes, ))

        # For each nlcd class, we have a landcover distribution:
        masked_probs = probs * c_mask
        # Mean mean of predicted distribution
        mean = cntk.reshape(cntk.reduce_sum(masked_probs, axis=(1, 2)),
                            (num_landcover_classes, )) / c_mask_size
        # Mean var of predicted distribution
        var = cntk.reshape(
            cntk.reduce_sum(masked_probs * (1. - masked_probs), axis=(1, 2)),
            (num_landcover_classes, )) / c_mask_size
        c_super_res_crit = cntk.square(ddist(mean, c_interval_center, c_interval_radius)) / (
                            var / c_mask_size + c_interval_radius * c_interval_radius + 0.000001) \
                        + cntk.log(var + 0.03)
        super_res_crit += c_super_res_crit * c_mask_size / mask_size * super_res_class_weight[
            nlcd_id]

    # Weight super_res loss according to the ratio of unlabeled LC pixels
    super_res_loss = cntk.reduce_sum(super_res_crit) * cntk.reduce_mean(
        cntk.slice(lc, 0, 0, 1))

    log_probs = cntk.log(probs)
    high_res_crit = cntk.times([0.0, 1.0, 1.0, 1.0, 1.0],
                               cntk.element_times(
                                   -cntk.element_times(log_probs, lc),
                                   lc_weight_map),
                               output_rank=2)
    # Average across spatial dimensions
    # Sum over all landcover classes, only one of the landcover classes is non-zero

    #high_res_loss = cntk.reduce_mean(high_res_crit)

    print("probs", probs)
    print("lc", lc)
    print("lc_weight_map", lc_weight_map)
    print("cntk.element_times(probs, lc)", cntk.element_times(probs, lc))

    iou_loss_i = cntk.element_times([0.0, 1.0, 1.0, 1.0, 1.0],
                                    cntk.reduce_sum(cntk.element_times(
                                        cntk.element_times(probs, lc),
                                        lc_weight_map),
                                                    axis=(1, 2)))
    print("iou_loss_i", iou_loss_i)
    iou_loss_u = cntk.element_times([0.0, 1.0, 1.0, 1.0, 1.0],
                                    cntk.reduce_sum(cntk.minus(
                                        cntk.plus(probs, lc),
                                        cntk.element_times(probs, lc)),
                                                    axis=(1, 2)))
    print("iou_loss_u", iou_loss_u)

    high_res_loss = 1.0 - (
        (1 / 4.0) *
        cntk.reduce_mean(cntk.element_divide(iou_loss_i, iou_loss_u)))

    print("high_res_loss", high_res_loss)

    loss = super_res_loss_weight * super_res_loss + high_res_loss_weight * high_res_loss

    return input_im, lc, lc_weight_map, mask, interval_center, interval_radius, \
           output, high_res_loss_weight * high_res_loss, loss
Ejemplo n.º 20
0
# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
from __future__ import print_function

import cntk as C

import ngraph as ng
from ngraph.frontends.cntk.cntk_importer.importer import CNTKImporter

cntk_op = C.minus([1, 2, 3], [4, 5, 6])

ng_op, ng_placeholders = CNTKImporter().import_model(cntk_op)
results = ng.transformers.make_transformer().computation(ng_op)
print(results())
Ejemplo n.º 21
0
def test_Sub(tmpdir):
    model = C.minus([1, 2, 3], [4, 5, 6])
    verify_no_input(model, tmpdir, 'Sub_0')
def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True,
               proposal_layer_param_string=None, conv_bias_init=0.0):
    '''
    Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper:
        Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
        "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"

    Outputs object detection proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").

    Args:
        conv_out:        The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network
        scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
        im_info:         A CNTK variable or constant containing
                         (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
                         e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
        add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses
        proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer.

    Returns:
        rpn_rois - the proposed ROIs
        rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness)
    '''

    # RPN network
    # init = 'normal', initValueScale = 0.01, initBias = 0.1
    num_channels = cfg["CNTK"].RPN_NUM_CHANNELS
    rpn_conv_3x3 = Convolution((3, 3), num_channels, activation=relu, pad=True, strides=1,
                                init = normal(scale=0.01), init_bias=conv_bias_init)(conv_out)
    rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score",
                                init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3)  # 2(bg/fg)  * 9(anchors)
    rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred",
                                init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3)  # 4(coords) * 9(anchors)

    # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W)
    num_predictions = int(rpn_cls_score.shape[0] / 2)
    rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]), name="rpn_cls_score_rshp")
    p_rpn_cls_score_rshp = cntk.placeholder()
    rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0)
    rpn_cls_prob = cntk.as_block(rpn_cls_sm, [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'Softmax', 'rpn_cls_prob')
    rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape")

    # proposal layer
    rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string))
    rpn_rois = alias(rpn_rois_raw, name='rpn_rois')

    rpn_losses = None
    if(add_loss_functions):
        # RPN targets
        # Comment: rpn_cls_score is only passed   vvv   to get width and height of the conv feature map ...
        atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string))
        rpn_labels = atl.outputs[0]
        rpn_bbox_targets = atl.outputs[1]
        rpn_bbox_inside_weights = atl.outputs[2]

        # classification loss
        p_rpn_labels = cntk.placeholder()
        p_rpn_cls_score_rshp = cntk.placeholder()

        keeps = cntk.greater_equal(p_rpn_labels, 0.0)
        fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets")
        bg_labels = minus(1, fg_labels, name="bg_targets")
        rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0)
        rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp, rpn_labels_ignore, axis=0)
        rpn_loss_cls = element_times(rpn_ce, keeps)

        # The terms that are accounted for in the cls loss are those that have a label >= 0
        cls_num_terms = reduce_sum(keeps)
        cls_normalization_factor = 1.0 / cls_num_terms
        normalized_rpn_cls_loss = reduce_sum(rpn_loss_cls) * cls_normalization_factor

        reduced_rpn_loss_cls = cntk.as_block(normalized_rpn_cls_loss,
                                         [(p_rpn_labels, rpn_labels), (p_rpn_cls_score_rshp, rpn_cls_score_rshp)],
                                         'CE_with_ignore', 'norm_rpn_cls_loss')

        # regression loss
        p_rpn_bbox_pred = cntk.placeholder()
        p_rpn_bbox_targets = cntk.placeholder()
        p_rpn_bbox_inside_weights = cntk.placeholder()
        rpn_loss_bbox = SmoothL1Loss(cfg["CNTK"].SIGMA_RPN_L1, p_rpn_bbox_pred, p_rpn_bbox_targets, p_rpn_bbox_inside_weights, 1.0)
        # The bbox loss is normalized by the rpn batch size
        bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE
        normalized_rpn_bbox_loss = reduce_sum(rpn_loss_bbox) * bbox_normalization_factor

        reduced_rpn_loss_bbox = cntk.as_block(normalized_rpn_bbox_loss,
                                          [(p_rpn_bbox_pred, rpn_bbox_pred), (p_rpn_bbox_targets, rpn_bbox_targets),
                                           (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)],
                                          'SmoothL1Loss', 'norm_rpn_bbox_loss')

        rpn_losses = plus(reduced_rpn_loss_cls, reduced_rpn_loss_bbox, name="rpn_losses")

    return rpn_rois, rpn_losses
Ejemplo n.º 23
0
import cntk
a = [1, 2, 3]
b = [4, 5, 6]
c = cntk.minus(a, b).eval()
print(c)
Ejemplo n.º 24
0
def train_model(reader_train, reader_test, model_func, max_epochs):
    # similar to  placeholder in tensorflow
    input_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    # preprocess
    mean_removed_features = C.minus(input_var,
                                    C.constant(114),
                                    name='mean_removed_input')

    # output of network, loss and metrics
    z = model_func(mean_removed_features, out_dims=num_classes)
    ce = C.cross_entropy_with_softmax(z, label_var)
    pe = C.classification_error(z, label_var)

    epoch_size = 10400
    minibatch_size = 256
    target_loss = 1.50

    # learing rate, momentum coefficient and weight decay (regularization coefficient)
    lr_per_mb = [0.01] * 25 + [0.001] * 25 + [0.0001] * 25 + [0.00001] * 25 + [
        0.000001
    ]
    lr_schedule = C.learning_parameter_schedule(lr_per_mb,
                                                minibatch_size=minibatch_size,
                                                epoch_size=epoch_size)
    mm_schedule = C.learners.momentum_schedule(0.9,
                                               minibatch_size=minibatch_size)
    l2_reg_weight = 0.0005  # CNTK L2 regularization is per sample, thus same as Caffe

    # optimizer
    learner = C.learners.momentum_sgd(z.parameters,
                                      lr=lr_schedule,
                                      momentum=mm_schedule,
                                      minibatch_size=minibatch_size,
                                      unit_gain=False,
                                      l2_regularization_weight=l2_reg_weight)
    progress_printer = C.logging.ProgressPrinter(tag='Training',
                                                 num_epochs=max_epochs)
    trainer = C.Trainer(z, (ce, pe), [learner], [progress_printer])

    # similar to feedict in tensorflow
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }
    C.logging.log_number_of_parameters(z)
    print()

    # train loop
    finished = False
    for epoch in range(1, max_epochs + 1):
        if finished:
            logging.info("Training finished!")
            break
        sample_count = 0
        batch_cnt = 1
        while sample_count < epoch_size:
            batch_begin_time = time.time()
            data = reader_train.next_minibatch(min(minibatch_size,
                                                   epoch_size - sample_count),
                                               input_map=input_map)
            _, dict_out = trainer.train_minibatch(data, outputs=[ce, pe])
            curr_loss = np.asscalar(np.mean(dict_out[ce]))
            curr_err_rate = np.asscalar(np.mean(dict_out[pe]))
            #curr_loss = np.mean(ce.eval(data), axis=0)
            logging.info(
                "epoch[%d of %d] - batch[%d] - training loss=%f - training err_rate = %f %% - %f exampls/s"
                % (epoch, max_epochs, batch_cnt, curr_loss,
                   curr_err_rate * 100, data[label_var].num_samples /
                   (time.time() - batch_begin_time)))
            sample_count += data[label_var].num_samples
            batch_cnt += 1
            if curr_loss < target_loss:
                finished = True
                break
Ejemplo n.º 25
0
def main(_):
    print("CNTK: " + cntk.__version__)
    print(cntk.minus([1, 2, 3], [4, 5, 6]).eval())
Ejemplo n.º 26
0
def test_Sub(tmpdir):
    model = C.minus([1, 2, 3], [4, 5, 6])
    verify_no_input(model, tmpdir, 'Sub_0')
Ejemplo n.º 27
0
# Import CNTK library
import cntk
import numpy as np

#################################
#### Mathematical operations ####
#################################

# Initial definition
a = [1, 2, 3]
b = [3, 2, 1]

# Get the type of the variable
print(type(a))

# Subtraction
print(cntk.minus(a, b).eval())

# Additive
print(cntk.plus(a, b).eval())

# Element-wise division
print(cntk.element_divide(a, b).eval())

# Defining variable
variable = cntk.input_variable((2), np.float32)
print(variable)
Ejemplo n.º 28
0
import cntk
print("Tensor A = [1,2,3]")
print("Tensor B = [4,5,6]\n")

print("A+B:")
sum = cntk.plus([1, 2, 3], [4, 5, 6]).eval()
print("{}\n".format(sum))

print("A-B:")
minus = cntk.minus([1, 2, 3], [4, 5, 6]).eval()
print("{}\n".format(minus))

print("A*B:")
times = cntk.times([1, 3, 4], [4, 5, 6]).eval()
print("{}\n".format(times))

print("A/B:")
divide = cntk.element_divide([4, 32, 15], [2, 4, 5]).eval()
print("{}\n".format(divide))

print("A^B:")
pow = cntk.pow([1, 3, 4], [4, 2, 3]).eval()
print("{}\n".format(pow))

print("Min(A,B):")
min = cntk.element_min([1, 2, 3], [4, 5, 6], [2, 1, 0]).eval()
print("{}\n".format(min))

print("Max(A,B):")
max = cntk.element_max([1, 2, 3], [4, 5, 6], [2, 9, 0]).eval()
print("{}\n".format(max))