def rpn_layer(self, net, layer_idx, bottom_blob, channel, idx):
        for i in range(4):
            bottom_blob, layer_idx = ConvBNReLU(net,
                                                layer_idx,
                                                bottom_blob,
                                                int(channel * self.width_mult),
                                                kernel_size=3,
                                                stride=1)

        cls_score, layer_idx = ConvBNReLU(net,
                                          layer_idx,
                                          bottom_blob,
                                          self.num_classes - 1,
                                          kernel_size=1,
                                          stride=1,
                                          use_activation=False,
                                          bias_term=True)
        centerness, layer_idx = ConvBNReLU(net,
                                           layer_idx,
                                           bottom_blob,
                                           1,
                                           kernel_size=1,
                                           stride=1,
                                           use_activation=False,
                                           bias_term=True)
        vetex_pred, layer_idx = ConvBNReLU(net,
                                           layer_idx,
                                           bottom_blob,
                                           self.max_joints * 2,
                                           kernel_size=1,
                                           stride=1,
                                           use_activation=False,
                                           bias_term=True)
        occlusion, layer_idx = ConvBNReLU(net,
                                          layer_idx,
                                          bottom_blob,
                                          self.max_joints * 2,
                                          kernel_size=1,
                                          stride=1,
                                          use_activation=False,
                                          bias_term=True)
        net['cls_score'] = L.Sigmoid(cls_score)
        net['centerness'] = L.Sigmoid(centerness)
        net['occlusion'] = L.Sigmoid(occlusion)
        net['scoremap_perm'] = L.Permute(net['cls_score'], order=[0, 2, 3, 1])
        net['centernessmap_perm'] = L.Permute(net['centerness'],
                                              order=[0, 2, 3, 1])
        net['occlusionmap_perm'] = L.Permute(net['occlusion'],
                                             order=[0, 2, 3, 1])
        net['regressionmap_perm'] = L.Permute(vetex_pred, order=[0, 2, 3, 1])

        return layer_idx
Beispiel #2
0
    def mute_net(self,in_data, order):
        cin,h,w = in_data.shape;
        model_path = 'temp/';
        if not os.path.exists(model_path):
            os.mkdir(model_path)

        n = caffe.NetSpec();
        n.data0 = L.Input(shape=[dict(dim=[n1, cin, h, w])])
        n.out = L.Permute(n.data0, order=order);
        def_file = model_path + 'internal.prototxt'
        with open(def_file, 'w') as f:
            f.write(str(n.to_proto()));
            f.close()
        net = caffe.Net(def_file, caffe.TEST);

        in_data = np.float32(in_data.reshape([1, cin, h, w]));
        p = in_data

        net.blobs['data0'].data[...] = p
        output = net.forward()
        pa = np.float32(output['out'][0]);

        if not os.path.exists(model_path):
            os.remove(model_path)

        return pa;
Beispiel #3
0
def PlateNetBody(net, data_layer, time_step, num_classes):
    #  lstm_kwargs = {
    #   'weight_filler': dict(type='xavier'),
    #  'bias_filler': dict(type='constant', value=0)}

    kwargs = {
        'param':
        [dict(lr_mult=1, decay_mult=1),
         dict(lr_mult=2, decay_mult=0)],
        'weight_filler': dict(type='xavier'),
        'bias_filler': dict(type='constant', value=0)
    }

    #  assert from_layer in net.keys()  # 48 x 48

    recurrent_param = {
        'num_output': 100,
        'weight_filler': dict(type='xavier'),
        'bias_filler': dict(type='constant', value=0)
    }
    net.indicator = L.ContinuationIndicator(time_step=time_step,
                                            batch_size=512)
    net.permuted_data = L.Permute(data_layer, order=[3, 0, 1, 2])
    net.lstm1 = L.LSTM(net.permuted_data,
                       net.indicator,
                       recurrent_param=recurrent_param)
    net.lstm2 = L.LSTM(net.lstm1,
                       net.indicator,
                       recurrent_param=recurrent_param)
    net.fc1 = L.InnerProduct(net.lstm2,
                             num_output=num_classes + 1,
                             axis=2,
                             **kwargs)

    return net
Beispiel #4
0
def _make_module(model_path, in_shape, order):
    ns = caffe.NetSpec()
    ns.data = L.Input(name="data", input_param={"shape": {"dim": in_shape}})
    ns.perm = L.Permute(ns.data, name="permute", order=order)

    with open(os.path.join(model_path, 'test.prototxt'), 'w') as f:
        f.write(str(ns.to_proto()))

    net = caffe.Net(f.name, caffe.TEST)

    net.save(os.path.join(model_path, 'test.caffemodel'))
Beispiel #5
0
def CreateUnifiedPredictionHead(net,
                                data_layer="data",
                                num_classes=[],
                                from_layers=[],
                                use_objectness=False,
                                normalizations=[],
                                use_batchnorm=True,
                                lr_mult=1,
                                use_scale=True,
                                min_sizes=[],
                                max_sizes=[],
                                prior_variance=[0.1],
                                aspect_ratios=[],
                                steps=[],
                                img_height=0,
                                img_width=0,
                                share_location=True,
                                flip=True,
                                clip=True,
                                offset=0.5,
                                inter_layer_depth=[],
                                kernel_size=1,
                                pad=0,
                                conf_postfix='',
                                loc_postfix='',
                                **bn_param):
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"
    if normalizations:
        assert len(from_layers) == len(
            normalizations
        ), "from_layers and normalizations should have same length"
    assert len(from_layers) == len(
        min_sizes), "from_layers and min_sizes should have same length"
    if max_sizes:
        assert len(from_layers) == len(
            max_sizes), "from_layers and max_sizes should have same length"
    if aspect_ratios:
        assert len(from_layers) == len(
            aspect_ratios
        ), "from_layers and aspect_ratios should have same length"
    if steps:
        assert len(from_layers) == len(
            steps), "from_layers and steps should have same length"
    net_layers = net.keys()
    assert data_layer in net_layers, "data_layer is not in net's layers"
    if inter_layer_depth:
        assert len(from_layers) == len(
            inter_layer_depth
        ), "from_layers and inter_layer_depth should have same length"

    num = len(from_layers)
    priorbox_layers = []
    loc_layers = []
    conf_layers = []
    objectness_layers = []

    loc_args = {
        'param': [
            dict(name='loc_p1', lr_mult=lr_mult, decay_mult=1),
            dict(name='loc_p2', lr_mult=2 * lr_mult, decay_mult=0)
        ],
        'weight_filler':
        dict(type='xavier'),
        'bias_filler':
        dict(type='constant', value=0)
    }

    conf_args = {
        'param': [
            dict(name='conf_p1', lr_mult=lr_mult, decay_mult=1),
            dict(name='conf_p2', lr_mult=2 * lr_mult, decay_mult=0)
        ],
        'weight_filler':
        dict(type='xavier'),
        'bias_filler':
        dict(type='constant', value=0)
    }

    if flip:
        num_priors_per_location = 6
    else:
        num_priors_per_location = 3

    for i in range(0, num):
        from_layer = from_layers[i]

        name = "{}_mbox_loc{}".format(from_layer, loc_postfix)

        # Create location prediction layer.
        net[name] = L.Convolution(net[from_layer],
                                  num_output=num_priors_per_location * 4,
                                  pad=1,
                                  kernel_size=3,
                                  stride=1,
                                  **loc_args)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layers.append(net[flatten_name])

        # Create confidence prediction layer.
        name = "{}_mbox_conf{}".format(from_layer, conf_postfix)
        net[name] = L.Convolution(net[from_layer],
                                  num_output=num_priors_per_location *
                                  num_classes,
                                  pad=1,
                                  kernel_size=3,
                                  stride=1,
                                  **conf_args)

        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layers.append(net[flatten_name])

        # Estimate number of priors per location given provided parameters.
        min_size = min_sizes[i]
        if type(min_size) is not list:
            min_size = [min_size]
        aspect_ratio = []
        if len(aspect_ratios) > i:
            aspect_ratio = aspect_ratios[i]
            if type(aspect_ratio) is not list:
                aspect_ratio = [aspect_ratio]
        max_size = []
        if len(max_sizes) > i:
            max_size = max_sizes[i]
            if type(max_size) is not list:
                max_size = [max_size]
            if max_size:
                assert len(max_size) == len(
                    min_size), "max_size and min_size should have same length."
        if max_size:
            num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size)
        else:
            num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size)
        if flip:
            num_priors_per_location += len(aspect_ratio) * len(min_size)
        step = []
        if len(steps) > i:
            step = steps[i]

        # Create prior generation layer.
        name = "{}_mbox_priorbox".format(from_layer)
        net[name] = L.PriorBox(net[from_layer],
                               net[data_layer],
                               min_size=min_size,
                               clip=clip,
                               variance=prior_variance,
                               offset=offset)
        if max_size:
            net.update(name, {'max_size': max_size})
        if aspect_ratio:
            net.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip})
        if step:
            net.update(name, {'step': step})
        if img_height != 0 and img_width != 0:
            if img_height == img_width:
                net.update(name, {'img_size': img_height})
            else:
                net.update(name, {'img_h': img_height, 'img_w': img_width})
        priorbox_layers.append(net[name])

        # Create objectness prediction layer.
        if use_objectness:
            name = "{}_mbox_objectness".format(from_layer)
            num_obj_output = num_priors_per_location * 2
            ConvBNLayer(net,
                        from_layer,
                        name,
                        use_bn=use_batchnorm,
                        use_relu=False,
                        lr_mult=lr_mult,
                        num_output=num_obj_output,
                        kernel_size=kernel_size,
                        pad=pad,
                        stride=1,
                        **bn_param)
            permute_name = "{}_perm".format(name)
            net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
            flatten_name = "{}_flat".format(name)
            net[flatten_name] = L.Flatten(net[permute_name], axis=1)
            objectness_layers.append(net[flatten_name])

    # Concatenate priorbox, loc, and conf layers.
    mbox_layers = []
    name = "mbox_loc"
    net[name] = L.Concat(*loc_layers, axis=1)
    mbox_layers.append(net[name])
    name = "mbox_conf"
    net[name] = L.Concat(*conf_layers, axis=1)
    mbox_layers.append(net[name])
    name = "mbox_priorbox"
    net[name] = L.Concat(*priorbox_layers, axis=2)
    mbox_layers.append(net[name])
    if use_objectness:
        name = "mbox_objectness"
        net[name] = L.Concat(*objectness_layers, axis=1)
        mbox_layers.append(net[name])

    return mbox_layers
Beispiel #6
0
def segmentation(n, seg_points, label, phase):
    ############### Params ###############
    num_cls = 1
    ############### Params ###############

    top_prev, top_lattice = L.Python(seg_points,
                                     ntop=2,
                                     python_param=dict(module='bcl_layers',
                                                       layer='BCLReshape'))

    top_prev = conv_bn_relu(n,
                            "conv0_seg",
                            top_prev,
                            1,
                            64,
                            stride=1,
                            pad=0,
                            loop=1)
    """
    1. If lattice scale too large the network will really slow and don't have good result
    """
    # #2nd
    top_prev = bcl_bn_relu(n,
                           'bcl_seg',
                           top_prev,
                           top_lattice,
                           nout=[64, 64, 128, 128, 128, 64],
                           lattic_scale=[
                               "0*4_1*4_2*4", "0*2_1*2_2*2", "0_1_2",
                               "0/2_1/2_2/2", "0/4_1/4_2/4", "0/8_1/8_2/8"
                           ],
                           loop=6,
                           skip='concat')
    #
    # #3rd
    # top_prev = bcl_bn_relu(n, 'bcl_seg', top_prev, top_lattice, nout=[64, 128, 128, 64],
    # lattic_scale=["0*8_1*8_2*8", "0*4_1*4_2*4", "0*2_1*2_2*2", "0_1_2"], loop=4, skip='concat')

    # BEST NOW
    # top_prev = bcl_bn_relu(n, 'bcl_seg', top_prev, top_lattice, nout=[64, 128, 128, 128, 64],
    #                       lattic_scale=["0*2_1*2_2*2","0_1_2","0/2_1/2_2/2","0/4_1/4_2/4","0/8_1/8_2/8"], loop=5, skip='concat')

    # top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 256, stride=1, pad=0, loop=1)
    # top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 128, stride=1, pad=0, loop=1)
    top_prev = conv_bn_relu(n,
                            "conv1_seg",
                            top_prev,
                            1,
                            64,
                            stride=1,
                            pad=0,
                            loop=1)

    n.seg_preds = L.Convolution(top_prev,
                                name="car_seg",
                                convolution_param=dict(
                                    num_output=num_cls,
                                    kernel_size=1,
                                    stride=1,
                                    pad=0,
                                    weight_filler=dict(type='xavier'),
                                    bias_term=True,
                                    bias_filler=dict(type='constant', value=0),
                                    engine=1,
                                ),
                                param=[dict(lr_mult=1),
                                       dict(lr_mult=0.1)])
    # Predict class
    if phase == "train":
        seg_preds = L.Permute(
            n.seg_preds, permute_param=dict(
                order=[0, 2, 3, 1]))  #(B,C=1,H,W) -> (B,H,W,C=1)
        seg_preds = L.Reshape(
            seg_preds, reshape_param=dict(shape=dict(
                dim=[0, -1, num_cls])))  # (B,H,W,C=1)-> (B, -1, 1)

        # seg_weights = L.Python(label, name = "SegWeight",
        #                        python_param=dict(
        #                                         module='bcl_layers',
        #                                         layer='SegWeight'
        #                                         ))
        #
        # seg_weights = L.Reshape(seg_weights, reshape_param=dict(shape=dict(dim=[0, -1])))

        # sigmoid_seg_preds = L.Sigmoid(seg_preds)
        #
        # n.dice_loss = L.Python(sigmoid_seg_preds, label, #seg_weights,
        #                  name = "Seg_Loss",
        #                  loss_weight = 1,
        #                  python_param=dict(
        #                  module='bcl_layers',
        #                  layer='DiceLoss'  #WeightFocalLoss, DiceFocalLoss, FocalLoss, DiceLoss
        #                  ),
        #         # param_str=str(dict(focusing_parameter=2, alpha=0.25)))
        #         # param_str=str(dict(focusing_parameter=2, alpha=0.25, dice_belta=0.5, dice_alpha=0.5, lamda=0.1)))
        #         param_str=str(dict(alpha=0.5, belta=0.5))) #dice #1, 1

        # sigmoid_seg_preds = L.Sigmoid(seg_preds)
        #
        # n.dice_loss = L.Python(sigmoid_seg_preds, label, #seg_weights,
        #                  name = "Seg_Loss",
        #                  loss_weight = 1,
        #                  python_param=dict(
        #                  module='bcl_layers',
        #                  layer='IoUSegLoss'  #WeightFocalLoss, DiceFocalLoss, FocalLoss, DiceLoss
        #                  ))
        # param_str=str(dict(focusing_parameter=2, alpha=0.25)))
        # param_str=str(dict(focusing_parameter=2, alpha=0.25, dice_belta=0.5, dice_alpha=0.5, lamda=0.1)))
        # param_str=str(dict(alpha=0.5, belta=0.5))) #dice #1, 1

        n.seg_loss = L.Python(
            seg_preds,
            label,  #seg_weights,
            name="Seg_Loss",
            loss_weight=1,
            python_param=dict(
                module='bcl_layers',
                layer=
                'FocalLoss'  #WeightFocalLoss, DiceFocalLoss, FocalLoss, DiceLoss
            ),
            param_str=str(dict(focusing_parameter=2, alpha=0.25)))
        # param_str=str(dict(focusing_parameter=2, alpha=0.25, dice_belta=0.5, dice_alpha=0.5, lamda=0.1)))
        # param_str=str(dict(alpha=0.5, belta=0.5))) #dice #1, 1

        # n.seg_loss = L.SigmoidCrossEntropyLoss(seg_preds, label)
        # n.accuracy = L.Accuracy(n.seg_preds, label)
        output = None
    # Problem
    elif phase == "eval":
        n.output = L.Sigmoid(n.seg_preds)
        output = n.output

    return n, output
Beispiel #7
0
 def generate_caffe_prototxt(self, caffe_net, layer):
     layer = L.Permute(layer, order=list(self.order))
     caffe_net[self.g_name] = layer
     return layer
Beispiel #8
0
def CreateMultiBoxHead(net, data_layer="data", num_classes=[], from_layers=[],
                       use_objectness=False, normalizations=[], use_batchnorm=True,
                       min_sizes=[], max_sizes=[], prior_variance=[0.1],
                       aspect_ratios=[], share_location=True, flip=True, clip=True,
                       inter_layer_depth=0, kernel_size=1, pad=0, conf_postfix='', loc_postfix=''):
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"
    if normalizations:
        assert len(from_layers) == len(normalizations), "from_layers and normalizations should have same length"
    assert len(from_layers) == len(min_sizes), "from_layers and min_sizes should have same length"
    if max_sizes:
        assert len(from_layers) == len(max_sizes), "from_layers and max_sizes should have same length"
    net_layers = net.keys()
    assert data_layer in net_layers, "data_layer is not in net's layers"

    num = len(from_layers)
    priorbox_layers = []
    loc_layers = []
    conf_layers = []
    objectness_layers = []
    for i in range(0, num):
        from_layer = from_layers[i]

        # Get the normalize value.
        if normalizations:
            if normalizations[i] != -1:
                norm_name = "{}_norm".format(from_layer)
                net[norm_name] = L.Normalize(net[from_layer],
                                             scale_filler=dict(type="constant", value=normalizations[i]),
                                             across_spatial=False, channel_shared=False)
                from_layer = norm_name

        # Add intermediate layers.
        if inter_layer_depth > 0:
            inter_name = "{}_inter".format(from_layer)
            ConvBNLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True,
                        num_output=inter_layer_depth, kernel_size=3, pad=1, stride=1)
            from_layer = inter_name

        # Estimate number of priors per location given provided parameters.
        aspect_ratio = []
        if len(aspect_ratios) > i:
            aspect_ratio = aspect_ratios[i]
            if type(aspect_ratio) is not list:
                aspect_ratio = [aspect_ratio]
        if max_sizes and max_sizes[i]:
            num_priors_per_location = 2 + len(aspect_ratio)
        else:
            num_priors_per_location = 1 + len(aspect_ratio)
        if flip:
            num_priors_per_location += len(aspect_ratio)

        num_priors_per_location = 2 * num_priors_per_location
        
        # Create location prediction layer.
        name = "{}_mbox_loc{}".format(from_layer, loc_postfix)
        num_loc_output = num_priors_per_location * 4;
        if not share_location:
            num_loc_output *= num_classes
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False,
                    num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layers.append(net[flatten_name])

        # Create confidence prediction layer.
        name = "{}_mbox_conf{}".format(from_layer, conf_postfix)
        num_conf_output = num_priors_per_location * num_classes;
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False,
                    num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layers.append(net[flatten_name])

        # Create prior generation layer.
        name = "{}_mbox_priorbox".format(from_layer)
        if max_sizes and max_sizes[i]:
            if aspect_ratio:
                net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes[i], max_size=max_sizes[i],
                                       aspect_ratio=aspect_ratio, flip=flip, clip=clip, variance=prior_variance)
            else:
                net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes[i], max_size=max_sizes[i],
                                       clip=clip, variance=prior_variance)
        else:
            if aspect_ratio:
                net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes[i],
                                       aspect_ratio=aspect_ratio, flip=flip, clip=clip, variance=prior_variance)
            else:
                net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes[i],
                                       clip=clip, variance=prior_variance)
        priorbox_layers.append(net[name])

        # Create objectness prediction layer.
        if use_objectness:
            name = "{}_mbox_objectness".format(from_layer)
            num_obj_output = num_priors_per_location * 2;
            ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False,
                        num_output=num_obj_output, kernel_size=kernel_size, pad=pad, stride=1)
            permute_name = "{}_perm".format(name)
            net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
            flatten_name = "{}_flat".format(name)
            net[flatten_name] = L.Flatten(net[permute_name], axis=1)
            objectness_layers.append(net[flatten_name])

    # Concatenate priorbox, loc, and conf layers.
    mbox_layers = []
    name = "mbox_loc"
    net[name] = L.Concat(*loc_layers, axis=1)
    mbox_layers.append(net[name])
    name = "mbox_conf"
    net[name] = L.Concat(*conf_layers, axis=1)
    mbox_layers.append(net[name])
    name = "mbox_priorbox"
    net[name] = L.Concat(*priorbox_layers, axis=2)
    mbox_layers.append(net[name])
    if use_objectness:
        name = "mbox_objectness"
        net[name] = L.Concat(*objectness_layers, axis=1)
        mbox_layers.append(net[name])

    return mbox_layers
def ACT_CreateCuboidHead(net, K=6, data_layer="data", num_classes=[], from_layers=[], 
    normalizations=[], use_batchnorm=True, lr_mult=1, use_scale=True, min_sizes=[], 
    max_sizes=[], prior_variance = [0.1], aspect_ratios=[], steps=[], img_height=0, 
    img_width=0, share_location=True, flip=True, clip=True, offset=0.5, kernel_size=1, pad=0,
    conf_postfix='', loc_postfix='', m='', fusion="concat", **bn_param):
    
            
    ##################### 3 change it!!! #######################################
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"
    
    if normalizations:
        assert len(from_layers) == len(normalizations), "from_layers and normalizations should have same length"
    assert len(from_layers) == len(min_sizes), "from_layers and min_sizes should have same length"
    
    if max_sizes:
        assert len(from_layers) == len(max_sizes), "from_layers and max_sizes should have same length"
    
    if aspect_ratios:
        assert len(from_layers) == len(aspect_ratios), "from_layers and aspect_ratios should have same length"
    
    if steps:
        assert len(from_layers) == len(steps), "from_layers and steps should have same length"
    
    net_layers = net.keys()
    
    assert data_layer in net_layers, "data_layer is not in net's layers"


    num = len(from_layers)
    priorbox_layers = []
    loc_layers = []
    conf_layers = []

    for i in range(0, num):
        from_layer = from_layers[i]

        # Get the normalize value.
        if normalizations:
            if normalizations[i] != -1:
                for stream in xrange(K):
                    norm_name = "{}_norm_stream{}{}".format(from_layer, stream, m)
                    net[norm_name] = L.Normalize(net[from_layer + '_stream' + str(stream) + m], scale_filler=dict(type="constant", value=normalizations[i]),
                        across_spatial=False, channel_shared=False)
                from_layer = "{}_norm".format(from_layer)

        # ACT: add a concatenation layer across streams
        if fusion == "concat":
            net[from_layer + '_concat'] = L.Concat( bottom=[from_layer + '_stream' + str(stream) + m for stream in xrange(K)], axis=1)
            from_layer += '_concat'
        else:
            assert fusion == "sum"
            net[from_layer + '_sum'] = L.EltWise( bottom=[from_layer + '_stream' + str(stream) + m for stream in xrange(K)])
            from_layer += '_sum'

        # Estimate number of priors per location given provided parameters.
        min_size = min_sizes[i]
        if type(min_size) is not list:
            min_size = [min_size]
        aspect_ratio = []
        if len(aspect_ratios) > i:
            aspect_ratio = aspect_ratios[i]
            if type(aspect_ratio) is not list:
                aspect_ratio = [aspect_ratio]
        max_size = []
        if len(max_sizes) > i:
            max_size = max_sizes[i]
            if type(max_size) is not list:
                max_size = [max_size]
            if max_size:
                assert len(max_size) == len(min_size), "max_size and min_size should have same length."
        if max_size:
            num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size)
        else:
            num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size)
        if flip:
            num_priors_per_location += len(aspect_ratio) * len(min_size)
        step = []
        if len(steps) > i:
            step = steps[i]

        # ACT-detector: location prediction layer 
        # location prediction for K different frames
        name = "{}_mbox_loc{}".format(from_layer, loc_postfix)
        num_loc_output = num_priors_per_location * 4 * K
        if not share_location:
            num_loc_output *= num_classes
                    
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult,
            num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layers.append(net[flatten_name])

        # ACT-detector: confidence prediction layer
        # joint prediction of all frames
        name = "{}_mbox_conf{}".format(from_layer, conf_postfix)
        num_conf_output = num_priors_per_location * num_classes;
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult,
            num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layers.append(net[flatten_name])

        # Create prior generation layer.
        name = "{}_mbox_priorbox".format(from_layer)
        net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_size,
                clip=clip, variance=prior_variance, offset=offset)
        if max_size:
            net.update(name, {'max_size': max_size})
        if aspect_ratio:
            net.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip})
        if step:
            net.update(name, {'step': step})
        if img_height != 0 and img_width != 0:
            if img_height == img_width:
                net.update(name, {'img_size': img_height})
            else:
                net.update(name, {'img_h': img_height, 'img_w': img_width})
        priorbox_layers.append(net[name])

    # Concatenate priorbox, loc, and conf layers.
    mbox_layers = []
    name = "mbox_loc"
    net[name] = L.Concat(*loc_layers, axis=1)
    mbox_layers.append(net[name])

    name = "mbox_conf"
    net[name] = L.Concat(*conf_layers, axis=1)
    mbox_layers.append(net[name])

    name = "mbox_priorbox"
    net[name] = L.Concat(*priorbox_layers, axis=2)
    mbox_layers.append(net[name])


    return mbox_layers
def UnitLayerDetectorHeader(net, data_layer="data", num_classes=2, feature_layer="conv5", \
        use_objectness=False, normalization=-1, use_batchnorm=True, prior_variance = [0.1], \
        min_sizes=[], max_sizes=[], aspect_ratios=[], pro_widths=[], pro_heights=[], \
        share_location=True, flip=True, clip=False, inter_layer_channels=0, kernel_size=1, \
        pad=0, conf_postfix='', loc_postfix='', flat=False, use_focus_loss=False,stage=1):
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"

    net_layers = net.keys()
    assert data_layer in net_layers, "data_layer is not in net's layers."
    assert feature_layer in net_layers, "feature_layer is not in net's layers."

    if min_sizes:
        assert not pro_widths, "pro_widths should not be provided when using min_sizes."
        assert not pro_heights, "pro_heights should not be provided when using min_sizes."
        if max_sizes:
            assert len(max_sizes) == len(
                min_sizes
            ), "min_sizes and max_sizes must have the same legnth."
    else:
        assert pro_widths, "Must provide proposed width/height."
        assert pro_heights, "Must provide proposed width/height."
        assert len(pro_widths) == len(
            pro_heights), "pro_widths/heights must have the same length."
        assert not min_sizes, "min_sizes should be not provided when using pro_widths/heights."
        assert not max_sizes, "max_sizes should be not provided when using pro_widths/heights."

    from_layer = feature_layer
    prefix_name = '{}_{}'.format(from_layer, stage)
    # Norm-Layer
    if normalization != -1:
        norm_name = "{}_{}_norm".format(prefix_name, stage)
        net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalization), \
            across_spatial=False, channel_shared=False)
        from_layer = norm_name

    # Add intermediate Conv layers.
    # if inter_layer_channels > 0:
    #     inter_name = "{}_inter".format(from_layer)
    #     ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, \
    #         num_output=inter_layer_channels, kernel_size=kernel_size, pad=pad, stride=1,use_scale=True, leaky=True)
    #     from_layer = inter_name
    if len(inter_layer_channels) > 0:
        start_inter_id = 1
        for inter_channel_kernel in inter_layer_channels:
            inter_channel = inter_channel_kernel[0]
            inter_kernel = inter_channel_kernel[1]
            inter_name = "{}_inter_{}".format(prefix_name, start_inter_id)
            if inter_kernel == 1:
                inter_pad = 0
            elif inter_kernel == 3:
                inter_pad = 1
            ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, \
                num_output=inter_channel, kernel_size=inter_kernel, pad=inter_pad, stride=1,use_scale=True, leaky=False)
            from_layer = inter_name
            start_inter_id = start_inter_id + 1
    # Estimate number of priors per location given provided parameters.
    if min_sizes:
        if aspect_ratios:
            num_priors_per_location = len(aspect_ratios) + 1
            if flip:
                num_priors_per_location += len(aspect_ratios)
            if max_sizes:
                num_priors_per_location += 1
            num_priors_per_location *= len(min_sizes)
        else:
            if max_sizes:
                num_priors_per_location = 2 * len(min_sizes)
            else:
                num_priors_per_location = len(min_sizes)
    else:
        num_priors_per_location = len(pro_widths)

    # Create location prediction layer.
    name = "{}_mbox_loc{}".format(prefix_name, loc_postfix)
    num_loc_output = num_priors_per_location * 4 * (num_classes - 1)
    if not share_location:
        num_loc_output *= num_classes
    ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
        num_output=num_loc_output, kernel_size=3, pad=1, stride=1)
    permute_name = "{}_perm".format(name)
    net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
    if flat:
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layer = net[flatten_name]
    else:
        loc_layer = net[permute_name]

    # Create confidence prediction layer.
    name = "{}_mbox_conf{}".format(prefix_name, conf_postfix)
    num_conf_output = num_priors_per_location * num_classes
    if use_focus_loss:
        ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
            num_output=num_conf_output, kernel_size=3, pad=1, stride=1,init_xavier=False,bias_type='focal',sparse=num_classes)
    else:
        ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
            num_output=num_conf_output, kernel_size=3, pad=1, stride=1)
    permute_name = "{}_perm".format(name)
    net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
    if flat:
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layer = net[flatten_name]
    else:
        conf_layer = net[permute_name]

    # Create prior generation layer.
    name = "{}_mbox_priorbox".format(prefix_name)
    if min_sizes:
        if aspect_ratios:
            if max_sizes:
                net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes, max_size=max_sizes, \
                    aspect_ratio=aspect_ratios, flip=flip, clip=clip, variance=prior_variance)
            else:
                net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes, \
                    aspect_ratio=aspect_ratios, flip=flip, clip=clip, variance=prior_variance)
        else:
            if max_sizes:
                net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes, max_size=max_sizes, \
                    flip=flip, clip=clip, variance=prior_variance)
            else:
                net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes, \
                    flip=flip, clip=clip, variance=prior_variance)
        priorbox_layer = net[name]
    else:
        net[name] = L.PriorBox(net[from_layer], net[data_layer], pro_width=pro_widths, pro_height=pro_heights, \
            flip=flip, clip=clip, variance=prior_variance)
        priorbox_layer = net[name]

    # Create objectness prediction layer.
    if use_objectness:
        name = "{}_mbox_objectness".format(prefix_name)
        num_obj_output = num_priors_per_location * 2
        ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
            num_output=num_obj_output, kernel_size=kernel_size, pad=pad, stride=1)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        if flat:
            flatten_name = "{}_flat".format(name)
            net[flatten_name] = L.Flatten(net[permute_name], axis=1)
            objectness_layer = net[flatten_name]
        else:
            objectness_layer = net[permute_name]

    if use_objectness:
        return loc_layer, conf_layer, priorbox_layer, objectness_layer
    else:
        return loc_layer, conf_layer, priorbox_layer
def McDetectorHeader(net, num_classes=1, feature_layer="conv5", \
        normalization=-1, use_batchnorm=False,
        boxsizes=[], aspect_ratios=[], pwidths=[], pheights=[], \
        inter_layer_channels=0, kernel_size=1, pad=0):

    assert num_classes > 0, "num_classes must be positive number"

    net_layers = net.keys()
    assert feature_layer in net_layers, "feature_layer is not in net's layers."

    if boxsizes:
        assert not pwidths, "pwidths should not be provided when using boxsizes."
        assert not pheights, "pheights should not be provided when using boxsizes."
        assert aspect_ratios, "aspect_ratios should be provided when using boxsizes."
    else:
        assert pwidths, "Must provide proposed width/height."
        assert pheights, "Must provide proposed width/height."
        assert len(pwidths) == len(
            pheights), "provided widths/heights must have the same length."
        assert not boxsizes, "boxsizes should be not provided when using pro_widths/heights."
        assert not aspect_ratios, "aspect_ratios should be not provided when using pro_widths/heights."

    from_layer = feature_layer
    loc_conf_layers = []

    # Norm-Layer
    if normalization > 0:
        norm_name = "{}_norm".format(from_layer)
        net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalization), \
            across_spatial=False, channel_shared=False)
        from_layer = norm_name

    # Add intermediate Conv layers.
    if inter_layer_channels > 0:
        inter_name = "{}_inter".format(from_layer)
        ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, \
            num_output=inter_layer_channels, kernel_size=3, pad=1, stride=1)
        from_layer = inter_name

    # Estimate number of priors per location given provided parameters.
    if boxsizes:
        num_priors_per_location = len(aspect_ratios) * len(boxsizes) + 1
    else:
        num_priors_per_location = len(pwidths) + 1

    # Create location prediction layer.
    name = "{}_loc".format(from_layer)
    num_loc_output = num_priors_per_location * 4
    ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
        num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1)
    permute_name = "{}_perm".format(name)
    net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
    loc_conf_layers.append(net[permute_name])

    # Create confidence prediction layer.
    name = "{}_conf".format(from_layer)
    num_conf_output = num_priors_per_location * (num_classes + 1)
    ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
        num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1)
    permute_name = "{}_perm".format(name)
    net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
    loc_conf_layers.append(net[permute_name])

    return loc_conf_layers
Beispiel #12
0
def segmentation(n, seg_points, label, cls_labels, reg_targets, dataset_params,
                 phase):
    ############### Params ###############
    num_cls = dataset_params['num_cls']
    box_code_size = dataset_params['box_code_size']
    num_anchor_per_loc = dataset_params['num_anchor_per_loc']

    num_filters = dataset_params['num_filters']
    layer_strides = dataset_params['layer_strides']
    layer_nums = dataset_params['layer_nums']

    num_upsample_filters = dataset_params['num_upsample_filters']
    upsample_strides = dataset_params["upsample_strides"]

    feat_map_size = dataset_params['feat_map_size']  #(b,c,n,h,w)
    point_cloud_range = dataset_params['point_cloud_range']
    seg_thresh = dataset_params['seg_thresh']

    use_depth = dataset_params['use_depth']
    use_score = dataset_params['use_score']
    use_points = dataset_params['use_points']
    ############### Params ###############

    top_prev, top_lattice = L.Python(seg_points,
                                     ntop=2,
                                     python_param=dict(module='bcl_layers',
                                                       layer='BCLReshape'))

    top_prev = conv_bn_relu(n,
                            "conv0_seg",
                            top_prev,
                            1,
                            64,
                            stride=1,
                            pad=0,
                            loop=1)
    """
    1. If lattice scale too large the network will really slow and don't have good result
    """
    # #2nd
    # top_prev = bcl_bn_relu(n, 'bcl_seg', top_prev, top_lattice, nout=[64, 64, 128, 128, 128, 64],
    #                       lattic_scale=["0*4_1*4_2*4","0*2_1*2_2*2","0_1_2","0/2_1/2_2/2","0/4_1/4_2/4","0/8_1/8_2/8"], loop=6, skip='concat')
    #
    # #3rd
    top_prev = bcl_bn_relu(
        n,
        'bcl_seg',
        top_prev,
        top_lattice,
        nout=[64, 128, 128, 64],
        lattic_scale=["0*8_1*8_2*8", "0*4_1*4_2*4", "0*2_1*2_2*2", "0_1_2"],
        loop=4,
        skip='concat')

    # BEST NOW
    # top_prev = bcl_bn_relu(n, 'bcl_seg', top_prev, top_lattice, nout=[64, 128, 128, 128, 64],
    # lattic_scale=["0*2_1*2_2*2","0_1_2","0/2_1/2_2/2","0/4_1/4_2/4","0/8_1/8_2/8"], loop=5, skip='concat')

    # top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 256, stride=1, pad=0, loop=1)
    # top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 128, stride=1, pad=0, loop=1)
    top_prev = conv_bn_relu(n,
                            "conv1_seg",
                            top_prev,
                            1,
                            64,
                            stride=1,
                            pad=0,
                            loop=1)

    n.seg_preds = L.Convolution(top_prev,
                                name="seg_head",
                                convolution_param=dict(
                                    num_output=num_cls,
                                    kernel_size=1,
                                    stride=1,
                                    pad=0,
                                    weight_filler=dict(type='xavier'),
                                    bias_term=True,
                                    bias_filler=dict(type='constant', value=0),
                                    engine=1,
                                ),
                                param=[dict(lr_mult=1),
                                       dict(lr_mult=0.1)])
    # Predict class
    if phase == "train":
        seg_preds = L.Permute(
            n.seg_preds, permute_param=dict(
                order=[0, 2, 3, 1]))  #(B,C=1,H,W) -> (B,H,W,C=1)
        seg_preds = L.Reshape(
            seg_preds, reshape_param=dict(shape=dict(
                dim=[0, -1, num_cls])))  # (B,H,W,C=1)-> (B, -1, 1)

        # seg_weights = L.Python(label, name = "SegWeight",
        #                        python_param=dict(
        #                                         module='bcl_layers',
        #                                         layer='SegWeight'
        #                                         ))
        #
        # seg_weights = L.Reshape(seg_weights, reshape_param=dict(shape=dict(dim=[0, -1])))

        n.seg_loss = L.Python(
            seg_preds,
            label,  #seg_weights,
            name="Seg_Loss",
            loss_weight=1,
            python_param=dict(
                module='bcl_layers',
                layer=
                'FocalLoss'  #WeightFocalLoss, DiceFocalLoss, FocalLoss, DiceLoss
            ),
            param_str=str(dict(focusing_parameter=2, alpha=0.25)))

        # n.accuracy = L.Accuracy(n.seg_preds, label)
    top_prev = conv_bn_relu(n,
                            "P2FM_Decov",
                            top_prev,
                            1,
                            32,
                            stride=1,
                            pad=0,
                            loop=1)
    n.seg_output = L.Sigmoid(n.seg_preds)
    n.p2fm = L.Python(
        seg_points,
        n.seg_output,
        top_prev,
        name="Point2FeatMap",
        python_param=dict(module='bcl_layers', layer='Point2FeatMap'),
        param_str=str(
            dict(
                thresh=seg_thresh,
                feat_map_size=feat_map_size,  #(B,C,N,H,W)
                point_cloud_range=point_cloud_range,
                use_depth=use_depth,
                use_score=use_score,
                use_points=use_points)))

    top_prev = n.p2fm

    # top_prev = L.Reshape(top_prev, reshape_param=dict(shape=dict(dim=[0, -1, feat_map_size[3], feat_map_size[4]]))) # (B,H,W,C=1)-> (B, -1, 1)

    top_prev = conv_bn_relu(n,
                            "ini_conv1",
                            top_prev,
                            3,
                            num_filters[0],
                            stride=layer_strides[0],
                            pad=1,
                            loop=1)
    top_prev = conv_bn_relu(n,
                            "rpn_conv1",
                            top_prev,
                            3,
                            num_filters[0],
                            stride=1,
                            pad=1,
                            loop=layer_nums[0])  #3
    deconv1 = deconv_bn_relu(n,
                             "rpn_deconv1",
                             top_prev,
                             upsample_strides[0],
                             num_upsample_filters[0],
                             stride=upsample_strides[0],
                             pad=0)

    top_prev = conv_bn_relu(n,
                            "ini_conv2",
                            top_prev,
                            3,
                            num_filters[1],
                            stride=layer_strides[1],
                            pad=1,
                            loop=1)
    top_prev = conv_bn_relu(n,
                            "rpn_conv2",
                            top_prev,
                            3,
                            num_filters[1],
                            stride=1,
                            pad=1,
                            loop=layer_nums[1])  #5
    deconv2 = deconv_bn_relu(n,
                             "rpn_deconv2",
                             top_prev,
                             upsample_strides[1],
                             num_upsample_filters[1],
                             stride=upsample_strides[1],
                             pad=0)

    top_prev = conv_bn_relu(n,
                            "ini_conv3",
                            top_prev,
                            3,
                            num_filters[2],
                            stride=layer_strides[2],
                            pad=1,
                            loop=1)
    top_prev = conv_bn_relu(n,
                            "rpn_conv3",
                            top_prev,
                            3,
                            num_filters[2],
                            stride=1,
                            pad=1,
                            loop=layer_nums[2])  #5
    deconv3 = deconv_bn_relu(n,
                             "rpn_deconv3",
                             top_prev,
                             upsample_strides[2],
                             num_upsample_filters[2],
                             stride=upsample_strides[2],
                             pad=0)
    n['rpn_out'] = L.Concat(deconv1, deconv2, deconv3)
    top_prev = n['rpn_out']

    n.cls_preds = L.Convolution(top_prev,
                                name="cls_head",
                                convolution_param=dict(
                                    num_output=num_anchor_per_loc * num_cls,
                                    kernel_size=1,
                                    stride=1,
                                    pad=0,
                                    weight_filler=dict(type='xavier'),
                                    bias_term=True,
                                    bias_filler=dict(type='constant', value=0),
                                    engine=1,
                                ),
                                param=[dict(lr_mult=1),
                                       dict(lr_mult=1)])

    n.box_preds = L.Convolution(top_prev,
                                name="reg_head",
                                convolution_param=dict(
                                    num_output=num_anchor_per_loc *
                                    box_code_size,
                                    kernel_size=1,
                                    stride=1,
                                    pad=0,
                                    weight_filler=dict(type='xavier'),
                                    bias_term=True,
                                    bias_filler=dict(type='constant', value=0),
                                    engine=1,
                                ),
                                param=[dict(lr_mult=1),
                                       dict(lr_mult=1)])

    cls_preds = L.Permute(
        n.cls_preds,
        permute_param=dict(order=[0, 2, 3, 1]))  #(B,C,H,W) -> (B,H,W,C)
    cls_preds = L.Reshape(cls_preds,
                          reshape_param=dict(shape=dict(
                              dim=[0, -1, 1])))  # (B,H,W,C) -> (B, -1, C)

    box_preds = L.Permute(
        n.box_preds,
        permute_param=dict(order=[0, 2, 3, 1]))  #(B,C,H,W) -> (B,H,W,C)
    box_preds = L.Reshape(
        box_preds, reshape_param=dict(shape=dict(
            dim=[0, -1, box_code_size])))  #(B,H,W,C) -> (B, -1, C)

    if phase == "train":

        n['cared'], n['reg_outside_weights'], n['cls_weights'] = L.Python(
            cls_labels,
            name="PrepareLossWeight",
            ntop=3,
            python_param=dict(module='bcl_layers', layer='PrepareLossWeight'))
        reg_outside_weights, cared, cls_weights = n['reg_outside_weights'], n[
            'cared'], n['cls_weights']

        # Gradients cannot be computed with respect to the label inputs (bottom[1])#
        n['labels_input'] = L.Python(cls_labels,
                                     cared,
                                     name="Label_Encode",
                                     python_param=dict(
                                         module='bcl_layers',
                                         layer='LabelEncode',
                                     ))
        labels_input = n['labels_input']

        n.cls_loss = L.Python(cls_preds,
                              labels_input,
                              cls_weights,
                              name="FocalLoss",
                              loss_weight=1,
                              python_param=dict(module='bcl_layers',
                                                layer='WeightFocalLoss'),
                              param_str=str(
                                  dict(focusing_parameter=2, alpha=0.25)))

        n.reg_loss = L.Python(box_preds,
                              reg_targets,
                              reg_outside_weights,
                              name="WeightedSmoothL1Loss",
                              loss_weight=1,
                              python_param=dict(module='bcl_layers',
                                                layer='WeightedSmoothL1Loss'))

    # Problem
    if phase == "eval":
        n.f_cls_preds = cls_preds
        n.f_box_preds = box_preds

    return n
Beispiel #13
0
def test_v2(phase,
            dataset_params=None,
            model_cfg = None,
            deploy=False,
            create_prototxt=True,
            save_path=None,
            ):

    #RPN config
    num_filters=list(model_cfg.rpn.num_filters)
    layer_nums=list(model_cfg.rpn.layer_nums)
    layer_strides=list(model_cfg.rpn.layer_strides)
    num_upsample_filters=list(model_cfg.rpn.num_upsample_filters)
    upsample_strides=list(model_cfg.rpn.upsample_strides)

    point_cloud_range=list(model_cfg.voxel_generator.point_cloud_range)
    voxel_size=list(model_cfg.voxel_generator.voxel_size)
    # anchors_fp_size = (point_cloud_range[3:]-point_cloud_range[:3])/voxel_size
    anchors_fp_w = 432 #1408
    anchors_fp_h = 496 #1600

    box_code_size = 7
    num_anchor_per_loc = 2

    ############################################################################
    # Voxel2BCL
    # Voxel2PointNet
    ############################################################################
    BCL_mode = 'Voxel2BCL'
    dataset_params['x2BCL'] = BCL_mode
    dataset_params['Voxel2BCL_numpoint'] = 6000 #num voxels

    ############################################################################
    # Featuer Creation
    # VoxelFeatureNet: xyzr + (cente_x, center_z, center_y), (cluster_x, cluster_z)
    # VoxelFeatureNetV2: xyzr + (cluster_x, cluster_z)
    # False: No Feature extraction only xyzr
    # SimpleVoxel: sum points in voxel and divided by num of points left 1 points
    #              if use SimpleVoxel PointNet Should disable!
    ############################################################################
    dataset_params['FeatureNet'] = 'SimpleVoxel'
    dataset_params['Save_Img'] = False
    ############################################################################
    # if PointNet == True then it means PointNet to extract high dimention features
    # and max pooling to reduce the point to 1
    # Normally except Simplex the rest of the Freature Creation must with
    # PointNet acticate
    ############################################################################


    n = caffe.NetSpec()

    if phase == "train":

        dataset_params_train = dataset_params.copy()
        dataset_params_train['subset'] = phase

        datalayer_train = L.Python(name='data', include=dict(phase=caffe.TRAIN),
                                   ntop= 4, python_param=dict(module='bcl_layers', layer='InputKittiData',
                                                     param_str=repr(dataset_params_train)))

        n.data, n.coors, n.labels, n.reg_targets = datalayer_train

    elif phase == "eval":
        dataset_params_eval = dataset_params.copy()
        dataset_params_eval['subset'] = phase

        datalayer_eval = L.Python(name='data', include=dict(phase=caffe.TEST),
                                  ntop= 9, python_param=dict(module='bcl_layers', layer='InputKittiData',
                                                     param_str=repr(dataset_params_eval)))

        n.data, n.coors, n.anchors, n.rect, n.trv2c, n.p2, n.anchors_mask, n.img_idx, n.img_shape = datalayer_eval

    if deploy:
        print("[debug] run deploy in caffe_model.py")
        # n.data = L.Input(shape=dict(dim=[1, len(input_dims), 1, sample_size]))


    top_prev = n.data

    """BCL fixed size before Scatter"""
    ############################################################################
    # Method 1
    # use new xyz as the lattice features
    # this reshape is used to make n.data from (1,Feature,Npoints,Voxels) -> (1,Feature,1,Voxels) Npoints in here should be 1
    # and n.data -> (1,Feature,Npoints,Voxels) -> (1, Feature[:3], 1, Voxeld) Npoints in here should be 1
    # this is particular for raw XYZ features as BCL data input which means there is no PointNet or any features extraction infront
    # Or must keep the origin xyz features inside the new features
    ############################################################################

    # n["input_feats"], n['lat_feats']= L.Python(n.data, ntop=2, python_param=dict(module='bcl_layers',
    #                                                                             layer='BCLReshape',
    #                                                                             param_str=str(dict(data_feature=True))))
    # top_prev, top_lat_feats = n["input_feats"], n['lat_feats']
    # top_prev = bcl_bn_relu(n, 'bcl0', top_prev, top_lat_feats, nout=[64,128,64], lattic_scale=["0*8_1*8_2*8", "0*4_1*4_2*4", "0*2_1*2_2*2"], loop=3)


    if BCL_mode=="Voxel2BCL":

        # Reshape to the (B,C,N,V) N is 1 here to fit in BCL
        n["input_feats"], n['lat_feats']= L.Python(top_prev, n.coors, ntop=2, python_param=dict(module='bcl_layers',
                                                                                    layer='BCLReshape',
                                                                                    param_str=str(dict(ReshapeMode=BCL_mode))))
        top_prev, top_lat_feats = n["input_feats"], n['lat_feats']

        # top_prev = bcl_bn_relu(n, 'bcl0', top_prev, top_lat_feats, nout=[64,128,64], lattic_scale=["0*8_1*8_2*8", "0*4_1*4_2*4", "0*2_1*2_2*2"], loop=3)
        # top_prev = bcl_bn_relu(n, 'bcl0', top_prev, top_lat_feats, nout=[64,128,64], lattic_scale=["0*1_1*1_2*1", "0*0.5_1*0.5_2*0.5", "0*0.25_1*0.25_2*0.25"], loop=3)
        top_prev = bcl_bn_relu(n, 'bcl0', top_prev, top_lat_feats, nout=[64,128,64], lattic_scale=["0*32_1*32_2*32", "0*16_1*16_2*16", "0*8_1*8_2*8"], loop=3)

        # Reshape to the (B,C,V,N) N is 1 here to fit in Scatter
        n["input_feats_inverse"]= L.Python(top_prev,python_param=dict(module='bcl_layers',
                                                                    layer='Voxel2Scatter',
                                                                        ))
        top_prev = n["input_feats_inverse"]

    if BCL_mode=="Voxel2PointNet":
        top_prev = conv_bn_relu(n, "mlp0", top_prev, 1, 64, stride=1, pad=0, loop=1)
        top_prev = conv_bn_relu(n, "mlp1", top_prev, 1, 128, stride=1, pad=0, loop=1)
        top_prev = conv_bn_relu(n, "mlp2", top_prev, 1, 64, stride=1, pad=0, loop=1)

    ###############################Scatter######################################
    n['PillarScatter'] = L.Python(top_prev, n.coors, ntop=1,python_param=dict(
                            module='bcl_layers',
                            layer='PointPillarsScatter',
                            param_str=str(dict(output_shape=[1, 1, anchors_fp_h, anchors_fp_w, 64], # [1, 1, 496, 432, 4]
                                            permutohedral=False # if true return shape is (b,c,1,h*w) else (b.c,h,w)
                                            ))))
    top_prev= n['PillarScatter']
    ###############################Scatter######################################


    #############################MODE1##########################################
    """ No Concate"""
    # top_prev = bcl_bn_relu(n, 'bcl0',
    #                         top_prev,
    #                         top_lat_feats,
    #                         nout=[64,128,128,128,64,64],
    #                         lattic_scale=["0*16_1*16_2*16", "0*8_1*8_2*8", "0*4_1*4_2*4", "0*2_1*2_2*2", "0*0.5_1*0.5_2*0.5"],
    #                         loop=6)

    #############################MODE1##########################################


    #############################MODE2##########################################
    """ Concate (might have rpn and feature extract function?)"""
    # top_prev_1 = bcl_bn_relu(n, 'bcl0',
    #                     top_prev,
    #                     top_lat_feats,
    #                     nout=[64,128],
    #                     lattic_scale=["0*8_1*8_2*8", "0*4_1*4_2*4"],
    #                     loop=2)
    #
    # top_prev_2 = bcl_bn_relu(n, 'bcl1',
    #                         top_prev_1,
    #                         top_lat_feats,
    #                         nout=[128,128],
    #                         lattic_scale=["0*2_1*2_2*2", "0*1_1*1_2*1"],
    #                         loop=2)
    #
    # top_prev_3 = bcl_bn_relu(n, 'bcl2',
    #                         top_prev_2,
    #                         top_lat_feats,
    #                         nout=[64,64],
    #                         lattic_scale=["0*0.5_1*0.5_2*0.5", "0*0.25_1*0.25_2*0.25"],
    #                         loop=2)
    #
    # n['rpn_out'] = L.Concat(top_prev_1, top_prev_2, top_prev_3)
    # top_prev = n['rpn_out']

    # n['reshape_rpn_out'] = L.Reshape(top_prev, reshape_param=dict(shape=dict(dim=[0, 0, int(anchors_fp_h/2), int(anchors_fp_w/2)])))# (B,H,W,C) -> (B, -1, C)
    # top_prev = n['reshape_rpn_out']
    #############################MODE2##########################################


    #############################MODE3##########################################
    top_prev = conv_bn_relu(n, "ini_conv1", top_prev, 3, num_filters[0], stride=layer_strides[0], pad=1, loop=1)
    top_prev = conv_bn_relu(n, "rpn_conv1", top_prev, 3, num_filters[0], stride=1, pad=1, loop=layer_nums[0]) #3
    deconv1 = deconv_bn_relu(n, "rpn_deconv1", top_prev, upsample_strides[0], num_upsample_filters[0], stride=upsample_strides[0], pad=0)

    top_prev = conv_bn_relu(n, "ini_conv2", top_prev, 3, num_filters[1], stride=layer_strides[1], pad=1, loop=1)
    top_prev = conv_bn_relu(n, "rpn_conv2", top_prev, 3, num_filters[1], stride=1, pad=1, loop=layer_nums[1]) #5
    deconv2 = deconv_bn_relu(n, "rpn_deconv2", top_prev, upsample_strides[1], num_upsample_filters[1], stride=upsample_strides[1], pad=0)

    top_prev = conv_bn_relu(n, "ini_conv3", top_prev, 3, num_filters[2], stride=layer_strides[2], pad=1, loop=1)
    top_prev = conv_bn_relu(n, "rpn_conv3", top_prev, 3, num_filters[2], stride=1, pad=1, loop=layer_nums[2]) #5
    deconv3 = deconv_bn_relu(n, "rpn_deconv3", top_prev, upsample_strides[2], num_upsample_filters[2], stride=upsample_strides[2], pad=0)

    n['rpn_out'] = L.Concat(deconv1, deconv2, deconv3)
    top_prev = n['rpn_out']
    #############################MODE3##########################################


    num_cls = 2
    n['cls_preds'] = L.Convolution(top_prev, name = "cls_head",
                         convolution_param=dict(num_output=num_cls,
                                                kernel_size=1, stride=1, pad=0,
                                                weight_filler=dict(type = 'xavier'),
                                                bias_term = True,
                                                bias_filler=dict(type='constant', value=0),
                                                engine=1,
                                                ),
                         param=[dict(lr_mult=1), dict(lr_mult=1)])
    cls_preds = n['cls_preds']


    box_code_size = 7
    num_anchor_per_loc = 2
    n['box_preds'] = L.Convolution(top_prev, name = "reg_head",
                          convolution_param=dict(num_output=num_anchor_per_loc * box_code_size,
                                                 kernel_size=1, stride=1, pad=0,
                                                 weight_filler=dict(type = 'xavier'),
                                                 bias_term = True,
                                                 bias_filler=dict(type='constant', value=0),
                                                 engine=1,
                                                 ),
                          param=[dict(lr_mult=1), dict(lr_mult=1)])
    box_preds = n['box_preds']

    if phase == "train":

        n['cared'],n['reg_outside_weights'], n['cls_weights']= L.Python(n.labels,
                                                                        name = "PrepareLossWeight",
                                                                        ntop = 3,
                                                                        python_param=dict(
                                                                                    module='bcl_layers',
                                                                                    layer='PrepareLossWeight'
                                                                                    ))
        reg_outside_weights, cared, cls_weights = n['reg_outside_weights'], n['cared'], n['cls_weights']

        # Gradients cannot be computed with respect to the label inputs (bottom[1])#
        n['labels_input'] = L.Python(n.labels, cared,
                            name = "Label_Encode",
                            python_param=dict(
                                        module='bcl_layers',
                                        layer='LabelEncode',
                                        ))
        labels_input = n['labels_input']


        n['cls_preds_permute'] = L.Permute(cls_preds, permute_param=dict(order=[0, 2, 3, 1])) #(B,C,H,W) -> (B,H,W,C)
        cls_preds_permute = n['cls_preds_permute']
        n['cls_preds_reshape'] = L.Reshape(cls_preds_permute, reshape_param=dict(shape=dict(dim=[0, -1, 1])))# (B,H,W,C) -> (B, -1, C)
        cls_preds_reshape = n['cls_preds_reshape']


        n.cls_loss= L.Python(cls_preds_reshape, labels_input, cls_weights,
                                name = "FocalLoss",
                                loss_weight = 1,
                                python_param=dict(
                                            module='bcl_layers',
                                            layer='WeightFocalLoss'
                                            ),
                                param_str=str(dict(focusing_parameter=2, alpha=0.25)))

        box_code_size = 7
        n['box_preds_permute'] = L.Permute(box_preds, permute_param=dict(order=[0, 2, 3, 1])) #(B,C,H,W) -> (B,H,W,C)
        box_preds_permute = n['box_preds_permute']
        n['box_preds_reshape'] = L.Reshape(box_preds_permute, reshape_param=dict(shape=dict(dim=[0, -1, box_code_size]))) #(B,H,W,C) -> (B, -1, C)
        box_preds_reshape = n['box_preds_reshape']

        n.reg_loss= L.Python(box_preds_reshape, n.reg_targets, reg_outside_weights,
                                name = "WeightedSmoothL1Loss",
                                loss_weight = 1,
                                python_param=dict(
                                            module='bcl_layers',
                                            layer='WeightedSmoothL1Loss'
                                            ))

        return n.to_proto()

    elif phase == "eval":

        n['e7'],n['m7'],n['h7'],n['e5'],n['m5'],n['h5']=L.Python(box_preds,cls_preds,
                                                    n.anchors, n.rect,
                                                    n.trv2c, n.p2, n.anchors_mask,
                                                    n.img_idx, n.img_shape,
                                                    name = "EvalLayer",
                                                    ntop=6,
                                                    python_param=dict(
                                                    module='bcl_layers',
                                                    layer='EvalLayer_v2',
                                                    param_str=repr(dataset_params_eval),
                                                    ))


        return n.to_proto()

    else:
        raise ValueError
Beispiel #14
0
def object_detection(n, voxels, coors, label, reg_targets, phase):
    ############### Params ###############
    box_code_size = 7
    num_anchor_per_loc = 1
    num_cls = 1
    ############### Params ###############

    top_prev, top_lattice = L.Python(voxels,
                                     coors,
                                     ntop=2,
                                     python_param=dict(module='bcl_layers',
                                                       layer='BCLReshape'))

    top_prev = conv_bn_relu(n,
                            "conv0_obj",
                            top_prev,
                            1,
                            64,
                            stride=1,
                            pad=0,
                            loop=1)

    top_prev = bcl_bn_relu(n,
                           'bcl_obj',
                           top_prev,
                           top_lattice,
                           nout=[64, 64, 128, 128, 128, 64],
                           lattic_scale=[
                               "0*32_1*32_2*32", "0*16_1*16_2*16",
                               "0*8_1*8_2*8", "0*4_1*4_2*4", "0*2_1*2_2*2",
                               "0_1_2"
                           ],
                           loop=6,
                           skip='concat')

    top_prev = conv_bn_relu(n,
                            "conv1_obj",
                            top_prev,
                            1,
                            64,
                            stride=1,
                            pad=0,
                            loop=1)

    n.cls_preds = L.Convolution(top_prev,
                                name="cls_head",
                                convolution_param=dict(
                                    num_output=num_anchor_per_loc * num_cls,
                                    kernel_size=1,
                                    stride=1,
                                    pad=0,
                                    weight_filler=dict(type='xavier'),
                                    bias_term=True,
                                    bias_filler=dict(type='constant', value=0),
                                    engine=1,
                                ),
                                param=[dict(lr_mult=1),
                                       dict(lr_mult=1)])

    n.box_preds = L.Convolution(top_prev,
                                name="reg_head",
                                convolution_param=dict(
                                    num_output=num_anchor_per_loc *
                                    box_code_size,
                                    kernel_size=1,
                                    stride=1,
                                    pad=0,
                                    weight_filler=dict(type='xavier'),
                                    bias_term=True,
                                    bias_filler=dict(type='constant', value=0),
                                    engine=1,
                                ),
                                param=[dict(lr_mult=1),
                                       dict(lr_mult=1)])

    cls_preds = n.cls_preds
    box_preds = n.box_preds
    box_preds = L.ReLU(box_preds, in_place=True)  ## WARNING:  ReLU
    # box_preds = L.Python(box_preds, name = "CaLu",
    #                         python_param=dict(
    #                                     module='bcl_layers',
    #                                     layer='CaLuV2',
    #                                     ))

    cls_preds_permute = L.Permute(
        cls_preds,
        permute_param=dict(order=[0, 2, 3, 1]))  #(B,C=2,H,W) -> (B,H,W,C=2)
    cls_preds_reshape = L.Reshape(
        cls_preds_permute,
        reshape_param=dict(shape=dict(
            dim=[0, -1, num_cls])))  # (B,H,W,C=2)-> (B, -1, 1)

    box_preds_permute = L.Permute(
        box_preds, permute_param=dict(
            order=[0, 2, 3, 1]))  #(B,C=2*7,H,W) -> (B,H,W,C=2*7)
    box_preds_reshape = L.Reshape(
        box_preds_permute,
        reshape_param=dict(shape=dict(
            dim=[0, -1, box_code_size])))  # (B,H,W,C=2*7)-> (B, -1, 7)

    if phase == "eval":
        n.f_cls_preds = cls_preds_reshape
        n.f_box_preds = box_preds_reshape

    elif phase == "train":

        # n['cared'],n['reg_outside_weights'], n['cls_weights']= L.Python(label,
        #                                                                 name = "PrepareLossWeight",
        #                                                                 ntop = 3,
        #                                                                 python_param=dict(
        #                                                                             module='bcl_layers',
        #                                                                             layer='PrepareLossWeight'
        #                                                                             ))
        # reg_outside_weights, cared, cls_weights = n['reg_outside_weights'], n['cared'], n['cls_weights']

        n['reg_outside_weights'], n['cls_weights'] = L.Python(
            label,
            name="PrepareLossWeightV2",
            ntop=2,
            python_param=dict(module='bcl_layers',
                              layer='PrepareLossWeightV2'))
        reg_outside_weights, cls_weights = n['reg_outside_weights'], n[
            'cls_weights']

        # Gradients cannot be computed with respect to the label inputs (bottom[1])#
        # n['labels_input'] = L.Python(label, cared,
        #                     name = "Label_Encode",
        #                     python_param=dict(
        #                                 module='bcl_layers',
        #                                 layer='LabelEncode',
        #                                 ))
        # labels_input = n['labels_input']
        n['labels_input'] = L.Python(label,
                                     name="LabelEncodeV2",
                                     python_param=dict(
                                         module='bcl_layers',
                                         layer='LabelEncodeV2',
                                     ))
        labels_input = n['labels_input']

        n.cls_loss = L.Python(cls_preds_reshape,
                              labels_input,
                              cls_weights,
                              name="FocalLoss",
                              loss_weight=2,
                              python_param=dict(module='bcl_layers',
                                                layer='WeightFocalLoss'),
                              param_str=str(
                                  dict(focusing_parameter=2, alpha=0.25)))

        n.reg_loss = L.Python(box_preds_reshape,
                              reg_targets,
                              reg_outside_weights,
                              top_lattice,
                              name="WeightedSmoothL1Loss",
                              loss_weight=1,
                              python_param=dict(module='bcl_layers',
                                                layer='WeightedSmoothL1Loss'))
        # box_preds_reshape = L.ReLU(box_preds_reshape, in_place=True)
        # n.reg_loss= L.Python(box_preds_reshape, reg_targets, labels_input, reg_outside_weights, top_lattice,
        #                         name = "IoULossV2",
        #                         loss_weight = 1,
        #                         python_param=dict(
        #                                     module='bcl_layers',
        #                                     layer='IoULossV2'
        #                                     ))

    return n
Beispiel #15
0
def segmentation(n, seg_points, label, coords, p2voxel_idx, cls_labels,
                 reg_targets, dataset_params, phase):
    ############### Params ###############
    num_cls = dataset_params['num_cls']
    box_code_size = dataset_params['box_code_size']
    num_anchor_per_loc = dataset_params['num_anchor_per_loc']

    max_voxels = dataset_params['max_voxels']
    points_per_voxel = dataset_params['points_per_voxel']
    ############### Params ###############

    top_prev, top_lattice = L.Python(seg_points,
                                     ntop=2,
                                     python_param=dict(module='bcl_layers',
                                                       layer='BCLReshape'))

    top_prev = conv_bn_relu(n,
                            "conv0_seg",
                            top_prev,
                            1,
                            64,
                            stride=1,
                            pad=0,
                            loop=1)
    """
    1. If lattice scale too large the network will really slow and don't have good result
    """
    # #2nd
    # top_prev = bcl_bn_relu(n, 'bcl_seg', top_prev, top_lattice, nout=[64, 64, 128, 128, 128, 64],
    #                       lattic_scale=["0*4_1*4_2*4","0*2_1*2_2*2","0_1_2","0/2_1/2_2/2","0/4_1/4_2/4","0/8_1/8_2/8"], loop=6, skip='concat')
    #
    # #3rd
    top_prev = bcl_bn_relu(
        n,
        'bcl_seg',
        top_prev,
        top_lattice,
        nout=[64, 128, 64],
        lattic_scale=["0*4_1*4_2*4", "0*2_1*2_2*2", "0_1_2"],
        loop=3,
        skip=None)

    # BEST NOW
    # top_prev = bcl_bn_relu(n, 'bcl_seg', top_prev, top_lattice, nout=[64, 128, 128, 128, 64],
    # lattic_scale=["0*2_1*2_2*2","0_1_2","0/2_1/2_2/2","0/4_1/4_2/4","0/8_1/8_2/8"], loop=5, skip='concat')

    # top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 256, stride=1, pad=0, loop=1)
    # top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 128, stride=1, pad=0, loop=1)
    top_prev = conv_bn_relu(n,
                            "conv1_seg",
                            top_prev,
                            1,
                            64,
                            stride=1,
                            pad=0,
                            loop=1)

    # n.seg_preds = L.Convolution(top_prev, name = "seg_head",
    #                      convolution_param=dict(num_output=num_cls,
    #                                             kernel_size=1, stride=1, pad=0,
    #                                             weight_filler=dict(type = 'xavier'),
    #                                             bias_term = True,
    #                                             bias_filler=dict(type='constant', value=0),
    #                                             engine=1,
    #                                             ),
    #                      param=[dict(lr_mult=1), dict(lr_mult=0.1)])
    # Predict class
    # if phase == "train":
    #     seg_preds = L.Permute(n.seg_preds, permute_param=dict(order=[0, 2, 3, 1])) #(B,C=1,H,W) -> (B,H,W,C=1)
    #     seg_preds = L.Reshape(seg_preds, reshape_param=dict(shape=dict(dim=[0, -1, num_cls])))# (B,H,W,C=1)-> (B, -1, 1)
    #
    #     seg_weights = L.Python(label, name = "SegWeight",
    #                            python_param=dict(
    #                                             module='bcl_layers',
    #                                             layer='SegWeight'
    #                                             ))
    #
    #     seg_weights = L.Reshape(seg_weights, reshape_param=dict(shape=dict(dim=[0, -1])))
    #
    #     n.seg_loss = L.Python(seg_preds, label, seg_weights,
    #                      name = "Seg_Loss",
    #                      loss_weight = 1,
    #                      python_param=dict(
    #                      module='bcl_layers',
    #                      layer='FocalLoss'  #WeightFocalLoss, DiceFocalLoss, FocalLoss, DiceLoss
    #                      ),
    #             param_str=str(dict(focusing_parameter=2, alpha=0.25)))

    top_prev = conv_bn_relu(n,
                            "P2VX_Decov",
                            top_prev,
                            1,
                            32,
                            stride=1,
                            pad=0,
                            loop=1)
    # n.seg_output = L.Sigmoid(n.seg_preds)
    n.p2vx = L.Python(
        top_prev,
        p2voxel_idx,  # seg_pred only for rubbish dump
        name="Point2Voxel3D",
        ntop=1,
        python_param=dict(module='bcl_layers', layer='Point2Voxel3D'),
        param_str=str(
            dict(max_voxels=max_voxels, points_per_voxel=points_per_voxel)))

    top_prev = n.p2vx

    top_lattice = L.Permute(
        coords, name="coords_permute",
        permute_param=dict(order=[0, 2, 1]))  #(B,C=1,H,W) -> (B,H,W,C=1)
    top_lattice = L.Reshape(
        top_lattice,
        name="coords_reshape",
        reshape_param=dict(shape=dict(
            dim=[0, -1, 1, max_voxels])))  # (B,H,W,C=1)-> (B, -1, 1)

    top_prev = conv_bn_relu(n,
                            "conv2_seg_voxel",
                            top_prev,
                            1,
                            64,
                            stride=1,
                            pad=0,
                            loop=1)
    top_prev = bcl_bn_relu(
        n,
        'bcl_seg_voxel',
        top_prev,
        top_lattice,
        nout=[64, 128, 128, 64],
        lattic_scale=["0*8_1*8_2*8", "0*4_1*4_2*4", "0*2_1*2_2*2", "0_1_2"],
        loop=4,
        skip='concat')
    top_prev = conv_bn_relu(n,
                            "conv3_seg_voxle",
                            top_prev,
                            1,
                            64,
                            stride=1,
                            pad=0,
                            loop=1)

    n.cls_preds = L.Convolution(top_prev,
                                name="cls_head",
                                convolution_param=dict(
                                    num_output=num_anchor_per_loc * num_cls,
                                    kernel_size=1,
                                    stride=1,
                                    pad=0,
                                    weight_filler=dict(type='xavier'),
                                    bias_term=True,
                                    bias_filler=dict(type='constant', value=0),
                                    engine=1,
                                ),
                                param=[dict(lr_mult=1),
                                       dict(lr_mult=1)])

    n.box_preds = L.Convolution(top_prev,
                                name="reg_head",
                                convolution_param=dict(
                                    num_output=num_anchor_per_loc *
                                    box_code_size,
                                    kernel_size=1,
                                    stride=1,
                                    pad=0,
                                    weight_filler=dict(type='xavier'),
                                    bias_term=True,
                                    bias_filler=dict(type='constant', value=0),
                                    engine=1,
                                ),
                                param=[dict(lr_mult=1),
                                       dict(lr_mult=1)])

    cls_preds = n.cls_preds
    box_preds = n.box_preds
    box_preds = L.ReLU(box_preds, in_place=True)

    cls_preds = L.Permute(
        cls_preds,
        permute_param=dict(order=[0, 2, 3, 1]))  #(B,C,H,W) -> (B,H,W,C)
    cls_preds = L.Reshape(cls_preds,
                          reshape_param=dict(shape=dict(
                              dim=[0, -1, 1])))  # (B,H,W,C) -> (B, -1, C)

    box_preds = L.Permute(
        box_preds,
        permute_param=dict(order=[0, 2, 3, 1]))  #(B,C,H,W) -> (B,H,W,C)
    box_preds = L.Reshape(
        box_preds, reshape_param=dict(shape=dict(
            dim=[0, -1, box_code_size])))  #(B,H,W,C) -> (B, -1, C)

    if phase == "train":

        n['cared'], n['reg_outside_weights'], n['cls_weights'] = L.Python(
            cls_labels,
            name="PrepareLossWeight",
            ntop=3,
            python_param=dict(module='bcl_layers', layer='PrepareLossWeight'))
        reg_outside_weights, cared, cls_weights = n['reg_outside_weights'], n[
            'cared'], n['cls_weights']

        # Gradients cannot be computed with respect to the label inputs (bottom[1])#
        n['labels_input'] = L.Python(cls_labels,
                                     cared,
                                     label,
                                     name="Label_Encode",
                                     python_param=dict(
                                         module='bcl_layers',
                                         layer='LabelEncode',
                                     ))
        labels_input = n['labels_input']

        n.cls_loss = L.Python(cls_preds,
                              labels_input,
                              cls_weights,
                              name="FocalLoss",
                              loss_weight=1,
                              python_param=dict(module='bcl_layers',
                                                layer='WeightFocalLoss'),
                              param_str=str(
                                  dict(focusing_parameter=2, alpha=0.25)))

        n.reg_loss = L.Python(box_preds,
                              reg_targets,
                              reg_outside_weights,
                              name="WeightedSmoothL1Loss",
                              loss_weight=1,
                              python_param=dict(module='bcl_layers',
                                                layer='WeightedSmoothL1Loss'))

    # Problem
    if phase == "eval":
        n.f_cls_preds = cls_preds
        n.f_box_preds = box_preds

    return n


# Create test net
net = caffe.NetSpec()

params_str['train'] = False
params_str['label_txt'] = test_txt
net.data, net.label = L.Python(name="data", ntop=2, python_param={
  'module': "pythonLayer",
  'layer':  "WarpctcDataLayer",
  'param_str': str(params_str)
})

body_layer = WarpctcNetBody(net, net.data)
net.premuted_fc = L.Permute(body_layer, order=[1,0,2])
net.accuracy = L.LabelsequenceAccuracy(net.premuted_fc, net.label, blank_label=10)

with open(test_net_file, 'w') as f:
    print('name: "{}_test"'.format(model_name), file=f)
    print(net.to_proto(), file=f)
shutil.copy(test_net_file, job_dir)

# Create deploy net.
# Remove the first and last layer from test net.
deploy_net = net
with open(deploy_net_file, 'w') as f:
    net_param = deploy_net.to_proto()
    # Remove the first (AnnotatedData) and last (DetectionEvaluate) layer from test net.
    del net_param.layer[0]
    del net_param.layer[-1]
def UnitLayerDenseDetectorHeader(net, data_layer="data", num_classes=2, feature_layer="conv5", \
        normalization=-1, use_batchnorm=True, prior_variance = [0.1], \
        pro_widths=[], pro_heights=[], flip=True, clip=True, \
        inter_layer_channels=0, flat=False, use_focus_loss=False, stage=1,lr_mult=1, decay_mult=1):
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"
    net_layers = net.keys()
    assert data_layer in net_layers, "data_layer is not in net's layers."
    assert feature_layer in net_layers, "feature_layer is not in net's layers."
    assert pro_widths, "Must provide proposed width/height."
    assert pro_heights, "Must provide proposed width/height."
    assert len(pro_widths) == len(pro_heights), "pro_widths/heights must have the same length."
    from_layer = feature_layer
    prefix_name = '{}_{}'.format(from_layer,stage)
    # Norm-Layer
    if normalization != -1:
        norm_name = "{}_norm".format(prefix_name)
        net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalization), \
            across_spatial=False, channel_shared=False)
        from_layer = norm_name
    # InterLayers
    if len(inter_layer_channels) > 0:
        start_inter_id = 1
        for inter_channel_kernel in inter_layer_channels:
            inter_channel = inter_channel_kernel[0]
            inter_kernel = inter_channel_kernel[1]
            inter_name = "{}_inter_{}".format(prefix_name,start_inter_id)
            if inter_kernel == 1:
                inter_pad = 0
            elif inter_kernel == 3:
                inter_pad = 1
            ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, num_output=inter_channel,\
                 kernel_size=inter_kernel, pad=inter_pad, stride=1,use_scale=True, leaky=False,lr_mult=lr_mult, decay_mult=decay_mult,constant_value=0.2)
            from_layer = inter_name
            start_inter_id = start_inter_id + 1
    # PriorBoxes
    num_priors_per_location = len(pro_widths)
    # LOC
    name = "{}_mbox_loc".format(prefix_name)
    num_loc_output = num_priors_per_location * 4 * (num_classes-1)
    ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
        num_output=num_loc_output, kernel_size=3, pad=1, stride=1,lr_mult=lr_mult, decay_mult=decay_mult)
    permute_name = "{}_perm".format(name)
    net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
    if flat:
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layer = net[flatten_name]
    else:
        loc_layer = net[permute_name]
    # CONF
    name = "{}_mbox_conf".format(prefix_name)
    num_conf_output = num_priors_per_location * num_classes
    if use_focus_loss:
        ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, num_output=num_conf_output,\
             kernel_size=3, pad=1, stride=1,init_xavier=False,bias_type='focal',sparse=num_classes,lr_mult=lr_mult, decay_mult=decay_mult)
    else:
        ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
            num_output=num_conf_output, kernel_size=3, pad=1, stride=1,lr_mult=lr_mult, decay_mult=decay_mult)
    permute_name = "{}_perm".format(name)
    net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
    if flat:
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layer = net[flatten_name]
    else:
        conf_layer = net[permute_name]
    # PRIOR
    name = "{}_mbox_priorbox".format(prefix_name)
    net[name] = L.PriorBox(net[from_layer], net[data_layer], pro_width=pro_widths, pro_height=pro_heights, \
        flip=flip, clip=clip, variance=prior_variance)
    priorbox_layer = net[name]
    return loc_layer,conf_layer,priorbox_layer
def UnitLayerDetectorHeader(net, data_layer="data", num_classes=2, feature_layer="conv5", \
        normalization=-1, use_batchnorm=True, prior_variance = [0.1], \
        pro_widths=[], pro_heights=[], flip=True, clip=True, inter_layer_channels=[], \
        flat=False, use_focus_loss=False, stage=1,lr_mult=1.0,decay_mult=1.0,flag_withparamname=False,flagcreateprior = True,add_str = ""):
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"
    net_layers = net.keys()
    assert data_layer in net_layers, "data_layer is not in net's layers."
    print feature_layer
    assert feature_layer + add_str in net_layers, "feature_layer is not in net's layers.(%s)" % feature_layer
    assert pro_widths, "Must provide proposed width/height. "
    assert pro_heights, "Must provide proposed width/height."
    assert len(pro_widths) == len(
        pro_heights), "pro_widths/heights must have the same length."
    from_layer = feature_layer
    prefix_name = '{}_{}'.format(from_layer, stage)
    from_layer += add_str
    # Norm-Layer
    if normalization != -1:
        norm_name = "{}_{}_norm".format(prefix_name, stage)
        net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalization), \
            across_spatial=False, channel_shared=False)
        from_layer = norm_name
    print(inter_layer_channels, "inter_layer_channels")
    if len(inter_layer_channels) > 0:
        start_inter_id = 1
        for inter_channel_kernel in inter_layer_channels:
            inter_channel = inter_channel_kernel[0]
            inter_kernel = inter_channel_kernel[1]
            inter_name = "{}_inter_{}".format(prefix_name, start_inter_id)
            if inter_kernel == 1:
                inter_pad = 0
            elif inter_kernel == 3:
                inter_pad = 1
            ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, \
                num_output=inter_channel, kernel_size=inter_kernel, pad=inter_pad, stride=1,use_scale=True, leaky=False,
                            lr_mult=lr_mult, decay_mult=decay_mult,flag_withparamname=flag_withparamname,pose_string=add_str)
            from_layer = inter_name + add_str
            start_inter_id = start_inter_id + 1
    # Estimate number of priors per location given provided parameters.
    num_priors_per_location = len(pro_widths)
    # Create location prediction layer.
    name = "{}_mbox_loc".format(prefix_name)
    num_loc_output = num_priors_per_location * 4
    ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
        num_output=num_loc_output, kernel_size=3, pad=1, stride=1,lr_mult=lr_mult, decay_mult=decay_mult,pose_string=add_str)
    permute_name = "{}_perm".format(name) + add_str
    net[permute_name] = L.Permute(net[name + add_str], order=[0, 2, 3, 1])
    if flat:
        flatten_name = "{}_flat".format(name) + add_str
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layer = net[flatten_name]
    else:
        loc_layer = net[permute_name]
    # Create confidence prediction layer.
    name = "{}_mbox_conf".format(prefix_name)
    num_conf_output = num_priors_per_location * num_classes
    if use_focus_loss:
        ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
            num_output=num_conf_output, kernel_size=3, pad=1, stride=1,init_xavier=False,bias_type='focal',sparse=num_classes,
                        lr_mult=lr_mult, decay_mult=decay_mult,pose_string=add_str)
    else:
        ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
            num_output=num_conf_output, kernel_size=3, pad=1, stride=1,lr_mult=lr_mult, decay_mult=decay_mult,pose_string=add_str)
    permute_name = "{}_perm".format(name) + add_str
    net[permute_name] = L.Permute(net[name + add_str], order=[0, 2, 3, 1])
    if flat:
        flatten_name = "{}_flat".format(name) + add_str
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layer = net[flatten_name]
    else:
        conf_layer = net[permute_name]

    # Create prior generation layer.
    if flagcreateprior:
        name = "{}_mbox_priorbox".format(prefix_name) + add_str
        net[name] = L.PriorBox(net[from_layer], net[data_layer], pro_width=pro_widths, pro_height=pro_heights, \
            flip=flip, clip=clip, variance=prior_variance)
        priorbox_layer = net[name]
    else:
        priorbox_layer = []
    return loc_layer, conf_layer, priorbox_layer
Beispiel #19
0
def get_caffe_layer(node, net, input_dims):
    """Generate caffe layer for corresponding mxnet op.

    Args:
        node (iterable from MxnetParser): Mxnet op summary generated by MxnetParser
        net (caffe.net): Caffe netspec object

    Returns:
        caffe.layers: Equivalent caffe layer
    """
    if node['type'] == 'Convolution':
        assert len(node['inputs']) == 1, \
            'Convolution layers can have only one input'
        conv_params = node['attr']
        kernel_size = make_list(conv_params['kernel'])
        num_filters = make_list(conv_params['num_filter'])[0]
        if 'stride' in conv_params:
            stride = make_list(conv_params['stride'])[0]
        else:
            stride = 1
        padding = make_list(conv_params['pad'])
        if 'dilate' in conv_params:
            dilation = make_list(conv_params['dilate'])[0]
        else:
            dilation = 1
        convolution_param = {
            'pad': padding,
            'kernel_size': kernel_size,
            'num_output': num_filters,
            'stride': stride,
            'dilation': dilation
        }
        return layers.Convolution(net[node['inputs'][0]],
                                  convolution_param=convolution_param)
    if node['type'] == 'Activation':
        assert len(node['inputs']) == 1, \
            'Activation layers can have only one input'
        assert node['attr']['act_type'] == 'relu'
        return layers.ReLU(net[node['inputs'][0]])

    if node['type'] == 'Pooling':
        assert len(node['inputs']) == 1, \
            'Pooling layers can have only one input'
        kernel_size = make_list(node['attr']['kernel'])
        stride = make_list(node['attr']['stride'])
        pooling_type = node['attr']['pool_type']
        if 'pad' in node['attr']:
            padding = make_list(node['attr']['pad'])
        else:
            padding = [0]
        if pooling_type == 'max':
            pooling = params.Pooling.MAX
        elif pooling_type == 'avg':
            pooling = params.Pooling.AVG
        pooling_param = {
            'pool': pooling,
            'pad': padding[0],
            'kernel_size': kernel_size[0],
            'stride': stride[0]
        }
        return layers.Pooling(net[node['inputs'][0]],
                              pooling_param=pooling_param)

    if node['type'] == 'L2Normalization':
        across_spatial = node['attr']['mode'] != 'channel'
        channel_shared = False
        scale_filler = {
            'type': "constant",
            'value': constants.NORMALIZATION_FACTOR
        }
        norm_param = {
            'across_spatial': across_spatial,
            'scale_filler': scale_filler,
            'channel_shared': channel_shared
        }
        return layers.Normalize(net[node['inputs'][0]], norm_param=norm_param)

    # Note - this layer has been implemented
    # only in WeiLiu's ssd branch of caffe not in caffe master
    if node['type'] == 'transpose':
        order = make_list(node['attr']['axes'])
        return layers.Permute(net[node['inputs'][0]],
                              permute_param={'order': order})

    if node['type'] == 'Flatten':
        if node['inputs'][0].endswith('anchors'):
            axis = 2
        else:
            axis = 1
        return layers.Flatten(net[node['inputs'][0]],
                              flatten_param={'axis': axis})

    if node['type'] == 'Concat':
        # In the ssd model, always concatenate along last axis,
        # since anchor boxes have an extra dimension in caffe (that includes variance).
        axis = -1
        concat_inputs = [net[inp] for inp in node['inputs']]
        return layers.Concat(*concat_inputs, concat_param={'axis': axis})

    if node['type'] == 'Reshape':
        if node['name'] == 'multibox_anchors':
            reshape_dims = [1, 2, -1]
        else:
            reshape_dims = make_list(node['attr']['shape'])
        return layers.Reshape(net[node['inputs'][0]],
                              reshape_param={'shape': {
                                  'dim': reshape_dims
                              }})

    if node['type'] == '_contrib_MultiBoxPrior':
        priorbox_inputs = [net[inp] for inp in node['inputs']] + [net["data"]]
        sizes = make_list(node["attr"]["sizes"])
        min_size = sizes[0] * input_dims[0]
        max_size = int(round((sizes[1] * input_dims[0])**2 / min_size))
        aspect_ratio = make_list(node["attr"]["ratios"])
        steps = make_list(node["attr"]["steps"])
        param = {
            'clip': node["attr"]["clip"] == "true",
            'flip': False,
            'min_size': min_size,
            'max_size': max_size,
            'aspect_ratio': aspect_ratio,
            'variance': [0.1, 0.1, 0.2, 0.2],
            'step': int(round(steps[0] * input_dims[0])),
        }
        return layers.PriorBox(*priorbox_inputs, prior_box_param=param)

    if node['type'] == '_contrib_MultiBoxDetection':
        multibox_inputs = [net[inp] for inp in node['inputs']]
        bottom_order = [1, 0, 2]
        multibox_inputs = [multibox_inputs[i] for i in bottom_order]
        param = {
            'num_classes': constants.NUM_CLASSES,
            'share_location': True,
            'background_label_id': 0,
            'nms_param': {
                'nms_threshold': float(node['attr']['nms_threshold']),
                'top_k': int(node['attr']['nms_topk'])
            },
            'keep_top_k': make_list(node['attr']['nms_topk'])[0],
            'confidence_threshold': 0.01,
            'code_type': params.PriorBox.CENTER_SIZE,
        }
        return layers.DetectionOutput(*multibox_inputs,
                                      detection_output_param=param)

    if node['type'] in ['SoftmaxActivation', 'SoftmaxOutput']:
        if 'mode' not in node['attr']:
            axis = 1
        elif node['attr']['mode'] == 'channel':
            axis = 1
        else:
            axis = 0
        # note: caffe expects confidence scores to be flattened before detection output layer receives it
        return layers.Flatten(layers.Permute(
            layers.Softmax(net[node['inputs'][0]], axis=axis),
            permute_param={'order': [0, 2, 1]}),
                              flatten_param={'axis': 1})
Beispiel #20
0
def FSRCNN_s(img_list, label_list, batch_size, include_acc=False):
    print('Create FSRCNN_s')
    # data
    # https://www.cnblogs.com/houjun/p/9909764.html
    #data, label = L.ImageData(
    data = L.ImageData(
        name="data",
        ntop=2,
        #include={'phase': caffe.TRAIN})
        source=img_list,
        batch_size=batch_size,
        is_color=True,
        new_width=640,
        new_height=360,
        #shuffle=True,
        root_folder=root,
        transform_param=dict(
            #crop_size=360,
            scale=0.00390625,
            #mirror=True
        ))
    # label
    label = L.ImageData(
        name="label",
        ntop=2,
        source=label_list,
        batch_size=batch_size,
        is_color=True,
        new_width=1280,
        new_height=720,
        #shuffle=True,
        root_folder=root,
        transform_param=dict(
            #crop_size=720,
            scale=0.00390625,
            #mirror=True
        ))
    # https://www.cnblogs.com/houjun/p/9909764.html
    #label = L.HDF5Data(
    #		name="label",
    #		ntop=2,
    #		source=img_list,
    #		#source=label_list,
    #		batch_size=batch_size,
    #		include=dict(phase=caffe.TRAIN))
    #label = L.HDF5Data(
    #		hdf5_data_param={
    #			'source': img_list,
    #			'batch_size': 64},
    #		include={
    #			'phase': caffe.TRAIN})

    # conv1
    conv1 = L.Convolution(
        data,
        #label,
        name="conv1",
        num_output=32,
        kernel_size=5,
        stride=1,
        pad=1,
        weight_filler=dict(type='gaussian', std=0.05),
        bias_filler=dict(type='constant', value=0))
    relu1 = L.PReLU(conv1,
                    name="relu1",
                    in_place=True,
                    prelu_param={'channel_shared': 1})
    # conv2
    conv2 = L.Convolution(conv1,
                          name="conv2",
                          num_output=5,
                          kernel_size=1,
                          stride=1,
                          pad=0,
                          group=1,
                          weight_filler=dict(type='gaussian', std=0.05),
                          bias_filler=dict(type='constant', value=0))
    relu2 = L.PReLU(conv2,
                    name="relu2",
                    in_place=True,
                    prelu_param={'channel_shared': 1})
    # conv22
    conv22 = L.Convolution(conv2,
                           name="conv22",
                           num_output=5,
                           kernel_size=3,
                           stride=1,
                           pad=1,
                           group=1,
                           weight_filler=dict(type='gaussian', std=0.05),
                           bias_filler=dict(type='constant', value=0))
    relu22 = L.PReLU(conv22,
                     name="relu22",
                     in_place=True,
                     prelu_param={'channel_shared': 1})
    # conv23
    conv23 = L.Convolution(conv22,
                           name="conv23",
                           num_output=32,
                           kernel_size=1,
                           stride=1,
                           pad=1,
                           group=1,
                           weight_filler=dict(type='gaussian', std=0.05),
                           bias_filler=dict(type='constant', value=0))
    relu23 = L.PReLU(conv23,
                     name="relu23",
                     in_place=True,
                     prelu_param={'channel_shared': 1})
    # conv3
    conv3 = L.Convolution(conv23,
                          name="conv3",
                          num_output=12,
                          kernel_size=3,
                          stride=1,
                          pad=1,
                          weight_filler=dict(type='gaussian', std=0.05),
                          bias_filler=dict(type='constant', value=0))
    # shuffle
    reshape1 = L.Reshape(
        conv3,
        name="reshape_to_6d",
        shape={
            #reshape_param={
            #	'shape'={
            'dim': 0,
            'dim': 2,
            'dim': 2,
            'dim': 3,
            'dim': 360,
            'dim': -1
        }
        #	})
    )
    permute = L.Permute(reshape1,
                        name="permute",
                        permute_param={
                            'order': 0,
                            'order': 3,
                            'order': 4,
                            'order': 1,
                            'order': 5,
                            'order': 2
                        })
    reshape2 = L.Reshape(permute,
                         name="reshape_to_4d",
                         shape={
                             'dim': 0,
                             'dim': 3,
                             'dim': 720,
                             'dim': -1
                         })
    # loss
    loss = L.EuclideanLoss(reshape2, label, name="loss")

    #return to_proto(conv1)
    #return to_proto(label, conv1)
    #return to_proto(data, label, conv1, relu1)
    #return to_proto(data, label, relu1)
    #return to_proto(data, label, relu1, relu2, relu22, relu23, conv3, reshape1)
    #return to_proto(data, label, relu1, relu2, relu22, relu23, conv3, loss)
    return to_proto(data, label, relu1, relu2, relu22, relu23, conv3, reshape2,
                    loss)
def mfb_coatt(mode, batchsize, T, question_vocab_size, folder):
    n = caffe.NetSpec()
    mode_str = json.dumps({'mode':mode, 'batchsize':batchsize,'folder':folder})
    if mode == 'val':
        n.data, n.cont, n.img_feature, n.label, n.glove = L.Python( \
            module='vqa_data_layer_hdf5', layer='VQADataProviderLayer', \
            param_str=mode_str, ntop=5 )
    else:
        n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\
            module='vqa_data_layer_kld_hdf5', layer='VQADataProviderLayer', \
            param_str=mode_str, ntop=5 ) 
    n.embed = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \
                         weight_filler=dict(type='xavier'))
    n.embed_tanh = L.TanH(n.embed) 
    concat_word_embed = [n.embed_tanh, n.glove]
    n.concat_embed = L.Concat(*concat_word_embed, concat_param={'axis': 2}) # T x N x 600

    # LSTM
    n.lstm1 = L.LSTM(\
                   n.concat_embed, n.cont,\
                   recurrent_param=dict(\
                       num_output=config.LSTM_UNIT_NUM,\
                       weight_filler=dict(type='xavier')))
    n.lstm1_droped = L.Dropout(n.lstm1,dropout_param={'dropout_ratio':config.LSTM_DROPOUT_RATIO})
    n.lstm1_resh = L.Permute(n.lstm1_droped, permute_param=dict(order=[1,2,0]))
    n.lstm1_resh2 = L.Reshape(n.lstm1_resh, \
            reshape_param=dict(shape=dict(dim=[0,0,0,1])))

    '''
    Question Attention
    '''
    n.qatt_conv1 = L.Convolution(n.lstm1_resh2, kernel_size=1, stride=1, num_output=512, pad=0,
                                           weight_filler=dict(type='xavier'))
    n.qatt_relu = L.ReLU(n.qatt_conv1)
    n.qatt_conv2 = L.Convolution(n.qatt_relu, kernel_size=1, stride=1, num_output=config.NUM_QUESTION_GLIMPSE, pad=0,
                                           weight_filler=dict(type='xavier')) 
    n.qatt_reshape = L.Reshape(n.qatt_conv2, reshape_param=dict(shape=dict(dim=[-1,config.NUM_QUESTION_GLIMPSE,config.MAX_WORDS_IN_QUESTION,1]))) # N*NUM_QUESTION_GLIMPSE*15
    n.qatt_softmax = L.Softmax(n.qatt_reshape, axis=2)

    qatt_maps = L.Slice(n.qatt_softmax,ntop=config.NUM_QUESTION_GLIMPSE,slice_param={'axis':1})
    dummy_lstm = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1)
    qatt_feature_list = []
    for i in xrange(config.NUM_QUESTION_GLIMPSE):
        if config.NUM_QUESTION_GLIMPSE == 1:
            n.__setattr__('qatt_feat%d'%i, L.SoftAttention(n.lstm1_resh2, qatt_maps, dummy_lstm))
        else:
            n.__setattr__('qatt_feat%d'%i, L.SoftAttention(n.lstm1_resh2, qatt_maps[i], dummy_lstm))    
        qatt_feature_list.append(n.__getattr__('qatt_feat%d'%i))
    n.qatt_feat_concat = L.Concat(*qatt_feature_list) 
    '''
    Image Attention with MFB
    '''
    n.q_feat_resh = L.Reshape(n.qatt_feat_concat,reshape_param=dict(shape=dict(dim=[0,-1,1,1])))
    n.i_feat_resh = L.Reshape(n.img_feature,reshape_param=dict(shape=dict(dim=[0,-1,config.IMG_FEAT_WIDTH,config.IMG_FEAT_WIDTH])))
    
    n.iatt_q_proj = L.InnerProduct(n.q_feat_resh, num_output = config.JOINT_EMB_SIZE, 
                                   weight_filler=dict(type='xavier'))
    n.iatt_q_resh = L.Reshape(n.iatt_q_proj, reshape_param=dict(shape=dict(dim=[-1,config.JOINT_EMB_SIZE,1,1])))  
    n.iatt_q_tile1 = L.Tile(n.iatt_q_resh, axis=2, tiles=config.IMG_FEAT_WIDTH)
    n.iatt_q_tile2 = L.Tile(n.iatt_q_tile1, axis=3, tiles=config.IMG_FEAT_WIDTH)


    n.iatt_i_conv = L.Convolution(n.i_feat_resh, kernel_size=1, stride=1, num_output=config.JOINT_EMB_SIZE, pad=0,
                                 weight_filler=dict(type='xavier')) 
    n.iatt_i_resh1 = L.Reshape(n.iatt_i_conv, reshape_param=dict(shape=dict(dim=[-1,config.JOINT_EMB_SIZE,
                                                                      config.IMG_FEAT_WIDTH,config.IMG_FEAT_WIDTH])))
    n.iatt_iq_eltwise = L.Eltwise(n.iatt_q_tile2, n.iatt_i_resh1, eltwise_param=dict(operation=0))
    n.iatt_iq_droped = L.Dropout(n.iatt_iq_eltwise, dropout_param={'dropout_ratio':config.MFB_DROPOUT_RATIO})
    n.iatt_iq_resh2 = L.Reshape(n.iatt_iq_droped, reshape_param=dict(shape=dict(dim=[-1,config.JOINT_EMB_SIZE,config.IMG_FEAT_SIZE,1])))
    n.iatt_iq_permute1 = L.Permute(n.iatt_iq_resh2, permute_param=dict(order=[0,2,1,3]))
    n.iatt_iq_resh2 = L.Reshape(n.iatt_iq_permute1, reshape_param=dict(shape=dict(dim=[-1,config.IMG_FEAT_SIZE,
                                                                       config.MFB_OUT_DIM,config.MFB_FACTOR_NUM])))
    n.iatt_iq_sumpool = L.Pooling(n.iatt_iq_resh2, pool=P.Pooling.SUM, \
                              pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1))
    n.iatt_iq_permute2 = L.Permute(n.iatt_iq_sumpool, permute_param=dict(order=[0,2,1,3]))
    
    n.iatt_iq_sqrt = L.SignedSqrt(n.iatt_iq_permute2)
    n.iatt_iq_l2 = L.L2Normalize(n.iatt_iq_sqrt)


    ## 2 conv layers 1000 -> 512 -> 2
    n.iatt_conv1 = L.Convolution(n.iatt_iq_l2, kernel_size=1, stride=1, num_output=512, pad=0, 
                                weight_filler=dict(type='xavier'))
    n.iatt_relu = L.ReLU(n.iatt_conv1)
    n.iatt_conv2 = L.Convolution(n.iatt_relu, kernel_size=1, stride=1, num_output=config.NUM_IMG_GLIMPSE, pad=0,
                                           weight_filler=dict(type='xavier')) 
    n.iatt_resh = L.Reshape(n.iatt_conv2, reshape_param=dict(shape=dict(dim=[-1,config.NUM_IMG_GLIMPSE,config.IMG_FEAT_SIZE])))
    n.iatt_softmax = L.Softmax(n.iatt_resh, axis=2)
    n.iatt_softmax_resh = L.Reshape(n.iatt_softmax,reshape_param=dict(shape=dict(dim=[-1,config.NUM_IMG_GLIMPSE,config.IMG_FEAT_WIDTH,config.IMG_FEAT_WIDTH])))
    iatt_maps = L.Slice(n.iatt_softmax_resh, ntop=config.NUM_IMG_GLIMPSE,slice_param={'axis':1})
    dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1)
    iatt_feature_list = []
    for i in xrange(config.NUM_IMG_GLIMPSE):
        if config.NUM_IMG_GLIMPSE == 1:
            n.__setattr__('iatt_feat%d'%i, L.SoftAttention(n.i_feat_resh, iatt_maps, dummy))
        else:
            n.__setattr__('iatt_feat%d'%i, L.SoftAttention(n.i_feat_resh, iatt_maps[i], dummy))
        n.__setattr__('iatt_feat%d_resh'%i, L.Reshape(n.__getattr__('iatt_feat%d'%i), \
                                reshape_param=dict(shape=dict(dim=[0,-1]))))
        iatt_feature_list.append(n.__getattr__('iatt_feat%d_resh'%i))
    n.iatt_feat_concat = L.Concat(*iatt_feature_list)
    n.iatt_feat_concat_resh = L.Reshape(n.iatt_feat_concat, reshape_param=dict(shape=dict(dim=[0,-1,1,1])))
    
    '''
    Fine-grained Image-Question MFB fusion
    '''

    n.mfb_q_proj = L.InnerProduct(n.q_feat_resh, num_output=config.JOINT_EMB_SIZE, 
                                  weight_filler=dict(type='xavier'))
    n.mfb_i_proj = L.InnerProduct(n.iatt_feat_concat_resh, num_output=config.JOINT_EMB_SIZE, 
                                  weight_filler=dict(type='xavier'))
    n.mfb_iq_eltwise = L.Eltwise(n.mfb_q_proj, n.mfb_i_proj, eltwise_param=dict(operation=0))
    n.mfb_iq_drop = L.Dropout(n.mfb_iq_eltwise, dropout_param={'dropout_ratio':config.MFB_DROPOUT_RATIO})
    n.mfb_iq_resh = L.Reshape(n.mfb_iq_drop, reshape_param=dict(shape=dict(dim=[-1,1,config.MFB_OUT_DIM,config.MFB_FACTOR_NUM])))
    n.mfb_iq_sumpool = L.Pooling(n.mfb_iq_resh, pool=P.Pooling.SUM, \
                                      pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1))
    n.mfb_out = L.Reshape(n.mfb_iq_sumpool,\
                                    reshape_param=dict(shape=dict(dim=[-1,config.MFB_OUT_DIM])))
    n.mfb_sign_sqrt = L.SignedSqrt(n.mfb_out)
    n.mfb_l2 = L.L2Normalize(n.mfb_sign_sqrt) 
    
    n.prediction = L.InnerProduct(n.mfb_l2, num_output=config.NUM_OUTPUT_UNITS,
                                  weight_filler=dict(type='xavier')) 
    if mode == 'val':
        n.loss = L.SoftmaxWithLoss(n.prediction, n.label)
    else:
        n.loss = L.SoftmaxKLDLoss(n.prediction, n.label) 
    return n.to_proto()
Beispiel #22
0
def test_v1(phase,
            dataset_params=None,
            model_cfg = None,
            deploy=False,
            create_prototxt=True,
            save_path=None,
            ):

    #RPN config
    num_filters=list(model_cfg.rpn.num_filters)
    layer_nums=list(model_cfg.rpn.layer_nums)
    layer_strides=list(model_cfg.rpn.layer_strides)
    num_upsample_filters=list(model_cfg.rpn.num_upsample_filters)
    upsample_strides=list(model_cfg.rpn.upsample_strides)

    box_code_size = 7
    num_anchor_per_loc = 2

    n = caffe.NetSpec()

    if phase == "train":

        dataset_params_train = dataset_params.copy()
        dataset_params_train['subset'] = phase

        datalayer_train = L.Python(name='data', include=dict(phase=caffe.TRAIN),
                                   ntop= 4, python_param=dict(module='custom_layers', layer='InputKittiData',
                                                     param_str=repr(dataset_params_train)))

        n.data, n.coors, n.labels, n.reg_targets = datalayer_train

    elif phase == "eval":
        dataset_params_eval = dataset_params.copy()
        dataset_params_eval['subset'] = phase

        datalayer_eval = L.Python(name='data', include=dict(phase=caffe.TEST),
                                  ntop= 9, python_param=dict(module='custom_layers', layer='InputKittiData',
                                                     param_str=repr(dataset_params_eval)))

        n.data, n.coors, n.anchors, n.rect, n.trv2c, n.p2, n.anchors_mask, n.img_idx, n.img_shape = datalayer_eval

    if deploy:
        print("[debug] run deploy in caffe_model.py")
        # n.data = L.Input(shape=dict(dim=[1, len(input_dims), 1, sample_size]))
        # n.coors = L.Input(shape=dict(dim=[1, len(input_dims), 1, sample_size]))
        # n.reg_targets = L.Input(shape=dict(dim=[1, len(input_dims), 1, sample_size]))


    # top_prev = L.Reshape(n.data, reshape_param=dict(shape=dict(dim=[0, 0, 1, -1])))
    #
    # n['conv' + str(idx)], top_lattice = L.Permutohedral(top_prev, top_data_lattice, top_data_lattice,
    #                                                     ntop=2,
    #                                                     permutohedral_param=dict(
    #                                                         num_output=n_out,
    #                                                         group=1,
    #                                                         neighborhood_size=bilateral_nbr,
    #                                                         bias_term=True,
    #                                                         norm_type=P.Permutohedral.AFTER,
    #                                                         offset_type=P.Permutohedral.NONE,
    #                                                         filter_filler=bltr_weight_filler,
    #                                                         bias_filler=dict(type='constant',
    #                                                                          value=0)),
    #                                                     param=[{'lr_mult': 1, 'decay_mult': 1},
    #                                                            {'lr_mult': 2, 'decay_mult': 0}])

    top_prev = conv_bn_relu(n, "mlp", n.data, 1, 64, stride=1, pad=0, loop=1)

    n['max_pool'] = L.Pooling(top_prev, pooling_param = dict(kernel_h=1, kernel_w=100, stride=1, pad=0,
                                        pool = caffe.params.Pooling.MAX)) #(1,64,voxel,1)
    top_prev = n['max_pool']

    n['PillarScatter'] = L.Python(top_prev, n.coors, python_param=dict(
                                                module='custom_layers',
                                                layer='PointPillarsScatter',
                                                param_str=str(dict(output_shape=[1, 1, 496, 432, 64],
                                                                ))))
    top_prev = n['PillarScatter']


    top_prev = conv_bn_relu(n, "ini_conv1", top_prev, 3, num_filters[0], stride=layer_strides[0], pad=1, loop=1)

    top_prev = conv_bn_relu(n, "rpn_conv1", top_prev, 3, num_filters[0], stride=1, pad=1, loop=3)

    deconv1 = deconv_bn_relu(n, "rpn_deconv1", top_prev, upsample_strides[0], num_upsample_filters[0], stride=upsample_strides[0], pad=0)


    top_prev = conv_bn_relu(n, "ini_conv2", top_prev, 3, num_filters[1], stride=layer_strides[1], pad=1, loop=1)

    top_prev = conv_bn_relu(n, "rpn_conv2", top_prev, 3, num_filters[1], stride=1, pad=1, loop=3)

    deconv2 = deconv_bn_relu(n, "rpn_deconv2", top_prev, upsample_strides[1], num_upsample_filters[1], stride=upsample_strides[1], pad=0)


    top_prev = conv_bn_relu(n, "ini_conv3", top_prev, 3, num_filters[2], stride=layer_strides[2], pad=1, loop=1)

    top_prev = conv_bn_relu(n, "rpn_conv3", top_prev, 3, num_filters[2], stride=1, pad=1, loop=3)

    deconv3 = deconv_bn_relu(n, "rpn_deconv3", top_prev, upsample_strides[2], num_upsample_filters[2], stride=upsample_strides[2], pad=0)


    n['rpn_out'] = L.Concat(deconv1, deconv2, deconv3)
    top_prev = n['rpn_out']


    num_cls = 2
    n['cls_preds'] = L.Convolution(top_prev, name = "cls_head",
                         convolution_param=dict(num_output=num_cls,
                                                kernel_size=1, stride=1, pad=0,
                                                weight_filler=dict(type = 'xavier'),
                                                bias_term = True,
                                                bias_filler=dict(type='constant', value=0),
                                                engine=1,
                                                ),
                         param=[dict(lr_mult=1), dict(lr_mult=1)])
    cls_preds = n['cls_preds']


    box_code_size = 7
    num_anchor_per_loc = 2
    n['box_preds'] = L.Convolution(top_prev, name = "reg_head",
                          convolution_param=dict(num_output=num_anchor_per_loc * box_code_size,
                                                 kernel_size=1, stride=1, pad=0,
                                                 weight_filler=dict(type = 'xavier'),
                                                 bias_term = True,
                                                 bias_filler=dict(type='constant', value=0),
                                                 engine=1,
                                                 ),
                          param=[dict(lr_mult=1), dict(lr_mult=1)])

    box_preds = n['box_preds']

    if phase == "train":

        n['cared'],n['reg_outside_weights'], n['cls_weights']= L.Python(n.labels,
                                                                        name = "PrepareLossWeight",
                                                                        ntop = 3,
                                                                        python_param=dict(
                                                                                    module='custom_layers',
                                                                                    layer='PrepareLossWeight'
                                                                                    ))
        reg_outside_weights, cared, cls_weights = n['reg_outside_weights'], n['cared'], n['cls_weights']

        # Gradients cannot be computed with respect to the label inputs (bottom[1])#
        n['labels_input'] = L.Python(n.labels, cared,
                            name = "Label_Encode",
                            python_param=dict(
                                        module='custom_layers',
                                        layer='LabelEncode',
                                        ))
        labels_input = n['labels_input']


        n['cls_preds_permute'] = L.Permute(cls_preds, permute_param=dict(order=[0, 2, 3, 1])) #(B,C,H,W) -> (B,H,W,C)
        cls_preds_permute = n['cls_preds_permute']
        n['cls_preds_reshape'] = L.Reshape(cls_preds_permute, reshape_param=dict(shape=dict(dim=[0, -1, 1])))# (B,H,W,C) -> (B, -1, C)
        cls_preds_reshape = n['cls_preds_reshape']


        n.cls_loss= L.Python(cls_preds_reshape, labels_input, cls_weights,
                                name = "FocalLoss",
                                loss_weight = 1,
                                python_param=dict(
                                            module='custom_layers',
                                            layer='WeightFocalLoss'
                                            ),
                                param_str=str(dict(focusing_parameter=2, alpha=0.25)))

        box_code_size = 7
        n['box_preds_permute'] = L.Permute(box_preds, permute_param=dict(order=[0, 2, 3, 1])) #(B,C,H,W) -> (B,H,W,C)
        box_preds_permute = n['box_preds_permute']
        n['box_preds_reshape'] = L.Reshape(box_preds_permute, reshape_param=dict(shape=dict(dim=[0, -1, box_code_size]))) #(B,H,W,C) -> (B, -1, C)
        box_preds_reshape = n['box_preds_reshape']

        n.reg_loss= L.Python(box_preds_reshape, n.reg_targets, reg_outside_weights,
                                name = "WeightedSmoothL1Loss",
                                loss_weight = 1,
                                python_param=dict(
                                            module='custom_layers',
                                            layer='WeightedSmoothL1Loss'
                                            ))

        return n.to_proto()

    elif phase == "eval":

        n['iou'] = L.Python(box_preds,
                            cls_preds,
                            n.anchors, n.rect,
                            n.trv2c, n.p2, n.anchors_mask,
                            n.img_idx, n.img_shape,
                            name = "EvalLayer",
                            python_param=dict(
                            module='custom_layers',
                            layer='EvalLayer_v2',
                            param_str=repr(dataset_params_eval),
                            ))


        return n.to_proto()

    else:
        raise ValueError
Beispiel #23
0
def CreateMultiBoxHead(net,
                       data_layer="data",
                       num_classes=[],
                       from_layers=[],
                       use_objectness=False,
                       normalizations=[],
                       use_batchnorm=True,
                       lr_mult=1,
                       use_scale=True,
                       min_sizes=[],
                       max_sizes=[],
                       prior_variance=[0.1],
                       aspect_ratios=[],
                       steps=[],
                       img_height=0,
                       img_width=0,
                       share_location=True,
                       flip=True,
                       clip=True,
                       offset=0.5,
                       inter_layer_depth=[],
                       kernel_size=1,
                       pad=0,
                       conf_postfix='',
                       loc_postfix='',
                       head_postfix='ext/pm',
                       **bn_param):
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"
    if normalizations:
        assert len(from_layers) == len(
            normalizations
        ), "from_layers and normalizations should have same length"
    assert len(from_layers) == len(
        min_sizes), "from_layers and min_sizes should have same length"
    if max_sizes:
        assert len(from_layers) == len(
            max_sizes), "from_layers and max_sizes should have same length"
    if aspect_ratios:
        assert len(from_layers) == len(
            aspect_ratios
        ), "from_layers and aspect_ratios should have same length"
    if steps:
        assert len(from_layers) == len(
            steps), "from_layers and steps should have same length"
    net_layers = net.keys()
    assert data_layer in net_layers, "data_layer is not in net's layers"
    if inter_layer_depth:
        assert len(from_layers) == len(
            inter_layer_depth
        ), "from_layers and inter_layer_depth should have same length"

    num = len(from_layers)
    priorbox_layers = []
    loc_layers = []
    conf_layers = []
    objectness_layers = []
    for i in range(0, num):
        from_layer = from_layers[i]

        # Get the normalize value.
        if normalizations:
            if normalizations[i] != -1:
                norm_name = "{}{}_norm".format(head_postfix, i + 1)
                net[norm_name] = L.Normalize(net[from_layer],
                                             scale_filler=dict(
                                                 type="constant",
                                                 value=normalizations[i]),
                                             across_spatial=False,
                                             channel_shared=False)
                from_layer = norm_name

        # Add intermediate layers.
        if inter_layer_depth:
            if inter_layer_depth[i] > 0:
                inter_name = "{}{}_inter".format(head_postfix, i + 1)
                ConvBNLayer(net,
                            from_layer,
                            inter_name,
                            use_bn=use_batchnorm,
                            use_relu=True,
                            lr_mult=lr_mult,
                            num_output=inter_layer_depth[i],
                            kernel_size=3,
                            pad=1,
                            stride=1,
                            **bn_param)
                from_layer = inter_name

        # Estimate number of priors per location given provided parameters.
        min_size = min_sizes[i]
        if type(min_size) is not list:
            min_size = [min_size]
        aspect_ratio = []
        if len(aspect_ratios) > i:
            aspect_ratio = aspect_ratios[i]
            if type(aspect_ratio) is not list:
                aspect_ratio = [aspect_ratio]
        max_size = []
        if len(max_sizes) > i:
            max_size = max_sizes[i]
            if type(max_size) is not list:
                max_size = [max_size]
            if max_size:
                assert len(max_size) == len(
                    min_size), "max_size and min_size should have same length."
        if max_size:
            num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size)
        else:
            num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size)
        if flip:
            num_priors_per_location += len(aspect_ratio) * len(min_size)
        step = []
        if len(steps) > i:
            step = steps[i]

        # Create location prediction layer.
        name = "{}{}_mbox_loc{}".format(head_postfix, i + 1, loc_postfix)
        num_loc_output = num_priors_per_location * 4
        if not share_location:
            num_loc_output *= num_classes
        ConvBNLayer(net,
                    from_layer,
                    name,
                    use_bn=use_batchnorm,
                    use_relu=False,
                    lr_mult=lr_mult,
                    num_output=num_loc_output,
                    kernel_size=kernel_size,
                    pad=pad,
                    stride=1,
                    **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layers.append(net[flatten_name])

        # Create confidence prediction layer.
        name = "{}{}_mbox_conf{}".format(head_postfix, i + 1, conf_postfix)
        num_conf_output = num_priors_per_location * num_classes
        ConvBNLayer(net,
                    from_layer,
                    name,
                    use_bn=use_batchnorm,
                    use_relu=False,
                    lr_mult=lr_mult,
                    num_output=num_conf_output,
                    kernel_size=kernel_size,
                    pad=pad,
                    stride=1,
                    **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layers.append(net[flatten_name])

        # Create prior generation layer.
        name = "{}{}_mbox_priorbox".format(head_postfix, i + 1)
        net[name] = L.PriorBox(net[from_layer],
                               net[data_layer],
                               min_size=min_size,
                               clip=clip,
                               variance=prior_variance,
                               offset=offset)
        if max_size:
            net.update(name, {'max_size': max_size})
        if aspect_ratio:
            net.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip})
        if step:
            net.update(name, {'step': step})
        if img_height != 0 and img_width != 0:
            if img_height == img_width:
                net.update(name, {'img_size': img_height})
            else:
                net.update(name, {'img_h': img_height, 'img_w': img_width})
        priorbox_layers.append(net[name])

        # Create objectness prediction layer.
        if use_objectness:
            name = "{}{}_mbox_objectness".format(head_postfix, i + 1)
            num_obj_output = num_priors_per_location * 2
            ConvBNLayer(net,
                        from_layer,
                        name,
                        use_bn=use_batchnorm,
                        use_relu=False,
                        lr_mult=lr_mult,
                        num_output=num_obj_output,
                        kernel_size=kernel_size,
                        pad=pad,
                        stride=1,
                        **bn_param)
            permute_name = "{}_perm".format(name)
            net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
            flatten_name = "{}_flat".format(name)
            net[flatten_name] = L.Flatten(net[permute_name], axis=1)
            objectness_layers.append(net[flatten_name])

    # Concatenate priorbox, loc, and conf layers.
    mbox_layers = []
    name = "mbox_loc"
    net[name] = L.Concat(*loc_layers, axis=1)
    mbox_layers.append(net[name])
    name = "mbox_conf"
    net[name] = L.Concat(*conf_layers, axis=1)
    mbox_layers.append(net[name])
    name = "mbox_priorbox"
    net[name] = L.Concat(*priorbox_layers, axis=2)
    mbox_layers.append(net[name])
    if use_objectness:
        name = "mbox_objectness"
        net[name] = L.Concat(*objectness_layers, axis=1)
        mbox_layers.append(net[name])

    return mbox_layers
Beispiel #24
0
def CreateRefineDetHead(net, data_layer="data", num_classes=[], from_layers=[], from_layers2=[],
        normalizations=[], use_batchnorm=True, lr_mult=1, min_sizes=[], max_sizes=[], prior_variance = [0.1],
        aspect_ratios=[], steps=[], img_height=0, img_width=0, share_location=True,
        flip=True, clip=True, offset=0.5, inter_layer_depth=[], kernel_size=1, pad=0,
        conf_postfix='', loc_postfix='', **bn_param):
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"
    if normalizations:
        assert len(from_layers) == len(normalizations), "from_layers and normalizations should have same length"
    assert len(from_layers) == len(min_sizes), "from_layers and min_sizes should have same length"
    if max_sizes:
        assert len(from_layers) == len(max_sizes), "from_layers and max_sizes should have same length"
    if aspect_ratios:
        assert len(from_layers) == len(aspect_ratios), "from_layers and aspect_ratios should have same length"
    if steps:
        assert len(from_layers) == len(steps), "from_layers and steps should have same length"
    net_layers = net.keys()
    assert data_layer in net_layers, "data_layer is not in net's layers"
    if inter_layer_depth:
        assert len(from_layers) == len(inter_layer_depth), "from_layers and inter_layer_depth should have same length"

    use_relu = True
    conv_prefix = ''
    conv_postfix = ''
    bn_prefix = ''
    bn_postfix = '/bn'
    scale_prefix = ''
    scale_postfix = '/scale'   

    kwargs = {
      'param': [dict(lr_mult=1, decay_mult=1)],
      'weight_filler': dict(type='gaussian', std=0.01),
      'bias_term': False,
      }
    kwargs2 = {
        'param': [dict(lr_mult=1, decay_mult=1)],
        'weight_filler': dict(type='gaussian', std=0.01),
      }
    kwargs_sb = {
        'axis': 0,
        'bias_term': False
      }

    prefix = 'arm'
    num_classes_rpn = 2
    num = len(from_layers)
    priorbox_layers = []
    loc_layers = []
    conf_layers = []
    for i in range(0, num):
        from_layer = from_layers[i]

        # Get the normalize value.
        if normalizations:
            if normalizations[i] != -1:
                norm_name = "{}_norm".format(from_layer)
                net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalizations[i]),
                    across_spatial=False, channel_shared=False)
                from_layer = norm_name

        # Add intermediate layers.
        if inter_layer_depth:
            if inter_layer_depth[i] > 0:                
                
                # Inter layer from body to head
                inter_name = "{}_inter".format(from_layer)
                # Depthwise convolution layer
                inter_dw = inter_name + '/dw'
                DWConvBNLayer(net, from_layer, inter_dw, use_bn=True, use_relu=True, num_output=512, group=512, kernel_size=3, pad=1, stride=1,
                    conv_prefix=conv_prefix, conv_postfix=inter_dw, bn_prefix=bn_prefix, bn_postfix=bn_postfix,
                    scale_prefix=scale_prefix, scale_postfix=scale_postfix, **bn_param)
                # Seperate layer
                inter_sep = inter_name + '/sep'
                ConvBNLayer(net, inter_dw, inter_sep, use_bn=True, use_relu=True, num_output=512, kernel_size=1, pad=0, stride=1,
                    conv_prefix=conv_prefix, conv_postfix=inter_sep, bn_prefix=bn_prefix, bn_postfix=bn_postfix,
                    scale_prefix=scale_prefix, scale_postfix=scale_postfix, **bn_param)
                # Bridge of rest of head
                from_layer = inter_sep
                
        # Estimate number of priors per location given provided parameters.
        min_size = min_sizes[i]
        if type(min_size) is not list:
            min_size = [min_size]
        aspect_ratio = []
        if len(aspect_ratios) > i:
            aspect_ratio = aspect_ratios[i]
            if type(aspect_ratio) is not list:
                aspect_ratio = [aspect_ratio]
        max_size = []
        if len(max_sizes) > i:
            max_size = max_sizes[i]
            if type(max_size) is not list:
                max_size = [max_size]
            if max_size:
                assert len(max_size) == len(min_size), "max_size and min_size should have same length."
        if max_size:
            num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size)
        else:
            num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size)
        if flip:
            num_priors_per_location += len(aspect_ratio) * len(min_size)
        step = []
        if len(steps) > i:
            step = steps[i]
        # Create location prediction layer.
        name = "{}_mbox_loc{}".format(from_layer, loc_postfix)
        num_loc_output = num_priors_per_location * 4
        if not share_location:
            num_loc_output *= num_classes_rpn
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult,
            num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layers.append(net[flatten_name])        

        # Create confidence prediction layer.
        name = "{}_mbox_conf{}".format(from_layer, conf_postfix)
        num_conf_output = num_priors_per_location * num_classes_rpn
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult,
            num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layers.append(net[flatten_name])

        # Create prior generation layer.
        name = "{}_mbox_priorbox".format(from_layer)
        net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_size,
                clip=clip, variance=prior_variance, offset=offset)

        if max_size:
            net.update(name, {'max_size': max_size})
        if aspect_ratio:
            net.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip})
        if step:
            net.update(name, {'step': step})
        if img_height != 0 and img_width != 0:
            if img_height == img_width:
                net.update(name, {'img_size': img_height})
            else:
                net.update(name, {'img_h': img_height, 'img_w': img_width})
        priorbox_layers.append(net[name])

    # Concatenate priorbox, loc, and conf layers.
    mbox_layers = []
    name = '{}{}'.format(prefix, "_loc")
    net[name] = L.Concat(*loc_layers, axis=1)
    mbox_layers.append(net[name])
    name = '{}{}'.format(prefix, "_conf")
    net[name] = L.Concat(*conf_layers, axis=1)
    mbox_layers.append(net[name])
    name = '{}{}'.format(prefix, "_priorbox")
    net[name] = L.Concat(*priorbox_layers, axis=2)
    mbox_layers.append(net[name])

    prefix = 'odm'
    num = len(from_layers2)
    loc_layers = []
    conf_layers = []
    for i in range(0, num):
        from_layer = from_layers2[i]

        # Get the normalize value.
        if normalizations:
            if normalizations[i] != -1:
                norm_name = "{}_norm".format(from_layer)
                net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalizations[i]),
                    across_spatial=False, channel_shared=False)
                from_layer = norm_name

        # Add intermediate layers.
        if inter_layer_depth:
            if inter_layer_depth[i] > 0:
                
                # Inter layer from body to head
                inter_name = "{}_inter".format(from_layer)
                # Depthwise convolution layer
                inter_dw = inter_name + '/dw'
                DWConvBNLayer(net, from_layer, inter_dw, use_bn=True, use_relu=True, num_output=512, group=512, kernel_size=3, pad=1, stride=1,
                    conv_prefix=conv_prefix, conv_postfix=inter_dw, bn_prefix=bn_prefix, bn_postfix=bn_postfix,
                    scale_prefix=scale_prefix, scale_postfix=scale_postfix, **bn_param)
                # Seperate layer
                inter_sep = inter_name + '/sep'
                ConvBNLayer(net, inter_dw, inter_sep, use_bn=True, use_relu=True, num_output=512, kernel_size=1, pad=0, stride=1,
                    conv_prefix=conv_prefix, conv_postfix=inter_sep, bn_prefix=bn_prefix, bn_postfix=bn_postfix,
                    scale_prefix=scale_prefix, scale_postfix=scale_postfix, **bn_param)
                # Bridge of rest of head
                from_layer = inter_sep

        # Estimate number of priors per location given provided parameters.
        min_size = min_sizes[i]
        if type(min_size) is not list:
            min_size = [min_size]
        aspect_ratio = []
        if len(aspect_ratios) > i:
            aspect_ratio = aspect_ratios[i]
            if type(aspect_ratio) is not list:
                aspect_ratio = [aspect_ratio]
        max_size = []
        if len(max_sizes) > i:
            max_size = max_sizes[i]
            if type(max_size) is not list:
                max_size = [max_size]
            if max_size:
                assert len(max_size) == len(min_size), "max_size and min_size should have same length."
        if max_size:
            num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size)
        else:
            num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size)
        if flip:
            num_priors_per_location += len(aspect_ratio) * len(min_size)

        # Create location prediction layer.
        name = "{}_mbox_loc{}".format(from_layer, loc_postfix)
        num_loc_output = num_priors_per_location * 4
        if not share_location:
            num_loc_output *= num_classes
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult,
                    num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layers.append(net[flatten_name])

        # Create confidence prediction layer.
        name = "{}_mbox_conf{}".format(from_layer, conf_postfix)
        num_conf_output = num_priors_per_location * num_classes
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult,
                    num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layers.append(net[flatten_name])


    # Concatenate priorbox, loc, and conf layers.
    name = '{}{}'.format(prefix, "_loc")
    net[name] = L.Concat(*loc_layers, axis=1)
    mbox_layers.append(net[name])
    name = '{}{}'.format(prefix, "_conf")
    net[name] = L.Concat(*conf_layers, axis=1)
    mbox_layers.append(net[name])    

    return mbox_layers