Ejemplo n.º 1
0
def CreateMultiBoxHead(net,
                       data_layer="data",
                       num_classes=[],
                       from_layers=[],
                       use_objectness=False,
                       normalizations=[],
                       use_batchnorm=True,
                       lr_mult=1,
                       use_scale=True,
                       min_sizes=[],
                       max_sizes=[],
                       prior_variance=[0.1],
                       aspect_ratios=[],
                       steps=[],
                       img_height=0,
                       img_width=0,
                       share_location=True,
                       flip=True,
                       clip=True,
                       offset=0.5,
                       inter_layer_depth=[],
                       kernel_size=1,
                       pad=0,
                       conf_postfix='',
                       loc_postfix='',
                       head_postfix='ext/pm',
                       **bn_param):
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"
    if normalizations:
        assert len(from_layers) == len(
            normalizations
        ), "from_layers and normalizations should have same length"
    assert len(from_layers) == len(
        min_sizes), "from_layers and min_sizes should have same length"
    if max_sizes:
        assert len(from_layers) == len(
            max_sizes), "from_layers and max_sizes should have same length"
    if aspect_ratios:
        assert len(from_layers) == len(
            aspect_ratios
        ), "from_layers and aspect_ratios should have same length"
    if steps:
        assert len(from_layers) == len(
            steps), "from_layers and steps should have same length"
    net_layers = net.keys()
    assert data_layer in net_layers, "data_layer is not in net's layers"
    if inter_layer_depth:
        assert len(from_layers) == len(
            inter_layer_depth
        ), "from_layers and inter_layer_depth should have same length"

    num = len(from_layers)
    priorbox_layers = []
    loc_layers = []
    conf_layers = []
    objectness_layers = []
    for i in range(0, num):
        from_layer = from_layers[i]

        # Get the normalize value.
        if normalizations:
            if normalizations[i] != -1:
                norm_name = "{}{}_norm".format(head_postfix, i + 1)
                net[norm_name] = L.Normalize(net[from_layer],
                                             scale_filler=dict(
                                                 type="constant",
                                                 value=normalizations[i]),
                                             across_spatial=False,
                                             channel_shared=False)
                from_layer = norm_name

        # Add intermediate layers.
        if inter_layer_depth:
            if inter_layer_depth[i] > 0:
                inter_name = "{}{}_inter".format(head_postfix, i + 1)
                ConvBNLayer(net,
                            from_layer,
                            inter_name,
                            use_bn=use_batchnorm,
                            use_relu=True,
                            lr_mult=lr_mult,
                            num_output=inter_layer_depth[i],
                            kernel_size=3,
                            pad=1,
                            stride=1,
                            **bn_param)
                from_layer = inter_name

        # Estimate number of priors per location given provided parameters.
        min_size = min_sizes[i]
        if type(min_size) is not list:
            min_size = [min_size]
        aspect_ratio = []
        if len(aspect_ratios) > i:
            aspect_ratio = aspect_ratios[i]
            if type(aspect_ratio) is not list:
                aspect_ratio = [aspect_ratio]
        max_size = []
        if len(max_sizes) > i:
            max_size = max_sizes[i]
            if type(max_size) is not list:
                max_size = [max_size]
            if max_size:
                assert len(max_size) == len(
                    min_size), "max_size and min_size should have same length."
        if max_size:
            num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size)
        else:
            num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size)
        if flip:
            num_priors_per_location += len(aspect_ratio) * len(min_size)
        step = []
        if len(steps) > i:
            step = steps[i]

        # Create location prediction layer.
        name = "{}{}_mbox_loc{}".format(head_postfix, i + 1, loc_postfix)
        num_loc_output = num_priors_per_location * 4
        if not share_location:
            num_loc_output *= num_classes
        ConvBNLayer(net,
                    from_layer,
                    name,
                    use_bn=use_batchnorm,
                    use_relu=False,
                    lr_mult=lr_mult,
                    num_output=num_loc_output,
                    kernel_size=kernel_size,
                    pad=pad,
                    stride=1,
                    **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layers.append(net[flatten_name])

        # Create confidence prediction layer.
        name = "{}{}_mbox_conf{}".format(head_postfix, i + 1, conf_postfix)
        num_conf_output = num_priors_per_location * num_classes
        ConvBNLayer(net,
                    from_layer,
                    name,
                    use_bn=use_batchnorm,
                    use_relu=False,
                    lr_mult=lr_mult,
                    num_output=num_conf_output,
                    kernel_size=kernel_size,
                    pad=pad,
                    stride=1,
                    **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layers.append(net[flatten_name])

        # Create prior generation layer.
        name = "{}{}_mbox_priorbox".format(head_postfix, i + 1)
        net[name] = L.PriorBox(net[from_layer],
                               net[data_layer],
                               min_size=min_size,
                               clip=clip,
                               variance=prior_variance,
                               offset=offset)
        if max_size:
            net.update(name, {'max_size': max_size})
        if aspect_ratio:
            net.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip})
        if step:
            net.update(name, {'step': step})
        if img_height != 0 and img_width != 0:
            if img_height == img_width:
                net.update(name, {'img_size': img_height})
            else:
                net.update(name, {'img_h': img_height, 'img_w': img_width})
        priorbox_layers.append(net[name])

        # Create objectness prediction layer.
        if use_objectness:
            name = "{}{}_mbox_objectness".format(head_postfix, i + 1)
            num_obj_output = num_priors_per_location * 2
            ConvBNLayer(net,
                        from_layer,
                        name,
                        use_bn=use_batchnorm,
                        use_relu=False,
                        lr_mult=lr_mult,
                        num_output=num_obj_output,
                        kernel_size=kernel_size,
                        pad=pad,
                        stride=1,
                        **bn_param)
            permute_name = "{}_perm".format(name)
            net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
            flatten_name = "{}_flat".format(name)
            net[flatten_name] = L.Flatten(net[permute_name], axis=1)
            objectness_layers.append(net[flatten_name])

    # Concatenate priorbox, loc, and conf layers.
    mbox_layers = []
    name = "mbox_loc"
    net[name] = L.Concat(*loc_layers, axis=1)
    mbox_layers.append(net[name])
    name = "mbox_conf"
    net[name] = L.Concat(*conf_layers, axis=1)
    mbox_layers.append(net[name])
    name = "mbox_priorbox"
    net[name] = L.Concat(*priorbox_layers, axis=2)
    mbox_layers.append(net[name])
    if use_objectness:
        name = "mbox_objectness"
        net[name] = L.Concat(*objectness_layers, axis=1)
        mbox_layers.append(net[name])

    return mbox_layers
Ejemplo n.º 2
0
def CreateMultiBoxHead(net, data_layer="data", num_classes=[], from_layers=[],
                       use_objectness=False, normalizations=[], use_batchnorm=True,
                       min_sizes=[], max_sizes=[], prior_variance=[0.1],
                       aspect_ratios=[], share_location=True, flip=True, clip=True,
                       inter_layer_depth=0, kernel_size=1, pad=0, conf_postfix='', loc_postfix=''):
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"
    if normalizations:
        assert len(from_layers) == len(normalizations), "from_layers and normalizations should have same length"
    assert len(from_layers) == len(min_sizes), "from_layers and min_sizes should have same length"
    if max_sizes:
        assert len(from_layers) == len(max_sizes), "from_layers and max_sizes should have same length"
    net_layers = net.keys()
    assert data_layer in net_layers, "data_layer is not in net's layers"

    num = len(from_layers)
    priorbox_layers = []
    loc_layers = []
    conf_layers = []
    objectness_layers = []
    for i in range(0, num):
        from_layer = from_layers[i]

        # Get the normalize value.
        if normalizations:
            if normalizations[i] != -1:
                norm_name = "{}_norm".format(from_layer)
                net[norm_name] = L.Normalize(net[from_layer],
                                             scale_filler=dict(type="constant", value=normalizations[i]),
                                             across_spatial=False, channel_shared=False)
                from_layer = norm_name

        # Add intermediate layers.
        if inter_layer_depth > 0:
            inter_name = "{}_inter".format(from_layer)
            ConvBNLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True,
                        num_output=inter_layer_depth, kernel_size=3, pad=1, stride=1)
            from_layer = inter_name

        # Estimate number of priors per location given provided parameters.
        aspect_ratio = []
        if len(aspect_ratios) > i:
            aspect_ratio = aspect_ratios[i]
            if type(aspect_ratio) is not list:
                aspect_ratio = [aspect_ratio]
        if max_sizes and max_sizes[i]:
            num_priors_per_location = 2 + len(aspect_ratio)
        else:
            num_priors_per_location = 1 + len(aspect_ratio)
        if flip:
            num_priors_per_location += len(aspect_ratio)

        num_priors_per_location = 2 * num_priors_per_location
        
        # Create location prediction layer.
        name = "{}_mbox_loc{}".format(from_layer, loc_postfix)
        num_loc_output = num_priors_per_location * 4;
        if not share_location:
            num_loc_output *= num_classes
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False,
                    num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layers.append(net[flatten_name])

        # Create confidence prediction layer.
        name = "{}_mbox_conf{}".format(from_layer, conf_postfix)
        num_conf_output = num_priors_per_location * num_classes;
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False,
                    num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layers.append(net[flatten_name])

        # Create prior generation layer.
        name = "{}_mbox_priorbox".format(from_layer)
        if max_sizes and max_sizes[i]:
            if aspect_ratio:
                net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes[i], max_size=max_sizes[i],
                                       aspect_ratio=aspect_ratio, flip=flip, clip=clip, variance=prior_variance)
            else:
                net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes[i], max_size=max_sizes[i],
                                       clip=clip, variance=prior_variance)
        else:
            if aspect_ratio:
                net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes[i],
                                       aspect_ratio=aspect_ratio, flip=flip, clip=clip, variance=prior_variance)
            else:
                net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes[i],
                                       clip=clip, variance=prior_variance)
        priorbox_layers.append(net[name])

        # Create objectness prediction layer.
        if use_objectness:
            name = "{}_mbox_objectness".format(from_layer)
            num_obj_output = num_priors_per_location * 2;
            ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False,
                        num_output=num_obj_output, kernel_size=kernel_size, pad=pad, stride=1)
            permute_name = "{}_perm".format(name)
            net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
            flatten_name = "{}_flat".format(name)
            net[flatten_name] = L.Flatten(net[permute_name], axis=1)
            objectness_layers.append(net[flatten_name])

    # Concatenate priorbox, loc, and conf layers.
    mbox_layers = []
    name = "mbox_loc"
    net[name] = L.Concat(*loc_layers, axis=1)
    mbox_layers.append(net[name])
    name = "mbox_conf"
    net[name] = L.Concat(*conf_layers, axis=1)
    mbox_layers.append(net[name])
    name = "mbox_priorbox"
    net[name] = L.Concat(*priorbox_layers, axis=2)
    mbox_layers.append(net[name])
    if use_objectness:
        name = "mbox_objectness"
        net[name] = L.Concat(*objectness_layers, axis=1)
        mbox_layers.append(net[name])

    return mbox_layers
Ejemplo n.º 3
0
def get_caffe_layer(node, net, input_dims):
    """Generate caffe layer for corresponding mxnet op.

    Args:
        node (iterable from MxnetParser): Mxnet op summary generated by MxnetParser
        net (caffe.net): Caffe netspec object

    Returns:
        caffe.layers: Equivalent caffe layer
    """
    if node['type'] == 'Convolution':
        assert len(node['inputs']) == 1, \
            'Convolution layers can have only one input'
        conv_params = node['attr']
        kernel_size = make_list(conv_params['kernel'])
        num_filters = make_list(conv_params['num_filter'])[0]
        if 'stride' in conv_params:
            stride = make_list(conv_params['stride'])[0]
        else:
            stride = 1
        padding = make_list(conv_params['pad'])
        if 'dilate' in conv_params:
            dilation = make_list(conv_params['dilate'])[0]
        else:
            dilation = 1
        convolution_param = {
            'pad': padding,
            'kernel_size': kernel_size,
            'num_output': num_filters,
            'stride': stride,
            'dilation': dilation
        }
        return layers.Convolution(net[node['inputs'][0]],
                                  convolution_param=convolution_param)
    if node['type'] == 'Activation':
        assert len(node['inputs']) == 1, \
            'Activation layers can have only one input'
        assert node['attr']['act_type'] == 'relu'
        return layers.ReLU(net[node['inputs'][0]])

    if node['type'] == 'Pooling':
        assert len(node['inputs']) == 1, \
            'Pooling layers can have only one input'
        kernel_size = make_list(node['attr']['kernel'])
        stride = make_list(node['attr']['stride'])
        pooling_type = node['attr']['pool_type']
        if 'pad' in node['attr']:
            padding = make_list(node['attr']['pad'])
        else:
            padding = [0]
        if pooling_type == 'max':
            pooling = params.Pooling.MAX
        elif pooling_type == 'avg':
            pooling = params.Pooling.AVG
        pooling_param = {
            'pool': pooling,
            'pad': padding[0],
            'kernel_size': kernel_size[0],
            'stride': stride[0]
        }
        return layers.Pooling(net[node['inputs'][0]],
                              pooling_param=pooling_param)

    if node['type'] == 'L2Normalization':
        across_spatial = node['attr']['mode'] != 'channel'
        channel_shared = False
        scale_filler = {
            'type': "constant",
            'value': constants.NORMALIZATION_FACTOR
        }
        norm_param = {
            'across_spatial': across_spatial,
            'scale_filler': scale_filler,
            'channel_shared': channel_shared
        }
        return layers.Normalize(net[node['inputs'][0]], norm_param=norm_param)

    # Note - this layer has been implemented
    # only in WeiLiu's ssd branch of caffe not in caffe master
    if node['type'] == 'transpose':
        order = make_list(node['attr']['axes'])
        return layers.Permute(net[node['inputs'][0]],
                              permute_param={'order': order})

    if node['type'] == 'Flatten':
        if node['inputs'][0].endswith('anchors'):
            axis = 2
        else:
            axis = 1
        return layers.Flatten(net[node['inputs'][0]],
                              flatten_param={'axis': axis})

    if node['type'] == 'Concat':
        # In the ssd model, always concatenate along last axis,
        # since anchor boxes have an extra dimension in caffe (that includes variance).
        axis = -1
        concat_inputs = [net[inp] for inp in node['inputs']]
        return layers.Concat(*concat_inputs, concat_param={'axis': axis})

    if node['type'] == 'Reshape':
        if node['name'] == 'multibox_anchors':
            reshape_dims = [1, 2, -1]
        else:
            reshape_dims = make_list(node['attr']['shape'])
        return layers.Reshape(net[node['inputs'][0]],
                              reshape_param={'shape': {
                                  'dim': reshape_dims
                              }})

    if node['type'] == '_contrib_MultiBoxPrior':
        priorbox_inputs = [net[inp] for inp in node['inputs']] + [net["data"]]
        sizes = make_list(node["attr"]["sizes"])
        min_size = sizes[0] * input_dims[0]
        max_size = int(round((sizes[1] * input_dims[0])**2 / min_size))
        aspect_ratio = make_list(node["attr"]["ratios"])
        steps = make_list(node["attr"]["steps"])
        param = {
            'clip': node["attr"]["clip"] == "true",
            'flip': False,
            'min_size': min_size,
            'max_size': max_size,
            'aspect_ratio': aspect_ratio,
            'variance': [0.1, 0.1, 0.2, 0.2],
            'step': int(round(steps[0] * input_dims[0])),
        }
        return layers.PriorBox(*priorbox_inputs, prior_box_param=param)

    if node['type'] == '_contrib_MultiBoxDetection':
        multibox_inputs = [net[inp] for inp in node['inputs']]
        bottom_order = [1, 0, 2]
        multibox_inputs = [multibox_inputs[i] for i in bottom_order]
        param = {
            'num_classes': constants.NUM_CLASSES,
            'share_location': True,
            'background_label_id': 0,
            'nms_param': {
                'nms_threshold': float(node['attr']['nms_threshold']),
                'top_k': int(node['attr']['nms_topk'])
            },
            'keep_top_k': make_list(node['attr']['nms_topk'])[0],
            'confidence_threshold': 0.01,
            'code_type': params.PriorBox.CENTER_SIZE,
        }
        return layers.DetectionOutput(*multibox_inputs,
                                      detection_output_param=param)

    if node['type'] in ['SoftmaxActivation', 'SoftmaxOutput']:
        if 'mode' not in node['attr']:
            axis = 1
        elif node['attr']['mode'] == 'channel':
            axis = 1
        else:
            axis = 0
        # note: caffe expects confidence scores to be flattened before detection output layer receives it
        return layers.Flatten(layers.Permute(
            layers.Softmax(net[node['inputs'][0]], axis=axis),
            permute_param={'order': [0, 2, 1]}),
                              flatten_param={'axis': 1})
Ejemplo n.º 4
0
def UnitLayerDenseDetectorHeader(net, data_layer="data", num_classes=2, feature_layer="conv5", \
        normalization=-1, use_batchnorm=True, prior_variance = [0.1], \
        pro_widths=[], pro_heights=[], flip=True, clip=True, \
        inter_layer_channels=0, flat=False, use_focus_loss=False, stage=1,lr_mult=1, decay_mult=1):
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"
    net_layers = net.keys()
    assert data_layer in net_layers, "data_layer is not in net's layers."
    assert feature_layer in net_layers, "feature_layer is not in net's layers."
    assert pro_widths, "Must provide proposed width/height."
    assert pro_heights, "Must provide proposed width/height."
    assert len(pro_widths) == len(pro_heights), "pro_widths/heights must have the same length."
    from_layer = feature_layer
    prefix_name = '{}_{}'.format(from_layer,stage)
    # Norm-Layer
    if normalization != -1:
        norm_name = "{}_norm".format(prefix_name)
        net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalization), \
            across_spatial=False, channel_shared=False)
        from_layer = norm_name
    # InterLayers
    if len(inter_layer_channels) > 0:
        start_inter_id = 1
        for inter_channel_kernel in inter_layer_channels:
            inter_channel = inter_channel_kernel[0]
            inter_kernel = inter_channel_kernel[1]
            inter_name = "{}_inter_{}".format(prefix_name,start_inter_id)
            if inter_kernel == 1:
                inter_pad = 0
            elif inter_kernel == 3:
                inter_pad = 1
            ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, num_output=inter_channel,\
                 kernel_size=inter_kernel, pad=inter_pad, stride=1,use_scale=True, leaky=False,lr_mult=lr_mult, decay_mult=decay_mult,constant_value=0.2)
            from_layer = inter_name
            start_inter_id = start_inter_id + 1
    # PriorBoxes
    num_priors_per_location = len(pro_widths)
    # LOC
    name = "{}_mbox_loc".format(prefix_name)
    num_loc_output = num_priors_per_location * 4 * (num_classes-1)
    ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
        num_output=num_loc_output, kernel_size=3, pad=1, stride=1,lr_mult=lr_mult, decay_mult=decay_mult)
    permute_name = "{}_perm".format(name)
    net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
    if flat:
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layer = net[flatten_name]
    else:
        loc_layer = net[permute_name]
    # CONF
    name = "{}_mbox_conf".format(prefix_name)
    num_conf_output = num_priors_per_location * num_classes
    if use_focus_loss:
        ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, num_output=num_conf_output,\
             kernel_size=3, pad=1, stride=1,init_xavier=False,bias_type='focal',sparse=num_classes,lr_mult=lr_mult, decay_mult=decay_mult)
    else:
        ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
            num_output=num_conf_output, kernel_size=3, pad=1, stride=1,lr_mult=lr_mult, decay_mult=decay_mult)
    permute_name = "{}_perm".format(name)
    net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
    if flat:
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layer = net[flatten_name]
    else:
        conf_layer = net[permute_name]
    # PRIOR
    name = "{}_mbox_priorbox".format(prefix_name)
    net[name] = L.PriorBox(net[from_layer], net[data_layer], pro_width=pro_widths, pro_height=pro_heights, \
        flip=flip, clip=clip, variance=prior_variance)
    priorbox_layer = net[name]
    return loc_layer,conf_layer,priorbox_layer
def CreateMultiBoxHead(net,
                       data_layer="data",
                       num_classes=[],
                       from_layers=[],
                       use_objectness=False,
                       normalizations=[],
                       use_batchnorm=True,
                       lr_mult=1,
                       use_scale=True,
                       min_sizes=[],
                       max_sizes=[],
                       prior_variance=[0.1],
                       aspect_ratios=[],
                       steps=[],
                       img_height=0,
                       img_width=0,
                       share_location=True,
                       flip=True,
                       clip=True,
                       offset=0.5,
                       inter_layer_depth=[],
                       kernel_size=1,
                       pad=0,
                       conf_postfix='',
                       loc_postfix='',
                       **bn_param):
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"
    if normalizations:
        assert len(from_layers) == len(
            normalizations
        ), "from_layers and normalizations should have same length"
    assert len(from_layers) == len(
        min_sizes), "from_layers and min_sizes should have same length"
    if max_sizes:
        assert len(from_layers) == len(
            max_sizes), "from_layers and max_sizes should have same length"
    if aspect_ratios:
        assert len(from_layers) == len(
            aspect_ratios
        ), "from_layers and aspect_ratios should have same length"
    if steps:
        assert len(from_layers) == len(
            steps), "from_layers and steps should have same length"

    #assert data_layer in net_layers, "data_layer is not in net's layers"
    if inter_layer_depth:
        assert len(from_layers) == len(
            inter_layer_depth
        ), "from_layers and inter_layer_depth should have same length"

    num = len(from_layers)

    loc_layers = []
    conf_layers = []

    priorbox_layers = collections.OrderedDict()
    norm_name_layers = collections.OrderedDict()
    for i in range(0, num):
        from_layer = from_layers[i]
        # Get the normalize value.
        if normalizations:  #normalizations = [20, -1, -1, -1, -1, -1]
            if normalizations[i] != -1:
                norm_name = "{}_norm".format(from_layer)
                norm_name_layers[norm_name] = net.layer.add()
                norm_name_layers[norm_name].CopyFrom(
                    L.Normalize(scale_filler=dict(type="constant",
                                                  value=normalizations[i]),
                                across_spatial=False,
                                channel_shared=False).to_proto().layer[0])
                norm_name_layers[norm_name].name = norm_name
                norm_name_layers[norm_name].top[0] = norm_name
                norm_name_layers[norm_name].bottom.append(from_layer)
                from_layer = norm_name

        # Estimate number of priors per location given provided parameters.
        min_size = min_sizes[i]
        if type(min_size) is not list:
            min_size = [min_size]
        aspect_ratio = []
        if len(aspect_ratios) > i:
            aspect_ratio = aspect_ratios[i]
            if type(aspect_ratio) is not list:
                aspect_ratio = [aspect_ratio]
        max_size = []
        if len(max_sizes) > i:
            max_size = max_sizes[i]
            if type(max_size) is not list:
                max_size = [max_size]
            if max_size:
                assert len(max_size) == len(
                    min_size), "max_size and min_size should have same length."
        if max_size:
            num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size)
        else:
            num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size)
        if flip:
            num_priors_per_location += len(aspect_ratio) * len(min_size)
        step = []
        if len(steps) > i:
            step = steps[i]

        # Create location prediction layer.
        name = "{}_mbox_loc{}".format(from_layer, loc_postfix)
        num_loc_output = num_priors_per_location * 4
        if not share_location:
            num_loc_output *= num_classes

        # Create prior generation layer.
        name = "{}_mbox_priorbox".format(from_layer)
        priorbox_layers[name] = net.layer.add()
        # priorbox_layers[name].CopyFrom(L.PriorBox(min_size=min_size, max_size=max_size, aspect_ratio=aspect_ratio,
        #                                 step=step, flip=flip, clip=clip, variance=prior_variance, offset=offset).to_proto().layer[0])

        p = L.PriorBox(min_size=min_size,
                       max_size=max_size,
                       aspect_ratio=aspect_ratio,
                       step=step,
                       flip=flip,
                       clip=clip,
                       variance=prior_variance,
                       offset=offset)

        p1 = L.PriorBox(min_size=min_size,
                        clip=clip,
                        variance=prior_variance,
                        offset=offset).to_proto().layer[0]

        c = L.Convolution(kernel_size=7, stride=1, num_output=48,
                          pad=0).to_proto().layer[0]

        # print(type(p1), dir(p1), dir(c))
        priorbox_layers[name].CopyFrom(
            L.PriorBox(min_size=min_size,
                       clip=clip,
                       variance=prior_variance,
                       offset=offset).to_proto().layer[0])
        priorbox_layers[name].name = name
        priorbox_layers[name].top[0] = name
        #print(type(priorbox_layers[name]), dir(priorbox_layers[name].prior_box_param.max_size))
        priorbox_layers[name].bottom.append(from_layer)
        priorbox_layers[name].bottom.append(data_layer)

        if max_size:
            priorbox_layers[name].prior_box_param.max_size.extend(max_size)
        if aspect_ratio:
            priorbox_layers[name].prior_box_param.aspect_ratio.extend(
                aspect_ratio)
            #priorbox_layers[name].prior_box_param.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip})
        if not flip:  #default is True
            priorbox_layers[name].prior_box_param.flip = flip
        if step:
            priorbox_layers[name].prior_box_param.step = step

    # Concatenate priorbox, loc, and conf layers.
    name = "mbox_priorbox"
    cat_mbox_layer = net.layer.add()
    cat_mbox_layer.CopyFrom(L.Concat(axis=2).to_proto().layer[0])
    cat_mbox_layer.name = name
    cat_mbox_layer.top[0] = name
    for bt in priorbox_layers.keys():
        cat_mbox_layer.bottom.append(bt)
Ejemplo n.º 6
0
def CreateUnifiedPredictionHead(net,
                                data_layer="data",
                                num_classes=[],
                                from_layers=[],
                                use_objectness=False,
                                normalizations=[],
                                use_batchnorm=True,
                                lr_mult=1,
                                use_scale=True,
                                min_sizes=[],
                                max_sizes=[],
                                prior_variance=[0.1],
                                aspect_ratios=[],
                                steps=[],
                                img_height=0,
                                img_width=0,
                                share_location=True,
                                flip=True,
                                clip=True,
                                offset=0.5,
                                inter_layer_depth=[],
                                kernel_size=1,
                                pad=0,
                                conf_postfix='',
                                loc_postfix='',
                                **bn_param):
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"
    if normalizations:
        assert len(from_layers) == len(
            normalizations
        ), "from_layers and normalizations should have same length"
    assert len(from_layers) == len(
        min_sizes), "from_layers and min_sizes should have same length"
    if max_sizes:
        assert len(from_layers) == len(
            max_sizes), "from_layers and max_sizes should have same length"
    if aspect_ratios:
        assert len(from_layers) == len(
            aspect_ratios
        ), "from_layers and aspect_ratios should have same length"
    if steps:
        assert len(from_layers) == len(
            steps), "from_layers and steps should have same length"
    net_layers = net.keys()
    assert data_layer in net_layers, "data_layer is not in net's layers"
    if inter_layer_depth:
        assert len(from_layers) == len(
            inter_layer_depth
        ), "from_layers and inter_layer_depth should have same length"

    num = len(from_layers)
    priorbox_layers = []
    loc_layers = []
    conf_layers = []
    objectness_layers = []

    loc_args = {
        'param': [
            dict(name='loc_p1', lr_mult=lr_mult, decay_mult=1),
            dict(name='loc_p2', lr_mult=2 * lr_mult, decay_mult=0)
        ],
        'weight_filler':
        dict(type='xavier'),
        'bias_filler':
        dict(type='constant', value=0)
    }

    conf_args = {
        'param': [
            dict(name='conf_p1', lr_mult=lr_mult, decay_mult=1),
            dict(name='conf_p2', lr_mult=2 * lr_mult, decay_mult=0)
        ],
        'weight_filler':
        dict(type='xavier'),
        'bias_filler':
        dict(type='constant', value=0)
    }

    if flip:
        num_priors_per_location = 6
    else:
        num_priors_per_location = 3

    for i in range(0, num):
        from_layer = from_layers[i]

        name = "{}_mbox_loc{}".format(from_layer, loc_postfix)

        # Create location prediction layer.
        net[name] = L.Convolution(net[from_layer],
                                  num_output=num_priors_per_location * 4,
                                  pad=1,
                                  kernel_size=3,
                                  stride=1,
                                  **loc_args)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layers.append(net[flatten_name])

        # Create confidence prediction layer.
        name = "{}_mbox_conf{}".format(from_layer, conf_postfix)
        net[name] = L.Convolution(net[from_layer],
                                  num_output=num_priors_per_location *
                                  num_classes,
                                  pad=1,
                                  kernel_size=3,
                                  stride=1,
                                  **conf_args)

        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layers.append(net[flatten_name])

        # Estimate number of priors per location given provided parameters.
        min_size = min_sizes[i]
        if type(min_size) is not list:
            min_size = [min_size]
        aspect_ratio = []
        if len(aspect_ratios) > i:
            aspect_ratio = aspect_ratios[i]
            if type(aspect_ratio) is not list:
                aspect_ratio = [aspect_ratio]
        max_size = []
        if len(max_sizes) > i:
            max_size = max_sizes[i]
            if type(max_size) is not list:
                max_size = [max_size]
            if max_size:
                assert len(max_size) == len(
                    min_size), "max_size and min_size should have same length."
        if max_size:
            num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size)
        else:
            num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size)
        if flip:
            num_priors_per_location += len(aspect_ratio) * len(min_size)
        step = []
        if len(steps) > i:
            step = steps[i]

        # Create prior generation layer.
        name = "{}_mbox_priorbox".format(from_layer)
        net[name] = L.PriorBox(net[from_layer],
                               net[data_layer],
                               min_size=min_size,
                               clip=clip,
                               variance=prior_variance,
                               offset=offset)
        if max_size:
            net.update(name, {'max_size': max_size})
        if aspect_ratio:
            net.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip})
        if step:
            net.update(name, {'step': step})
        if img_height != 0 and img_width != 0:
            if img_height == img_width:
                net.update(name, {'img_size': img_height})
            else:
                net.update(name, {'img_h': img_height, 'img_w': img_width})
        priorbox_layers.append(net[name])

        # Create objectness prediction layer.
        if use_objectness:
            name = "{}_mbox_objectness".format(from_layer)
            num_obj_output = num_priors_per_location * 2
            ConvBNLayer(net,
                        from_layer,
                        name,
                        use_bn=use_batchnorm,
                        use_relu=False,
                        lr_mult=lr_mult,
                        num_output=num_obj_output,
                        kernel_size=kernel_size,
                        pad=pad,
                        stride=1,
                        **bn_param)
            permute_name = "{}_perm".format(name)
            net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
            flatten_name = "{}_flat".format(name)
            net[flatten_name] = L.Flatten(net[permute_name], axis=1)
            objectness_layers.append(net[flatten_name])

    # Concatenate priorbox, loc, and conf layers.
    mbox_layers = []
    name = "mbox_loc"
    net[name] = L.Concat(*loc_layers, axis=1)
    mbox_layers.append(net[name])
    name = "mbox_conf"
    net[name] = L.Concat(*conf_layers, axis=1)
    mbox_layers.append(net[name])
    name = "mbox_priorbox"
    net[name] = L.Concat(*priorbox_layers, axis=2)
    mbox_layers.append(net[name])
    if use_objectness:
        name = "mbox_objectness"
        net[name] = L.Concat(*objectness_layers, axis=1)
        mbox_layers.append(net[name])

    return mbox_layers
Ejemplo n.º 7
0
def UnitLayerDetectorHeader(net, data_layer="data", num_classes=2, feature_layer="conv5", \
        normalization=-1, use_batchnorm=True, prior_variance = [0.1], \
        pro_widths=[], pro_heights=[], flip=True, clip=True, inter_layer_channels=[], \
        flat=False, use_focus_loss=False, stage=1,lr_mult=1.0,decay_mult=1.0,flag_withparamname=False,flagcreateprior = True,add_str = ""):
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"
    net_layers = net.keys()
    assert data_layer in net_layers, "data_layer is not in net's layers."
    print feature_layer
    assert feature_layer + add_str in net_layers, "feature_layer is not in net's layers.(%s)" % feature_layer
    assert pro_widths, "Must provide proposed width/height. "
    assert pro_heights, "Must provide proposed width/height."
    assert len(pro_widths) == len(
        pro_heights), "pro_widths/heights must have the same length."
    from_layer = feature_layer
    prefix_name = '{}_{}'.format(from_layer, stage)
    from_layer += add_str
    # Norm-Layer
    if normalization != -1:
        norm_name = "{}_{}_norm".format(prefix_name, stage)
        net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalization), \
            across_spatial=False, channel_shared=False)
        from_layer = norm_name
    print(inter_layer_channels, "inter_layer_channels")
    if len(inter_layer_channels) > 0:
        start_inter_id = 1
        for inter_channel_kernel in inter_layer_channels:
            inter_channel = inter_channel_kernel[0]
            inter_kernel = inter_channel_kernel[1]
            inter_name = "{}_inter_{}".format(prefix_name, start_inter_id)
            if inter_kernel == 1:
                inter_pad = 0
            elif inter_kernel == 3:
                inter_pad = 1
            ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, \
                num_output=inter_channel, kernel_size=inter_kernel, pad=inter_pad, stride=1,use_scale=True, leaky=False,
                            lr_mult=lr_mult, decay_mult=decay_mult,flag_withparamname=flag_withparamname,pose_string=add_str)
            from_layer = inter_name + add_str
            start_inter_id = start_inter_id + 1
    # Estimate number of priors per location given provided parameters.
    num_priors_per_location = len(pro_widths)
    # Create location prediction layer.
    name = "{}_mbox_loc".format(prefix_name)
    num_loc_output = num_priors_per_location * 4
    ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
        num_output=num_loc_output, kernel_size=3, pad=1, stride=1,lr_mult=lr_mult, decay_mult=decay_mult,pose_string=add_str)
    permute_name = "{}_perm".format(name) + add_str
    net[permute_name] = L.Permute(net[name + add_str], order=[0, 2, 3, 1])
    if flat:
        flatten_name = "{}_flat".format(name) + add_str
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layer = net[flatten_name]
    else:
        loc_layer = net[permute_name]
    # Create confidence prediction layer.
    name = "{}_mbox_conf".format(prefix_name)
    num_conf_output = num_priors_per_location * num_classes
    if use_focus_loss:
        ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
            num_output=num_conf_output, kernel_size=3, pad=1, stride=1,init_xavier=False,bias_type='focal',sparse=num_classes,
                        lr_mult=lr_mult, decay_mult=decay_mult,pose_string=add_str)
    else:
        ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
            num_output=num_conf_output, kernel_size=3, pad=1, stride=1,lr_mult=lr_mult, decay_mult=decay_mult,pose_string=add_str)
    permute_name = "{}_perm".format(name) + add_str
    net[permute_name] = L.Permute(net[name + add_str], order=[0, 2, 3, 1])
    if flat:
        flatten_name = "{}_flat".format(name) + add_str
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layer = net[flatten_name]
    else:
        conf_layer = net[permute_name]

    # Create prior generation layer.
    if flagcreateprior:
        name = "{}_mbox_priorbox".format(prefix_name) + add_str
        net[name] = L.PriorBox(net[from_layer], net[data_layer], pro_width=pro_widths, pro_height=pro_heights, \
            flip=flip, clip=clip, variance=prior_variance)
        priorbox_layer = net[name]
    else:
        priorbox_layer = []
    return loc_layer, conf_layer, priorbox_layer
def ACT_CreateCuboidHead(net, K=6, data_layer="data", num_classes=[], from_layers=[], 
    normalizations=[], use_batchnorm=True, lr_mult=1, use_scale=True, min_sizes=[], 
    max_sizes=[], prior_variance = [0.1], aspect_ratios=[], steps=[], img_height=0, 
    img_width=0, share_location=True, flip=True, clip=True, offset=0.5, kernel_size=1, pad=0,
    conf_postfix='', loc_postfix='', m='', fusion="concat", **bn_param):
    
            
    ##################### 3 change it!!! #######################################
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"
    
    if normalizations:
        assert len(from_layers) == len(normalizations), "from_layers and normalizations should have same length"
    assert len(from_layers) == len(min_sizes), "from_layers and min_sizes should have same length"
    
    if max_sizes:
        assert len(from_layers) == len(max_sizes), "from_layers and max_sizes should have same length"
    
    if aspect_ratios:
        assert len(from_layers) == len(aspect_ratios), "from_layers and aspect_ratios should have same length"
    
    if steps:
        assert len(from_layers) == len(steps), "from_layers and steps should have same length"
    
    net_layers = net.keys()
    
    assert data_layer in net_layers, "data_layer is not in net's layers"


    num = len(from_layers)
    priorbox_layers = []
    loc_layers = []
    conf_layers = []

    for i in range(0, num):
        from_layer = from_layers[i]

        # Get the normalize value.
        if normalizations:
            if normalizations[i] != -1:
                for stream in xrange(K):
                    norm_name = "{}_norm_stream{}{}".format(from_layer, stream, m)
                    net[norm_name] = L.Normalize(net[from_layer + '_stream' + str(stream) + m], scale_filler=dict(type="constant", value=normalizations[i]),
                        across_spatial=False, channel_shared=False)
                from_layer = "{}_norm".format(from_layer)

        # ACT: add a concatenation layer across streams
        if fusion == "concat":
            net[from_layer + '_concat'] = L.Concat( bottom=[from_layer + '_stream' + str(stream) + m for stream in xrange(K)], axis=1)
            from_layer += '_concat'
        else:
            assert fusion == "sum"
            net[from_layer + '_sum'] = L.EltWise( bottom=[from_layer + '_stream' + str(stream) + m for stream in xrange(K)])
            from_layer += '_sum'

        # Estimate number of priors per location given provided parameters.
        min_size = min_sizes[i]
        if type(min_size) is not list:
            min_size = [min_size]
        aspect_ratio = []
        if len(aspect_ratios) > i:
            aspect_ratio = aspect_ratios[i]
            if type(aspect_ratio) is not list:
                aspect_ratio = [aspect_ratio]
        max_size = []
        if len(max_sizes) > i:
            max_size = max_sizes[i]
            if type(max_size) is not list:
                max_size = [max_size]
            if max_size:
                assert len(max_size) == len(min_size), "max_size and min_size should have same length."
        if max_size:
            num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size)
        else:
            num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size)
        if flip:
            num_priors_per_location += len(aspect_ratio) * len(min_size)
        step = []
        if len(steps) > i:
            step = steps[i]

        # ACT-detector: location prediction layer 
        # location prediction for K different frames
        name = "{}_mbox_loc{}".format(from_layer, loc_postfix)
        num_loc_output = num_priors_per_location * 4 * K
        if not share_location:
            num_loc_output *= num_classes
                    
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult,
            num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layers.append(net[flatten_name])

        # ACT-detector: confidence prediction layer
        # joint prediction of all frames
        name = "{}_mbox_conf{}".format(from_layer, conf_postfix)
        num_conf_output = num_priors_per_location * num_classes;
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult,
            num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layers.append(net[flatten_name])

        # Create prior generation layer.
        name = "{}_mbox_priorbox".format(from_layer)
        net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_size,
                clip=clip, variance=prior_variance, offset=offset)
        if max_size:
            net.update(name, {'max_size': max_size})
        if aspect_ratio:
            net.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip})
        if step:
            net.update(name, {'step': step})
        if img_height != 0 and img_width != 0:
            if img_height == img_width:
                net.update(name, {'img_size': img_height})
            else:
                net.update(name, {'img_h': img_height, 'img_w': img_width})
        priorbox_layers.append(net[name])

    # Concatenate priorbox, loc, and conf layers.
    mbox_layers = []
    name = "mbox_loc"
    net[name] = L.Concat(*loc_layers, axis=1)
    mbox_layers.append(net[name])

    name = "mbox_conf"
    net[name] = L.Concat(*conf_layers, axis=1)
    mbox_layers.append(net[name])

    name = "mbox_priorbox"
    net[name] = L.Concat(*priorbox_layers, axis=2)
    mbox_layers.append(net[name])


    return mbox_layers
Ejemplo n.º 9
0
def UnitLayerDetectorHeader(net, data_layer="data", num_classes=2, feature_layer="conv5", \
        use_objectness=False, normalization=-1, use_batchnorm=True, prior_variance = [0.1], \
        min_sizes=[], max_sizes=[], aspect_ratios=[], pro_widths=[], pro_heights=[], \
        share_location=True, flip=True, clip=False, inter_layer_channels=0, kernel_size=1, \
        pad=0, conf_postfix='', loc_postfix='', flat=False, use_focus_loss=False,stage=1):
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"

    net_layers = net.keys()
    assert data_layer in net_layers, "data_layer is not in net's layers."
    assert feature_layer in net_layers, "feature_layer is not in net's layers."

    if min_sizes:
        assert not pro_widths, "pro_widths should not be provided when using min_sizes."
        assert not pro_heights, "pro_heights should not be provided when using min_sizes."
        if max_sizes:
            assert len(max_sizes) == len(
                min_sizes
            ), "min_sizes and max_sizes must have the same legnth."
    else:
        assert pro_widths, "Must provide proposed width/height."
        assert pro_heights, "Must provide proposed width/height."
        assert len(pro_widths) == len(
            pro_heights), "pro_widths/heights must have the same length."
        assert not min_sizes, "min_sizes should be not provided when using pro_widths/heights."
        assert not max_sizes, "max_sizes should be not provided when using pro_widths/heights."

    from_layer = feature_layer
    prefix_name = '{}_{}'.format(from_layer, stage)
    # Norm-Layer
    if normalization != -1:
        norm_name = "{}_{}_norm".format(prefix_name, stage)
        net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalization), \
            across_spatial=False, channel_shared=False)
        from_layer = norm_name

    # Add intermediate Conv layers.
    # if inter_layer_channels > 0:
    #     inter_name = "{}_inter".format(from_layer)
    #     ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, \
    #         num_output=inter_layer_channels, kernel_size=kernel_size, pad=pad, stride=1,use_scale=True, leaky=True)
    #     from_layer = inter_name
    if len(inter_layer_channels) > 0:
        start_inter_id = 1
        for inter_channel_kernel in inter_layer_channels:
            inter_channel = inter_channel_kernel[0]
            inter_kernel = inter_channel_kernel[1]
            inter_name = "{}_inter_{}".format(prefix_name, start_inter_id)
            if inter_kernel == 1:
                inter_pad = 0
            elif inter_kernel == 3:
                inter_pad = 1
            ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, \
                num_output=inter_channel, kernel_size=inter_kernel, pad=inter_pad, stride=1,use_scale=True, leaky=False)
            from_layer = inter_name
            start_inter_id = start_inter_id + 1
    # Estimate number of priors per location given provided parameters.
    if min_sizes:
        if aspect_ratios:
            num_priors_per_location = len(aspect_ratios) + 1
            if flip:
                num_priors_per_location += len(aspect_ratios)
            if max_sizes:
                num_priors_per_location += 1
            num_priors_per_location *= len(min_sizes)
        else:
            if max_sizes:
                num_priors_per_location = 2 * len(min_sizes)
            else:
                num_priors_per_location = len(min_sizes)
    else:
        num_priors_per_location = len(pro_widths)

    # Create location prediction layer.
    name = "{}_mbox_loc{}".format(prefix_name, loc_postfix)
    num_loc_output = num_priors_per_location * 4 * (num_classes - 1)
    if not share_location:
        num_loc_output *= num_classes
    ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
        num_output=num_loc_output, kernel_size=3, pad=1, stride=1)
    permute_name = "{}_perm".format(name)
    net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
    if flat:
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layer = net[flatten_name]
    else:
        loc_layer = net[permute_name]

    # Create confidence prediction layer.
    name = "{}_mbox_conf{}".format(prefix_name, conf_postfix)
    num_conf_output = num_priors_per_location * num_classes
    if use_focus_loss:
        ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
            num_output=num_conf_output, kernel_size=3, pad=1, stride=1,init_xavier=False,bias_type='focal',sparse=num_classes)
    else:
        ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
            num_output=num_conf_output, kernel_size=3, pad=1, stride=1)
    permute_name = "{}_perm".format(name)
    net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
    if flat:
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layer = net[flatten_name]
    else:
        conf_layer = net[permute_name]

    # Create prior generation layer.
    name = "{}_mbox_priorbox".format(prefix_name)
    if min_sizes:
        if aspect_ratios:
            if max_sizes:
                net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes, max_size=max_sizes, \
                    aspect_ratio=aspect_ratios, flip=flip, clip=clip, variance=prior_variance)
            else:
                net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes, \
                    aspect_ratio=aspect_ratios, flip=flip, clip=clip, variance=prior_variance)
        else:
            if max_sizes:
                net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes, max_size=max_sizes, \
                    flip=flip, clip=clip, variance=prior_variance)
            else:
                net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes, \
                    flip=flip, clip=clip, variance=prior_variance)
        priorbox_layer = net[name]
    else:
        net[name] = L.PriorBox(net[from_layer], net[data_layer], pro_width=pro_widths, pro_height=pro_heights, \
            flip=flip, clip=clip, variance=prior_variance)
        priorbox_layer = net[name]

    # Create objectness prediction layer.
    if use_objectness:
        name = "{}_mbox_objectness".format(prefix_name)
        num_obj_output = num_priors_per_location * 2
        ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \
            num_output=num_obj_output, kernel_size=kernel_size, pad=pad, stride=1)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        if flat:
            flatten_name = "{}_flat".format(name)
            net[flatten_name] = L.Flatten(net[permute_name], axis=1)
            objectness_layer = net[flatten_name]
        else:
            objectness_layer = net[permute_name]

    if use_objectness:
        return loc_layer, conf_layer, priorbox_layer, objectness_layer
    else:
        return loc_layer, conf_layer, priorbox_layer
Ejemplo n.º 10
0
def CreateRefineDetHead(net, data_layer="data", num_classes=[], from_layers=[], from_layers2=[],
        normalizations=[], use_batchnorm=True, lr_mult=1, min_sizes=[], max_sizes=[], prior_variance = [0.1],
        aspect_ratios=[], steps=[], img_height=0, img_width=0, share_location=True,
        flip=True, clip=True, offset=0.5, inter_layer_depth=[], kernel_size=1, pad=0,
        conf_postfix='', loc_postfix='', **bn_param):
    assert num_classes, "must provide num_classes"
    assert num_classes > 0, "num_classes must be positive number"
    if normalizations:
        assert len(from_layers) == len(normalizations), "from_layers and normalizations should have same length"
    assert len(from_layers) == len(min_sizes), "from_layers and min_sizes should have same length"
    if max_sizes:
        assert len(from_layers) == len(max_sizes), "from_layers and max_sizes should have same length"
    if aspect_ratios:
        assert len(from_layers) == len(aspect_ratios), "from_layers and aspect_ratios should have same length"
    if steps:
        assert len(from_layers) == len(steps), "from_layers and steps should have same length"
    net_layers = net.keys()
    assert data_layer in net_layers, "data_layer is not in net's layers"
    if inter_layer_depth:
        assert len(from_layers) == len(inter_layer_depth), "from_layers and inter_layer_depth should have same length"

    use_relu = True
    conv_prefix = ''
    conv_postfix = ''
    bn_prefix = ''
    bn_postfix = '/bn'
    scale_prefix = ''
    scale_postfix = '/scale'   

    kwargs = {
      'param': [dict(lr_mult=1, decay_mult=1)],
      'weight_filler': dict(type='gaussian', std=0.01),
      'bias_term': False,
      }
    kwargs2 = {
        'param': [dict(lr_mult=1, decay_mult=1)],
        'weight_filler': dict(type='gaussian', std=0.01),
      }
    kwargs_sb = {
        'axis': 0,
        'bias_term': False
      }

    prefix = 'arm'
    num_classes_rpn = 2
    num = len(from_layers)
    priorbox_layers = []
    loc_layers = []
    conf_layers = []
    for i in range(0, num):
        from_layer = from_layers[i]

        # Get the normalize value.
        if normalizations:
            if normalizations[i] != -1:
                norm_name = "{}_norm".format(from_layer)
                net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalizations[i]),
                    across_spatial=False, channel_shared=False)
                from_layer = norm_name

        # Add intermediate layers.
        if inter_layer_depth:
            if inter_layer_depth[i] > 0:                
                
                # Inter layer from body to head
                inter_name = "{}_inter".format(from_layer)
                # Depthwise convolution layer
                inter_dw = inter_name + '/dw'
                DWConvBNLayer(net, from_layer, inter_dw, use_bn=True, use_relu=True, num_output=512, group=512, kernel_size=3, pad=1, stride=1,
                    conv_prefix=conv_prefix, conv_postfix=inter_dw, bn_prefix=bn_prefix, bn_postfix=bn_postfix,
                    scale_prefix=scale_prefix, scale_postfix=scale_postfix, **bn_param)
                # Seperate layer
                inter_sep = inter_name + '/sep'
                ConvBNLayer(net, inter_dw, inter_sep, use_bn=True, use_relu=True, num_output=512, kernel_size=1, pad=0, stride=1,
                    conv_prefix=conv_prefix, conv_postfix=inter_sep, bn_prefix=bn_prefix, bn_postfix=bn_postfix,
                    scale_prefix=scale_prefix, scale_postfix=scale_postfix, **bn_param)
                # Bridge of rest of head
                from_layer = inter_sep
                
        # Estimate number of priors per location given provided parameters.
        min_size = min_sizes[i]
        if type(min_size) is not list:
            min_size = [min_size]
        aspect_ratio = []
        if len(aspect_ratios) > i:
            aspect_ratio = aspect_ratios[i]
            if type(aspect_ratio) is not list:
                aspect_ratio = [aspect_ratio]
        max_size = []
        if len(max_sizes) > i:
            max_size = max_sizes[i]
            if type(max_size) is not list:
                max_size = [max_size]
            if max_size:
                assert len(max_size) == len(min_size), "max_size and min_size should have same length."
        if max_size:
            num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size)
        else:
            num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size)
        if flip:
            num_priors_per_location += len(aspect_ratio) * len(min_size)
        step = []
        if len(steps) > i:
            step = steps[i]
        # Create location prediction layer.
        name = "{}_mbox_loc{}".format(from_layer, loc_postfix)
        num_loc_output = num_priors_per_location * 4
        if not share_location:
            num_loc_output *= num_classes_rpn
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult,
            num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layers.append(net[flatten_name])        

        # Create confidence prediction layer.
        name = "{}_mbox_conf{}".format(from_layer, conf_postfix)
        num_conf_output = num_priors_per_location * num_classes_rpn
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult,
            num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layers.append(net[flatten_name])

        # Create prior generation layer.
        name = "{}_mbox_priorbox".format(from_layer)
        net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_size,
                clip=clip, variance=prior_variance, offset=offset)

        if max_size:
            net.update(name, {'max_size': max_size})
        if aspect_ratio:
            net.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip})
        if step:
            net.update(name, {'step': step})
        if img_height != 0 and img_width != 0:
            if img_height == img_width:
                net.update(name, {'img_size': img_height})
            else:
                net.update(name, {'img_h': img_height, 'img_w': img_width})
        priorbox_layers.append(net[name])

    # Concatenate priorbox, loc, and conf layers.
    mbox_layers = []
    name = '{}{}'.format(prefix, "_loc")
    net[name] = L.Concat(*loc_layers, axis=1)
    mbox_layers.append(net[name])
    name = '{}{}'.format(prefix, "_conf")
    net[name] = L.Concat(*conf_layers, axis=1)
    mbox_layers.append(net[name])
    name = '{}{}'.format(prefix, "_priorbox")
    net[name] = L.Concat(*priorbox_layers, axis=2)
    mbox_layers.append(net[name])

    prefix = 'odm'
    num = len(from_layers2)
    loc_layers = []
    conf_layers = []
    for i in range(0, num):
        from_layer = from_layers2[i]

        # Get the normalize value.
        if normalizations:
            if normalizations[i] != -1:
                norm_name = "{}_norm".format(from_layer)
                net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalizations[i]),
                    across_spatial=False, channel_shared=False)
                from_layer = norm_name

        # Add intermediate layers.
        if inter_layer_depth:
            if inter_layer_depth[i] > 0:
                
                # Inter layer from body to head
                inter_name = "{}_inter".format(from_layer)
                # Depthwise convolution layer
                inter_dw = inter_name + '/dw'
                DWConvBNLayer(net, from_layer, inter_dw, use_bn=True, use_relu=True, num_output=512, group=512, kernel_size=3, pad=1, stride=1,
                    conv_prefix=conv_prefix, conv_postfix=inter_dw, bn_prefix=bn_prefix, bn_postfix=bn_postfix,
                    scale_prefix=scale_prefix, scale_postfix=scale_postfix, **bn_param)
                # Seperate layer
                inter_sep = inter_name + '/sep'
                ConvBNLayer(net, inter_dw, inter_sep, use_bn=True, use_relu=True, num_output=512, kernel_size=1, pad=0, stride=1,
                    conv_prefix=conv_prefix, conv_postfix=inter_sep, bn_prefix=bn_prefix, bn_postfix=bn_postfix,
                    scale_prefix=scale_prefix, scale_postfix=scale_postfix, **bn_param)
                # Bridge of rest of head
                from_layer = inter_sep

        # Estimate number of priors per location given provided parameters.
        min_size = min_sizes[i]
        if type(min_size) is not list:
            min_size = [min_size]
        aspect_ratio = []
        if len(aspect_ratios) > i:
            aspect_ratio = aspect_ratios[i]
            if type(aspect_ratio) is not list:
                aspect_ratio = [aspect_ratio]
        max_size = []
        if len(max_sizes) > i:
            max_size = max_sizes[i]
            if type(max_size) is not list:
                max_size = [max_size]
            if max_size:
                assert len(max_size) == len(min_size), "max_size and min_size should have same length."
        if max_size:
            num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size)
        else:
            num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size)
        if flip:
            num_priors_per_location += len(aspect_ratio) * len(min_size)

        # Create location prediction layer.
        name = "{}_mbox_loc{}".format(from_layer, loc_postfix)
        num_loc_output = num_priors_per_location * 4
        if not share_location:
            num_loc_output *= num_classes
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult,
                    num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        loc_layers.append(net[flatten_name])

        # Create confidence prediction layer.
        name = "{}_mbox_conf{}".format(from_layer, conf_postfix)
        num_conf_output = num_priors_per_location * num_classes
        ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult,
                    num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param)
        permute_name = "{}_perm".format(name)
        net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1])
        flatten_name = "{}_flat".format(name)
        net[flatten_name] = L.Flatten(net[permute_name], axis=1)
        conf_layers.append(net[flatten_name])


    # Concatenate priorbox, loc, and conf layers.
    name = '{}{}'.format(prefix, "_loc")
    net[name] = L.Concat(*loc_layers, axis=1)
    mbox_layers.append(net[name])
    name = '{}{}'.format(prefix, "_conf")
    net[name] = L.Concat(*conf_layers, axis=1)
    mbox_layers.append(net[name])    

    return mbox_layers