def CreateMultiBoxHead(net, data_layer="data", num_classes=[], from_layers=[], use_objectness=False, normalizations=[], use_batchnorm=True, lr_mult=1, use_scale=True, min_sizes=[], max_sizes=[], prior_variance=[0.1], aspect_ratios=[], steps=[], img_height=0, img_width=0, share_location=True, flip=True, clip=True, offset=0.5, inter_layer_depth=[], kernel_size=1, pad=0, conf_postfix='', loc_postfix='', head_postfix='ext/pm', **bn_param): assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" if normalizations: assert len(from_layers) == len( normalizations ), "from_layers and normalizations should have same length" assert len(from_layers) == len( min_sizes), "from_layers and min_sizes should have same length" if max_sizes: assert len(from_layers) == len( max_sizes), "from_layers and max_sizes should have same length" if aspect_ratios: assert len(from_layers) == len( aspect_ratios ), "from_layers and aspect_ratios should have same length" if steps: assert len(from_layers) == len( steps), "from_layers and steps should have same length" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers" if inter_layer_depth: assert len(from_layers) == len( inter_layer_depth ), "from_layers and inter_layer_depth should have same length" num = len(from_layers) priorbox_layers = [] loc_layers = [] conf_layers = [] objectness_layers = [] for i in range(0, num): from_layer = from_layers[i] # Get the normalize value. if normalizations: if normalizations[i] != -1: norm_name = "{}{}_norm".format(head_postfix, i + 1) net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict( type="constant", value=normalizations[i]), across_spatial=False, channel_shared=False) from_layer = norm_name # Add intermediate layers. if inter_layer_depth: if inter_layer_depth[i] > 0: inter_name = "{}{}_inter".format(head_postfix, i + 1) ConvBNLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, lr_mult=lr_mult, num_output=inter_layer_depth[i], kernel_size=3, pad=1, stride=1, **bn_param) from_layer = inter_name # Estimate number of priors per location given provided parameters. min_size = min_sizes[i] if type(min_size) is not list: min_size = [min_size] aspect_ratio = [] if len(aspect_ratios) > i: aspect_ratio = aspect_ratios[i] if type(aspect_ratio) is not list: aspect_ratio = [aspect_ratio] max_size = [] if len(max_sizes) > i: max_size = max_sizes[i] if type(max_size) is not list: max_size = [max_size] if max_size: assert len(max_size) == len( min_size), "max_size and min_size should have same length." if max_size: num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size) else: num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size) if flip: num_priors_per_location += len(aspect_ratio) * len(min_size) step = [] if len(steps) > i: step = steps[i] # Create location prediction layer. name = "{}{}_mbox_loc{}".format(head_postfix, i + 1, loc_postfix) num_loc_output = num_priors_per_location * 4 if not share_location: num_loc_output *= num_classes ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layers.append(net[flatten_name]) # Create confidence prediction layer. name = "{}{}_mbox_conf{}".format(head_postfix, i + 1, conf_postfix) num_conf_output = num_priors_per_location * num_classes ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layers.append(net[flatten_name]) # Create prior generation layer. name = "{}{}_mbox_priorbox".format(head_postfix, i + 1) net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_size, clip=clip, variance=prior_variance, offset=offset) if max_size: net.update(name, {'max_size': max_size}) if aspect_ratio: net.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip}) if step: net.update(name, {'step': step}) if img_height != 0 and img_width != 0: if img_height == img_width: net.update(name, {'img_size': img_height}) else: net.update(name, {'img_h': img_height, 'img_w': img_width}) priorbox_layers.append(net[name]) # Create objectness prediction layer. if use_objectness: name = "{}{}_mbox_objectness".format(head_postfix, i + 1) num_obj_output = num_priors_per_location * 2 ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_obj_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) objectness_layers.append(net[flatten_name]) # Concatenate priorbox, loc, and conf layers. mbox_layers = [] name = "mbox_loc" net[name] = L.Concat(*loc_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_conf" net[name] = L.Concat(*conf_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_priorbox" net[name] = L.Concat(*priorbox_layers, axis=2) mbox_layers.append(net[name]) if use_objectness: name = "mbox_objectness" net[name] = L.Concat(*objectness_layers, axis=1) mbox_layers.append(net[name]) return mbox_layers
def CreateMultiBoxHead(net, data_layer="data", num_classes=[], from_layers=[], use_objectness=False, normalizations=[], use_batchnorm=True, min_sizes=[], max_sizes=[], prior_variance=[0.1], aspect_ratios=[], share_location=True, flip=True, clip=True, inter_layer_depth=0, kernel_size=1, pad=0, conf_postfix='', loc_postfix=''): assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" if normalizations: assert len(from_layers) == len(normalizations), "from_layers and normalizations should have same length" assert len(from_layers) == len(min_sizes), "from_layers and min_sizes should have same length" if max_sizes: assert len(from_layers) == len(max_sizes), "from_layers and max_sizes should have same length" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers" num = len(from_layers) priorbox_layers = [] loc_layers = [] conf_layers = [] objectness_layers = [] for i in range(0, num): from_layer = from_layers[i] # Get the normalize value. if normalizations: if normalizations[i] != -1: norm_name = "{}_norm".format(from_layer) net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalizations[i]), across_spatial=False, channel_shared=False) from_layer = norm_name # Add intermediate layers. if inter_layer_depth > 0: inter_name = "{}_inter".format(from_layer) ConvBNLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, num_output=inter_layer_depth, kernel_size=3, pad=1, stride=1) from_layer = inter_name # Estimate number of priors per location given provided parameters. aspect_ratio = [] if len(aspect_ratios) > i: aspect_ratio = aspect_ratios[i] if type(aspect_ratio) is not list: aspect_ratio = [aspect_ratio] if max_sizes and max_sizes[i]: num_priors_per_location = 2 + len(aspect_ratio) else: num_priors_per_location = 1 + len(aspect_ratio) if flip: num_priors_per_location += len(aspect_ratio) num_priors_per_location = 2 * num_priors_per_location # Create location prediction layer. name = "{}_mbox_loc{}".format(from_layer, loc_postfix) num_loc_output = num_priors_per_location * 4; if not share_location: num_loc_output *= num_classes ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layers.append(net[flatten_name]) # Create confidence prediction layer. name = "{}_mbox_conf{}".format(from_layer, conf_postfix) num_conf_output = num_priors_per_location * num_classes; ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layers.append(net[flatten_name]) # Create prior generation layer. name = "{}_mbox_priorbox".format(from_layer) if max_sizes and max_sizes[i]: if aspect_ratio: net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes[i], max_size=max_sizes[i], aspect_ratio=aspect_ratio, flip=flip, clip=clip, variance=prior_variance) else: net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes[i], max_size=max_sizes[i], clip=clip, variance=prior_variance) else: if aspect_ratio: net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes[i], aspect_ratio=aspect_ratio, flip=flip, clip=clip, variance=prior_variance) else: net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes[i], clip=clip, variance=prior_variance) priorbox_layers.append(net[name]) # Create objectness prediction layer. if use_objectness: name = "{}_mbox_objectness".format(from_layer) num_obj_output = num_priors_per_location * 2; ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, num_output=num_obj_output, kernel_size=kernel_size, pad=pad, stride=1) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) objectness_layers.append(net[flatten_name]) # Concatenate priorbox, loc, and conf layers. mbox_layers = [] name = "mbox_loc" net[name] = L.Concat(*loc_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_conf" net[name] = L.Concat(*conf_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_priorbox" net[name] = L.Concat(*priorbox_layers, axis=2) mbox_layers.append(net[name]) if use_objectness: name = "mbox_objectness" net[name] = L.Concat(*objectness_layers, axis=1) mbox_layers.append(net[name]) return mbox_layers
def get_caffe_layer(node, net, input_dims): """Generate caffe layer for corresponding mxnet op. Args: node (iterable from MxnetParser): Mxnet op summary generated by MxnetParser net (caffe.net): Caffe netspec object Returns: caffe.layers: Equivalent caffe layer """ if node['type'] == 'Convolution': assert len(node['inputs']) == 1, \ 'Convolution layers can have only one input' conv_params = node['attr'] kernel_size = make_list(conv_params['kernel']) num_filters = make_list(conv_params['num_filter'])[0] if 'stride' in conv_params: stride = make_list(conv_params['stride'])[0] else: stride = 1 padding = make_list(conv_params['pad']) if 'dilate' in conv_params: dilation = make_list(conv_params['dilate'])[0] else: dilation = 1 convolution_param = { 'pad': padding, 'kernel_size': kernel_size, 'num_output': num_filters, 'stride': stride, 'dilation': dilation } return layers.Convolution(net[node['inputs'][0]], convolution_param=convolution_param) if node['type'] == 'Activation': assert len(node['inputs']) == 1, \ 'Activation layers can have only one input' assert node['attr']['act_type'] == 'relu' return layers.ReLU(net[node['inputs'][0]]) if node['type'] == 'Pooling': assert len(node['inputs']) == 1, \ 'Pooling layers can have only one input' kernel_size = make_list(node['attr']['kernel']) stride = make_list(node['attr']['stride']) pooling_type = node['attr']['pool_type'] if 'pad' in node['attr']: padding = make_list(node['attr']['pad']) else: padding = [0] if pooling_type == 'max': pooling = params.Pooling.MAX elif pooling_type == 'avg': pooling = params.Pooling.AVG pooling_param = { 'pool': pooling, 'pad': padding[0], 'kernel_size': kernel_size[0], 'stride': stride[0] } return layers.Pooling(net[node['inputs'][0]], pooling_param=pooling_param) if node['type'] == 'L2Normalization': across_spatial = node['attr']['mode'] != 'channel' channel_shared = False scale_filler = { 'type': "constant", 'value': constants.NORMALIZATION_FACTOR } norm_param = { 'across_spatial': across_spatial, 'scale_filler': scale_filler, 'channel_shared': channel_shared } return layers.Normalize(net[node['inputs'][0]], norm_param=norm_param) # Note - this layer has been implemented # only in WeiLiu's ssd branch of caffe not in caffe master if node['type'] == 'transpose': order = make_list(node['attr']['axes']) return layers.Permute(net[node['inputs'][0]], permute_param={'order': order}) if node['type'] == 'Flatten': if node['inputs'][0].endswith('anchors'): axis = 2 else: axis = 1 return layers.Flatten(net[node['inputs'][0]], flatten_param={'axis': axis}) if node['type'] == 'Concat': # In the ssd model, always concatenate along last axis, # since anchor boxes have an extra dimension in caffe (that includes variance). axis = -1 concat_inputs = [net[inp] for inp in node['inputs']] return layers.Concat(*concat_inputs, concat_param={'axis': axis}) if node['type'] == 'Reshape': if node['name'] == 'multibox_anchors': reshape_dims = [1, 2, -1] else: reshape_dims = make_list(node['attr']['shape']) return layers.Reshape(net[node['inputs'][0]], reshape_param={'shape': { 'dim': reshape_dims }}) if node['type'] == '_contrib_MultiBoxPrior': priorbox_inputs = [net[inp] for inp in node['inputs']] + [net["data"]] sizes = make_list(node["attr"]["sizes"]) min_size = sizes[0] * input_dims[0] max_size = int(round((sizes[1] * input_dims[0])**2 / min_size)) aspect_ratio = make_list(node["attr"]["ratios"]) steps = make_list(node["attr"]["steps"]) param = { 'clip': node["attr"]["clip"] == "true", 'flip': False, 'min_size': min_size, 'max_size': max_size, 'aspect_ratio': aspect_ratio, 'variance': [0.1, 0.1, 0.2, 0.2], 'step': int(round(steps[0] * input_dims[0])), } return layers.PriorBox(*priorbox_inputs, prior_box_param=param) if node['type'] == '_contrib_MultiBoxDetection': multibox_inputs = [net[inp] for inp in node['inputs']] bottom_order = [1, 0, 2] multibox_inputs = [multibox_inputs[i] for i in bottom_order] param = { 'num_classes': constants.NUM_CLASSES, 'share_location': True, 'background_label_id': 0, 'nms_param': { 'nms_threshold': float(node['attr']['nms_threshold']), 'top_k': int(node['attr']['nms_topk']) }, 'keep_top_k': make_list(node['attr']['nms_topk'])[0], 'confidence_threshold': 0.01, 'code_type': params.PriorBox.CENTER_SIZE, } return layers.DetectionOutput(*multibox_inputs, detection_output_param=param) if node['type'] in ['SoftmaxActivation', 'SoftmaxOutput']: if 'mode' not in node['attr']: axis = 1 elif node['attr']['mode'] == 'channel': axis = 1 else: axis = 0 # note: caffe expects confidence scores to be flattened before detection output layer receives it return layers.Flatten(layers.Permute( layers.Softmax(net[node['inputs'][0]], axis=axis), permute_param={'order': [0, 2, 1]}), flatten_param={'axis': 1})
def UnitLayerDenseDetectorHeader(net, data_layer="data", num_classes=2, feature_layer="conv5", \ normalization=-1, use_batchnorm=True, prior_variance = [0.1], \ pro_widths=[], pro_heights=[], flip=True, clip=True, \ inter_layer_channels=0, flat=False, use_focus_loss=False, stage=1,lr_mult=1, decay_mult=1): assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers." assert feature_layer in net_layers, "feature_layer is not in net's layers." assert pro_widths, "Must provide proposed width/height." assert pro_heights, "Must provide proposed width/height." assert len(pro_widths) == len(pro_heights), "pro_widths/heights must have the same length." from_layer = feature_layer prefix_name = '{}_{}'.format(from_layer,stage) # Norm-Layer if normalization != -1: norm_name = "{}_norm".format(prefix_name) net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalization), \ across_spatial=False, channel_shared=False) from_layer = norm_name # InterLayers if len(inter_layer_channels) > 0: start_inter_id = 1 for inter_channel_kernel in inter_layer_channels: inter_channel = inter_channel_kernel[0] inter_kernel = inter_channel_kernel[1] inter_name = "{}_inter_{}".format(prefix_name,start_inter_id) if inter_kernel == 1: inter_pad = 0 elif inter_kernel == 3: inter_pad = 1 ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, num_output=inter_channel,\ kernel_size=inter_kernel, pad=inter_pad, stride=1,use_scale=True, leaky=False,lr_mult=lr_mult, decay_mult=decay_mult,constant_value=0.2) from_layer = inter_name start_inter_id = start_inter_id + 1 # PriorBoxes num_priors_per_location = len(pro_widths) # LOC name = "{}_mbox_loc".format(prefix_name) num_loc_output = num_priors_per_location * 4 * (num_classes-1) ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_loc_output, kernel_size=3, pad=1, stride=1,lr_mult=lr_mult, decay_mult=decay_mult) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) if flat: flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layer = net[flatten_name] else: loc_layer = net[permute_name] # CONF name = "{}_mbox_conf".format(prefix_name) num_conf_output = num_priors_per_location * num_classes if use_focus_loss: ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, num_output=num_conf_output,\ kernel_size=3, pad=1, stride=1,init_xavier=False,bias_type='focal',sparse=num_classes,lr_mult=lr_mult, decay_mult=decay_mult) else: ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_conf_output, kernel_size=3, pad=1, stride=1,lr_mult=lr_mult, decay_mult=decay_mult) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) if flat: flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layer = net[flatten_name] else: conf_layer = net[permute_name] # PRIOR name = "{}_mbox_priorbox".format(prefix_name) net[name] = L.PriorBox(net[from_layer], net[data_layer], pro_width=pro_widths, pro_height=pro_heights, \ flip=flip, clip=clip, variance=prior_variance) priorbox_layer = net[name] return loc_layer,conf_layer,priorbox_layer
def CreateMultiBoxHead(net, data_layer="data", num_classes=[], from_layers=[], use_objectness=False, normalizations=[], use_batchnorm=True, lr_mult=1, use_scale=True, min_sizes=[], max_sizes=[], prior_variance=[0.1], aspect_ratios=[], steps=[], img_height=0, img_width=0, share_location=True, flip=True, clip=True, offset=0.5, inter_layer_depth=[], kernel_size=1, pad=0, conf_postfix='', loc_postfix='', **bn_param): assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" if normalizations: assert len(from_layers) == len( normalizations ), "from_layers and normalizations should have same length" assert len(from_layers) == len( min_sizes), "from_layers and min_sizes should have same length" if max_sizes: assert len(from_layers) == len( max_sizes), "from_layers and max_sizes should have same length" if aspect_ratios: assert len(from_layers) == len( aspect_ratios ), "from_layers and aspect_ratios should have same length" if steps: assert len(from_layers) == len( steps), "from_layers and steps should have same length" #assert data_layer in net_layers, "data_layer is not in net's layers" if inter_layer_depth: assert len(from_layers) == len( inter_layer_depth ), "from_layers and inter_layer_depth should have same length" num = len(from_layers) loc_layers = [] conf_layers = [] priorbox_layers = collections.OrderedDict() norm_name_layers = collections.OrderedDict() for i in range(0, num): from_layer = from_layers[i] # Get the normalize value. if normalizations: #normalizations = [20, -1, -1, -1, -1, -1] if normalizations[i] != -1: norm_name = "{}_norm".format(from_layer) norm_name_layers[norm_name] = net.layer.add() norm_name_layers[norm_name].CopyFrom( L.Normalize(scale_filler=dict(type="constant", value=normalizations[i]), across_spatial=False, channel_shared=False).to_proto().layer[0]) norm_name_layers[norm_name].name = norm_name norm_name_layers[norm_name].top[0] = norm_name norm_name_layers[norm_name].bottom.append(from_layer) from_layer = norm_name # Estimate number of priors per location given provided parameters. min_size = min_sizes[i] if type(min_size) is not list: min_size = [min_size] aspect_ratio = [] if len(aspect_ratios) > i: aspect_ratio = aspect_ratios[i] if type(aspect_ratio) is not list: aspect_ratio = [aspect_ratio] max_size = [] if len(max_sizes) > i: max_size = max_sizes[i] if type(max_size) is not list: max_size = [max_size] if max_size: assert len(max_size) == len( min_size), "max_size and min_size should have same length." if max_size: num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size) else: num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size) if flip: num_priors_per_location += len(aspect_ratio) * len(min_size) step = [] if len(steps) > i: step = steps[i] # Create location prediction layer. name = "{}_mbox_loc{}".format(from_layer, loc_postfix) num_loc_output = num_priors_per_location * 4 if not share_location: num_loc_output *= num_classes # Create prior generation layer. name = "{}_mbox_priorbox".format(from_layer) priorbox_layers[name] = net.layer.add() # priorbox_layers[name].CopyFrom(L.PriorBox(min_size=min_size, max_size=max_size, aspect_ratio=aspect_ratio, # step=step, flip=flip, clip=clip, variance=prior_variance, offset=offset).to_proto().layer[0]) p = L.PriorBox(min_size=min_size, max_size=max_size, aspect_ratio=aspect_ratio, step=step, flip=flip, clip=clip, variance=prior_variance, offset=offset) p1 = L.PriorBox(min_size=min_size, clip=clip, variance=prior_variance, offset=offset).to_proto().layer[0] c = L.Convolution(kernel_size=7, stride=1, num_output=48, pad=0).to_proto().layer[0] # print(type(p1), dir(p1), dir(c)) priorbox_layers[name].CopyFrom( L.PriorBox(min_size=min_size, clip=clip, variance=prior_variance, offset=offset).to_proto().layer[0]) priorbox_layers[name].name = name priorbox_layers[name].top[0] = name #print(type(priorbox_layers[name]), dir(priorbox_layers[name].prior_box_param.max_size)) priorbox_layers[name].bottom.append(from_layer) priorbox_layers[name].bottom.append(data_layer) if max_size: priorbox_layers[name].prior_box_param.max_size.extend(max_size) if aspect_ratio: priorbox_layers[name].prior_box_param.aspect_ratio.extend( aspect_ratio) #priorbox_layers[name].prior_box_param.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip}) if not flip: #default is True priorbox_layers[name].prior_box_param.flip = flip if step: priorbox_layers[name].prior_box_param.step = step # Concatenate priorbox, loc, and conf layers. name = "mbox_priorbox" cat_mbox_layer = net.layer.add() cat_mbox_layer.CopyFrom(L.Concat(axis=2).to_proto().layer[0]) cat_mbox_layer.name = name cat_mbox_layer.top[0] = name for bt in priorbox_layers.keys(): cat_mbox_layer.bottom.append(bt)
def CreateUnifiedPredictionHead(net, data_layer="data", num_classes=[], from_layers=[], use_objectness=False, normalizations=[], use_batchnorm=True, lr_mult=1, use_scale=True, min_sizes=[], max_sizes=[], prior_variance=[0.1], aspect_ratios=[], steps=[], img_height=0, img_width=0, share_location=True, flip=True, clip=True, offset=0.5, inter_layer_depth=[], kernel_size=1, pad=0, conf_postfix='', loc_postfix='', **bn_param): assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" if normalizations: assert len(from_layers) == len( normalizations ), "from_layers and normalizations should have same length" assert len(from_layers) == len( min_sizes), "from_layers and min_sizes should have same length" if max_sizes: assert len(from_layers) == len( max_sizes), "from_layers and max_sizes should have same length" if aspect_ratios: assert len(from_layers) == len( aspect_ratios ), "from_layers and aspect_ratios should have same length" if steps: assert len(from_layers) == len( steps), "from_layers and steps should have same length" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers" if inter_layer_depth: assert len(from_layers) == len( inter_layer_depth ), "from_layers and inter_layer_depth should have same length" num = len(from_layers) priorbox_layers = [] loc_layers = [] conf_layers = [] objectness_layers = [] loc_args = { 'param': [ dict(name='loc_p1', lr_mult=lr_mult, decay_mult=1), dict(name='loc_p2', lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } conf_args = { 'param': [ dict(name='conf_p1', lr_mult=lr_mult, decay_mult=1), dict(name='conf_p2', lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } if flip: num_priors_per_location = 6 else: num_priors_per_location = 3 for i in range(0, num): from_layer = from_layers[i] name = "{}_mbox_loc{}".format(from_layer, loc_postfix) # Create location prediction layer. net[name] = L.Convolution(net[from_layer], num_output=num_priors_per_location * 4, pad=1, kernel_size=3, stride=1, **loc_args) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layers.append(net[flatten_name]) # Create confidence prediction layer. name = "{}_mbox_conf{}".format(from_layer, conf_postfix) net[name] = L.Convolution(net[from_layer], num_output=num_priors_per_location * num_classes, pad=1, kernel_size=3, stride=1, **conf_args) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layers.append(net[flatten_name]) # Estimate number of priors per location given provided parameters. min_size = min_sizes[i] if type(min_size) is not list: min_size = [min_size] aspect_ratio = [] if len(aspect_ratios) > i: aspect_ratio = aspect_ratios[i] if type(aspect_ratio) is not list: aspect_ratio = [aspect_ratio] max_size = [] if len(max_sizes) > i: max_size = max_sizes[i] if type(max_size) is not list: max_size = [max_size] if max_size: assert len(max_size) == len( min_size), "max_size and min_size should have same length." if max_size: num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size) else: num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size) if flip: num_priors_per_location += len(aspect_ratio) * len(min_size) step = [] if len(steps) > i: step = steps[i] # Create prior generation layer. name = "{}_mbox_priorbox".format(from_layer) net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_size, clip=clip, variance=prior_variance, offset=offset) if max_size: net.update(name, {'max_size': max_size}) if aspect_ratio: net.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip}) if step: net.update(name, {'step': step}) if img_height != 0 and img_width != 0: if img_height == img_width: net.update(name, {'img_size': img_height}) else: net.update(name, {'img_h': img_height, 'img_w': img_width}) priorbox_layers.append(net[name]) # Create objectness prediction layer. if use_objectness: name = "{}_mbox_objectness".format(from_layer) num_obj_output = num_priors_per_location * 2 ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_obj_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) objectness_layers.append(net[flatten_name]) # Concatenate priorbox, loc, and conf layers. mbox_layers = [] name = "mbox_loc" net[name] = L.Concat(*loc_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_conf" net[name] = L.Concat(*conf_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_priorbox" net[name] = L.Concat(*priorbox_layers, axis=2) mbox_layers.append(net[name]) if use_objectness: name = "mbox_objectness" net[name] = L.Concat(*objectness_layers, axis=1) mbox_layers.append(net[name]) return mbox_layers
def UnitLayerDetectorHeader(net, data_layer="data", num_classes=2, feature_layer="conv5", \ normalization=-1, use_batchnorm=True, prior_variance = [0.1], \ pro_widths=[], pro_heights=[], flip=True, clip=True, inter_layer_channels=[], \ flat=False, use_focus_loss=False, stage=1,lr_mult=1.0,decay_mult=1.0,flag_withparamname=False,flagcreateprior = True,add_str = ""): assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers." print feature_layer assert feature_layer + add_str in net_layers, "feature_layer is not in net's layers.(%s)" % feature_layer assert pro_widths, "Must provide proposed width/height. " assert pro_heights, "Must provide proposed width/height." assert len(pro_widths) == len( pro_heights), "pro_widths/heights must have the same length." from_layer = feature_layer prefix_name = '{}_{}'.format(from_layer, stage) from_layer += add_str # Norm-Layer if normalization != -1: norm_name = "{}_{}_norm".format(prefix_name, stage) net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalization), \ across_spatial=False, channel_shared=False) from_layer = norm_name print(inter_layer_channels, "inter_layer_channels") if len(inter_layer_channels) > 0: start_inter_id = 1 for inter_channel_kernel in inter_layer_channels: inter_channel = inter_channel_kernel[0] inter_kernel = inter_channel_kernel[1] inter_name = "{}_inter_{}".format(prefix_name, start_inter_id) if inter_kernel == 1: inter_pad = 0 elif inter_kernel == 3: inter_pad = 1 ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, \ num_output=inter_channel, kernel_size=inter_kernel, pad=inter_pad, stride=1,use_scale=True, leaky=False, lr_mult=lr_mult, decay_mult=decay_mult,flag_withparamname=flag_withparamname,pose_string=add_str) from_layer = inter_name + add_str start_inter_id = start_inter_id + 1 # Estimate number of priors per location given provided parameters. num_priors_per_location = len(pro_widths) # Create location prediction layer. name = "{}_mbox_loc".format(prefix_name) num_loc_output = num_priors_per_location * 4 ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_loc_output, kernel_size=3, pad=1, stride=1,lr_mult=lr_mult, decay_mult=decay_mult,pose_string=add_str) permute_name = "{}_perm".format(name) + add_str net[permute_name] = L.Permute(net[name + add_str], order=[0, 2, 3, 1]) if flat: flatten_name = "{}_flat".format(name) + add_str net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layer = net[flatten_name] else: loc_layer = net[permute_name] # Create confidence prediction layer. name = "{}_mbox_conf".format(prefix_name) num_conf_output = num_priors_per_location * num_classes if use_focus_loss: ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_conf_output, kernel_size=3, pad=1, stride=1,init_xavier=False,bias_type='focal',sparse=num_classes, lr_mult=lr_mult, decay_mult=decay_mult,pose_string=add_str) else: ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_conf_output, kernel_size=3, pad=1, stride=1,lr_mult=lr_mult, decay_mult=decay_mult,pose_string=add_str) permute_name = "{}_perm".format(name) + add_str net[permute_name] = L.Permute(net[name + add_str], order=[0, 2, 3, 1]) if flat: flatten_name = "{}_flat".format(name) + add_str net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layer = net[flatten_name] else: conf_layer = net[permute_name] # Create prior generation layer. if flagcreateprior: name = "{}_mbox_priorbox".format(prefix_name) + add_str net[name] = L.PriorBox(net[from_layer], net[data_layer], pro_width=pro_widths, pro_height=pro_heights, \ flip=flip, clip=clip, variance=prior_variance) priorbox_layer = net[name] else: priorbox_layer = [] return loc_layer, conf_layer, priorbox_layer
def ACT_CreateCuboidHead(net, K=6, data_layer="data", num_classes=[], from_layers=[], normalizations=[], use_batchnorm=True, lr_mult=1, use_scale=True, min_sizes=[], max_sizes=[], prior_variance = [0.1], aspect_ratios=[], steps=[], img_height=0, img_width=0, share_location=True, flip=True, clip=True, offset=0.5, kernel_size=1, pad=0, conf_postfix='', loc_postfix='', m='', fusion="concat", **bn_param): ##################### 3 change it!!! ####################################### assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" if normalizations: assert len(from_layers) == len(normalizations), "from_layers and normalizations should have same length" assert len(from_layers) == len(min_sizes), "from_layers and min_sizes should have same length" if max_sizes: assert len(from_layers) == len(max_sizes), "from_layers and max_sizes should have same length" if aspect_ratios: assert len(from_layers) == len(aspect_ratios), "from_layers and aspect_ratios should have same length" if steps: assert len(from_layers) == len(steps), "from_layers and steps should have same length" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers" num = len(from_layers) priorbox_layers = [] loc_layers = [] conf_layers = [] for i in range(0, num): from_layer = from_layers[i] # Get the normalize value. if normalizations: if normalizations[i] != -1: for stream in xrange(K): norm_name = "{}_norm_stream{}{}".format(from_layer, stream, m) net[norm_name] = L.Normalize(net[from_layer + '_stream' + str(stream) + m], scale_filler=dict(type="constant", value=normalizations[i]), across_spatial=False, channel_shared=False) from_layer = "{}_norm".format(from_layer) # ACT: add a concatenation layer across streams if fusion == "concat": net[from_layer + '_concat'] = L.Concat( bottom=[from_layer + '_stream' + str(stream) + m for stream in xrange(K)], axis=1) from_layer += '_concat' else: assert fusion == "sum" net[from_layer + '_sum'] = L.EltWise( bottom=[from_layer + '_stream' + str(stream) + m for stream in xrange(K)]) from_layer += '_sum' # Estimate number of priors per location given provided parameters. min_size = min_sizes[i] if type(min_size) is not list: min_size = [min_size] aspect_ratio = [] if len(aspect_ratios) > i: aspect_ratio = aspect_ratios[i] if type(aspect_ratio) is not list: aspect_ratio = [aspect_ratio] max_size = [] if len(max_sizes) > i: max_size = max_sizes[i] if type(max_size) is not list: max_size = [max_size] if max_size: assert len(max_size) == len(min_size), "max_size and min_size should have same length." if max_size: num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size) else: num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size) if flip: num_priors_per_location += len(aspect_ratio) * len(min_size) step = [] if len(steps) > i: step = steps[i] # ACT-detector: location prediction layer # location prediction for K different frames name = "{}_mbox_loc{}".format(from_layer, loc_postfix) num_loc_output = num_priors_per_location * 4 * K if not share_location: num_loc_output *= num_classes ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layers.append(net[flatten_name]) # ACT-detector: confidence prediction layer # joint prediction of all frames name = "{}_mbox_conf{}".format(from_layer, conf_postfix) num_conf_output = num_priors_per_location * num_classes; ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layers.append(net[flatten_name]) # Create prior generation layer. name = "{}_mbox_priorbox".format(from_layer) net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_size, clip=clip, variance=prior_variance, offset=offset) if max_size: net.update(name, {'max_size': max_size}) if aspect_ratio: net.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip}) if step: net.update(name, {'step': step}) if img_height != 0 and img_width != 0: if img_height == img_width: net.update(name, {'img_size': img_height}) else: net.update(name, {'img_h': img_height, 'img_w': img_width}) priorbox_layers.append(net[name]) # Concatenate priorbox, loc, and conf layers. mbox_layers = [] name = "mbox_loc" net[name] = L.Concat(*loc_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_conf" net[name] = L.Concat(*conf_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_priorbox" net[name] = L.Concat(*priorbox_layers, axis=2) mbox_layers.append(net[name]) return mbox_layers
def UnitLayerDetectorHeader(net, data_layer="data", num_classes=2, feature_layer="conv5", \ use_objectness=False, normalization=-1, use_batchnorm=True, prior_variance = [0.1], \ min_sizes=[], max_sizes=[], aspect_ratios=[], pro_widths=[], pro_heights=[], \ share_location=True, flip=True, clip=False, inter_layer_channels=0, kernel_size=1, \ pad=0, conf_postfix='', loc_postfix='', flat=False, use_focus_loss=False,stage=1): assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers." assert feature_layer in net_layers, "feature_layer is not in net's layers." if min_sizes: assert not pro_widths, "pro_widths should not be provided when using min_sizes." assert not pro_heights, "pro_heights should not be provided when using min_sizes." if max_sizes: assert len(max_sizes) == len( min_sizes ), "min_sizes and max_sizes must have the same legnth." else: assert pro_widths, "Must provide proposed width/height." assert pro_heights, "Must provide proposed width/height." assert len(pro_widths) == len( pro_heights), "pro_widths/heights must have the same length." assert not min_sizes, "min_sizes should be not provided when using pro_widths/heights." assert not max_sizes, "max_sizes should be not provided when using pro_widths/heights." from_layer = feature_layer prefix_name = '{}_{}'.format(from_layer, stage) # Norm-Layer if normalization != -1: norm_name = "{}_{}_norm".format(prefix_name, stage) net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalization), \ across_spatial=False, channel_shared=False) from_layer = norm_name # Add intermediate Conv layers. # if inter_layer_channels > 0: # inter_name = "{}_inter".format(from_layer) # ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, \ # num_output=inter_layer_channels, kernel_size=kernel_size, pad=pad, stride=1,use_scale=True, leaky=True) # from_layer = inter_name if len(inter_layer_channels) > 0: start_inter_id = 1 for inter_channel_kernel in inter_layer_channels: inter_channel = inter_channel_kernel[0] inter_kernel = inter_channel_kernel[1] inter_name = "{}_inter_{}".format(prefix_name, start_inter_id) if inter_kernel == 1: inter_pad = 0 elif inter_kernel == 3: inter_pad = 1 ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, \ num_output=inter_channel, kernel_size=inter_kernel, pad=inter_pad, stride=1,use_scale=True, leaky=False) from_layer = inter_name start_inter_id = start_inter_id + 1 # Estimate number of priors per location given provided parameters. if min_sizes: if aspect_ratios: num_priors_per_location = len(aspect_ratios) + 1 if flip: num_priors_per_location += len(aspect_ratios) if max_sizes: num_priors_per_location += 1 num_priors_per_location *= len(min_sizes) else: if max_sizes: num_priors_per_location = 2 * len(min_sizes) else: num_priors_per_location = len(min_sizes) else: num_priors_per_location = len(pro_widths) # Create location prediction layer. name = "{}_mbox_loc{}".format(prefix_name, loc_postfix) num_loc_output = num_priors_per_location * 4 * (num_classes - 1) if not share_location: num_loc_output *= num_classes ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_loc_output, kernel_size=3, pad=1, stride=1) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) if flat: flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layer = net[flatten_name] else: loc_layer = net[permute_name] # Create confidence prediction layer. name = "{}_mbox_conf{}".format(prefix_name, conf_postfix) num_conf_output = num_priors_per_location * num_classes if use_focus_loss: ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_conf_output, kernel_size=3, pad=1, stride=1,init_xavier=False,bias_type='focal',sparse=num_classes) else: ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_conf_output, kernel_size=3, pad=1, stride=1) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) if flat: flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layer = net[flatten_name] else: conf_layer = net[permute_name] # Create prior generation layer. name = "{}_mbox_priorbox".format(prefix_name) if min_sizes: if aspect_ratios: if max_sizes: net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes, max_size=max_sizes, \ aspect_ratio=aspect_ratios, flip=flip, clip=clip, variance=prior_variance) else: net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes, \ aspect_ratio=aspect_ratios, flip=flip, clip=clip, variance=prior_variance) else: if max_sizes: net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes, max_size=max_sizes, \ flip=flip, clip=clip, variance=prior_variance) else: net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes, \ flip=flip, clip=clip, variance=prior_variance) priorbox_layer = net[name] else: net[name] = L.PriorBox(net[from_layer], net[data_layer], pro_width=pro_widths, pro_height=pro_heights, \ flip=flip, clip=clip, variance=prior_variance) priorbox_layer = net[name] # Create objectness prediction layer. if use_objectness: name = "{}_mbox_objectness".format(prefix_name) num_obj_output = num_priors_per_location * 2 ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_obj_output, kernel_size=kernel_size, pad=pad, stride=1) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) if flat: flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) objectness_layer = net[flatten_name] else: objectness_layer = net[permute_name] if use_objectness: return loc_layer, conf_layer, priorbox_layer, objectness_layer else: return loc_layer, conf_layer, priorbox_layer
def CreateRefineDetHead(net, data_layer="data", num_classes=[], from_layers=[], from_layers2=[], normalizations=[], use_batchnorm=True, lr_mult=1, min_sizes=[], max_sizes=[], prior_variance = [0.1], aspect_ratios=[], steps=[], img_height=0, img_width=0, share_location=True, flip=True, clip=True, offset=0.5, inter_layer_depth=[], kernel_size=1, pad=0, conf_postfix='', loc_postfix='', **bn_param): assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" if normalizations: assert len(from_layers) == len(normalizations), "from_layers and normalizations should have same length" assert len(from_layers) == len(min_sizes), "from_layers and min_sizes should have same length" if max_sizes: assert len(from_layers) == len(max_sizes), "from_layers and max_sizes should have same length" if aspect_ratios: assert len(from_layers) == len(aspect_ratios), "from_layers and aspect_ratios should have same length" if steps: assert len(from_layers) == len(steps), "from_layers and steps should have same length" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers" if inter_layer_depth: assert len(from_layers) == len(inter_layer_depth), "from_layers and inter_layer_depth should have same length" use_relu = True conv_prefix = '' conv_postfix = '' bn_prefix = '' bn_postfix = '/bn' scale_prefix = '' scale_postfix = '/scale' kwargs = { 'param': [dict(lr_mult=1, decay_mult=1)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_term': False, } kwargs2 = { 'param': [dict(lr_mult=1, decay_mult=1)], 'weight_filler': dict(type='gaussian', std=0.01), } kwargs_sb = { 'axis': 0, 'bias_term': False } prefix = 'arm' num_classes_rpn = 2 num = len(from_layers) priorbox_layers = [] loc_layers = [] conf_layers = [] for i in range(0, num): from_layer = from_layers[i] # Get the normalize value. if normalizations: if normalizations[i] != -1: norm_name = "{}_norm".format(from_layer) net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalizations[i]), across_spatial=False, channel_shared=False) from_layer = norm_name # Add intermediate layers. if inter_layer_depth: if inter_layer_depth[i] > 0: # Inter layer from body to head inter_name = "{}_inter".format(from_layer) # Depthwise convolution layer inter_dw = inter_name + '/dw' DWConvBNLayer(net, from_layer, inter_dw, use_bn=True, use_relu=True, num_output=512, group=512, kernel_size=3, pad=1, stride=1, conv_prefix=conv_prefix, conv_postfix=inter_dw, bn_prefix=bn_prefix, bn_postfix=bn_postfix, scale_prefix=scale_prefix, scale_postfix=scale_postfix, **bn_param) # Seperate layer inter_sep = inter_name + '/sep' ConvBNLayer(net, inter_dw, inter_sep, use_bn=True, use_relu=True, num_output=512, kernel_size=1, pad=0, stride=1, conv_prefix=conv_prefix, conv_postfix=inter_sep, bn_prefix=bn_prefix, bn_postfix=bn_postfix, scale_prefix=scale_prefix, scale_postfix=scale_postfix, **bn_param) # Bridge of rest of head from_layer = inter_sep # Estimate number of priors per location given provided parameters. min_size = min_sizes[i] if type(min_size) is not list: min_size = [min_size] aspect_ratio = [] if len(aspect_ratios) > i: aspect_ratio = aspect_ratios[i] if type(aspect_ratio) is not list: aspect_ratio = [aspect_ratio] max_size = [] if len(max_sizes) > i: max_size = max_sizes[i] if type(max_size) is not list: max_size = [max_size] if max_size: assert len(max_size) == len(min_size), "max_size and min_size should have same length." if max_size: num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size) else: num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size) if flip: num_priors_per_location += len(aspect_ratio) * len(min_size) step = [] if len(steps) > i: step = steps[i] # Create location prediction layer. name = "{}_mbox_loc{}".format(from_layer, loc_postfix) num_loc_output = num_priors_per_location * 4 if not share_location: num_loc_output *= num_classes_rpn ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layers.append(net[flatten_name]) # Create confidence prediction layer. name = "{}_mbox_conf{}".format(from_layer, conf_postfix) num_conf_output = num_priors_per_location * num_classes_rpn ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layers.append(net[flatten_name]) # Create prior generation layer. name = "{}_mbox_priorbox".format(from_layer) net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_size, clip=clip, variance=prior_variance, offset=offset) if max_size: net.update(name, {'max_size': max_size}) if aspect_ratio: net.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip}) if step: net.update(name, {'step': step}) if img_height != 0 and img_width != 0: if img_height == img_width: net.update(name, {'img_size': img_height}) else: net.update(name, {'img_h': img_height, 'img_w': img_width}) priorbox_layers.append(net[name]) # Concatenate priorbox, loc, and conf layers. mbox_layers = [] name = '{}{}'.format(prefix, "_loc") net[name] = L.Concat(*loc_layers, axis=1) mbox_layers.append(net[name]) name = '{}{}'.format(prefix, "_conf") net[name] = L.Concat(*conf_layers, axis=1) mbox_layers.append(net[name]) name = '{}{}'.format(prefix, "_priorbox") net[name] = L.Concat(*priorbox_layers, axis=2) mbox_layers.append(net[name]) prefix = 'odm' num = len(from_layers2) loc_layers = [] conf_layers = [] for i in range(0, num): from_layer = from_layers2[i] # Get the normalize value. if normalizations: if normalizations[i] != -1: norm_name = "{}_norm".format(from_layer) net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalizations[i]), across_spatial=False, channel_shared=False) from_layer = norm_name # Add intermediate layers. if inter_layer_depth: if inter_layer_depth[i] > 0: # Inter layer from body to head inter_name = "{}_inter".format(from_layer) # Depthwise convolution layer inter_dw = inter_name + '/dw' DWConvBNLayer(net, from_layer, inter_dw, use_bn=True, use_relu=True, num_output=512, group=512, kernel_size=3, pad=1, stride=1, conv_prefix=conv_prefix, conv_postfix=inter_dw, bn_prefix=bn_prefix, bn_postfix=bn_postfix, scale_prefix=scale_prefix, scale_postfix=scale_postfix, **bn_param) # Seperate layer inter_sep = inter_name + '/sep' ConvBNLayer(net, inter_dw, inter_sep, use_bn=True, use_relu=True, num_output=512, kernel_size=1, pad=0, stride=1, conv_prefix=conv_prefix, conv_postfix=inter_sep, bn_prefix=bn_prefix, bn_postfix=bn_postfix, scale_prefix=scale_prefix, scale_postfix=scale_postfix, **bn_param) # Bridge of rest of head from_layer = inter_sep # Estimate number of priors per location given provided parameters. min_size = min_sizes[i] if type(min_size) is not list: min_size = [min_size] aspect_ratio = [] if len(aspect_ratios) > i: aspect_ratio = aspect_ratios[i] if type(aspect_ratio) is not list: aspect_ratio = [aspect_ratio] max_size = [] if len(max_sizes) > i: max_size = max_sizes[i] if type(max_size) is not list: max_size = [max_size] if max_size: assert len(max_size) == len(min_size), "max_size and min_size should have same length." if max_size: num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size) else: num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size) if flip: num_priors_per_location += len(aspect_ratio) * len(min_size) # Create location prediction layer. name = "{}_mbox_loc{}".format(from_layer, loc_postfix) num_loc_output = num_priors_per_location * 4 if not share_location: num_loc_output *= num_classes ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layers.append(net[flatten_name]) # Create confidence prediction layer. name = "{}_mbox_conf{}".format(from_layer, conf_postfix) num_conf_output = num_priors_per_location * num_classes ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layers.append(net[flatten_name]) # Concatenate priorbox, loc, and conf layers. name = '{}{}'.format(prefix, "_loc") net[name] = L.Concat(*loc_layers, axis=1) mbox_layers.append(net[name]) name = '{}{}'.format(prefix, "_conf") net[name] = L.Concat(*conf_layers, axis=1) mbox_layers.append(net[name]) return mbox_layers