def rpn_layer(self, net, layer_idx, bottom_blob, channel, idx): for i in range(4): bottom_blob, layer_idx = ConvBNReLU(net, layer_idx, bottom_blob, int(channel * self.width_mult), kernel_size=3, stride=1) cls_score, layer_idx = ConvBNReLU(net, layer_idx, bottom_blob, self.num_classes - 1, kernel_size=1, stride=1, use_activation=False, bias_term=True) centerness, layer_idx = ConvBNReLU(net, layer_idx, bottom_blob, 1, kernel_size=1, stride=1, use_activation=False, bias_term=True) vetex_pred, layer_idx = ConvBNReLU(net, layer_idx, bottom_blob, self.max_joints * 2, kernel_size=1, stride=1, use_activation=False, bias_term=True) occlusion, layer_idx = ConvBNReLU(net, layer_idx, bottom_blob, self.max_joints * 2, kernel_size=1, stride=1, use_activation=False, bias_term=True) net['cls_score'] = L.Sigmoid(cls_score) net['centerness'] = L.Sigmoid(centerness) net['occlusion'] = L.Sigmoid(occlusion) net['scoremap_perm'] = L.Permute(net['cls_score'], order=[0, 2, 3, 1]) net['centernessmap_perm'] = L.Permute(net['centerness'], order=[0, 2, 3, 1]) net['occlusionmap_perm'] = L.Permute(net['occlusion'], order=[0, 2, 3, 1]) net['regressionmap_perm'] = L.Permute(vetex_pred, order=[0, 2, 3, 1]) return layer_idx
def mute_net(self,in_data, order): cin,h,w = in_data.shape; model_path = 'temp/'; if not os.path.exists(model_path): os.mkdir(model_path) n = caffe.NetSpec(); n.data0 = L.Input(shape=[dict(dim=[n1, cin, h, w])]) n.out = L.Permute(n.data0, order=order); def_file = model_path + 'internal.prototxt' with open(def_file, 'w') as f: f.write(str(n.to_proto())); f.close() net = caffe.Net(def_file, caffe.TEST); in_data = np.float32(in_data.reshape([1, cin, h, w])); p = in_data net.blobs['data0'].data[...] = p output = net.forward() pa = np.float32(output['out'][0]); if not os.path.exists(model_path): os.remove(model_path) return pa;
def PlateNetBody(net, data_layer, time_step, num_classes): # lstm_kwargs = { # 'weight_filler': dict(type='xavier'), # 'bias_filler': dict(type='constant', value=0)} kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } # assert from_layer in net.keys() # 48 x 48 recurrent_param = { 'num_output': 100, 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } net.indicator = L.ContinuationIndicator(time_step=time_step, batch_size=512) net.permuted_data = L.Permute(data_layer, order=[3, 0, 1, 2]) net.lstm1 = L.LSTM(net.permuted_data, net.indicator, recurrent_param=recurrent_param) net.lstm2 = L.LSTM(net.lstm1, net.indicator, recurrent_param=recurrent_param) net.fc1 = L.InnerProduct(net.lstm2, num_output=num_classes + 1, axis=2, **kwargs) return net
def _make_module(model_path, in_shape, order): ns = caffe.NetSpec() ns.data = L.Input(name="data", input_param={"shape": {"dim": in_shape}}) ns.perm = L.Permute(ns.data, name="permute", order=order) with open(os.path.join(model_path, 'test.prototxt'), 'w') as f: f.write(str(ns.to_proto())) net = caffe.Net(f.name, caffe.TEST) net.save(os.path.join(model_path, 'test.caffemodel'))
def CreateUnifiedPredictionHead(net, data_layer="data", num_classes=[], from_layers=[], use_objectness=False, normalizations=[], use_batchnorm=True, lr_mult=1, use_scale=True, min_sizes=[], max_sizes=[], prior_variance=[0.1], aspect_ratios=[], steps=[], img_height=0, img_width=0, share_location=True, flip=True, clip=True, offset=0.5, inter_layer_depth=[], kernel_size=1, pad=0, conf_postfix='', loc_postfix='', **bn_param): assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" if normalizations: assert len(from_layers) == len( normalizations ), "from_layers and normalizations should have same length" assert len(from_layers) == len( min_sizes), "from_layers and min_sizes should have same length" if max_sizes: assert len(from_layers) == len( max_sizes), "from_layers and max_sizes should have same length" if aspect_ratios: assert len(from_layers) == len( aspect_ratios ), "from_layers and aspect_ratios should have same length" if steps: assert len(from_layers) == len( steps), "from_layers and steps should have same length" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers" if inter_layer_depth: assert len(from_layers) == len( inter_layer_depth ), "from_layers and inter_layer_depth should have same length" num = len(from_layers) priorbox_layers = [] loc_layers = [] conf_layers = [] objectness_layers = [] loc_args = { 'param': [ dict(name='loc_p1', lr_mult=lr_mult, decay_mult=1), dict(name='loc_p2', lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } conf_args = { 'param': [ dict(name='conf_p1', lr_mult=lr_mult, decay_mult=1), dict(name='conf_p2', lr_mult=2 * lr_mult, decay_mult=0) ], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } if flip: num_priors_per_location = 6 else: num_priors_per_location = 3 for i in range(0, num): from_layer = from_layers[i] name = "{}_mbox_loc{}".format(from_layer, loc_postfix) # Create location prediction layer. net[name] = L.Convolution(net[from_layer], num_output=num_priors_per_location * 4, pad=1, kernel_size=3, stride=1, **loc_args) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layers.append(net[flatten_name]) # Create confidence prediction layer. name = "{}_mbox_conf{}".format(from_layer, conf_postfix) net[name] = L.Convolution(net[from_layer], num_output=num_priors_per_location * num_classes, pad=1, kernel_size=3, stride=1, **conf_args) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layers.append(net[flatten_name]) # Estimate number of priors per location given provided parameters. min_size = min_sizes[i] if type(min_size) is not list: min_size = [min_size] aspect_ratio = [] if len(aspect_ratios) > i: aspect_ratio = aspect_ratios[i] if type(aspect_ratio) is not list: aspect_ratio = [aspect_ratio] max_size = [] if len(max_sizes) > i: max_size = max_sizes[i] if type(max_size) is not list: max_size = [max_size] if max_size: assert len(max_size) == len( min_size), "max_size and min_size should have same length." if max_size: num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size) else: num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size) if flip: num_priors_per_location += len(aspect_ratio) * len(min_size) step = [] if len(steps) > i: step = steps[i] # Create prior generation layer. name = "{}_mbox_priorbox".format(from_layer) net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_size, clip=clip, variance=prior_variance, offset=offset) if max_size: net.update(name, {'max_size': max_size}) if aspect_ratio: net.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip}) if step: net.update(name, {'step': step}) if img_height != 0 and img_width != 0: if img_height == img_width: net.update(name, {'img_size': img_height}) else: net.update(name, {'img_h': img_height, 'img_w': img_width}) priorbox_layers.append(net[name]) # Create objectness prediction layer. if use_objectness: name = "{}_mbox_objectness".format(from_layer) num_obj_output = num_priors_per_location * 2 ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_obj_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) objectness_layers.append(net[flatten_name]) # Concatenate priorbox, loc, and conf layers. mbox_layers = [] name = "mbox_loc" net[name] = L.Concat(*loc_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_conf" net[name] = L.Concat(*conf_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_priorbox" net[name] = L.Concat(*priorbox_layers, axis=2) mbox_layers.append(net[name]) if use_objectness: name = "mbox_objectness" net[name] = L.Concat(*objectness_layers, axis=1) mbox_layers.append(net[name]) return mbox_layers
def segmentation(n, seg_points, label, phase): ############### Params ############### num_cls = 1 ############### Params ############### top_prev, top_lattice = L.Python(seg_points, ntop=2, python_param=dict(module='bcl_layers', layer='BCLReshape')) top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 64, stride=1, pad=0, loop=1) """ 1. If lattice scale too large the network will really slow and don't have good result """ # #2nd top_prev = bcl_bn_relu(n, 'bcl_seg', top_prev, top_lattice, nout=[64, 64, 128, 128, 128, 64], lattic_scale=[ "0*4_1*4_2*4", "0*2_1*2_2*2", "0_1_2", "0/2_1/2_2/2", "0/4_1/4_2/4", "0/8_1/8_2/8" ], loop=6, skip='concat') # # #3rd # top_prev = bcl_bn_relu(n, 'bcl_seg', top_prev, top_lattice, nout=[64, 128, 128, 64], # lattic_scale=["0*8_1*8_2*8", "0*4_1*4_2*4", "0*2_1*2_2*2", "0_1_2"], loop=4, skip='concat') # BEST NOW # top_prev = bcl_bn_relu(n, 'bcl_seg', top_prev, top_lattice, nout=[64, 128, 128, 128, 64], # lattic_scale=["0*2_1*2_2*2","0_1_2","0/2_1/2_2/2","0/4_1/4_2/4","0/8_1/8_2/8"], loop=5, skip='concat') # top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 256, stride=1, pad=0, loop=1) # top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 128, stride=1, pad=0, loop=1) top_prev = conv_bn_relu(n, "conv1_seg", top_prev, 1, 64, stride=1, pad=0, loop=1) n.seg_preds = L.Convolution(top_prev, name="car_seg", convolution_param=dict( num_output=num_cls, kernel_size=1, stride=1, pad=0, weight_filler=dict(type='xavier'), bias_term=True, bias_filler=dict(type='constant', value=0), engine=1, ), param=[dict(lr_mult=1), dict(lr_mult=0.1)]) # Predict class if phase == "train": seg_preds = L.Permute( n.seg_preds, permute_param=dict( order=[0, 2, 3, 1])) #(B,C=1,H,W) -> (B,H,W,C=1) seg_preds = L.Reshape( seg_preds, reshape_param=dict(shape=dict( dim=[0, -1, num_cls]))) # (B,H,W,C=1)-> (B, -1, 1) # seg_weights = L.Python(label, name = "SegWeight", # python_param=dict( # module='bcl_layers', # layer='SegWeight' # )) # # seg_weights = L.Reshape(seg_weights, reshape_param=dict(shape=dict(dim=[0, -1]))) # sigmoid_seg_preds = L.Sigmoid(seg_preds) # # n.dice_loss = L.Python(sigmoid_seg_preds, label, #seg_weights, # name = "Seg_Loss", # loss_weight = 1, # python_param=dict( # module='bcl_layers', # layer='DiceLoss' #WeightFocalLoss, DiceFocalLoss, FocalLoss, DiceLoss # ), # # param_str=str(dict(focusing_parameter=2, alpha=0.25))) # # param_str=str(dict(focusing_parameter=2, alpha=0.25, dice_belta=0.5, dice_alpha=0.5, lamda=0.1))) # param_str=str(dict(alpha=0.5, belta=0.5))) #dice #1, 1 # sigmoid_seg_preds = L.Sigmoid(seg_preds) # # n.dice_loss = L.Python(sigmoid_seg_preds, label, #seg_weights, # name = "Seg_Loss", # loss_weight = 1, # python_param=dict( # module='bcl_layers', # layer='IoUSegLoss' #WeightFocalLoss, DiceFocalLoss, FocalLoss, DiceLoss # )) # param_str=str(dict(focusing_parameter=2, alpha=0.25))) # param_str=str(dict(focusing_parameter=2, alpha=0.25, dice_belta=0.5, dice_alpha=0.5, lamda=0.1))) # param_str=str(dict(alpha=0.5, belta=0.5))) #dice #1, 1 n.seg_loss = L.Python( seg_preds, label, #seg_weights, name="Seg_Loss", loss_weight=1, python_param=dict( module='bcl_layers', layer= 'FocalLoss' #WeightFocalLoss, DiceFocalLoss, FocalLoss, DiceLoss ), param_str=str(dict(focusing_parameter=2, alpha=0.25))) # param_str=str(dict(focusing_parameter=2, alpha=0.25, dice_belta=0.5, dice_alpha=0.5, lamda=0.1))) # param_str=str(dict(alpha=0.5, belta=0.5))) #dice #1, 1 # n.seg_loss = L.SigmoidCrossEntropyLoss(seg_preds, label) # n.accuracy = L.Accuracy(n.seg_preds, label) output = None # Problem elif phase == "eval": n.output = L.Sigmoid(n.seg_preds) output = n.output return n, output
def generate_caffe_prototxt(self, caffe_net, layer): layer = L.Permute(layer, order=list(self.order)) caffe_net[self.g_name] = layer return layer
def CreateMultiBoxHead(net, data_layer="data", num_classes=[], from_layers=[], use_objectness=False, normalizations=[], use_batchnorm=True, min_sizes=[], max_sizes=[], prior_variance=[0.1], aspect_ratios=[], share_location=True, flip=True, clip=True, inter_layer_depth=0, kernel_size=1, pad=0, conf_postfix='', loc_postfix=''): assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" if normalizations: assert len(from_layers) == len(normalizations), "from_layers and normalizations should have same length" assert len(from_layers) == len(min_sizes), "from_layers and min_sizes should have same length" if max_sizes: assert len(from_layers) == len(max_sizes), "from_layers and max_sizes should have same length" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers" num = len(from_layers) priorbox_layers = [] loc_layers = [] conf_layers = [] objectness_layers = [] for i in range(0, num): from_layer = from_layers[i] # Get the normalize value. if normalizations: if normalizations[i] != -1: norm_name = "{}_norm".format(from_layer) net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalizations[i]), across_spatial=False, channel_shared=False) from_layer = norm_name # Add intermediate layers. if inter_layer_depth > 0: inter_name = "{}_inter".format(from_layer) ConvBNLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, num_output=inter_layer_depth, kernel_size=3, pad=1, stride=1) from_layer = inter_name # Estimate number of priors per location given provided parameters. aspect_ratio = [] if len(aspect_ratios) > i: aspect_ratio = aspect_ratios[i] if type(aspect_ratio) is not list: aspect_ratio = [aspect_ratio] if max_sizes and max_sizes[i]: num_priors_per_location = 2 + len(aspect_ratio) else: num_priors_per_location = 1 + len(aspect_ratio) if flip: num_priors_per_location += len(aspect_ratio) num_priors_per_location = 2 * num_priors_per_location # Create location prediction layer. name = "{}_mbox_loc{}".format(from_layer, loc_postfix) num_loc_output = num_priors_per_location * 4; if not share_location: num_loc_output *= num_classes ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layers.append(net[flatten_name]) # Create confidence prediction layer. name = "{}_mbox_conf{}".format(from_layer, conf_postfix) num_conf_output = num_priors_per_location * num_classes; ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layers.append(net[flatten_name]) # Create prior generation layer. name = "{}_mbox_priorbox".format(from_layer) if max_sizes and max_sizes[i]: if aspect_ratio: net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes[i], max_size=max_sizes[i], aspect_ratio=aspect_ratio, flip=flip, clip=clip, variance=prior_variance) else: net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes[i], max_size=max_sizes[i], clip=clip, variance=prior_variance) else: if aspect_ratio: net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes[i], aspect_ratio=aspect_ratio, flip=flip, clip=clip, variance=prior_variance) else: net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes[i], clip=clip, variance=prior_variance) priorbox_layers.append(net[name]) # Create objectness prediction layer. if use_objectness: name = "{}_mbox_objectness".format(from_layer) num_obj_output = num_priors_per_location * 2; ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, num_output=num_obj_output, kernel_size=kernel_size, pad=pad, stride=1) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) objectness_layers.append(net[flatten_name]) # Concatenate priorbox, loc, and conf layers. mbox_layers = [] name = "mbox_loc" net[name] = L.Concat(*loc_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_conf" net[name] = L.Concat(*conf_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_priorbox" net[name] = L.Concat(*priorbox_layers, axis=2) mbox_layers.append(net[name]) if use_objectness: name = "mbox_objectness" net[name] = L.Concat(*objectness_layers, axis=1) mbox_layers.append(net[name]) return mbox_layers
def ACT_CreateCuboidHead(net, K=6, data_layer="data", num_classes=[], from_layers=[], normalizations=[], use_batchnorm=True, lr_mult=1, use_scale=True, min_sizes=[], max_sizes=[], prior_variance = [0.1], aspect_ratios=[], steps=[], img_height=0, img_width=0, share_location=True, flip=True, clip=True, offset=0.5, kernel_size=1, pad=0, conf_postfix='', loc_postfix='', m='', fusion="concat", **bn_param): ##################### 3 change it!!! ####################################### assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" if normalizations: assert len(from_layers) == len(normalizations), "from_layers and normalizations should have same length" assert len(from_layers) == len(min_sizes), "from_layers and min_sizes should have same length" if max_sizes: assert len(from_layers) == len(max_sizes), "from_layers and max_sizes should have same length" if aspect_ratios: assert len(from_layers) == len(aspect_ratios), "from_layers and aspect_ratios should have same length" if steps: assert len(from_layers) == len(steps), "from_layers and steps should have same length" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers" num = len(from_layers) priorbox_layers = [] loc_layers = [] conf_layers = [] for i in range(0, num): from_layer = from_layers[i] # Get the normalize value. if normalizations: if normalizations[i] != -1: for stream in xrange(K): norm_name = "{}_norm_stream{}{}".format(from_layer, stream, m) net[norm_name] = L.Normalize(net[from_layer + '_stream' + str(stream) + m], scale_filler=dict(type="constant", value=normalizations[i]), across_spatial=False, channel_shared=False) from_layer = "{}_norm".format(from_layer) # ACT: add a concatenation layer across streams if fusion == "concat": net[from_layer + '_concat'] = L.Concat( bottom=[from_layer + '_stream' + str(stream) + m for stream in xrange(K)], axis=1) from_layer += '_concat' else: assert fusion == "sum" net[from_layer + '_sum'] = L.EltWise( bottom=[from_layer + '_stream' + str(stream) + m for stream in xrange(K)]) from_layer += '_sum' # Estimate number of priors per location given provided parameters. min_size = min_sizes[i] if type(min_size) is not list: min_size = [min_size] aspect_ratio = [] if len(aspect_ratios) > i: aspect_ratio = aspect_ratios[i] if type(aspect_ratio) is not list: aspect_ratio = [aspect_ratio] max_size = [] if len(max_sizes) > i: max_size = max_sizes[i] if type(max_size) is not list: max_size = [max_size] if max_size: assert len(max_size) == len(min_size), "max_size and min_size should have same length." if max_size: num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size) else: num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size) if flip: num_priors_per_location += len(aspect_ratio) * len(min_size) step = [] if len(steps) > i: step = steps[i] # ACT-detector: location prediction layer # location prediction for K different frames name = "{}_mbox_loc{}".format(from_layer, loc_postfix) num_loc_output = num_priors_per_location * 4 * K if not share_location: num_loc_output *= num_classes ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layers.append(net[flatten_name]) # ACT-detector: confidence prediction layer # joint prediction of all frames name = "{}_mbox_conf{}".format(from_layer, conf_postfix) num_conf_output = num_priors_per_location * num_classes; ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layers.append(net[flatten_name]) # Create prior generation layer. name = "{}_mbox_priorbox".format(from_layer) net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_size, clip=clip, variance=prior_variance, offset=offset) if max_size: net.update(name, {'max_size': max_size}) if aspect_ratio: net.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip}) if step: net.update(name, {'step': step}) if img_height != 0 and img_width != 0: if img_height == img_width: net.update(name, {'img_size': img_height}) else: net.update(name, {'img_h': img_height, 'img_w': img_width}) priorbox_layers.append(net[name]) # Concatenate priorbox, loc, and conf layers. mbox_layers = [] name = "mbox_loc" net[name] = L.Concat(*loc_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_conf" net[name] = L.Concat(*conf_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_priorbox" net[name] = L.Concat(*priorbox_layers, axis=2) mbox_layers.append(net[name]) return mbox_layers
def UnitLayerDetectorHeader(net, data_layer="data", num_classes=2, feature_layer="conv5", \ use_objectness=False, normalization=-1, use_batchnorm=True, prior_variance = [0.1], \ min_sizes=[], max_sizes=[], aspect_ratios=[], pro_widths=[], pro_heights=[], \ share_location=True, flip=True, clip=False, inter_layer_channels=0, kernel_size=1, \ pad=0, conf_postfix='', loc_postfix='', flat=False, use_focus_loss=False,stage=1): assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers." assert feature_layer in net_layers, "feature_layer is not in net's layers." if min_sizes: assert not pro_widths, "pro_widths should not be provided when using min_sizes." assert not pro_heights, "pro_heights should not be provided when using min_sizes." if max_sizes: assert len(max_sizes) == len( min_sizes ), "min_sizes and max_sizes must have the same legnth." else: assert pro_widths, "Must provide proposed width/height." assert pro_heights, "Must provide proposed width/height." assert len(pro_widths) == len( pro_heights), "pro_widths/heights must have the same length." assert not min_sizes, "min_sizes should be not provided when using pro_widths/heights." assert not max_sizes, "max_sizes should be not provided when using pro_widths/heights." from_layer = feature_layer prefix_name = '{}_{}'.format(from_layer, stage) # Norm-Layer if normalization != -1: norm_name = "{}_{}_norm".format(prefix_name, stage) net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalization), \ across_spatial=False, channel_shared=False) from_layer = norm_name # Add intermediate Conv layers. # if inter_layer_channels > 0: # inter_name = "{}_inter".format(from_layer) # ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, \ # num_output=inter_layer_channels, kernel_size=kernel_size, pad=pad, stride=1,use_scale=True, leaky=True) # from_layer = inter_name if len(inter_layer_channels) > 0: start_inter_id = 1 for inter_channel_kernel in inter_layer_channels: inter_channel = inter_channel_kernel[0] inter_kernel = inter_channel_kernel[1] inter_name = "{}_inter_{}".format(prefix_name, start_inter_id) if inter_kernel == 1: inter_pad = 0 elif inter_kernel == 3: inter_pad = 1 ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, \ num_output=inter_channel, kernel_size=inter_kernel, pad=inter_pad, stride=1,use_scale=True, leaky=False) from_layer = inter_name start_inter_id = start_inter_id + 1 # Estimate number of priors per location given provided parameters. if min_sizes: if aspect_ratios: num_priors_per_location = len(aspect_ratios) + 1 if flip: num_priors_per_location += len(aspect_ratios) if max_sizes: num_priors_per_location += 1 num_priors_per_location *= len(min_sizes) else: if max_sizes: num_priors_per_location = 2 * len(min_sizes) else: num_priors_per_location = len(min_sizes) else: num_priors_per_location = len(pro_widths) # Create location prediction layer. name = "{}_mbox_loc{}".format(prefix_name, loc_postfix) num_loc_output = num_priors_per_location * 4 * (num_classes - 1) if not share_location: num_loc_output *= num_classes ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_loc_output, kernel_size=3, pad=1, stride=1) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) if flat: flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layer = net[flatten_name] else: loc_layer = net[permute_name] # Create confidence prediction layer. name = "{}_mbox_conf{}".format(prefix_name, conf_postfix) num_conf_output = num_priors_per_location * num_classes if use_focus_loss: ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_conf_output, kernel_size=3, pad=1, stride=1,init_xavier=False,bias_type='focal',sparse=num_classes) else: ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_conf_output, kernel_size=3, pad=1, stride=1) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) if flat: flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layer = net[flatten_name] else: conf_layer = net[permute_name] # Create prior generation layer. name = "{}_mbox_priorbox".format(prefix_name) if min_sizes: if aspect_ratios: if max_sizes: net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes, max_size=max_sizes, \ aspect_ratio=aspect_ratios, flip=flip, clip=clip, variance=prior_variance) else: net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes, \ aspect_ratio=aspect_ratios, flip=flip, clip=clip, variance=prior_variance) else: if max_sizes: net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes, max_size=max_sizes, \ flip=flip, clip=clip, variance=prior_variance) else: net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_sizes, \ flip=flip, clip=clip, variance=prior_variance) priorbox_layer = net[name] else: net[name] = L.PriorBox(net[from_layer], net[data_layer], pro_width=pro_widths, pro_height=pro_heights, \ flip=flip, clip=clip, variance=prior_variance) priorbox_layer = net[name] # Create objectness prediction layer. if use_objectness: name = "{}_mbox_objectness".format(prefix_name) num_obj_output = num_priors_per_location * 2 ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_obj_output, kernel_size=kernel_size, pad=pad, stride=1) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) if flat: flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) objectness_layer = net[flatten_name] else: objectness_layer = net[permute_name] if use_objectness: return loc_layer, conf_layer, priorbox_layer, objectness_layer else: return loc_layer, conf_layer, priorbox_layer
def McDetectorHeader(net, num_classes=1, feature_layer="conv5", \ normalization=-1, use_batchnorm=False, boxsizes=[], aspect_ratios=[], pwidths=[], pheights=[], \ inter_layer_channels=0, kernel_size=1, pad=0): assert num_classes > 0, "num_classes must be positive number" net_layers = net.keys() assert feature_layer in net_layers, "feature_layer is not in net's layers." if boxsizes: assert not pwidths, "pwidths should not be provided when using boxsizes." assert not pheights, "pheights should not be provided when using boxsizes." assert aspect_ratios, "aspect_ratios should be provided when using boxsizes." else: assert pwidths, "Must provide proposed width/height." assert pheights, "Must provide proposed width/height." assert len(pwidths) == len( pheights), "provided widths/heights must have the same length." assert not boxsizes, "boxsizes should be not provided when using pro_widths/heights." assert not aspect_ratios, "aspect_ratios should be not provided when using pro_widths/heights." from_layer = feature_layer loc_conf_layers = [] # Norm-Layer if normalization > 0: norm_name = "{}_norm".format(from_layer) net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalization), \ across_spatial=False, channel_shared=False) from_layer = norm_name # Add intermediate Conv layers. if inter_layer_channels > 0: inter_name = "{}_inter".format(from_layer) ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, \ num_output=inter_layer_channels, kernel_size=3, pad=1, stride=1) from_layer = inter_name # Estimate number of priors per location given provided parameters. if boxsizes: num_priors_per_location = len(aspect_ratios) * len(boxsizes) + 1 else: num_priors_per_location = len(pwidths) + 1 # Create location prediction layer. name = "{}_loc".format(from_layer) num_loc_output = num_priors_per_location * 4 ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) loc_conf_layers.append(net[permute_name]) # Create confidence prediction layer. name = "{}_conf".format(from_layer) num_conf_output = num_priors_per_location * (num_classes + 1) ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) loc_conf_layers.append(net[permute_name]) return loc_conf_layers
def segmentation(n, seg_points, label, cls_labels, reg_targets, dataset_params, phase): ############### Params ############### num_cls = dataset_params['num_cls'] box_code_size = dataset_params['box_code_size'] num_anchor_per_loc = dataset_params['num_anchor_per_loc'] num_filters = dataset_params['num_filters'] layer_strides = dataset_params['layer_strides'] layer_nums = dataset_params['layer_nums'] num_upsample_filters = dataset_params['num_upsample_filters'] upsample_strides = dataset_params["upsample_strides"] feat_map_size = dataset_params['feat_map_size'] #(b,c,n,h,w) point_cloud_range = dataset_params['point_cloud_range'] seg_thresh = dataset_params['seg_thresh'] use_depth = dataset_params['use_depth'] use_score = dataset_params['use_score'] use_points = dataset_params['use_points'] ############### Params ############### top_prev, top_lattice = L.Python(seg_points, ntop=2, python_param=dict(module='bcl_layers', layer='BCLReshape')) top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 64, stride=1, pad=0, loop=1) """ 1. If lattice scale too large the network will really slow and don't have good result """ # #2nd # top_prev = bcl_bn_relu(n, 'bcl_seg', top_prev, top_lattice, nout=[64, 64, 128, 128, 128, 64], # lattic_scale=["0*4_1*4_2*4","0*2_1*2_2*2","0_1_2","0/2_1/2_2/2","0/4_1/4_2/4","0/8_1/8_2/8"], loop=6, skip='concat') # # #3rd top_prev = bcl_bn_relu( n, 'bcl_seg', top_prev, top_lattice, nout=[64, 128, 128, 64], lattic_scale=["0*8_1*8_2*8", "0*4_1*4_2*4", "0*2_1*2_2*2", "0_1_2"], loop=4, skip='concat') # BEST NOW # top_prev = bcl_bn_relu(n, 'bcl_seg', top_prev, top_lattice, nout=[64, 128, 128, 128, 64], # lattic_scale=["0*2_1*2_2*2","0_1_2","0/2_1/2_2/2","0/4_1/4_2/4","0/8_1/8_2/8"], loop=5, skip='concat') # top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 256, stride=1, pad=0, loop=1) # top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 128, stride=1, pad=0, loop=1) top_prev = conv_bn_relu(n, "conv1_seg", top_prev, 1, 64, stride=1, pad=0, loop=1) n.seg_preds = L.Convolution(top_prev, name="seg_head", convolution_param=dict( num_output=num_cls, kernel_size=1, stride=1, pad=0, weight_filler=dict(type='xavier'), bias_term=True, bias_filler=dict(type='constant', value=0), engine=1, ), param=[dict(lr_mult=1), dict(lr_mult=0.1)]) # Predict class if phase == "train": seg_preds = L.Permute( n.seg_preds, permute_param=dict( order=[0, 2, 3, 1])) #(B,C=1,H,W) -> (B,H,W,C=1) seg_preds = L.Reshape( seg_preds, reshape_param=dict(shape=dict( dim=[0, -1, num_cls]))) # (B,H,W,C=1)-> (B, -1, 1) # seg_weights = L.Python(label, name = "SegWeight", # python_param=dict( # module='bcl_layers', # layer='SegWeight' # )) # # seg_weights = L.Reshape(seg_weights, reshape_param=dict(shape=dict(dim=[0, -1]))) n.seg_loss = L.Python( seg_preds, label, #seg_weights, name="Seg_Loss", loss_weight=1, python_param=dict( module='bcl_layers', layer= 'FocalLoss' #WeightFocalLoss, DiceFocalLoss, FocalLoss, DiceLoss ), param_str=str(dict(focusing_parameter=2, alpha=0.25))) # n.accuracy = L.Accuracy(n.seg_preds, label) top_prev = conv_bn_relu(n, "P2FM_Decov", top_prev, 1, 32, stride=1, pad=0, loop=1) n.seg_output = L.Sigmoid(n.seg_preds) n.p2fm = L.Python( seg_points, n.seg_output, top_prev, name="Point2FeatMap", python_param=dict(module='bcl_layers', layer='Point2FeatMap'), param_str=str( dict( thresh=seg_thresh, feat_map_size=feat_map_size, #(B,C,N,H,W) point_cloud_range=point_cloud_range, use_depth=use_depth, use_score=use_score, use_points=use_points))) top_prev = n.p2fm # top_prev = L.Reshape(top_prev, reshape_param=dict(shape=dict(dim=[0, -1, feat_map_size[3], feat_map_size[4]]))) # (B,H,W,C=1)-> (B, -1, 1) top_prev = conv_bn_relu(n, "ini_conv1", top_prev, 3, num_filters[0], stride=layer_strides[0], pad=1, loop=1) top_prev = conv_bn_relu(n, "rpn_conv1", top_prev, 3, num_filters[0], stride=1, pad=1, loop=layer_nums[0]) #3 deconv1 = deconv_bn_relu(n, "rpn_deconv1", top_prev, upsample_strides[0], num_upsample_filters[0], stride=upsample_strides[0], pad=0) top_prev = conv_bn_relu(n, "ini_conv2", top_prev, 3, num_filters[1], stride=layer_strides[1], pad=1, loop=1) top_prev = conv_bn_relu(n, "rpn_conv2", top_prev, 3, num_filters[1], stride=1, pad=1, loop=layer_nums[1]) #5 deconv2 = deconv_bn_relu(n, "rpn_deconv2", top_prev, upsample_strides[1], num_upsample_filters[1], stride=upsample_strides[1], pad=0) top_prev = conv_bn_relu(n, "ini_conv3", top_prev, 3, num_filters[2], stride=layer_strides[2], pad=1, loop=1) top_prev = conv_bn_relu(n, "rpn_conv3", top_prev, 3, num_filters[2], stride=1, pad=1, loop=layer_nums[2]) #5 deconv3 = deconv_bn_relu(n, "rpn_deconv3", top_prev, upsample_strides[2], num_upsample_filters[2], stride=upsample_strides[2], pad=0) n['rpn_out'] = L.Concat(deconv1, deconv2, deconv3) top_prev = n['rpn_out'] n.cls_preds = L.Convolution(top_prev, name="cls_head", convolution_param=dict( num_output=num_anchor_per_loc * num_cls, kernel_size=1, stride=1, pad=0, weight_filler=dict(type='xavier'), bias_term=True, bias_filler=dict(type='constant', value=0), engine=1, ), param=[dict(lr_mult=1), dict(lr_mult=1)]) n.box_preds = L.Convolution(top_prev, name="reg_head", convolution_param=dict( num_output=num_anchor_per_loc * box_code_size, kernel_size=1, stride=1, pad=0, weight_filler=dict(type='xavier'), bias_term=True, bias_filler=dict(type='constant', value=0), engine=1, ), param=[dict(lr_mult=1), dict(lr_mult=1)]) cls_preds = L.Permute( n.cls_preds, permute_param=dict(order=[0, 2, 3, 1])) #(B,C,H,W) -> (B,H,W,C) cls_preds = L.Reshape(cls_preds, reshape_param=dict(shape=dict( dim=[0, -1, 1]))) # (B,H,W,C) -> (B, -1, C) box_preds = L.Permute( n.box_preds, permute_param=dict(order=[0, 2, 3, 1])) #(B,C,H,W) -> (B,H,W,C) box_preds = L.Reshape( box_preds, reshape_param=dict(shape=dict( dim=[0, -1, box_code_size]))) #(B,H,W,C) -> (B, -1, C) if phase == "train": n['cared'], n['reg_outside_weights'], n['cls_weights'] = L.Python( cls_labels, name="PrepareLossWeight", ntop=3, python_param=dict(module='bcl_layers', layer='PrepareLossWeight')) reg_outside_weights, cared, cls_weights = n['reg_outside_weights'], n[ 'cared'], n['cls_weights'] # Gradients cannot be computed with respect to the label inputs (bottom[1])# n['labels_input'] = L.Python(cls_labels, cared, name="Label_Encode", python_param=dict( module='bcl_layers', layer='LabelEncode', )) labels_input = n['labels_input'] n.cls_loss = L.Python(cls_preds, labels_input, cls_weights, name="FocalLoss", loss_weight=1, python_param=dict(module='bcl_layers', layer='WeightFocalLoss'), param_str=str( dict(focusing_parameter=2, alpha=0.25))) n.reg_loss = L.Python(box_preds, reg_targets, reg_outside_weights, name="WeightedSmoothL1Loss", loss_weight=1, python_param=dict(module='bcl_layers', layer='WeightedSmoothL1Loss')) # Problem if phase == "eval": n.f_cls_preds = cls_preds n.f_box_preds = box_preds return n
def test_v2(phase, dataset_params=None, model_cfg = None, deploy=False, create_prototxt=True, save_path=None, ): #RPN config num_filters=list(model_cfg.rpn.num_filters) layer_nums=list(model_cfg.rpn.layer_nums) layer_strides=list(model_cfg.rpn.layer_strides) num_upsample_filters=list(model_cfg.rpn.num_upsample_filters) upsample_strides=list(model_cfg.rpn.upsample_strides) point_cloud_range=list(model_cfg.voxel_generator.point_cloud_range) voxel_size=list(model_cfg.voxel_generator.voxel_size) # anchors_fp_size = (point_cloud_range[3:]-point_cloud_range[:3])/voxel_size anchors_fp_w = 432 #1408 anchors_fp_h = 496 #1600 box_code_size = 7 num_anchor_per_loc = 2 ############################################################################ # Voxel2BCL # Voxel2PointNet ############################################################################ BCL_mode = 'Voxel2BCL' dataset_params['x2BCL'] = BCL_mode dataset_params['Voxel2BCL_numpoint'] = 6000 #num voxels ############################################################################ # Featuer Creation # VoxelFeatureNet: xyzr + (cente_x, center_z, center_y), (cluster_x, cluster_z) # VoxelFeatureNetV2: xyzr + (cluster_x, cluster_z) # False: No Feature extraction only xyzr # SimpleVoxel: sum points in voxel and divided by num of points left 1 points # if use SimpleVoxel PointNet Should disable! ############################################################################ dataset_params['FeatureNet'] = 'SimpleVoxel' dataset_params['Save_Img'] = False ############################################################################ # if PointNet == True then it means PointNet to extract high dimention features # and max pooling to reduce the point to 1 # Normally except Simplex the rest of the Freature Creation must with # PointNet acticate ############################################################################ n = caffe.NetSpec() if phase == "train": dataset_params_train = dataset_params.copy() dataset_params_train['subset'] = phase datalayer_train = L.Python(name='data', include=dict(phase=caffe.TRAIN), ntop= 4, python_param=dict(module='bcl_layers', layer='InputKittiData', param_str=repr(dataset_params_train))) n.data, n.coors, n.labels, n.reg_targets = datalayer_train elif phase == "eval": dataset_params_eval = dataset_params.copy() dataset_params_eval['subset'] = phase datalayer_eval = L.Python(name='data', include=dict(phase=caffe.TEST), ntop= 9, python_param=dict(module='bcl_layers', layer='InputKittiData', param_str=repr(dataset_params_eval))) n.data, n.coors, n.anchors, n.rect, n.trv2c, n.p2, n.anchors_mask, n.img_idx, n.img_shape = datalayer_eval if deploy: print("[debug] run deploy in caffe_model.py") # n.data = L.Input(shape=dict(dim=[1, len(input_dims), 1, sample_size])) top_prev = n.data """BCL fixed size before Scatter""" ############################################################################ # Method 1 # use new xyz as the lattice features # this reshape is used to make n.data from (1,Feature,Npoints,Voxels) -> (1,Feature,1,Voxels) Npoints in here should be 1 # and n.data -> (1,Feature,Npoints,Voxels) -> (1, Feature[:3], 1, Voxeld) Npoints in here should be 1 # this is particular for raw XYZ features as BCL data input which means there is no PointNet or any features extraction infront # Or must keep the origin xyz features inside the new features ############################################################################ # n["input_feats"], n['lat_feats']= L.Python(n.data, ntop=2, python_param=dict(module='bcl_layers', # layer='BCLReshape', # param_str=str(dict(data_feature=True)))) # top_prev, top_lat_feats = n["input_feats"], n['lat_feats'] # top_prev = bcl_bn_relu(n, 'bcl0', top_prev, top_lat_feats, nout=[64,128,64], lattic_scale=["0*8_1*8_2*8", "0*4_1*4_2*4", "0*2_1*2_2*2"], loop=3) if BCL_mode=="Voxel2BCL": # Reshape to the (B,C,N,V) N is 1 here to fit in BCL n["input_feats"], n['lat_feats']= L.Python(top_prev, n.coors, ntop=2, python_param=dict(module='bcl_layers', layer='BCLReshape', param_str=str(dict(ReshapeMode=BCL_mode)))) top_prev, top_lat_feats = n["input_feats"], n['lat_feats'] # top_prev = bcl_bn_relu(n, 'bcl0', top_prev, top_lat_feats, nout=[64,128,64], lattic_scale=["0*8_1*8_2*8", "0*4_1*4_2*4", "0*2_1*2_2*2"], loop=3) # top_prev = bcl_bn_relu(n, 'bcl0', top_prev, top_lat_feats, nout=[64,128,64], lattic_scale=["0*1_1*1_2*1", "0*0.5_1*0.5_2*0.5", "0*0.25_1*0.25_2*0.25"], loop=3) top_prev = bcl_bn_relu(n, 'bcl0', top_prev, top_lat_feats, nout=[64,128,64], lattic_scale=["0*32_1*32_2*32", "0*16_1*16_2*16", "0*8_1*8_2*8"], loop=3) # Reshape to the (B,C,V,N) N is 1 here to fit in Scatter n["input_feats_inverse"]= L.Python(top_prev,python_param=dict(module='bcl_layers', layer='Voxel2Scatter', )) top_prev = n["input_feats_inverse"] if BCL_mode=="Voxel2PointNet": top_prev = conv_bn_relu(n, "mlp0", top_prev, 1, 64, stride=1, pad=0, loop=1) top_prev = conv_bn_relu(n, "mlp1", top_prev, 1, 128, stride=1, pad=0, loop=1) top_prev = conv_bn_relu(n, "mlp2", top_prev, 1, 64, stride=1, pad=0, loop=1) ###############################Scatter###################################### n['PillarScatter'] = L.Python(top_prev, n.coors, ntop=1,python_param=dict( module='bcl_layers', layer='PointPillarsScatter', param_str=str(dict(output_shape=[1, 1, anchors_fp_h, anchors_fp_w, 64], # [1, 1, 496, 432, 4] permutohedral=False # if true return shape is (b,c,1,h*w) else (b.c,h,w) )))) top_prev= n['PillarScatter'] ###############################Scatter###################################### #############################MODE1########################################## """ No Concate""" # top_prev = bcl_bn_relu(n, 'bcl0', # top_prev, # top_lat_feats, # nout=[64,128,128,128,64,64], # lattic_scale=["0*16_1*16_2*16", "0*8_1*8_2*8", "0*4_1*4_2*4", "0*2_1*2_2*2", "0*0.5_1*0.5_2*0.5"], # loop=6) #############################MODE1########################################## #############################MODE2########################################## """ Concate (might have rpn and feature extract function?)""" # top_prev_1 = bcl_bn_relu(n, 'bcl0', # top_prev, # top_lat_feats, # nout=[64,128], # lattic_scale=["0*8_1*8_2*8", "0*4_1*4_2*4"], # loop=2) # # top_prev_2 = bcl_bn_relu(n, 'bcl1', # top_prev_1, # top_lat_feats, # nout=[128,128], # lattic_scale=["0*2_1*2_2*2", "0*1_1*1_2*1"], # loop=2) # # top_prev_3 = bcl_bn_relu(n, 'bcl2', # top_prev_2, # top_lat_feats, # nout=[64,64], # lattic_scale=["0*0.5_1*0.5_2*0.5", "0*0.25_1*0.25_2*0.25"], # loop=2) # # n['rpn_out'] = L.Concat(top_prev_1, top_prev_2, top_prev_3) # top_prev = n['rpn_out'] # n['reshape_rpn_out'] = L.Reshape(top_prev, reshape_param=dict(shape=dict(dim=[0, 0, int(anchors_fp_h/2), int(anchors_fp_w/2)])))# (B,H,W,C) -> (B, -1, C) # top_prev = n['reshape_rpn_out'] #############################MODE2########################################## #############################MODE3########################################## top_prev = conv_bn_relu(n, "ini_conv1", top_prev, 3, num_filters[0], stride=layer_strides[0], pad=1, loop=1) top_prev = conv_bn_relu(n, "rpn_conv1", top_prev, 3, num_filters[0], stride=1, pad=1, loop=layer_nums[0]) #3 deconv1 = deconv_bn_relu(n, "rpn_deconv1", top_prev, upsample_strides[0], num_upsample_filters[0], stride=upsample_strides[0], pad=0) top_prev = conv_bn_relu(n, "ini_conv2", top_prev, 3, num_filters[1], stride=layer_strides[1], pad=1, loop=1) top_prev = conv_bn_relu(n, "rpn_conv2", top_prev, 3, num_filters[1], stride=1, pad=1, loop=layer_nums[1]) #5 deconv2 = deconv_bn_relu(n, "rpn_deconv2", top_prev, upsample_strides[1], num_upsample_filters[1], stride=upsample_strides[1], pad=0) top_prev = conv_bn_relu(n, "ini_conv3", top_prev, 3, num_filters[2], stride=layer_strides[2], pad=1, loop=1) top_prev = conv_bn_relu(n, "rpn_conv3", top_prev, 3, num_filters[2], stride=1, pad=1, loop=layer_nums[2]) #5 deconv3 = deconv_bn_relu(n, "rpn_deconv3", top_prev, upsample_strides[2], num_upsample_filters[2], stride=upsample_strides[2], pad=0) n['rpn_out'] = L.Concat(deconv1, deconv2, deconv3) top_prev = n['rpn_out'] #############################MODE3########################################## num_cls = 2 n['cls_preds'] = L.Convolution(top_prev, name = "cls_head", convolution_param=dict(num_output=num_cls, kernel_size=1, stride=1, pad=0, weight_filler=dict(type = 'xavier'), bias_term = True, bias_filler=dict(type='constant', value=0), engine=1, ), param=[dict(lr_mult=1), dict(lr_mult=1)]) cls_preds = n['cls_preds'] box_code_size = 7 num_anchor_per_loc = 2 n['box_preds'] = L.Convolution(top_prev, name = "reg_head", convolution_param=dict(num_output=num_anchor_per_loc * box_code_size, kernel_size=1, stride=1, pad=0, weight_filler=dict(type = 'xavier'), bias_term = True, bias_filler=dict(type='constant', value=0), engine=1, ), param=[dict(lr_mult=1), dict(lr_mult=1)]) box_preds = n['box_preds'] if phase == "train": n['cared'],n['reg_outside_weights'], n['cls_weights']= L.Python(n.labels, name = "PrepareLossWeight", ntop = 3, python_param=dict( module='bcl_layers', layer='PrepareLossWeight' )) reg_outside_weights, cared, cls_weights = n['reg_outside_weights'], n['cared'], n['cls_weights'] # Gradients cannot be computed with respect to the label inputs (bottom[1])# n['labels_input'] = L.Python(n.labels, cared, name = "Label_Encode", python_param=dict( module='bcl_layers', layer='LabelEncode', )) labels_input = n['labels_input'] n['cls_preds_permute'] = L.Permute(cls_preds, permute_param=dict(order=[0, 2, 3, 1])) #(B,C,H,W) -> (B,H,W,C) cls_preds_permute = n['cls_preds_permute'] n['cls_preds_reshape'] = L.Reshape(cls_preds_permute, reshape_param=dict(shape=dict(dim=[0, -1, 1])))# (B,H,W,C) -> (B, -1, C) cls_preds_reshape = n['cls_preds_reshape'] n.cls_loss= L.Python(cls_preds_reshape, labels_input, cls_weights, name = "FocalLoss", loss_weight = 1, python_param=dict( module='bcl_layers', layer='WeightFocalLoss' ), param_str=str(dict(focusing_parameter=2, alpha=0.25))) box_code_size = 7 n['box_preds_permute'] = L.Permute(box_preds, permute_param=dict(order=[0, 2, 3, 1])) #(B,C,H,W) -> (B,H,W,C) box_preds_permute = n['box_preds_permute'] n['box_preds_reshape'] = L.Reshape(box_preds_permute, reshape_param=dict(shape=dict(dim=[0, -1, box_code_size]))) #(B,H,W,C) -> (B, -1, C) box_preds_reshape = n['box_preds_reshape'] n.reg_loss= L.Python(box_preds_reshape, n.reg_targets, reg_outside_weights, name = "WeightedSmoothL1Loss", loss_weight = 1, python_param=dict( module='bcl_layers', layer='WeightedSmoothL1Loss' )) return n.to_proto() elif phase == "eval": n['e7'],n['m7'],n['h7'],n['e5'],n['m5'],n['h5']=L.Python(box_preds,cls_preds, n.anchors, n.rect, n.trv2c, n.p2, n.anchors_mask, n.img_idx, n.img_shape, name = "EvalLayer", ntop=6, python_param=dict( module='bcl_layers', layer='EvalLayer_v2', param_str=repr(dataset_params_eval), )) return n.to_proto() else: raise ValueError
def object_detection(n, voxels, coors, label, reg_targets, phase): ############### Params ############### box_code_size = 7 num_anchor_per_loc = 1 num_cls = 1 ############### Params ############### top_prev, top_lattice = L.Python(voxels, coors, ntop=2, python_param=dict(module='bcl_layers', layer='BCLReshape')) top_prev = conv_bn_relu(n, "conv0_obj", top_prev, 1, 64, stride=1, pad=0, loop=1) top_prev = bcl_bn_relu(n, 'bcl_obj', top_prev, top_lattice, nout=[64, 64, 128, 128, 128, 64], lattic_scale=[ "0*32_1*32_2*32", "0*16_1*16_2*16", "0*8_1*8_2*8", "0*4_1*4_2*4", "0*2_1*2_2*2", "0_1_2" ], loop=6, skip='concat') top_prev = conv_bn_relu(n, "conv1_obj", top_prev, 1, 64, stride=1, pad=0, loop=1) n.cls_preds = L.Convolution(top_prev, name="cls_head", convolution_param=dict( num_output=num_anchor_per_loc * num_cls, kernel_size=1, stride=1, pad=0, weight_filler=dict(type='xavier'), bias_term=True, bias_filler=dict(type='constant', value=0), engine=1, ), param=[dict(lr_mult=1), dict(lr_mult=1)]) n.box_preds = L.Convolution(top_prev, name="reg_head", convolution_param=dict( num_output=num_anchor_per_loc * box_code_size, kernel_size=1, stride=1, pad=0, weight_filler=dict(type='xavier'), bias_term=True, bias_filler=dict(type='constant', value=0), engine=1, ), param=[dict(lr_mult=1), dict(lr_mult=1)]) cls_preds = n.cls_preds box_preds = n.box_preds box_preds = L.ReLU(box_preds, in_place=True) ## WARNING: ReLU # box_preds = L.Python(box_preds, name = "CaLu", # python_param=dict( # module='bcl_layers', # layer='CaLuV2', # )) cls_preds_permute = L.Permute( cls_preds, permute_param=dict(order=[0, 2, 3, 1])) #(B,C=2,H,W) -> (B,H,W,C=2) cls_preds_reshape = L.Reshape( cls_preds_permute, reshape_param=dict(shape=dict( dim=[0, -1, num_cls]))) # (B,H,W,C=2)-> (B, -1, 1) box_preds_permute = L.Permute( box_preds, permute_param=dict( order=[0, 2, 3, 1])) #(B,C=2*7,H,W) -> (B,H,W,C=2*7) box_preds_reshape = L.Reshape( box_preds_permute, reshape_param=dict(shape=dict( dim=[0, -1, box_code_size]))) # (B,H,W,C=2*7)-> (B, -1, 7) if phase == "eval": n.f_cls_preds = cls_preds_reshape n.f_box_preds = box_preds_reshape elif phase == "train": # n['cared'],n['reg_outside_weights'], n['cls_weights']= L.Python(label, # name = "PrepareLossWeight", # ntop = 3, # python_param=dict( # module='bcl_layers', # layer='PrepareLossWeight' # )) # reg_outside_weights, cared, cls_weights = n['reg_outside_weights'], n['cared'], n['cls_weights'] n['reg_outside_weights'], n['cls_weights'] = L.Python( label, name="PrepareLossWeightV2", ntop=2, python_param=dict(module='bcl_layers', layer='PrepareLossWeightV2')) reg_outside_weights, cls_weights = n['reg_outside_weights'], n[ 'cls_weights'] # Gradients cannot be computed with respect to the label inputs (bottom[1])# # n['labels_input'] = L.Python(label, cared, # name = "Label_Encode", # python_param=dict( # module='bcl_layers', # layer='LabelEncode', # )) # labels_input = n['labels_input'] n['labels_input'] = L.Python(label, name="LabelEncodeV2", python_param=dict( module='bcl_layers', layer='LabelEncodeV2', )) labels_input = n['labels_input'] n.cls_loss = L.Python(cls_preds_reshape, labels_input, cls_weights, name="FocalLoss", loss_weight=2, python_param=dict(module='bcl_layers', layer='WeightFocalLoss'), param_str=str( dict(focusing_parameter=2, alpha=0.25))) n.reg_loss = L.Python(box_preds_reshape, reg_targets, reg_outside_weights, top_lattice, name="WeightedSmoothL1Loss", loss_weight=1, python_param=dict(module='bcl_layers', layer='WeightedSmoothL1Loss')) # box_preds_reshape = L.ReLU(box_preds_reshape, in_place=True) # n.reg_loss= L.Python(box_preds_reshape, reg_targets, labels_input, reg_outside_weights, top_lattice, # name = "IoULossV2", # loss_weight = 1, # python_param=dict( # module='bcl_layers', # layer='IoULossV2' # )) return n
def segmentation(n, seg_points, label, coords, p2voxel_idx, cls_labels, reg_targets, dataset_params, phase): ############### Params ############### num_cls = dataset_params['num_cls'] box_code_size = dataset_params['box_code_size'] num_anchor_per_loc = dataset_params['num_anchor_per_loc'] max_voxels = dataset_params['max_voxels'] points_per_voxel = dataset_params['points_per_voxel'] ############### Params ############### top_prev, top_lattice = L.Python(seg_points, ntop=2, python_param=dict(module='bcl_layers', layer='BCLReshape')) top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 64, stride=1, pad=0, loop=1) """ 1. If lattice scale too large the network will really slow and don't have good result """ # #2nd # top_prev = bcl_bn_relu(n, 'bcl_seg', top_prev, top_lattice, nout=[64, 64, 128, 128, 128, 64], # lattic_scale=["0*4_1*4_2*4","0*2_1*2_2*2","0_1_2","0/2_1/2_2/2","0/4_1/4_2/4","0/8_1/8_2/8"], loop=6, skip='concat') # # #3rd top_prev = bcl_bn_relu( n, 'bcl_seg', top_prev, top_lattice, nout=[64, 128, 64], lattic_scale=["0*4_1*4_2*4", "0*2_1*2_2*2", "0_1_2"], loop=3, skip=None) # BEST NOW # top_prev = bcl_bn_relu(n, 'bcl_seg', top_prev, top_lattice, nout=[64, 128, 128, 128, 64], # lattic_scale=["0*2_1*2_2*2","0_1_2","0/2_1/2_2/2","0/4_1/4_2/4","0/8_1/8_2/8"], loop=5, skip='concat') # top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 256, stride=1, pad=0, loop=1) # top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 128, stride=1, pad=0, loop=1) top_prev = conv_bn_relu(n, "conv1_seg", top_prev, 1, 64, stride=1, pad=0, loop=1) # n.seg_preds = L.Convolution(top_prev, name = "seg_head", # convolution_param=dict(num_output=num_cls, # kernel_size=1, stride=1, pad=0, # weight_filler=dict(type = 'xavier'), # bias_term = True, # bias_filler=dict(type='constant', value=0), # engine=1, # ), # param=[dict(lr_mult=1), dict(lr_mult=0.1)]) # Predict class # if phase == "train": # seg_preds = L.Permute(n.seg_preds, permute_param=dict(order=[0, 2, 3, 1])) #(B,C=1,H,W) -> (B,H,W,C=1) # seg_preds = L.Reshape(seg_preds, reshape_param=dict(shape=dict(dim=[0, -1, num_cls])))# (B,H,W,C=1)-> (B, -1, 1) # # seg_weights = L.Python(label, name = "SegWeight", # python_param=dict( # module='bcl_layers', # layer='SegWeight' # )) # # seg_weights = L.Reshape(seg_weights, reshape_param=dict(shape=dict(dim=[0, -1]))) # # n.seg_loss = L.Python(seg_preds, label, seg_weights, # name = "Seg_Loss", # loss_weight = 1, # python_param=dict( # module='bcl_layers', # layer='FocalLoss' #WeightFocalLoss, DiceFocalLoss, FocalLoss, DiceLoss # ), # param_str=str(dict(focusing_parameter=2, alpha=0.25))) top_prev = conv_bn_relu(n, "P2VX_Decov", top_prev, 1, 32, stride=1, pad=0, loop=1) # n.seg_output = L.Sigmoid(n.seg_preds) n.p2vx = L.Python( top_prev, p2voxel_idx, # seg_pred only for rubbish dump name="Point2Voxel3D", ntop=1, python_param=dict(module='bcl_layers', layer='Point2Voxel3D'), param_str=str( dict(max_voxels=max_voxels, points_per_voxel=points_per_voxel))) top_prev = n.p2vx top_lattice = L.Permute( coords, name="coords_permute", permute_param=dict(order=[0, 2, 1])) #(B,C=1,H,W) -> (B,H,W,C=1) top_lattice = L.Reshape( top_lattice, name="coords_reshape", reshape_param=dict(shape=dict( dim=[0, -1, 1, max_voxels]))) # (B,H,W,C=1)-> (B, -1, 1) top_prev = conv_bn_relu(n, "conv2_seg_voxel", top_prev, 1, 64, stride=1, pad=0, loop=1) top_prev = bcl_bn_relu( n, 'bcl_seg_voxel', top_prev, top_lattice, nout=[64, 128, 128, 64], lattic_scale=["0*8_1*8_2*8", "0*4_1*4_2*4", "0*2_1*2_2*2", "0_1_2"], loop=4, skip='concat') top_prev = conv_bn_relu(n, "conv3_seg_voxle", top_prev, 1, 64, stride=1, pad=0, loop=1) n.cls_preds = L.Convolution(top_prev, name="cls_head", convolution_param=dict( num_output=num_anchor_per_loc * num_cls, kernel_size=1, stride=1, pad=0, weight_filler=dict(type='xavier'), bias_term=True, bias_filler=dict(type='constant', value=0), engine=1, ), param=[dict(lr_mult=1), dict(lr_mult=1)]) n.box_preds = L.Convolution(top_prev, name="reg_head", convolution_param=dict( num_output=num_anchor_per_loc * box_code_size, kernel_size=1, stride=1, pad=0, weight_filler=dict(type='xavier'), bias_term=True, bias_filler=dict(type='constant', value=0), engine=1, ), param=[dict(lr_mult=1), dict(lr_mult=1)]) cls_preds = n.cls_preds box_preds = n.box_preds box_preds = L.ReLU(box_preds, in_place=True) cls_preds = L.Permute( cls_preds, permute_param=dict(order=[0, 2, 3, 1])) #(B,C,H,W) -> (B,H,W,C) cls_preds = L.Reshape(cls_preds, reshape_param=dict(shape=dict( dim=[0, -1, 1]))) # (B,H,W,C) -> (B, -1, C) box_preds = L.Permute( box_preds, permute_param=dict(order=[0, 2, 3, 1])) #(B,C,H,W) -> (B,H,W,C) box_preds = L.Reshape( box_preds, reshape_param=dict(shape=dict( dim=[0, -1, box_code_size]))) #(B,H,W,C) -> (B, -1, C) if phase == "train": n['cared'], n['reg_outside_weights'], n['cls_weights'] = L.Python( cls_labels, name="PrepareLossWeight", ntop=3, python_param=dict(module='bcl_layers', layer='PrepareLossWeight')) reg_outside_weights, cared, cls_weights = n['reg_outside_weights'], n[ 'cared'], n['cls_weights'] # Gradients cannot be computed with respect to the label inputs (bottom[1])# n['labels_input'] = L.Python(cls_labels, cared, label, name="Label_Encode", python_param=dict( module='bcl_layers', layer='LabelEncode', )) labels_input = n['labels_input'] n.cls_loss = L.Python(cls_preds, labels_input, cls_weights, name="FocalLoss", loss_weight=1, python_param=dict(module='bcl_layers', layer='WeightFocalLoss'), param_str=str( dict(focusing_parameter=2, alpha=0.25))) n.reg_loss = L.Python(box_preds, reg_targets, reg_outside_weights, name="WeightedSmoothL1Loss", loss_weight=1, python_param=dict(module='bcl_layers', layer='WeightedSmoothL1Loss')) # Problem if phase == "eval": n.f_cls_preds = cls_preds n.f_box_preds = box_preds return n
# Create test net net = caffe.NetSpec() params_str['train'] = False params_str['label_txt'] = test_txt net.data, net.label = L.Python(name="data", ntop=2, python_param={ 'module': "pythonLayer", 'layer': "WarpctcDataLayer", 'param_str': str(params_str) }) body_layer = WarpctcNetBody(net, net.data) net.premuted_fc = L.Permute(body_layer, order=[1,0,2]) net.accuracy = L.LabelsequenceAccuracy(net.premuted_fc, net.label, blank_label=10) with open(test_net_file, 'w') as f: print('name: "{}_test"'.format(model_name), file=f) print(net.to_proto(), file=f) shutil.copy(test_net_file, job_dir) # Create deploy net. # Remove the first and last layer from test net. deploy_net = net with open(deploy_net_file, 'w') as f: net_param = deploy_net.to_proto() # Remove the first (AnnotatedData) and last (DetectionEvaluate) layer from test net. del net_param.layer[0] del net_param.layer[-1]
def UnitLayerDenseDetectorHeader(net, data_layer="data", num_classes=2, feature_layer="conv5", \ normalization=-1, use_batchnorm=True, prior_variance = [0.1], \ pro_widths=[], pro_heights=[], flip=True, clip=True, \ inter_layer_channels=0, flat=False, use_focus_loss=False, stage=1,lr_mult=1, decay_mult=1): assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers." assert feature_layer in net_layers, "feature_layer is not in net's layers." assert pro_widths, "Must provide proposed width/height." assert pro_heights, "Must provide proposed width/height." assert len(pro_widths) == len(pro_heights), "pro_widths/heights must have the same length." from_layer = feature_layer prefix_name = '{}_{}'.format(from_layer,stage) # Norm-Layer if normalization != -1: norm_name = "{}_norm".format(prefix_name) net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalization), \ across_spatial=False, channel_shared=False) from_layer = norm_name # InterLayers if len(inter_layer_channels) > 0: start_inter_id = 1 for inter_channel_kernel in inter_layer_channels: inter_channel = inter_channel_kernel[0] inter_kernel = inter_channel_kernel[1] inter_name = "{}_inter_{}".format(prefix_name,start_inter_id) if inter_kernel == 1: inter_pad = 0 elif inter_kernel == 3: inter_pad = 1 ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, num_output=inter_channel,\ kernel_size=inter_kernel, pad=inter_pad, stride=1,use_scale=True, leaky=False,lr_mult=lr_mult, decay_mult=decay_mult,constant_value=0.2) from_layer = inter_name start_inter_id = start_inter_id + 1 # PriorBoxes num_priors_per_location = len(pro_widths) # LOC name = "{}_mbox_loc".format(prefix_name) num_loc_output = num_priors_per_location * 4 * (num_classes-1) ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_loc_output, kernel_size=3, pad=1, stride=1,lr_mult=lr_mult, decay_mult=decay_mult) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) if flat: flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layer = net[flatten_name] else: loc_layer = net[permute_name] # CONF name = "{}_mbox_conf".format(prefix_name) num_conf_output = num_priors_per_location * num_classes if use_focus_loss: ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, num_output=num_conf_output,\ kernel_size=3, pad=1, stride=1,init_xavier=False,bias_type='focal',sparse=num_classes,lr_mult=lr_mult, decay_mult=decay_mult) else: ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_conf_output, kernel_size=3, pad=1, stride=1,lr_mult=lr_mult, decay_mult=decay_mult) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) if flat: flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layer = net[flatten_name] else: conf_layer = net[permute_name] # PRIOR name = "{}_mbox_priorbox".format(prefix_name) net[name] = L.PriorBox(net[from_layer], net[data_layer], pro_width=pro_widths, pro_height=pro_heights, \ flip=flip, clip=clip, variance=prior_variance) priorbox_layer = net[name] return loc_layer,conf_layer,priorbox_layer
def UnitLayerDetectorHeader(net, data_layer="data", num_classes=2, feature_layer="conv5", \ normalization=-1, use_batchnorm=True, prior_variance = [0.1], \ pro_widths=[], pro_heights=[], flip=True, clip=True, inter_layer_channels=[], \ flat=False, use_focus_loss=False, stage=1,lr_mult=1.0,decay_mult=1.0,flag_withparamname=False,flagcreateprior = True,add_str = ""): assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers." print feature_layer assert feature_layer + add_str in net_layers, "feature_layer is not in net's layers.(%s)" % feature_layer assert pro_widths, "Must provide proposed width/height. " assert pro_heights, "Must provide proposed width/height." assert len(pro_widths) == len( pro_heights), "pro_widths/heights must have the same length." from_layer = feature_layer prefix_name = '{}_{}'.format(from_layer, stage) from_layer += add_str # Norm-Layer if normalization != -1: norm_name = "{}_{}_norm".format(prefix_name, stage) net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalization), \ across_spatial=False, channel_shared=False) from_layer = norm_name print(inter_layer_channels, "inter_layer_channels") if len(inter_layer_channels) > 0: start_inter_id = 1 for inter_channel_kernel in inter_layer_channels: inter_channel = inter_channel_kernel[0] inter_kernel = inter_channel_kernel[1] inter_name = "{}_inter_{}".format(prefix_name, start_inter_id) if inter_kernel == 1: inter_pad = 0 elif inter_kernel == 3: inter_pad = 1 ConvBNUnitLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, \ num_output=inter_channel, kernel_size=inter_kernel, pad=inter_pad, stride=1,use_scale=True, leaky=False, lr_mult=lr_mult, decay_mult=decay_mult,flag_withparamname=flag_withparamname,pose_string=add_str) from_layer = inter_name + add_str start_inter_id = start_inter_id + 1 # Estimate number of priors per location given provided parameters. num_priors_per_location = len(pro_widths) # Create location prediction layer. name = "{}_mbox_loc".format(prefix_name) num_loc_output = num_priors_per_location * 4 ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_loc_output, kernel_size=3, pad=1, stride=1,lr_mult=lr_mult, decay_mult=decay_mult,pose_string=add_str) permute_name = "{}_perm".format(name) + add_str net[permute_name] = L.Permute(net[name + add_str], order=[0, 2, 3, 1]) if flat: flatten_name = "{}_flat".format(name) + add_str net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layer = net[flatten_name] else: loc_layer = net[permute_name] # Create confidence prediction layer. name = "{}_mbox_conf".format(prefix_name) num_conf_output = num_priors_per_location * num_classes if use_focus_loss: ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_conf_output, kernel_size=3, pad=1, stride=1,init_xavier=False,bias_type='focal',sparse=num_classes, lr_mult=lr_mult, decay_mult=decay_mult,pose_string=add_str) else: ConvBNUnitLayer(net, from_layer, name, use_bn=False, use_relu=False, \ num_output=num_conf_output, kernel_size=3, pad=1, stride=1,lr_mult=lr_mult, decay_mult=decay_mult,pose_string=add_str) permute_name = "{}_perm".format(name) + add_str net[permute_name] = L.Permute(net[name + add_str], order=[0, 2, 3, 1]) if flat: flatten_name = "{}_flat".format(name) + add_str net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layer = net[flatten_name] else: conf_layer = net[permute_name] # Create prior generation layer. if flagcreateprior: name = "{}_mbox_priorbox".format(prefix_name) + add_str net[name] = L.PriorBox(net[from_layer], net[data_layer], pro_width=pro_widths, pro_height=pro_heights, \ flip=flip, clip=clip, variance=prior_variance) priorbox_layer = net[name] else: priorbox_layer = [] return loc_layer, conf_layer, priorbox_layer
def get_caffe_layer(node, net, input_dims): """Generate caffe layer for corresponding mxnet op. Args: node (iterable from MxnetParser): Mxnet op summary generated by MxnetParser net (caffe.net): Caffe netspec object Returns: caffe.layers: Equivalent caffe layer """ if node['type'] == 'Convolution': assert len(node['inputs']) == 1, \ 'Convolution layers can have only one input' conv_params = node['attr'] kernel_size = make_list(conv_params['kernel']) num_filters = make_list(conv_params['num_filter'])[0] if 'stride' in conv_params: stride = make_list(conv_params['stride'])[0] else: stride = 1 padding = make_list(conv_params['pad']) if 'dilate' in conv_params: dilation = make_list(conv_params['dilate'])[0] else: dilation = 1 convolution_param = { 'pad': padding, 'kernel_size': kernel_size, 'num_output': num_filters, 'stride': stride, 'dilation': dilation } return layers.Convolution(net[node['inputs'][0]], convolution_param=convolution_param) if node['type'] == 'Activation': assert len(node['inputs']) == 1, \ 'Activation layers can have only one input' assert node['attr']['act_type'] == 'relu' return layers.ReLU(net[node['inputs'][0]]) if node['type'] == 'Pooling': assert len(node['inputs']) == 1, \ 'Pooling layers can have only one input' kernel_size = make_list(node['attr']['kernel']) stride = make_list(node['attr']['stride']) pooling_type = node['attr']['pool_type'] if 'pad' in node['attr']: padding = make_list(node['attr']['pad']) else: padding = [0] if pooling_type == 'max': pooling = params.Pooling.MAX elif pooling_type == 'avg': pooling = params.Pooling.AVG pooling_param = { 'pool': pooling, 'pad': padding[0], 'kernel_size': kernel_size[0], 'stride': stride[0] } return layers.Pooling(net[node['inputs'][0]], pooling_param=pooling_param) if node['type'] == 'L2Normalization': across_spatial = node['attr']['mode'] != 'channel' channel_shared = False scale_filler = { 'type': "constant", 'value': constants.NORMALIZATION_FACTOR } norm_param = { 'across_spatial': across_spatial, 'scale_filler': scale_filler, 'channel_shared': channel_shared } return layers.Normalize(net[node['inputs'][0]], norm_param=norm_param) # Note - this layer has been implemented # only in WeiLiu's ssd branch of caffe not in caffe master if node['type'] == 'transpose': order = make_list(node['attr']['axes']) return layers.Permute(net[node['inputs'][0]], permute_param={'order': order}) if node['type'] == 'Flatten': if node['inputs'][0].endswith('anchors'): axis = 2 else: axis = 1 return layers.Flatten(net[node['inputs'][0]], flatten_param={'axis': axis}) if node['type'] == 'Concat': # In the ssd model, always concatenate along last axis, # since anchor boxes have an extra dimension in caffe (that includes variance). axis = -1 concat_inputs = [net[inp] for inp in node['inputs']] return layers.Concat(*concat_inputs, concat_param={'axis': axis}) if node['type'] == 'Reshape': if node['name'] == 'multibox_anchors': reshape_dims = [1, 2, -1] else: reshape_dims = make_list(node['attr']['shape']) return layers.Reshape(net[node['inputs'][0]], reshape_param={'shape': { 'dim': reshape_dims }}) if node['type'] == '_contrib_MultiBoxPrior': priorbox_inputs = [net[inp] for inp in node['inputs']] + [net["data"]] sizes = make_list(node["attr"]["sizes"]) min_size = sizes[0] * input_dims[0] max_size = int(round((sizes[1] * input_dims[0])**2 / min_size)) aspect_ratio = make_list(node["attr"]["ratios"]) steps = make_list(node["attr"]["steps"]) param = { 'clip': node["attr"]["clip"] == "true", 'flip': False, 'min_size': min_size, 'max_size': max_size, 'aspect_ratio': aspect_ratio, 'variance': [0.1, 0.1, 0.2, 0.2], 'step': int(round(steps[0] * input_dims[0])), } return layers.PriorBox(*priorbox_inputs, prior_box_param=param) if node['type'] == '_contrib_MultiBoxDetection': multibox_inputs = [net[inp] for inp in node['inputs']] bottom_order = [1, 0, 2] multibox_inputs = [multibox_inputs[i] for i in bottom_order] param = { 'num_classes': constants.NUM_CLASSES, 'share_location': True, 'background_label_id': 0, 'nms_param': { 'nms_threshold': float(node['attr']['nms_threshold']), 'top_k': int(node['attr']['nms_topk']) }, 'keep_top_k': make_list(node['attr']['nms_topk'])[0], 'confidence_threshold': 0.01, 'code_type': params.PriorBox.CENTER_SIZE, } return layers.DetectionOutput(*multibox_inputs, detection_output_param=param) if node['type'] in ['SoftmaxActivation', 'SoftmaxOutput']: if 'mode' not in node['attr']: axis = 1 elif node['attr']['mode'] == 'channel': axis = 1 else: axis = 0 # note: caffe expects confidence scores to be flattened before detection output layer receives it return layers.Flatten(layers.Permute( layers.Softmax(net[node['inputs'][0]], axis=axis), permute_param={'order': [0, 2, 1]}), flatten_param={'axis': 1})
def FSRCNN_s(img_list, label_list, batch_size, include_acc=False): print('Create FSRCNN_s') # data # https://www.cnblogs.com/houjun/p/9909764.html #data, label = L.ImageData( data = L.ImageData( name="data", ntop=2, #include={'phase': caffe.TRAIN}) source=img_list, batch_size=batch_size, is_color=True, new_width=640, new_height=360, #shuffle=True, root_folder=root, transform_param=dict( #crop_size=360, scale=0.00390625, #mirror=True )) # label label = L.ImageData( name="label", ntop=2, source=label_list, batch_size=batch_size, is_color=True, new_width=1280, new_height=720, #shuffle=True, root_folder=root, transform_param=dict( #crop_size=720, scale=0.00390625, #mirror=True )) # https://www.cnblogs.com/houjun/p/9909764.html #label = L.HDF5Data( # name="label", # ntop=2, # source=img_list, # #source=label_list, # batch_size=batch_size, # include=dict(phase=caffe.TRAIN)) #label = L.HDF5Data( # hdf5_data_param={ # 'source': img_list, # 'batch_size': 64}, # include={ # 'phase': caffe.TRAIN}) # conv1 conv1 = L.Convolution( data, #label, name="conv1", num_output=32, kernel_size=5, stride=1, pad=1, weight_filler=dict(type='gaussian', std=0.05), bias_filler=dict(type='constant', value=0)) relu1 = L.PReLU(conv1, name="relu1", in_place=True, prelu_param={'channel_shared': 1}) # conv2 conv2 = L.Convolution(conv1, name="conv2", num_output=5, kernel_size=1, stride=1, pad=0, group=1, weight_filler=dict(type='gaussian', std=0.05), bias_filler=dict(type='constant', value=0)) relu2 = L.PReLU(conv2, name="relu2", in_place=True, prelu_param={'channel_shared': 1}) # conv22 conv22 = L.Convolution(conv2, name="conv22", num_output=5, kernel_size=3, stride=1, pad=1, group=1, weight_filler=dict(type='gaussian', std=0.05), bias_filler=dict(type='constant', value=0)) relu22 = L.PReLU(conv22, name="relu22", in_place=True, prelu_param={'channel_shared': 1}) # conv23 conv23 = L.Convolution(conv22, name="conv23", num_output=32, kernel_size=1, stride=1, pad=1, group=1, weight_filler=dict(type='gaussian', std=0.05), bias_filler=dict(type='constant', value=0)) relu23 = L.PReLU(conv23, name="relu23", in_place=True, prelu_param={'channel_shared': 1}) # conv3 conv3 = L.Convolution(conv23, name="conv3", num_output=12, kernel_size=3, stride=1, pad=1, weight_filler=dict(type='gaussian', std=0.05), bias_filler=dict(type='constant', value=0)) # shuffle reshape1 = L.Reshape( conv3, name="reshape_to_6d", shape={ #reshape_param={ # 'shape'={ 'dim': 0, 'dim': 2, 'dim': 2, 'dim': 3, 'dim': 360, 'dim': -1 } # }) ) permute = L.Permute(reshape1, name="permute", permute_param={ 'order': 0, 'order': 3, 'order': 4, 'order': 1, 'order': 5, 'order': 2 }) reshape2 = L.Reshape(permute, name="reshape_to_4d", shape={ 'dim': 0, 'dim': 3, 'dim': 720, 'dim': -1 }) # loss loss = L.EuclideanLoss(reshape2, label, name="loss") #return to_proto(conv1) #return to_proto(label, conv1) #return to_proto(data, label, conv1, relu1) #return to_proto(data, label, relu1) #return to_proto(data, label, relu1, relu2, relu22, relu23, conv3, reshape1) #return to_proto(data, label, relu1, relu2, relu22, relu23, conv3, loss) return to_proto(data, label, relu1, relu2, relu22, relu23, conv3, reshape2, loss)
def mfb_coatt(mode, batchsize, T, question_vocab_size, folder): n = caffe.NetSpec() mode_str = json.dumps({'mode':mode, 'batchsize':batchsize,'folder':folder}) if mode == 'val': n.data, n.cont, n.img_feature, n.label, n.glove = L.Python( \ module='vqa_data_layer_hdf5', layer='VQADataProviderLayer', \ param_str=mode_str, ntop=5 ) else: n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\ module='vqa_data_layer_kld_hdf5', layer='VQADataProviderLayer', \ param_str=mode_str, ntop=5 ) n.embed = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \ weight_filler=dict(type='xavier')) n.embed_tanh = L.TanH(n.embed) concat_word_embed = [n.embed_tanh, n.glove] n.concat_embed = L.Concat(*concat_word_embed, concat_param={'axis': 2}) # T x N x 600 # LSTM n.lstm1 = L.LSTM(\ n.concat_embed, n.cont,\ recurrent_param=dict(\ num_output=config.LSTM_UNIT_NUM,\ weight_filler=dict(type='xavier'))) n.lstm1_droped = L.Dropout(n.lstm1,dropout_param={'dropout_ratio':config.LSTM_DROPOUT_RATIO}) n.lstm1_resh = L.Permute(n.lstm1_droped, permute_param=dict(order=[1,2,0])) n.lstm1_resh2 = L.Reshape(n.lstm1_resh, \ reshape_param=dict(shape=dict(dim=[0,0,0,1]))) ''' Question Attention ''' n.qatt_conv1 = L.Convolution(n.lstm1_resh2, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.qatt_relu = L.ReLU(n.qatt_conv1) n.qatt_conv2 = L.Convolution(n.qatt_relu, kernel_size=1, stride=1, num_output=config.NUM_QUESTION_GLIMPSE, pad=0, weight_filler=dict(type='xavier')) n.qatt_reshape = L.Reshape(n.qatt_conv2, reshape_param=dict(shape=dict(dim=[-1,config.NUM_QUESTION_GLIMPSE,config.MAX_WORDS_IN_QUESTION,1]))) # N*NUM_QUESTION_GLIMPSE*15 n.qatt_softmax = L.Softmax(n.qatt_reshape, axis=2) qatt_maps = L.Slice(n.qatt_softmax,ntop=config.NUM_QUESTION_GLIMPSE,slice_param={'axis':1}) dummy_lstm = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) qatt_feature_list = [] for i in xrange(config.NUM_QUESTION_GLIMPSE): if config.NUM_QUESTION_GLIMPSE == 1: n.__setattr__('qatt_feat%d'%i, L.SoftAttention(n.lstm1_resh2, qatt_maps, dummy_lstm)) else: n.__setattr__('qatt_feat%d'%i, L.SoftAttention(n.lstm1_resh2, qatt_maps[i], dummy_lstm)) qatt_feature_list.append(n.__getattr__('qatt_feat%d'%i)) n.qatt_feat_concat = L.Concat(*qatt_feature_list) ''' Image Attention with MFB ''' n.q_feat_resh = L.Reshape(n.qatt_feat_concat,reshape_param=dict(shape=dict(dim=[0,-1,1,1]))) n.i_feat_resh = L.Reshape(n.img_feature,reshape_param=dict(shape=dict(dim=[0,-1,config.IMG_FEAT_WIDTH,config.IMG_FEAT_WIDTH]))) n.iatt_q_proj = L.InnerProduct(n.q_feat_resh, num_output = config.JOINT_EMB_SIZE, weight_filler=dict(type='xavier')) n.iatt_q_resh = L.Reshape(n.iatt_q_proj, reshape_param=dict(shape=dict(dim=[-1,config.JOINT_EMB_SIZE,1,1]))) n.iatt_q_tile1 = L.Tile(n.iatt_q_resh, axis=2, tiles=config.IMG_FEAT_WIDTH) n.iatt_q_tile2 = L.Tile(n.iatt_q_tile1, axis=3, tiles=config.IMG_FEAT_WIDTH) n.iatt_i_conv = L.Convolution(n.i_feat_resh, kernel_size=1, stride=1, num_output=config.JOINT_EMB_SIZE, pad=0, weight_filler=dict(type='xavier')) n.iatt_i_resh1 = L.Reshape(n.iatt_i_conv, reshape_param=dict(shape=dict(dim=[-1,config.JOINT_EMB_SIZE, config.IMG_FEAT_WIDTH,config.IMG_FEAT_WIDTH]))) n.iatt_iq_eltwise = L.Eltwise(n.iatt_q_tile2, n.iatt_i_resh1, eltwise_param=dict(operation=0)) n.iatt_iq_droped = L.Dropout(n.iatt_iq_eltwise, dropout_param={'dropout_ratio':config.MFB_DROPOUT_RATIO}) n.iatt_iq_resh2 = L.Reshape(n.iatt_iq_droped, reshape_param=dict(shape=dict(dim=[-1,config.JOINT_EMB_SIZE,config.IMG_FEAT_SIZE,1]))) n.iatt_iq_permute1 = L.Permute(n.iatt_iq_resh2, permute_param=dict(order=[0,2,1,3])) n.iatt_iq_resh2 = L.Reshape(n.iatt_iq_permute1, reshape_param=dict(shape=dict(dim=[-1,config.IMG_FEAT_SIZE, config.MFB_OUT_DIM,config.MFB_FACTOR_NUM]))) n.iatt_iq_sumpool = L.Pooling(n.iatt_iq_resh2, pool=P.Pooling.SUM, \ pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1)) n.iatt_iq_permute2 = L.Permute(n.iatt_iq_sumpool, permute_param=dict(order=[0,2,1,3])) n.iatt_iq_sqrt = L.SignedSqrt(n.iatt_iq_permute2) n.iatt_iq_l2 = L.L2Normalize(n.iatt_iq_sqrt) ## 2 conv layers 1000 -> 512 -> 2 n.iatt_conv1 = L.Convolution(n.iatt_iq_l2, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.iatt_relu = L.ReLU(n.iatt_conv1) n.iatt_conv2 = L.Convolution(n.iatt_relu, kernel_size=1, stride=1, num_output=config.NUM_IMG_GLIMPSE, pad=0, weight_filler=dict(type='xavier')) n.iatt_resh = L.Reshape(n.iatt_conv2, reshape_param=dict(shape=dict(dim=[-1,config.NUM_IMG_GLIMPSE,config.IMG_FEAT_SIZE]))) n.iatt_softmax = L.Softmax(n.iatt_resh, axis=2) n.iatt_softmax_resh = L.Reshape(n.iatt_softmax,reshape_param=dict(shape=dict(dim=[-1,config.NUM_IMG_GLIMPSE,config.IMG_FEAT_WIDTH,config.IMG_FEAT_WIDTH]))) iatt_maps = L.Slice(n.iatt_softmax_resh, ntop=config.NUM_IMG_GLIMPSE,slice_param={'axis':1}) dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) iatt_feature_list = [] for i in xrange(config.NUM_IMG_GLIMPSE): if config.NUM_IMG_GLIMPSE == 1: n.__setattr__('iatt_feat%d'%i, L.SoftAttention(n.i_feat_resh, iatt_maps, dummy)) else: n.__setattr__('iatt_feat%d'%i, L.SoftAttention(n.i_feat_resh, iatt_maps[i], dummy)) n.__setattr__('iatt_feat%d_resh'%i, L.Reshape(n.__getattr__('iatt_feat%d'%i), \ reshape_param=dict(shape=dict(dim=[0,-1])))) iatt_feature_list.append(n.__getattr__('iatt_feat%d_resh'%i)) n.iatt_feat_concat = L.Concat(*iatt_feature_list) n.iatt_feat_concat_resh = L.Reshape(n.iatt_feat_concat, reshape_param=dict(shape=dict(dim=[0,-1,1,1]))) ''' Fine-grained Image-Question MFB fusion ''' n.mfb_q_proj = L.InnerProduct(n.q_feat_resh, num_output=config.JOINT_EMB_SIZE, weight_filler=dict(type='xavier')) n.mfb_i_proj = L.InnerProduct(n.iatt_feat_concat_resh, num_output=config.JOINT_EMB_SIZE, weight_filler=dict(type='xavier')) n.mfb_iq_eltwise = L.Eltwise(n.mfb_q_proj, n.mfb_i_proj, eltwise_param=dict(operation=0)) n.mfb_iq_drop = L.Dropout(n.mfb_iq_eltwise, dropout_param={'dropout_ratio':config.MFB_DROPOUT_RATIO}) n.mfb_iq_resh = L.Reshape(n.mfb_iq_drop, reshape_param=dict(shape=dict(dim=[-1,1,config.MFB_OUT_DIM,config.MFB_FACTOR_NUM]))) n.mfb_iq_sumpool = L.Pooling(n.mfb_iq_resh, pool=P.Pooling.SUM, \ pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1)) n.mfb_out = L.Reshape(n.mfb_iq_sumpool,\ reshape_param=dict(shape=dict(dim=[-1,config.MFB_OUT_DIM]))) n.mfb_sign_sqrt = L.SignedSqrt(n.mfb_out) n.mfb_l2 = L.L2Normalize(n.mfb_sign_sqrt) n.prediction = L.InnerProduct(n.mfb_l2, num_output=config.NUM_OUTPUT_UNITS, weight_filler=dict(type='xavier')) if mode == 'val': n.loss = L.SoftmaxWithLoss(n.prediction, n.label) else: n.loss = L.SoftmaxKLDLoss(n.prediction, n.label) return n.to_proto()
def test_v1(phase, dataset_params=None, model_cfg = None, deploy=False, create_prototxt=True, save_path=None, ): #RPN config num_filters=list(model_cfg.rpn.num_filters) layer_nums=list(model_cfg.rpn.layer_nums) layer_strides=list(model_cfg.rpn.layer_strides) num_upsample_filters=list(model_cfg.rpn.num_upsample_filters) upsample_strides=list(model_cfg.rpn.upsample_strides) box_code_size = 7 num_anchor_per_loc = 2 n = caffe.NetSpec() if phase == "train": dataset_params_train = dataset_params.copy() dataset_params_train['subset'] = phase datalayer_train = L.Python(name='data', include=dict(phase=caffe.TRAIN), ntop= 4, python_param=dict(module='custom_layers', layer='InputKittiData', param_str=repr(dataset_params_train))) n.data, n.coors, n.labels, n.reg_targets = datalayer_train elif phase == "eval": dataset_params_eval = dataset_params.copy() dataset_params_eval['subset'] = phase datalayer_eval = L.Python(name='data', include=dict(phase=caffe.TEST), ntop= 9, python_param=dict(module='custom_layers', layer='InputKittiData', param_str=repr(dataset_params_eval))) n.data, n.coors, n.anchors, n.rect, n.trv2c, n.p2, n.anchors_mask, n.img_idx, n.img_shape = datalayer_eval if deploy: print("[debug] run deploy in caffe_model.py") # n.data = L.Input(shape=dict(dim=[1, len(input_dims), 1, sample_size])) # n.coors = L.Input(shape=dict(dim=[1, len(input_dims), 1, sample_size])) # n.reg_targets = L.Input(shape=dict(dim=[1, len(input_dims), 1, sample_size])) # top_prev = L.Reshape(n.data, reshape_param=dict(shape=dict(dim=[0, 0, 1, -1]))) # # n['conv' + str(idx)], top_lattice = L.Permutohedral(top_prev, top_data_lattice, top_data_lattice, # ntop=2, # permutohedral_param=dict( # num_output=n_out, # group=1, # neighborhood_size=bilateral_nbr, # bias_term=True, # norm_type=P.Permutohedral.AFTER, # offset_type=P.Permutohedral.NONE, # filter_filler=bltr_weight_filler, # bias_filler=dict(type='constant', # value=0)), # param=[{'lr_mult': 1, 'decay_mult': 1}, # {'lr_mult': 2, 'decay_mult': 0}]) top_prev = conv_bn_relu(n, "mlp", n.data, 1, 64, stride=1, pad=0, loop=1) n['max_pool'] = L.Pooling(top_prev, pooling_param = dict(kernel_h=1, kernel_w=100, stride=1, pad=0, pool = caffe.params.Pooling.MAX)) #(1,64,voxel,1) top_prev = n['max_pool'] n['PillarScatter'] = L.Python(top_prev, n.coors, python_param=dict( module='custom_layers', layer='PointPillarsScatter', param_str=str(dict(output_shape=[1, 1, 496, 432, 64], )))) top_prev = n['PillarScatter'] top_prev = conv_bn_relu(n, "ini_conv1", top_prev, 3, num_filters[0], stride=layer_strides[0], pad=1, loop=1) top_prev = conv_bn_relu(n, "rpn_conv1", top_prev, 3, num_filters[0], stride=1, pad=1, loop=3) deconv1 = deconv_bn_relu(n, "rpn_deconv1", top_prev, upsample_strides[0], num_upsample_filters[0], stride=upsample_strides[0], pad=0) top_prev = conv_bn_relu(n, "ini_conv2", top_prev, 3, num_filters[1], stride=layer_strides[1], pad=1, loop=1) top_prev = conv_bn_relu(n, "rpn_conv2", top_prev, 3, num_filters[1], stride=1, pad=1, loop=3) deconv2 = deconv_bn_relu(n, "rpn_deconv2", top_prev, upsample_strides[1], num_upsample_filters[1], stride=upsample_strides[1], pad=0) top_prev = conv_bn_relu(n, "ini_conv3", top_prev, 3, num_filters[2], stride=layer_strides[2], pad=1, loop=1) top_prev = conv_bn_relu(n, "rpn_conv3", top_prev, 3, num_filters[2], stride=1, pad=1, loop=3) deconv3 = deconv_bn_relu(n, "rpn_deconv3", top_prev, upsample_strides[2], num_upsample_filters[2], stride=upsample_strides[2], pad=0) n['rpn_out'] = L.Concat(deconv1, deconv2, deconv3) top_prev = n['rpn_out'] num_cls = 2 n['cls_preds'] = L.Convolution(top_prev, name = "cls_head", convolution_param=dict(num_output=num_cls, kernel_size=1, stride=1, pad=0, weight_filler=dict(type = 'xavier'), bias_term = True, bias_filler=dict(type='constant', value=0), engine=1, ), param=[dict(lr_mult=1), dict(lr_mult=1)]) cls_preds = n['cls_preds'] box_code_size = 7 num_anchor_per_loc = 2 n['box_preds'] = L.Convolution(top_prev, name = "reg_head", convolution_param=dict(num_output=num_anchor_per_loc * box_code_size, kernel_size=1, stride=1, pad=0, weight_filler=dict(type = 'xavier'), bias_term = True, bias_filler=dict(type='constant', value=0), engine=1, ), param=[dict(lr_mult=1), dict(lr_mult=1)]) box_preds = n['box_preds'] if phase == "train": n['cared'],n['reg_outside_weights'], n['cls_weights']= L.Python(n.labels, name = "PrepareLossWeight", ntop = 3, python_param=dict( module='custom_layers', layer='PrepareLossWeight' )) reg_outside_weights, cared, cls_weights = n['reg_outside_weights'], n['cared'], n['cls_weights'] # Gradients cannot be computed with respect to the label inputs (bottom[1])# n['labels_input'] = L.Python(n.labels, cared, name = "Label_Encode", python_param=dict( module='custom_layers', layer='LabelEncode', )) labels_input = n['labels_input'] n['cls_preds_permute'] = L.Permute(cls_preds, permute_param=dict(order=[0, 2, 3, 1])) #(B,C,H,W) -> (B,H,W,C) cls_preds_permute = n['cls_preds_permute'] n['cls_preds_reshape'] = L.Reshape(cls_preds_permute, reshape_param=dict(shape=dict(dim=[0, -1, 1])))# (B,H,W,C) -> (B, -1, C) cls_preds_reshape = n['cls_preds_reshape'] n.cls_loss= L.Python(cls_preds_reshape, labels_input, cls_weights, name = "FocalLoss", loss_weight = 1, python_param=dict( module='custom_layers', layer='WeightFocalLoss' ), param_str=str(dict(focusing_parameter=2, alpha=0.25))) box_code_size = 7 n['box_preds_permute'] = L.Permute(box_preds, permute_param=dict(order=[0, 2, 3, 1])) #(B,C,H,W) -> (B,H,W,C) box_preds_permute = n['box_preds_permute'] n['box_preds_reshape'] = L.Reshape(box_preds_permute, reshape_param=dict(shape=dict(dim=[0, -1, box_code_size]))) #(B,H,W,C) -> (B, -1, C) box_preds_reshape = n['box_preds_reshape'] n.reg_loss= L.Python(box_preds_reshape, n.reg_targets, reg_outside_weights, name = "WeightedSmoothL1Loss", loss_weight = 1, python_param=dict( module='custom_layers', layer='WeightedSmoothL1Loss' )) return n.to_proto() elif phase == "eval": n['iou'] = L.Python(box_preds, cls_preds, n.anchors, n.rect, n.trv2c, n.p2, n.anchors_mask, n.img_idx, n.img_shape, name = "EvalLayer", python_param=dict( module='custom_layers', layer='EvalLayer_v2', param_str=repr(dataset_params_eval), )) return n.to_proto() else: raise ValueError
def CreateMultiBoxHead(net, data_layer="data", num_classes=[], from_layers=[], use_objectness=False, normalizations=[], use_batchnorm=True, lr_mult=1, use_scale=True, min_sizes=[], max_sizes=[], prior_variance=[0.1], aspect_ratios=[], steps=[], img_height=0, img_width=0, share_location=True, flip=True, clip=True, offset=0.5, inter_layer_depth=[], kernel_size=1, pad=0, conf_postfix='', loc_postfix='', head_postfix='ext/pm', **bn_param): assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" if normalizations: assert len(from_layers) == len( normalizations ), "from_layers and normalizations should have same length" assert len(from_layers) == len( min_sizes), "from_layers and min_sizes should have same length" if max_sizes: assert len(from_layers) == len( max_sizes), "from_layers and max_sizes should have same length" if aspect_ratios: assert len(from_layers) == len( aspect_ratios ), "from_layers and aspect_ratios should have same length" if steps: assert len(from_layers) == len( steps), "from_layers and steps should have same length" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers" if inter_layer_depth: assert len(from_layers) == len( inter_layer_depth ), "from_layers and inter_layer_depth should have same length" num = len(from_layers) priorbox_layers = [] loc_layers = [] conf_layers = [] objectness_layers = [] for i in range(0, num): from_layer = from_layers[i] # Get the normalize value. if normalizations: if normalizations[i] != -1: norm_name = "{}{}_norm".format(head_postfix, i + 1) net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict( type="constant", value=normalizations[i]), across_spatial=False, channel_shared=False) from_layer = norm_name # Add intermediate layers. if inter_layer_depth: if inter_layer_depth[i] > 0: inter_name = "{}{}_inter".format(head_postfix, i + 1) ConvBNLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, lr_mult=lr_mult, num_output=inter_layer_depth[i], kernel_size=3, pad=1, stride=1, **bn_param) from_layer = inter_name # Estimate number of priors per location given provided parameters. min_size = min_sizes[i] if type(min_size) is not list: min_size = [min_size] aspect_ratio = [] if len(aspect_ratios) > i: aspect_ratio = aspect_ratios[i] if type(aspect_ratio) is not list: aspect_ratio = [aspect_ratio] max_size = [] if len(max_sizes) > i: max_size = max_sizes[i] if type(max_size) is not list: max_size = [max_size] if max_size: assert len(max_size) == len( min_size), "max_size and min_size should have same length." if max_size: num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size) else: num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size) if flip: num_priors_per_location += len(aspect_ratio) * len(min_size) step = [] if len(steps) > i: step = steps[i] # Create location prediction layer. name = "{}{}_mbox_loc{}".format(head_postfix, i + 1, loc_postfix) num_loc_output = num_priors_per_location * 4 if not share_location: num_loc_output *= num_classes ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layers.append(net[flatten_name]) # Create confidence prediction layer. name = "{}{}_mbox_conf{}".format(head_postfix, i + 1, conf_postfix) num_conf_output = num_priors_per_location * num_classes ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layers.append(net[flatten_name]) # Create prior generation layer. name = "{}{}_mbox_priorbox".format(head_postfix, i + 1) net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_size, clip=clip, variance=prior_variance, offset=offset) if max_size: net.update(name, {'max_size': max_size}) if aspect_ratio: net.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip}) if step: net.update(name, {'step': step}) if img_height != 0 and img_width != 0: if img_height == img_width: net.update(name, {'img_size': img_height}) else: net.update(name, {'img_h': img_height, 'img_w': img_width}) priorbox_layers.append(net[name]) # Create objectness prediction layer. if use_objectness: name = "{}{}_mbox_objectness".format(head_postfix, i + 1) num_obj_output = num_priors_per_location * 2 ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_obj_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) objectness_layers.append(net[flatten_name]) # Concatenate priorbox, loc, and conf layers. mbox_layers = [] name = "mbox_loc" net[name] = L.Concat(*loc_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_conf" net[name] = L.Concat(*conf_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_priorbox" net[name] = L.Concat(*priorbox_layers, axis=2) mbox_layers.append(net[name]) if use_objectness: name = "mbox_objectness" net[name] = L.Concat(*objectness_layers, axis=1) mbox_layers.append(net[name]) return mbox_layers
def CreateRefineDetHead(net, data_layer="data", num_classes=[], from_layers=[], from_layers2=[], normalizations=[], use_batchnorm=True, lr_mult=1, min_sizes=[], max_sizes=[], prior_variance = [0.1], aspect_ratios=[], steps=[], img_height=0, img_width=0, share_location=True, flip=True, clip=True, offset=0.5, inter_layer_depth=[], kernel_size=1, pad=0, conf_postfix='', loc_postfix='', **bn_param): assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" if normalizations: assert len(from_layers) == len(normalizations), "from_layers and normalizations should have same length" assert len(from_layers) == len(min_sizes), "from_layers and min_sizes should have same length" if max_sizes: assert len(from_layers) == len(max_sizes), "from_layers and max_sizes should have same length" if aspect_ratios: assert len(from_layers) == len(aspect_ratios), "from_layers and aspect_ratios should have same length" if steps: assert len(from_layers) == len(steps), "from_layers and steps should have same length" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers" if inter_layer_depth: assert len(from_layers) == len(inter_layer_depth), "from_layers and inter_layer_depth should have same length" use_relu = True conv_prefix = '' conv_postfix = '' bn_prefix = '' bn_postfix = '/bn' scale_prefix = '' scale_postfix = '/scale' kwargs = { 'param': [dict(lr_mult=1, decay_mult=1)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_term': False, } kwargs2 = { 'param': [dict(lr_mult=1, decay_mult=1)], 'weight_filler': dict(type='gaussian', std=0.01), } kwargs_sb = { 'axis': 0, 'bias_term': False } prefix = 'arm' num_classes_rpn = 2 num = len(from_layers) priorbox_layers = [] loc_layers = [] conf_layers = [] for i in range(0, num): from_layer = from_layers[i] # Get the normalize value. if normalizations: if normalizations[i] != -1: norm_name = "{}_norm".format(from_layer) net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalizations[i]), across_spatial=False, channel_shared=False) from_layer = norm_name # Add intermediate layers. if inter_layer_depth: if inter_layer_depth[i] > 0: # Inter layer from body to head inter_name = "{}_inter".format(from_layer) # Depthwise convolution layer inter_dw = inter_name + '/dw' DWConvBNLayer(net, from_layer, inter_dw, use_bn=True, use_relu=True, num_output=512, group=512, kernel_size=3, pad=1, stride=1, conv_prefix=conv_prefix, conv_postfix=inter_dw, bn_prefix=bn_prefix, bn_postfix=bn_postfix, scale_prefix=scale_prefix, scale_postfix=scale_postfix, **bn_param) # Seperate layer inter_sep = inter_name + '/sep' ConvBNLayer(net, inter_dw, inter_sep, use_bn=True, use_relu=True, num_output=512, kernel_size=1, pad=0, stride=1, conv_prefix=conv_prefix, conv_postfix=inter_sep, bn_prefix=bn_prefix, bn_postfix=bn_postfix, scale_prefix=scale_prefix, scale_postfix=scale_postfix, **bn_param) # Bridge of rest of head from_layer = inter_sep # Estimate number of priors per location given provided parameters. min_size = min_sizes[i] if type(min_size) is not list: min_size = [min_size] aspect_ratio = [] if len(aspect_ratios) > i: aspect_ratio = aspect_ratios[i] if type(aspect_ratio) is not list: aspect_ratio = [aspect_ratio] max_size = [] if len(max_sizes) > i: max_size = max_sizes[i] if type(max_size) is not list: max_size = [max_size] if max_size: assert len(max_size) == len(min_size), "max_size and min_size should have same length." if max_size: num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size) else: num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size) if flip: num_priors_per_location += len(aspect_ratio) * len(min_size) step = [] if len(steps) > i: step = steps[i] # Create location prediction layer. name = "{}_mbox_loc{}".format(from_layer, loc_postfix) num_loc_output = num_priors_per_location * 4 if not share_location: num_loc_output *= num_classes_rpn ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layers.append(net[flatten_name]) # Create confidence prediction layer. name = "{}_mbox_conf{}".format(from_layer, conf_postfix) num_conf_output = num_priors_per_location * num_classes_rpn ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layers.append(net[flatten_name]) # Create prior generation layer. name = "{}_mbox_priorbox".format(from_layer) net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_size, clip=clip, variance=prior_variance, offset=offset) if max_size: net.update(name, {'max_size': max_size}) if aspect_ratio: net.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip}) if step: net.update(name, {'step': step}) if img_height != 0 and img_width != 0: if img_height == img_width: net.update(name, {'img_size': img_height}) else: net.update(name, {'img_h': img_height, 'img_w': img_width}) priorbox_layers.append(net[name]) # Concatenate priorbox, loc, and conf layers. mbox_layers = [] name = '{}{}'.format(prefix, "_loc") net[name] = L.Concat(*loc_layers, axis=1) mbox_layers.append(net[name]) name = '{}{}'.format(prefix, "_conf") net[name] = L.Concat(*conf_layers, axis=1) mbox_layers.append(net[name]) name = '{}{}'.format(prefix, "_priorbox") net[name] = L.Concat(*priorbox_layers, axis=2) mbox_layers.append(net[name]) prefix = 'odm' num = len(from_layers2) loc_layers = [] conf_layers = [] for i in range(0, num): from_layer = from_layers2[i] # Get the normalize value. if normalizations: if normalizations[i] != -1: norm_name = "{}_norm".format(from_layer) net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict(type="constant", value=normalizations[i]), across_spatial=False, channel_shared=False) from_layer = norm_name # Add intermediate layers. if inter_layer_depth: if inter_layer_depth[i] > 0: # Inter layer from body to head inter_name = "{}_inter".format(from_layer) # Depthwise convolution layer inter_dw = inter_name + '/dw' DWConvBNLayer(net, from_layer, inter_dw, use_bn=True, use_relu=True, num_output=512, group=512, kernel_size=3, pad=1, stride=1, conv_prefix=conv_prefix, conv_postfix=inter_dw, bn_prefix=bn_prefix, bn_postfix=bn_postfix, scale_prefix=scale_prefix, scale_postfix=scale_postfix, **bn_param) # Seperate layer inter_sep = inter_name + '/sep' ConvBNLayer(net, inter_dw, inter_sep, use_bn=True, use_relu=True, num_output=512, kernel_size=1, pad=0, stride=1, conv_prefix=conv_prefix, conv_postfix=inter_sep, bn_prefix=bn_prefix, bn_postfix=bn_postfix, scale_prefix=scale_prefix, scale_postfix=scale_postfix, **bn_param) # Bridge of rest of head from_layer = inter_sep # Estimate number of priors per location given provided parameters. min_size = min_sizes[i] if type(min_size) is not list: min_size = [min_size] aspect_ratio = [] if len(aspect_ratios) > i: aspect_ratio = aspect_ratios[i] if type(aspect_ratio) is not list: aspect_ratio = [aspect_ratio] max_size = [] if len(max_sizes) > i: max_size = max_sizes[i] if type(max_size) is not list: max_size = [max_size] if max_size: assert len(max_size) == len(min_size), "max_size and min_size should have same length." if max_size: num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size) else: num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size) if flip: num_priors_per_location += len(aspect_ratio) * len(min_size) # Create location prediction layer. name = "{}_mbox_loc{}".format(from_layer, loc_postfix) num_loc_output = num_priors_per_location * 4 if not share_location: num_loc_output *= num_classes ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layers.append(net[flatten_name]) # Create confidence prediction layer. name = "{}_mbox_conf{}".format(from_layer, conf_postfix) num_conf_output = num_priors_per_location * num_classes ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layers.append(net[flatten_name]) # Concatenate priorbox, loc, and conf layers. name = '{}{}'.format(prefix, "_loc") net[name] = L.Concat(*loc_layers, axis=1) mbox_layers.append(net[name]) name = '{}{}'.format(prefix, "_conf") net[name] = L.Concat(*conf_layers, axis=1) mbox_layers.append(net[name]) return mbox_layers