def ConvBNLayer(net, from_layer, out_layer, use_bn, use_relu, num_output, kernel_size, pad, stride, dilation=1, use_scale=True, lr_mult=1, conv_prefix='', conv_postfix='', bn_prefix='', bn_postfix='_bn', scale_prefix='', scale_postfix='_scale', bias_prefix='', bias_postfix='_bias', **bn_params): if use_bn: # parameters for convolution layer with batchnorm. kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=1)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_term': False, } eps = bn_params.get('eps', 0.001) moving_average_fraction = bn_params.get('moving_average_fraction', 0.999) use_global_stats = bn_params.get('use_global_stats', False) # parameters for batchnorm layer. bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], 'eps': eps, 'moving_average_fraction': moving_average_fraction, } bn_lr_mult = lr_mult if use_global_stats: # only specify if use_global_stats is explicitly provided; # otherwise, use_global_stats_ = this->phase_ == TEST; bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], 'eps': eps, 'use_global_stats': use_global_stats, } # not updating scale/bias parameters bn_lr_mult = 0 # parameters for scale bias layer after batchnorm. if use_scale: sb_kwargs = { 'bias_term': True, 'param': [ dict(lr_mult=bn_lr_mult, decay_mult=0), dict(lr_mult=bn_lr_mult, decay_mult=0)], 'filler': dict(type='constant', value=1.0), 'bias_filler': dict(type='constant', value=0.0), } else: bias_kwargs = { 'param': [dict(lr_mult=bn_lr_mult, decay_mult=0)], 'filler': dict(type='constant', value=0.0), } else: kwargs = { 'param': [ dict(lr_mult=lr_mult, decay_mult=1), dict(lr_mult=2 * lr_mult, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } conv_name = '{}{}{}'.format(conv_prefix, out_layer, conv_postfix) [kernel_h, kernel_w] = UnpackVariable(kernel_size, 2) [pad_h, pad_w] = UnpackVariable(pad, 2) [stride_h, stride_w] = UnpackVariable(stride, 2) if kernel_h == kernel_w: net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, kernel_size=kernel_h, pad=pad_h, stride=stride_h, **kwargs) else: net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, kernel_h=kernel_h, kernel_w=kernel_w, pad_h=pad_h, pad_w=pad_w, stride_h=stride_h, stride_w=stride_w, **kwargs) if dilation > 1: net.update(conv_name, {'dilation': dilation}) if use_bn: bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix) net[bn_name] = L.BatchNorm(net[conv_name], in_place=True, **bn_kwargs) if use_scale: sb_name = '{}{}{}'.format(scale_prefix, out_layer, scale_postfix) net[sb_name] = L.Scale(net[bn_name], in_place=True, **sb_kwargs) else: bias_name = '{}{}{}'.format(bias_prefix, out_layer, bias_postfix) net[bias_name] = L.Bias(net[bn_name], in_place=True, **bias_kwargs) if use_relu: relu_name = '{}_relu'.format(conv_name) net[relu_name] = L.ReLU(net[conv_name], in_place=True)
def tdm(net, current_from_layer, high_from_layer, featuremap_num, freeze=False): if freeze: kwargs = { 'param': [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0) } de_kwargs = { 'param': [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], } else: kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0) } de_kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], } # net[high_from_layer+'_lateral'] = L.Convolution(net[high_from_layer], num_output=128, # kernel_size=1, pad=0, stride=1, group=1, **kwargs) net[high_from_layer + '_Deconv'] = L.Deconvolution( net[high_from_layer], convolution_param=dict(weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), num_output=128, kernel_size=2, pad=0, stride=2), **de_kwargs) net[current_from_layer + '_addtop'] = L.Convolution( net[current_from_layer], num_output=128, kernel_size=1, pad=0, stride=1, group=1, **kwargs) # net[current_from_layer+'_addtop' + '_relu'] = L.ReLU(net[current_from_layer+'_addtop'], in_place=True) net['featuremap' + str(featuremap_num)] = L.Eltwise( net[current_from_layer + '_addtop'], net[high_from_layer + '_Deconv']) net['featuremap' + str(featuremap_num) + '_relu'] = L.ReLU( net['featuremap' + str(featuremap_num)], in_place=True) return net, 'featuremap' + str(featuremap_num)
def convert(keras_model, keras_format, caffe_net_file, caffe_params_file): caffe_net = caffe.NetSpec() net_params = dict() outputs=dict() shape=() input_str = '' # tensorflow 2.0 if len(caffe_net.tops) == 0 and False: input_name = 'data' input_shape = [1, keras_model.input.shape[1], keras_model.input.shape[2], keras_model.input.shape[3]] input_param = {'shape': {'dim': list(input_shape)}} caffe_net[input_name] = L.Input(input_param=input_param) input_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format( '"' + input_name + '"', 1, input_shape[1], input_shape[2], input_shape[3]) top = keras_model.input.name outputs[top] = input_name for layer in keras_model.layers: name = layer.name layer_type = type(layer).__name__ config = layer.get_config() blobs = layer.get_weights() blobs_num = len(blobs) if type(layer.output)==list: raise Exception('Layers with multiply outputs are not supported') else: top=layer.output.name if type(layer.input)!=list: bottom = layer.input.name #first we need to create Input layer ''' if layer_type=='InputLayer' or len(caffe_net.tops)==0: input_name = 'data' caffe_net[input_name] = L.Layer() input_shape = config['batch_input_shape'] input_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format('"' + input_name + '"', 1, input_shape[3], input_shape[1], input_shape[2]) outputs[layer.input.name] = input_name if layer_type=='InputLayer': continue ''' if layer_type == 'InputLayer' and len(caffe_net.tops)==0: name = 'data' input_shape = config['batch_input_shape'] if "first" in keras_format: input_shape = [1, input_shape[1], input_shape[2], input_shape[3]] else: input_shape = [1, input_shape[3], input_shape[1], input_shape[2]] input_param = {'shape': {'dim': list(input_shape)}} caffe_net[name] = L.Input(input_param=input_param) elif layer_type=='Conv2D' or layer_type=='Convolution2D': strides = config['strides'] kernel_size = config['kernel_size'] kwargs = { 'num_output': config['filters'] } if kernel_size[0]==kernel_size[1]: kwargs['kernel_size']=kernel_size[0] else: kwargs['kernel_h']=kernel_size[0] kwargs['kernel_w']=kernel_size[1] if strides[0]==strides[1]: kwargs['stride']=strides[0] else: kwargs['stride_h']=strides[0] kwargs['stride_w']=strides[1] if not config['use_bias']: kwargs['bias_term'] = False #kwargs['param']=[dict(lr_mult=0)] else: #kwargs['param']=[dict(lr_mult=0), dict(lr_mult=0)] pass set_padding(config, layer.input_shape, kwargs) caffe_net[name] = L.Convolution(caffe_net[outputs[bottom]], **kwargs) blobs[0] = np.array(blobs[0]).transpose(3,2,0,1) net_params[name] = blobs if config['activation'] == 'relu': name_s = name+'s' caffe_net[name_s] = L.ReLU(caffe_net[name], in_place=True) elif config['activation'] == 'sigmoid': name_s = name+'s' caffe_net[name_s] = L.Sigmoid(caffe_net[name], in_place=True) elif config['activation'] == 'linear': #do nothing pass else: raise Exception('Unsupported activation '+config['activation']) elif layer_type == 'Permute': # skip the layer name = outputs[bottom] elif layer_type=='DepthwiseConv2D': strides = config['strides'] kernel_size = config['kernel_size'] kwargs = {'num_output': layer.input_shape[3]} if kernel_size[0] == kernel_size[1]: kwargs['kernel_size'] = kernel_size[0] else: kwargs['kernel_h'] = kernel_size[0] kwargs['kernel_w'] = kernel_size[1] if strides[0] == strides[1]: kwargs['stride'] = strides[0] else: kwargs['stride_h'] = strides[0] kwargs['stride_w'] = strides[1] set_padding(config, layer.input_shape, kwargs) kwargs['group'] = layer.input_shape[3] kwargs['bias_term'] = False caffe_net[name] = L.Convolution(caffe_net[outputs[bottom]], **kwargs) blob = np.array(blobs[0]).transpose(2, 3, 0, 1) blob.shape = (1,) + blob.shape net_params[name] = blob if config['activation'] == 'relu': name_s = name+'s' caffe_net[name_s] = L.ReLU(caffe_net[name], in_place=True) elif config['activation'] == 'sigmoid': name_s = name+'s' caffe_net[name_s] = L.Sigmoid(caffe_net[name], in_place=True) elif config['activation'] == 'linear': #do nothing pass else: raise Exception('Unsupported activation '+config['activation']) elif layer_type == 'SeparableConv2D': strides = config['strides'] kernel_size = config['kernel_size'] kwargs = {'num_output': layer.input_shape[3]} if kernel_size[0] == kernel_size[1]: kwargs['kernel_size'] = kernel_size[0] else: kwargs['kernel_h'] = kernel_size[0] kwargs['kernel_w'] = kernel_size[1] if strides[0] == strides[1]: kwargs['stride'] = strides[0] else: kwargs['stride_h'] = strides[0] kwargs['stride_w'] = strides[1] set_padding(config, layer.input_shape, kwargs) kwargs['group'] = layer.input_shape[3] kwargs['bias_term'] = False caffe_net[name] = L.Convolution(caffe_net[outputs[bottom]], **kwargs) blob = np.array(blobs[0]).transpose(2, 3, 0, 1) blob.shape = (1,) + blob.shape net_params[name] = blob name2 = name + '_' kwargs = {'num_output': config['filters'], 'kernel_size': 1, 'bias_term': config['use_bias']} caffe_net[name2] = L.Convolution(caffe_net[name], **kwargs) if config['use_bias'] == True: blob2 = [] blob2.append(np.array(blobs[1]).transpose(3, 2, 0, 1)) blob2.append(np.array(blobs[2])) blob2[0].shape = (1,) + blob2[0].shape else: blob2 = np.array(blobs[1]).transpose(3, 2, 0, 1) blob2.shape = (1,) + blob2.shape net_params[name2] = blob2 name = name2 elif layer_type=='BatchNormalization': param = dict() variance = np.array(blobs[-1]) mean = np.array(blobs[-2]) if config['scale']: gamma = np.array(blobs[0]) sparam=[dict(lr_mult=1), dict(lr_mult=1)] else: gamma = np.ones(mean.shape, dtype=np.float32) #sparam=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=1, decay_mult=1)] sparam=[dict(lr_mult=0), dict(lr_mult=1)] #sparam=[dict(lr_mult=0), dict(lr_mult=0)] if config['center']: beta = np.array(blobs[-3]) param['bias_term']=True else: beta = np.zeros(mean.shape, dtype=np.float32) param['bias_term']=False caffe_net[name] = L.BatchNorm(caffe_net[outputs[bottom]], in_place=True) #param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=1), dict(lr_mult=0, decay_mult=0)]) #param=[dict(lr_mult=1), dict(lr_mult=1), dict(lr_mult=0)]) net_params[name] = (mean, variance, np.array(1.0)) name_s = name+'s' caffe_net[name_s] = L.Scale(caffe_net[name], in_place=True, param=sparam, scale_param={'bias_term': config['center']}) net_params[name_s] = (gamma, beta) elif layer_type=='Dense': caffe_net[name] = L.InnerProduct(caffe_net[outputs[bottom]], num_output=config['units'], weight_filler=dict(type='xavier')) if config['use_bias']: weight=np.array(blobs[0]).transpose(1, 0) if False and type(layer._inbound_nodes[0].inbound_layers[0]).__name__=='Flatten': flatten_shape=layer._inbound_nodes[0].inbound_layers[0].input_shape for i in range(weight.shape[0]): weight[i]=np.array(weight[i].reshape(flatten_shape[1],flatten_shape[2],flatten_shape[3]).transpose(2,0,1).reshape(weight.shape[1])) net_params[name] = (weight, np.array(blobs[1])) else: weight=np.array(blobs[0]).transpose(1, 0) net_params[name] = (weight, np.zeros(weight.shape[0], dtype=weight.dtype)) name_s = name+'s' if config['activation']=='softmax': caffe_net[name_s] = L.Softmax(caffe_net[name], in_place=True) elif config['activation']=='relu': caffe_net[name_s] = L.ReLU(caffe_net[name], in_place=True) elif layer_type=='Activation': if config['activation']=='relu': #caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]], in_place=True) if len(layer.input.consumers())>1: caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]]) else: caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]], in_place=True) elif config['activation']=='relu6': #TODO caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]]) elif config['activation']=='softmax': caffe_net[name] = L.Softmax(caffe_net[outputs[bottom]], in_place=True) elif config['activation'] == 'sigmoid': # name_s = name+'s' caffe_net[name] = L.Sigmoid(caffe_net[outputs[bottom]], in_place=True) #used to finish the image normalization. elif config['activation'] == 'linear': name = name + '_linear' caffe_net[name] = L.Scale(caffe_net[outputs[bottom]], filler=dict(type="constant", value=0.003921)) else: raise Exception('Unsupported activation '+config['activation']) elif layer_type=='Cropping2D': shape = layer.output_shape ddata = L.DummyData(shape=dict(dim=[1, shape[3],shape[1], shape[2]])) layers = [] layers.append(caffe_net[outputs[bottom]]) layers.append(ddata) #TODO caffe_net[name] = L.Crop(*layers) elif layer_type=='Concatenate' or layer_type=='Merge': layers = [] for i in layer.input: layers.append(caffe_net[outputs[i.name]]) caffe_net[name] = L.Concat(*layers, axis=1) elif layer_type=='Add': '''PROD = 0; SUM = 1; MAX = 2;''' layers = [] for i in layer.input: layers.append(caffe_net[outputs[i.name]]) caffe_net[name] = L.Eltwise(*layers, eltwise_param ={'operation': 1 }) elif layer_type=='Flatten': caffe_net[name] = L.Flatten(caffe_net[outputs[bottom]]) elif layer_type=='Reshape': shape = config['target_shape'] if len(shape)==3: #shape = (layer.input_shape[0], shape[2], shape[0], shape[1]) shape = (1, shape[2], shape[0], shape[1]) elif len(shape)==1: #shape = (layer.input_shape[0], 1, 1, shape[0]) shape = (1, 1, 1, shape[0]) caffe_net[name] = L.Reshape(caffe_net[outputs[bottom]], reshape_param={'shape':{'dim': list(shape)}}) elif layer_type=='MaxPooling2D' or layer_type=='AveragePooling2D': kwargs={} if layer_type=='MaxPooling2D': kwargs['pool'] = P.Pooling.MAX else: kwargs['pool'] = P.Pooling.AVE pool_size = config['pool_size'] strides = config['strides'] if pool_size[0]!=pool_size[1]: raise Exception('Unsupported pool_size') if strides[0]!=strides[1]: raise Exception('Unsupported strides') set_padding(config, layer.input_shape, kwargs) caffe_net[name] = L.Pooling(caffe_net[outputs[bottom]], kernel_size=pool_size[0], stride=strides[0], **kwargs) elif layer_type=='Dropout': caffe_net[name] = L.Dropout(caffe_net[outputs[bottom]], dropout_param=dict(dropout_ratio=config['rate'])) elif layer_type=='GlobalAveragePooling2D': caffe_net[name] = L.Pooling(caffe_net[outputs[bottom]], pool=P.Pooling.AVE, pooling_param=dict(global_pooling=True)) elif layer_type=='UpSampling2D': if config['size'][0]!=config['size'][1]: raise Exception('Unsupported upsampling factor') factor = config['size'][0] kernel_size = 2 * factor - factor % 2 stride = factor pad = int(math.ceil((factor - 1) / 2.0)) channels = layer.input_shape[-1] caffe_net[name] = L.Deconvolution(caffe_net[outputs[bottom]], convolution_param=dict(num_output=channels, group=channels, kernel_size=kernel_size, stride=stride, pad=pad, weight_filler=dict(type='bilinear'), bias_term=False), param=dict(lr_mult=0, decay_mult=0)) elif layer_type=='LeakyReLU': caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]], negative_slope=config['alpha'], in_place=True) #TODO elif layer_type=='ZeroPadding2D': padding=config['padding'] #ch = layer.input_shape[3] #caffe_net[name] = L.Convolution(caffe_net[outputs[bottom]], num_output=ch, kernel_size=1, stride=1, group=ch, # pad_h=padding[0][0], pad_w=padding[1][0], convolution_param=dict(bias_term = False)) #params = np.ones((1,ch,1,1)) #net_params[name] = np.ones((1,ch,1,1,1)) #net_params[name] = np.ones(layer.output_shape) caffe_net[name] = L.Pooling(caffe_net[outputs[bottom]], kernel_size=1, stride=1, pad_h=padding[0][0]+padding[0][1], pad_w=padding[1][0]+padding[1][1], pool=P.Pooling.AVE) else: raise Exception('Unsupported layer type: '+layer_type) outputs[top]=name #replace empty layer with input blob #net_proto = input_str + '\n' + 'layer {' + 'layer {'.join(str(caffe_net.to_proto()).split('layer {')[2:]) net_proto = str(caffe_net.to_proto()) f = open(caffe_net_file, 'w') f.write(net_proto) f.close() caffe_model = caffe.Net(caffe_net_file, caffe.TEST) for layer in caffe_model.params.keys(): print(layer) if 'up_sampling2d' in layer: continue if "activation_linear" in layer: continue for n in range(0, len(caffe_model.params[layer])): caffe_model.params[layer][n].data[...] = net_params[layer][n] caffe_model.save(caffe_params_file)
def fcn(split): n = caffe.NetSpec() n.data, n.label = L.Python(module='pascalcontext_layers', layer='PASCALContextSegDataLayer', ntop=2, param_str=str( dict( voc_dir='../../data/pascal', context_dir='../../data/pascal-context', split=split, seed=1337))) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr = L.Convolution( n.drop7, num_output=60, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore2 = L.Deconvolution(n.score_fr, convolution_param=dict(num_output=60, kernel_size=4, stride=2, bias_term=False), param=[dict(lr_mult=0)]) n.score_pool4 = L.Convolution( n.pool4, num_output=60, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.score_pool4c = crop(n.score_pool4, n.upscore2) n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c, operation=P.Eltwise.SUM) n.upscore_pool4 = L.Deconvolution(n.fuse_pool4, convolution_param=dict(num_output=60, kernel_size=4, stride=2, bias_term=False), param=[dict(lr_mult=0)]) n.score_pool3 = L.Convolution( n.pool3, num_output=60, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.score_pool3c = crop(n.score_pool3, n.upscore_pool4) n.fuse_pool3 = L.Eltwise(n.upscore_pool4, n.score_pool3c, operation=P.Eltwise.SUM) n.upscore8 = L.Deconvolution(n.fuse_pool3, convolution_param=dict(num_output=60, kernel_size=16, stride=8, bias_term=False), param=[dict(lr_mult=0)]) n.score = crop(n.upscore8, n.data) n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=False, ignore_label=255)) return n.to_proto()
def long_range_unet(name): # Start a network net = caffe.NetSpec() # Data input layer net.data = L.MemoryData(dim=[1, 1], ntop=1) n_channels = 12 # TODO # Label input layer # I guess the second number is the number of channels net.aff_label = L.MemoryData(dim=[1, n_channels], ntop=1, include=[dict(phase=0)]) # Components label layer # No idea about this one... net.comp_label = L.MemoryData(dim=[1, 2], ntop=1, include=[dict(phase=0, stage='malis')]) # Scale input layer # again second = channels ?! net.scale = L.MemoryData(dim=[1, n_channels], ntop=1, include=[dict(phase=0, stage='euclid')]) # Silence the not needed data and label integer values # is this correct ???? net.nhood = L.MemoryData(dim=[1, 1, n_channels, 3], ntop=1, include=[dict(phase=0, stage='malis')]) # USK-Net metalayer net.unet = ML.UNet( net.data, fmap_start=12, depth=3, fmap_inc_rule=lambda fmaps: int(math.ceil(float(fmaps) * 5)), fmap_dec_rule=lambda fmaps: int(math.ceil(float(fmaps) / 5)), downsampling_strategy=[[1, 3, 3], [1, 3, 3], [1, 3, 3]], dropout=0.0, use_deconv_uppath=False, use_stable_upconv=True) net.aff_out = L.Convolution(net.unet, kernel_size=[1], num_output=n_channels, param=[dict(lr_mult=1), dict(lr_mult=2)], weight_filler=dict(type='msra'), bias_filler=dict(type='constant')) # Choose output activation functions net.aff_pred = L.Sigmoid(net.aff_out, ntop=1, in_place=False) # Choose a loss function and input data, label and scale inputs. Only include it during the training phase (phase = 0) net.euclid_loss = L.EuclideanLoss(net.aff_pred, net.aff_label, net.scale, ntop=0, loss_weight=1.0, include=[dict(phase=0, stage='euclid')]) net.malis_loss = L.MalisLoss(net.aff_pred, net.aff_label, net.comp_label, net.nhood, ntop=0, loss_weight=1.0, include=[dict(phase=0, stage='malis')]) # Fix the spatial input dimensions. Note that only spatial dimensions get modified, the minibatch size # and the channels/feature maps must be set correctly by the user (since this code can definitely not # figure out the user's intent). If the code does not seem to terminate, then the issue is most likely # a wrong number of feature maps / channels in either the MemoryData-layers or the network output. # This function takes as input: # - The network # - A list of other inputs to test (note: the nhood input is static and not spatially testable, thus excluded here) # - A list of the maximal shapes for each input # - A list of spatial dependencies; here [-1, 0] means the Y axis is a free parameter, and the X axis should be identical to the Y axis. caffe.fix_input_dims(net, [net.data, net.aff_label, net.comp_label, net.scale], max_shapes=[[84, 268, 268], [100, 100, 100], [100, 100, 100], [100, 100, 100]], shape_coupled=[-1, -1, 1]) protonet = net.to_proto() protonet.name = name # Store the network as prototxt with open(protonet.name + '.prototxt', 'w') as f: print(protonet, file=f)
def get_me_my_prototxt(model): n = caffe.NetSpec() n.data = L.Input(input_param={'shape':{'dim':[1,3,224,224]}}) for i_ in range(len(m.features)): # This will handle the Features but we also need to handle classifier if isinstance(m.features[i_], torch.nn.modules.conv.Conv2d): # This is to handle First Convolution Layer if i_==0: n.conv1 = L.Convolution(n.data, num_output=m.features[i_].out_channels, kernel_size=m.features[i_].kernel_size[0], stride=m.features[i_].stride[0], pad=m.features[i_].padding[0]) elif isinstance(m.features[i_], torch.nn.modules.activation.ReLU): # This is to handle Relu Layer if i_==1: n.relu1 = L.ReLU(n.conv1, in_place=True) elif isinstance(m.features[i_], torch.nn.modules.pooling.MaxPool2d): # This is to handle MaxPooling Layer if i_==2: n.pool1 = L.Pooling(n.conv1, kernel_size=m.features[i_].kernel_size, stride=m.features[i_].stride, pool=P.Pooling.MAX) if i_==5: n.pool2 = L.Pooling(n.fire2_concat, kernel_size=m.features[i_].kernel_size, stride=m.features[i_].stride, pool=P.Pooling.MAX) if i_==8: n.pool3 = L.Pooling(n.fire4_concat, kernel_size=m.features[i_].kernel_size, stride=m.features[i_].stride, pool=P.Pooling.MAX) elif isinstance(m.features[i_], torchvision.models.squeezenet.Fire): # This is to handle Fire module of SqueezeNet if i_==3: n.fire1_squeeze = L.Convolution(n.pool1, num_output=m.features[i_].squeeze.out_channels, kernel_size=m.features[i_].squeeze.kernel_size[0], stride=m.features[i_].squeeze.stride[0], pad=m.features[i_].squeeze.padding[0], group=m.features[i_].squeeze.groups) n.fire1_squeeze_relu = L.ReLU(n.fire1_squeeze, in_place=True) n.fire1_expand1x1 = L.Convolution(n.fire1_squeeze, num_output=m.features[i_].expand1x1.out_channels, kernel_size=m.features[i_].expand1x1.kernel_size[0], stride=m.features[i_].expand1x1.stride[0], pad=m.features[i_].expand1x1.padding[0], group=m.features[i_].expand1x1.groups) n.fire1_expand1x1_relu = L.ReLU(n.fire1_expand1x1, in_place=True) n.fire1_expand3x3 = L.Convolution(n.fire1_squeeze, num_output=m.features[i_].expand3x3.out_channels, kernel_size=m.features[i_].expand3x3.kernel_size[0], stride=m.features[i_].expand3x3.stride[0], pad=m.features[i_].expand3x3.padding[0], group=m.features[i_].expand3x3.groups) n.fire1_expand3x3_relu = L.ReLU(n.fire1_expand3x3, in_place=True) n.fire1_concat = L.Concat(n.fire1_expand1x1, n.fire1_expand3x3) if i_==4: n.fire2_squeeze = L.Convolution(n.fire1_concat, num_output=m.features[i_].squeeze.out_channels, kernel_size=m.features[i_].squeeze.kernel_size[0], stride=m.features[i_].squeeze.stride[0], pad=m.features[i_].squeeze.padding[0], group=m.features[i_].squeeze.groups) n.fire2_squeeze_relu = L.ReLU(n.fire2_squeeze, in_place=True) n.fire2_expand1x1 = L.Convolution(n.fire2_squeeze, num_output=m.features[i_].expand1x1.out_channels, kernel_size=m.features[i_].expand1x1.kernel_size[0], stride=m.features[i_].expand1x1.stride[0], pad=m.features[i_].expand1x1.padding[0], group=m.features[i_].expand1x1.groups) n.fire2_expand1x1_relu = L.ReLU(n.fire2_expand1x1, in_place=True) n.fire2_expand3x3 = L.Convolution(n.fire2_squeeze, num_output=m.features[i_].expand3x3.out_channels, kernel_size=m.features[i_].expand3x3.kernel_size[0], stride=m.features[i_].expand3x3.stride[0], pad=m.features[i_].expand3x3.padding[0], group=m.features[i_].expand3x3.groups) n.fire2_expand3x3_relu = L.ReLU(n.fire2_expand3x3, in_place=True) n.fire2_concat = L.Concat(n.fire2_expand1x1, n.fire2_expand3x3) if i_==6: n.fire3_squeeze = L.Convolution(n.pool2, num_output=m.features[i_].squeeze.out_channels, kernel_size=m.features[i_].squeeze.kernel_size[0], stride=m.features[i_].squeeze.stride[0], pad=m.features[i_].squeeze.padding[0], group=m.features[i_].squeeze.groups) n.fire3_squeeze_relu = L.ReLU(n.fire3_squeeze, in_place=True) n.fire3_expand1x1 = L.Convolution(n.fire3_squeeze, num_output=m.features[i_].expand1x1.out_channels, kernel_size=m.features[i_].expand1x1.kernel_size[0], stride=m.features[i_].expand1x1.stride[0], pad=m.features[i_].expand1x1.padding[0], group=m.features[i_].expand1x1.groups) n.fire3_expand1x1_relu = L.ReLU(n.fire3_expand1x1, in_place=True) n.fire3_expand3x3 = L.Convolution(n.fire3_squeeze, num_output=m.features[i_].expand3x3.out_channels, kernel_size=m.features[i_].expand3x3.kernel_size[0], stride=m.features[i_].expand3x3.stride[0], pad=m.features[i_].expand3x3.padding[0], group=m.features[i_].expand3x3.groups) n.fire3_expand3x3_relu = L.ReLU(n.fire3_expand3x3, in_place=True) n.fire3_concat = L.Concat(n.fire3_expand1x1, n.fire3_expand3x3) if i_==7: n.fire4_squeeze = L.Convolution(n.fire3_concat, num_output=m.features[i_].squeeze.out_channels, kernel_size=m.features[i_].squeeze.kernel_size[0], stride=m.features[i_].squeeze.stride[0], pad=m.features[i_].squeeze.padding[0], group=m.features[i_].squeeze.groups) n.fire4_squeeze_relu = L.ReLU(n.fire4_squeeze, in_place=True) n.fire4_expand1x1 = L.Convolution(n.fire4_squeeze, num_output=m.features[i_].expand1x1.out_channels, kernel_size=m.features[i_].expand1x1.kernel_size[0], stride=m.features[i_].expand1x1.stride[0], pad=m.features[i_].expand1x1.padding[0], group=m.features[i_].expand1x1.groups) n.fire4_expand1x1_relu = L.ReLU(n.fire4_expand1x1, in_place=True) n.fire4_expand3x3 = L.Convolution(n.fire4_squeeze, num_output=m.features[i_].expand3x3.out_channels, kernel_size=m.features[i_].expand3x3.kernel_size[0], stride=m.features[i_].expand3x3.stride[0], pad=m.features[i_].expand3x3.padding[0], group=m.features[i_].expand3x3.groups) n.fire4_expand3x3_relu = L.ReLU(n.fire4_expand3x3, in_place=True) n.fire4_concat = L.Concat(n.fire4_expand1x1, n.fire4_expand3x3) if i_==9: n.fire5_squeeze = L.Convolution(n.pool3, num_output=m.features[i_].squeeze.out_channels, kernel_size=m.features[i_].squeeze.kernel_size[0], stride=m.features[i_].squeeze.stride[0], pad=m.features[i_].squeeze.padding[0], group=m.features[i_].squeeze.groups) n.fire5_squeeze_relu = L.ReLU(n.fire5_squeeze, in_place=True) n.fire5_expand1x1 = L.Convolution(n.fire5_squeeze, num_output=m.features[i_].expand1x1.out_channels, kernel_size=m.features[i_].expand1x1.kernel_size[0], stride=m.features[i_].expand1x1.stride[0], pad=m.features[i_].expand1x1.padding[0], group=m.features[i_].expand1x1.groups) n.fire5_expand1x1_relu = L.ReLU(n.fire5_expand1x1, in_place=True) n.fire5_expand3x3 = L.Convolution(n.fire5_squeeze, num_output=m.features[i_].expand3x3.out_channels, kernel_size=m.features[i_].expand3x3.kernel_size[0], stride=m.features[i_].expand3x3.stride[0], pad=m.features[i_].expand3x3.padding[0], group=m.features[i_].expand3x3.groups) n.fire5_expand3x3_relu = L.ReLU(n.fire5_expand3x3, in_place=True) n.fire5_concat = L.Concat(n.fire5_expand1x1, n.fire5_expand3x3) if i_==10: n.fire6_squeeze = L.Convolution(n.fire5_concat, num_output=m.features[i_].squeeze.out_channels, kernel_size=m.features[i_].squeeze.kernel_size[0], stride=m.features[i_].squeeze.stride[0], pad=m.features[i_].squeeze.padding[0], group=m.features[i_].squeeze.groups) n.fire6_squeeze_relu = L.ReLU(n.fire6_squeeze, in_place=True) n.fire6_expand1x1 = L.Convolution(n.fire6_squeeze, num_output=m.features[i_].expand1x1.out_channels, kernel_size=m.features[i_].expand1x1.kernel_size[0], stride=m.features[i_].expand1x1.stride[0], pad=m.features[i_].expand1x1.padding[0], group=m.features[i_].expand1x1.groups) n.fire6_expand1x1_relu = L.ReLU(n.fire6_expand1x1, in_place=True) n.fire6_expand3x3 = L.Convolution(n.fire6_squeeze, num_output=m.features[i_].expand3x3.out_channels, kernel_size=m.features[i_].expand3x3.kernel_size[0], stride=m.features[i_].expand3x3.stride[0], pad=m.features[i_].expand3x3.padding[0], group=m.features[i_].expand3x3.groups) n.fire6_expand3x3_relu = L.ReLU(n.fire6_expand3x3, in_place=True) n.fire6_concat = L.Concat(n.fire6_expand1x1, n.fire6_expand3x3) if i_==11: n.fire7_squeeze = L.Convolution(n.fire6_concat, num_output=m.features[i_].squeeze.out_channels, kernel_size=m.features[i_].squeeze.kernel_size[0], stride=m.features[i_].squeeze.stride[0], pad=m.features[i_].squeeze.padding[0], group=m.features[i_].squeeze.groups) n.fire7_squeeze_relu = L.ReLU(n.fire7_squeeze, in_place=True) n.fire7_expand1x1 = L.Convolution(n.fire7_squeeze, num_output=m.features[i_].expand1x1.out_channels, kernel_size=m.features[i_].expand1x1.kernel_size[0], stride=m.features[i_].expand1x1.stride[0], pad=m.features[i_].expand1x1.padding[0], group=m.features[i_].expand1x1.groups) n.fire7_expand1x1_relu = L.ReLU(n.fire7_expand1x1, in_place=True) n.fire7_expand3x3 = L.Convolution(n.fire7_squeeze, num_output=m.features[i_].expand3x3.out_channels, kernel_size=m.features[i_].expand3x3.kernel_size[0], stride=m.features[i_].expand3x3.stride[0], pad=m.features[i_].expand3x3.padding[0], group=m.features[i_].expand3x3.groups) n.fire7_expand3x3_relu = L.ReLU(n.fire7_expand3x3, in_place=True) n.fire7_concat = L.Concat(n.fire7_expand1x1, n.fire7_expand3x3) if i_==12: n.fire8_squeeze = L.Convolution(n.fire7_concat, num_output=m.features[i_].squeeze.out_channels, kernel_size=m.features[i_].squeeze.kernel_size[0], stride=m.features[i_].squeeze.stride[0], pad=m.features[i_].squeeze.padding[0], group=m.features[i_].squeeze.groups) n.fire8_squeeze_relu = L.ReLU(n.fire8_squeeze, in_place=True) n.fire8_expand1x1 = L.Convolution(n.fire8_squeeze, num_output=m.features[i_].expand1x1.out_channels, kernel_size=m.features[i_].expand1x1.kernel_size[0], stride=m.features[i_].expand1x1.stride[0], pad=m.features[i_].expand1x1.padding[0], group=m.features[i_].expand1x1.groups) n.fire8_expand1x1_relu = L.ReLU(n.fire8_expand1x1, in_place=True) n.fire8_expand3x3 = L.Convolution(n.fire8_squeeze, num_output=m.features[i_].expand3x3.out_channels, kernel_size=m.features[i_].expand3x3.kernel_size[0], stride=m.features[i_].expand3x3.stride[0], pad=m.features[i_].expand3x3.padding[0], group=m.features[i_].expand3x3.groups) n.fire8_expand3x3_relu = L.ReLU(n.fire8_expand3x3, in_place=True) n.fire8_concat = L.Concat(n.fire8_expand1x1, n.fire8_expand3x3) for i_ in range(len(m.classifier)): if isinstance(m.classifier[i_], torch.nn.modules.dropout.Dropout): n.drop1 = L.Dropout(n.fire8_concat, dropout_param=dict(dropout_ratio=0.5)) elif isinstance(m.classifier[i_], torch.nn.modules.conv.Conv2d): n.conv2 = L.Convolution(n.drop1, num_output=m.classifier[i_].out_channels, kernel_size=m.classifier[i_].kernel_size[0], stride=m.classifier[i_].stride[0], pad=m.classifier[i_].padding[0]) elif isinstance(m.classifier[i_], torch.nn.modules.activation.ReLU): n.relu2 = L.ReLU(n.conv2, in_place=True) elif isinstance(m.classifier[i_], torch.nn.modules.pooling.AvgPool2d ): n.pool_final = L.Pooling(n.conv2, kernel_size=m.classifier[i_].kernel_size, stride=m.classifier[i_].stride, pool=P.Pooling.AVE) n.prob = L.Softmax(n.pool_final) return n.to_proto()
def conv_factory(bottom, ks, nout, stride=1, pad=0): conv = L.Convolution(bottom, kernel_size=ks, stride=stride, num_output=nout, pad=pad, bias_term=True, weight_filler=dict(type='msra'), bias_filler=dict(type='constant')) batch_norm = L.BatchNorm(conv, in_place=True, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]) scale = L.Scale(batch_norm, bias_term=True, in_place=True) return scale
def fcn(split): n = caffe.NetSpec() pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892), seed=1337) if split == 'train': pydata_params['sbdd_dir'] = '../data/sbdd/dataset' pylayer = 'SBDDSegDataLayer' else: pydata_params['voc_dir'] = '../data/pascal/VOC2011' pylayer = 'VOCSegDataLayer' n.data, n.label = L.Python(module='voc_layers', layer=pylayer, ntop=2, param_str=str(pydata_params)) # the base net n.conv1, n.relu1 = conv_relu(n.data, 96, ks=11, stride=4, pad=100) n.pool1 = max_pool(n.relu1) n.lrn1 = lrn(n.pool1) n.conv2, n.relu2 = conv_relu(n.lrn1, 256, ks=5, stride=1, pad=2, group=2) n.pool2 = max_pool(n.relu2) n.lrn2 = lrn(n.pool2) n.conv3, n.relu3 = conv_relu(n.lrn2, 384, ks=3, stride=1, pad=1) n.conv4, n.relu4 = conv_relu(n.relu3, 384, ks=3, stride=1, pad=1, group=2) n.conv5, n.relu5 = conv_relu(n.relu4, 256, ks=3, stride=1, pad=1, group=2) n.pool5 = max_pool(n.relu5) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=6, stride=1, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, stride=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr = L.Convolution( n.drop7, num_output=21, kernel_size=1, pad=0, stride=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore2 = L.Deconvolution(n.score_fr, convolution_param=dict(num_output=21, kernel_size=5, stride=2, bias_term=False), param=[dict(lr_mult=0)]) n.score_pool2 = L.Convolution( n.lrn2, num_output=21, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.score_pool2c = crop(n.score_pool2, n.upscore2) n.fuse_pool2 = L.Eltwise(n.upscore2, n.score_pool2c, operation=P.Eltwise.SUM) n.upscore16 = L.Deconvolution(n.fuse_pool2, convolution_param=dict(num_output=21, kernel_size=31, stride=16, bias_term=False), param=[dict(lr_mult=0)]) n.score = crop(n.upscore16, n.data) n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=True, ignore_label=255)) return n.to_proto()
def vgg_net(mode, batch_size=1): #This is not the whole network! missing ReLU ect. if mode == "cl": pad_init = 1 elif mode == "sg": pad_init = 96 else: raise ValueError n = caffe.NetSpec() p = 1 pl = P.Pooling.MAX n.data = L.DummyData(shape=[dict(dim=[batch_size, 3, 224, 224])], ntop=1) n.conv1_1 = L.Convolution(n.data, kernel_size=3, pad=pad_init, num_output=64) n.conv1_2 = L.Convolution(n.conv1_1, kernel_size=3, pad=p, num_output=64) n.pool1 = L.Pooling(n.conv1_2, kernel_size=2, stride=2, pool=pl) n.conv2_1 = L.Convolution(n.pool1, kernel_size=3, pad=p, num_output=128) n.conv2_2 = L.Convolution(n.conv2_1, kernel_size=3, pad=p, num_output=128) n.pool2 = L.Pooling(n.conv2_2, kernel_size=2, stride=2, pool=pl) n.conv3_1 = L.Convolution(n.pool2, kernel_size=3, pad=p, num_output=256) n.conv3_2 = L.Convolution(n.conv3_1, kernel_size=3, pad=p, num_output=256) n.conv3_3 = L.Convolution(n.conv3_2, kernel_size=3, pad=p, num_output=256) n.pool3 = L.Pooling(n.conv3_3, kernel_size=2, stride=2, pool=pl) n.conv4_1 = L.Convolution(n.pool3, kernel_size=3, pad=p, num_output=512) n.conv4_2 = L.Convolution(n.conv4_1, kernel_size=3, pad=p, num_output=512) n.conv4_3 = L.Convolution(n.conv4_2, kernel_size=3, pad=p, num_output=512) n.pool4 = L.Pooling(n.conv4_3, kernel_size=2, stride=2, pool=pl) n.conv5_1 = L.Convolution(n.pool4, kernel_size=3, pad=p, num_output=512) n.conv5_2 = L.Convolution(n.conv5_1, kernel_size=3, pad=p, num_output=512) n.conv5_3 = L.Convolution(n.conv5_2, kernel_size=3, pad=p, num_output=512) n.pool5 = L.Pooling(n.conv5_3, kernel_size=2, stride=2, pool=pl) if mode == "cl": n.fc6 = L.InnerProduct(n.pool5, num_output=4096) n.fc7 = L.InnerProduct(n.fc6, num_output=4096) elif mode == "sg": n.fc6 = L.Convolution(n.pool5, kernel_size=7, pad=0, num_output=4096) n.fc7 = L.Convolution(n.fc6, kernel_size=1, pad=0, num_output=4096) else: raise ValueError return n
def attribute_cam_network(output_net, train=True, num_classes=25, learn_all=False, batch_size=32): """ creates attribute_cam_network from deep_attribute_network. Follows recommended architecture from paper "Learning Deep Features for Discriminative Localization" (https://goo.gl/vWgH3w) """ n = caffe.NetSpec() if train: n.data, n.dummy_label1 = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=project_root + train_data_lmdb, transform_param=dict(mean_value=[104, 117, 123], mirror=True, crop_size=224), ntop=2) n.label_1, n.dummy_label2 = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=project_root + train_label_lmdb, ntop=2) n.label = L.Flatten(n.label_1) n.silence = L.Silence(n.dummy_label1, n.dummy_label2, ntop=0) else: n.data, n.dummy_label1 = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=project_root + val_data_lmdb, transform_param=dict(mean_value=[104, 117, 123], mirror=False, crop_size=224), ntop=2) n.label_1, n.dummy_label2 = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=project_root + val_label_lmdb, ntop=2) n.label = L.Flatten(n.label_1) n.silence = L.Silence(n.dummy_label1, n.dummy_label2, ntop=0) param = learned_param if learn_all else frozen_param n.conv1_1, n.relu1_1 = conv_relu(n.data, 3, 64, pad=1, param=param) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 3, 64, pad=1, param=param) n.pool1 = max_pool(n.relu1_2, 2, stride=2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 3, 128, pad=1, param=param) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 3, 128, pad=1, param=param) n.pool2 = max_pool(n.relu2_2, 2, stride=2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 3, 256, pad=1, param=param) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 3, 256, pad=1, param=param) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 3, 256, pad=1, param=param) n.pool3 = max_pool(n.relu3_3, 2, stride=2) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 3, 512, pad=1, param=param) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 3, 512, pad=1, param=param) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 3, 512, pad=1, param=param) n.pool4 = max_pool(n.relu4_3, 2, stride=2) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 3, 512, pad=1, param=param) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 3, 512, pad=1, param=param) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 3, 512, pad=1, param=param) n.conv6 = L.Convolution(n.relu5_3, kernel_size=3, stride=1, num_output=512, pad=1, group=1, param=learned_param, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0.1)) n.gap7 = L.Pooling(n.conv6, pool=P.Pooling.AVE, global_pooling=True) n.fc8_ = L.InnerProduct(n.gap7, num_output=num_classes, param=learned_param) if not train: n.scores = L.Sigmoid(n.fc8_) n.accuracy = L.MultiLabelAccuracy(n.scores, n.label) n.class_weights = L.LossWeight(n.label) n.loss = L.WeightedSigmoidCrossEntropyLoss(n.fc8_, n.label, n.class_weights) f = open(project_root + 'out/' + output_net, 'w') f.write(str(n.to_proto())) f.close()
def fcn(obj_cls, part, split): n = caffe.NetSpec() n.data, n.label = L.Python( module='pascalpart_layers', layer='PASCALPartSegDataLayer', ntop=2, param_str=str( dict(voc_dir='/home/cv/hdl/caffe/data/pascal/VOC', part_dir='/home/cv/hdl/caffe/data/pascal/pascal-part', obj_cls=obj_cls, part=part, split=split, seed=1337))) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr = L.Convolution( n.drop7, num_output=25, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore = L.Deconvolution(n.score_fr, convolution_param=dict(num_output=25, kernel_size=64, stride=32, bias_term=False), param=[dict(lr_mult=0)]) n.score = crop(n.upscore, n.data) n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=False, ignore_label=255)) return n.to_proto()
def match_block(bottom, base_output=64, stride=2, card=32): """ input:4*base_output x n x n output:4*base_output x n x n :param base_output: base num_output of branch2 :param bottom: bottom layer :return: layers """ conv1 = L.Convolution(bottom, num_output=base_output * (card / 16), kernel_size=1, stride=1, pad=0, bias_term=False, param=[dict(lr_mult=1, decay_mult=1)], weight_filler=dict(type='xavier')) conv1_bn = L.BatchNorm(conv1, use_global_stats=False, in_place=True) conv1_scale = L.Scale(conv1, scale_param=dict(bias_term=True), in_place=True) conv1_relu = L.ReLU(conv1, in_place=True) conv2 = L.Convolution(conv1, num_output=base_output * (card / 16), kernel_size=3, stride=stride, pad=1, group=card, bias_term=False, param=[dict(lr_mult=1, decay_mult=1)], weight_filler=dict(type='xavier')) conv2_bn = L.BatchNorm(conv2, use_global_stats=False, in_place=True) conv2_scale = L.Scale(conv2, scale_param=dict(bias_term=True), in_place=True) conv2_relu = L.ReLU(conv2, in_place=True) conv3 = L.Convolution(conv2, num_output=base_output * 4, kernel_size=1, stride=1, pad=0, bias_term=False, param=[dict(lr_mult=1, decay_mult=1)], weight_filler=dict(type='xavier')) conv3_bn = L.BatchNorm(conv3, use_global_stats=False, in_place=True) conv3_scale = L.Scale(conv3, scale_param=dict(bias_term=True), in_place=True) match = L.Convolution(bottom, num_output=base_output * 4, kernel_size=1, stride=stride, pad=0, bias_term=False, param=[dict(lr_mult=1, decay_mult=1)], weight_filler=dict(type='xavier')) match_bn = L.BatchNorm(match, use_global_stats=False, in_place=True) match_scale = L.Scale(match, scale_param=dict(bias_term=True), in_place=True) eltwise = L.Eltwise(match, conv3, eltwise_param=dict(operation=1)) eltwise_relu = L.ReLU(eltwise, in_place=True) return conv1, conv1_bn, conv1_scale, conv1_relu, conv2, conv2_bn, conv2_scale, conv2_relu, \ conv3, conv3_bn, conv3_scale, match, match_bn, match_scale, eltwise, eltwise_relu
def resnext_layers_proto(self, batch_size, card=32, phase='TRAIN', stages=(3, 4, 6, 3)): """ :param batch_size: the batch_size of train and test phase :param phase: TRAIN or TEST :param stages: the num of layers = 2 + 3*sum(stages), layers would better be chosen from [50, 101, 152] {every stage is composed of 1 residual_branch_shortcut module and stage[i]-1 residual_branch modules, each module consists of 3 conv layers} (3, 4, 6, 3) for 50 layers; (3, 4, 23, 3) for 101 layers; (3, 8, 36, 3) for 152 layers """ n = caffe.NetSpec() if phase == 'TRAIN': source_data = self.train_data mirror = True else: source_data = self.test_data mirror = False n.data, n.label = L.Data(source=source_data, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict( crop_size=224, mean_value=[104, 117, 123], mirror=mirror)) n.conv1 = L.Convolution(n.data, num_output=64, kernel_size=7, stride=2, pad=3, bias_term=False, param=[dict(lr_mult=1, decay_mult=1)], weight_filler=dict(type='xavier')) n.conv1_bn = L.BatchNorm(n.conv1, use_global_stats=False, in_place=True) n.conv1_scale = L.Scale(n.conv1, scale_param=dict(bias_term=True), in_place=True) n.conv1_relu = L.ReLU(n.conv1, in_place=True) # 64x112x112 n.pool1 = L.Pooling(n.conv1, kernel_size=3, stride=2, pad=1, ceil_mode=False, pool=P.Pooling.MAX) # 64x56x56 for num in xrange(len(stages)): # num = 0, 1, 2, 3 for i in xrange(stages[num]): if i == 0: stage_string = match_string bottom_string = [ 'n.pool1', 'n.resx{}_elewise'.format(str(sum(stages[:1]))), 'n.resx{}_elewise'.format(str(sum(stages[:2]))), 'n.resx{}_elewise'.format(str(sum(stages[:3]))) ][num] else: stage_string = resnext_string bottom_string = 'n.resx{}_elewise'.format( str(sum(stages[:num]) + i)) print num, i exec( stage_string.replace('(bottom)', bottom_string).replace( '(base)', str(2**num * 64)).replace( '(n)', str(sum(stages[:num]) + i + 1)).replace( '(s)', str(int(num > 0) + 1)).replace( '(c)', str(card))) exec 'n.pool_ave = L.Pooling(n.resx{}_elewise, pool=P.Pooling.AVE, global_pooling=True)'.format( str(sum(stages))) n.classifier = L.InnerProduct(n.pool_ave, num_output=self.classifier_num, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) n.loss = L.SoftmaxWithLoss(n.classifier, n.label) if phase == 'TRAIN': pass else: n.accuracy_top1 = L.Accuracy(n.classifier, n.label, include=dict(phase=1)) n.accuracy_top5 = L.Accuracy(n.classifier, n.label, include=dict(phase=1), accuracy_param=dict(top_k=5)) return n.to_proto()
def VGGNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False, dilated=False, nopool=False, dropout=True, freeze_layers=[], dilate_pool4=False): kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0)} assert from_layer in net.keys() net.conv1_1 = L.Convolution(net[from_layer], num_output=64, pad=1, kernel_size=3, **kwargs) net.relu1_1 = L.ReLU(net.conv1_1, in_place=True) net.conv1_2 = L.Convolution(net.relu1_1, num_output=64, pad=1, kernel_size=3, **kwargs) net.relu1_2 = L.ReLU(net.conv1_2, in_place=True) if nopool: name = 'conv1_3' net[name] = L.Convolution(net.relu1_2, num_output=64, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool1' net.pool1 = L.Pooling(net.relu1_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv2_1 = L.Convolution(net[name], num_output=128, pad=1, kernel_size=3, **kwargs) net.relu2_1 = L.ReLU(net.conv2_1, in_place=True) net.conv2_2 = L.Convolution(net.relu2_1, num_output=128, pad=1, kernel_size=3, **kwargs) net.relu2_2 = L.ReLU(net.conv2_2, in_place=True) if nopool: name = 'conv2_3' net[name] = L.Convolution(net.relu2_2, num_output=128, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool2' net[name] = L.Pooling(net.relu2_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv3_1 = L.Convolution(net[name], num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_1 = L.ReLU(net.conv3_1, in_place=True) net.conv3_2 = L.Convolution(net.relu3_1, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_2 = L.ReLU(net.conv3_2, in_place=True) net.conv3_3 = L.Convolution(net.relu3_2, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_3 = L.ReLU(net.conv3_3, in_place=True) if nopool: name = 'conv3_4' net[name] = L.Convolution(net.relu3_3, num_output=256, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool3' net[name] = L.Pooling(net.relu3_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv4_1 = L.Convolution(net[name], num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_1 = L.ReLU(net.conv4_1, in_place=True) net.conv4_2 = L.Convolution(net.relu4_1, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_2 = L.ReLU(net.conv4_2, in_place=True) net.conv4_3 = L.Convolution(net.relu4_2, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_3 = L.ReLU(net.conv4_3, in_place=True) if nopool: name = 'conv4_4' net[name] = L.Convolution(net.relu4_3, num_output=512, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool4' if dilate_pool4: net[name] = L.Pooling(net.relu4_3, pool=P.Pooling.MAX, kernel_size=3, stride=1, pad=1) dilation = 2 else: net[name] = L.Pooling(net.relu4_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) dilation = 1 kernel_size = 3 pad = int((kernel_size + (dilation - 1) * (kernel_size - 1)) - 1) / 2 net.conv5_1 = L.Convolution(net[name], num_output=512, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.relu5_1 = L.ReLU(net.conv5_1, in_place=True) net.conv5_2 = L.Convolution(net.relu5_1, num_output=512, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.relu5_2 = L.ReLU(net.conv5_2, in_place=True) net.conv5_3 = L.Convolution(net.relu5_2, num_output=512, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.relu5_3 = L.ReLU(net.conv5_3, in_place=True) if need_fc: if dilated: if nopool: name = 'conv5_4' net[name] = L.Convolution(net.relu5_3, num_output=512, pad=1, kernel_size=3, stride=1, **kwargs) else: name = 'pool5' net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=1) else: if nopool: name = 'conv5_4' net[name] = L.Convolution(net.relu5_3, num_output=512, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool5' net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) if fully_conv: if dilated: if reduced: dilation = dilation * 6 kernel_size = 3 num_output = 1024 else: dilation = dilation * 2 kernel_size = 7 num_output = 4096 else: if reduced: dilation = dilation * 3 kernel_size = 3 num_output = 1024 else: kernel_size = 7 num_output = 4096 pad = int((kernel_size + (dilation - 1) * (kernel_size - 1)) - 1) / 2 net.fc6 = L.Convolution(net[name], num_output=num_output, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) if reduced: net.fc7 = L.Convolution(net.relu6, num_output=1024, kernel_size=1, **kwargs) else: net.fc7 = L.Convolution(net.relu6, num_output=4096, kernel_size=1, **kwargs) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) else: net.fc6 = L.InnerProduct(net.pool5, num_output=4096) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) net.fc7 = L.InnerProduct(net.relu6, num_output=4096) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) # Update freeze layers. kwargs['param'] = [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)] layers = net.keys() for freeze_layer in freeze_layers: if freeze_layer in layers: net.update(freeze_layer, kwargs) return net
def create_googlenet(input_shape, classes=1000, deploy=False): net_name = "googlenet" data_root_dir = "/home/tim/datasets/cifar10.224x224/" if deploy: net_filename = "{0}_deploy.prototxt".format(net_name) else: net_filename = "{0}_train_test.prototxt".format(net_name) # net name with open(net_filename, "w") as f: f.write('name: "{0}"\n'.format(net_name)) if deploy: net = caffe.NetSpec() """ The conventional blob dimensions for batches of image data are number N x channel K x height H x width W. Blob memory is row-major in layout, so the last / rightmost dimension changes fastest. For example, in a 4D blob, the value at index (n, k, h, w) is physically located at index ((n * K + k) * H + h) * W + w. """ # batch_size, channel, height, width net.data = L.Input(input_param=dict( shape=[dict(dim=list(input_shape))])) else: net = caffe.NetSpec() batch_size = 32 lmdb = data_root_dir + "train_lmdb" net.data, net.label = L.Data( batch_size=batch_size, backend=P.Data.LMDB, source=lmdb, transform_param=dict( mirror=True, crop_size=224, #mean_file=data_root_dir + "mean.binaryproto"), mean_value=[104, 117, 123]), ntop=2, include=dict(phase=caffe_pb2.Phase.Value("TRAIN"))) with open(net_filename, "a") as f: f.write(str(net.to_proto())) del net net = caffe.NetSpec() batch_size = 50 lmdb = data_root_dir + "test_lmdb" net.data, net.label = L.Data( batch_size=batch_size, backend=P.Data.LMDB, source=lmdb, transform_param=dict( mirror=False, crop_size=224, # mean_file=data_root_dir + "mean.binaryproto"), mean_value=[104, 117, 123]), ntop=2, include=dict(phase=caffe_pb2.Phase.Value("TEST"))) # padding = 'same', equal to pad = 1 net.conv1_7x7_2s = L.Convolution( net.data, kernel_size=7, num_output=64, pad=3, stride=2, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) net.conv1_7x7_2s_relu = L.ReLU(net.conv1_7x7_2s, in_place=True) net.conv1_maxpool1_3x3_2s = L.Pooling(net.conv1_7x7_2s_relu, kernel_size=3, stride=2, pool=P.Pooling.MAX) net.conv1_norm1 = L.LRN(net.conv1_maxpool1_3x3_2s, local_size=5, alpha=0.0001, beta=0.75) net.conv2_1x1_1v = L.Convolution( net.conv1_norm1, kernel_size=1, num_output=64, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) net.conv2_1x1_1v_relu = L.ReLU(net.conv2_1x1_1v, in_place=True) net.conv2_3x3_1s = L.Convolution( net.conv2_1x1_1v_relu, kernel_size=3, num_output=192, pad=1, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) net.conv2_3x3_1s_relu = L.ReLU(net.conv2_3x3_1s, in_place=True) net.conv2_norm2 = L.LRN(net.conv2_3x3_1s_relu, local_size=5, alpha=0.0001, beta=0.75) net.conv2_pool_3x3_2s = L.Pooling(net.conv2_norm2, kernel_size=3, stride=2, pool=P.Pooling.MAX) # inception(3a) inception3a_output = inception(net=net, pre_layer=net.conv2_pool_3x3_2s, conv1x1_num=64, conv3x3_reduce_num=96, conv3x3_num=128, conv5x5_reduce_num=16, conv5x5_num=32, maxpool3x3_proj1x1_num=32, name="inception3a") # inception(3b) inception3b_output = inception(net=net, pre_layer=inception3a_output, conv1x1_num=128, conv3x3_reduce_num=128, conv3x3_num=192, conv5x5_reduce_num=32, conv5x5_num=96, maxpool3x3_proj1x1_num=64, name="inception3b") # max pool net.inception3_maxpool = L.Pooling(inception3b_output, kernel_size=3, stride=2, pool=P.Pooling.MAX) # inception(4a) inception4a_output = inception(net=net, pre_layer=net.inception3_maxpool, conv1x1_num=192, conv3x3_reduce_num=96, conv3x3_num=208, conv5x5_reduce_num=16, conv5x5_num=48, maxpool3x3_proj1x1_num=64, name="inception4a") # loss1 if not deploy: # avg pool net.loss1_avgpool5x5_3v = L.Pooling(inception4a_output, kernel_size=5, stride=3, pool=P.Pooling.AVE) # conv1x1_1s net.loss1_conv1x1_1s = L.Convolution(net.loss1_avgpool5x5_3v, kernel_size=1, num_output=128, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0.2), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ]) net.loss1_conv1x1_1s_relu = L.ReLU(net.loss1_conv1x1_1s, in_place=True) net.loss1_fc1 = L.InnerProduct(net.loss1_conv1x1_1s_relu, num_output=1024, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ]) net.loss1_fc1_relu1 = L.ReLU(net.loss1_fc1, in_place=True) net.loss1_dropout = L.Dropout(net.loss1_fc1_relu1, dropout_param=dict(dropout_ratio=0.7), in_place=True) net.loss1_pred_fc = L.InnerProduct(net.loss1_dropout, num_output=classes, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ]) net.loss1 = L.SoftmaxWithLoss(net.loss1_pred_fc, net.label, loss_weight=0.3) net.loss1_accuracy_top_1 = L.Accuracy( net.loss1_pred_fc, net.label, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) net.loss1_accuracy_top_5 = L.Accuracy( net.loss1_pred_fc, net.label, include=dict(phase=caffe_pb2.Phase.Value('TEST')), accuracy_param=dict(top_k=5)) # inception(4b) inception4b_output = inception(net=net, pre_layer=inception4a_output, conv1x1_num=160, conv3x3_reduce_num=112, conv3x3_num=224, conv5x5_reduce_num=24, conv5x5_num=64, maxpool3x3_proj1x1_num=64, name="inception4b") # inception(4c) inception4c_output = inception(net=net, pre_layer=inception4b_output, conv1x1_num=128, conv3x3_reduce_num=128, conv3x3_num=256, conv5x5_reduce_num=24, conv5x5_num=64, maxpool3x3_proj1x1_num=64, name="inception4c") # inception(4d) inception4d_output = inception(net=net, pre_layer=inception4c_output, conv1x1_num=112, conv3x3_reduce_num=144, conv3x3_num=288, conv5x5_reduce_num=32, conv5x5_num=64, maxpool3x3_proj1x1_num=64, name="inception4d") # loss2 if not deploy: # avg pool net.loss2_avgpool5x5_3v = L.Pooling(inception4d_output, kernel_size=5, stride=3, pool=P.Pooling.AVE) # conv1x1_1s net.loss2_conv1x1_1s = L.Convolution(net.loss2_avgpool5x5_3v, kernel_size=1, num_output=128, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0.2), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ]) net.loss2_conv1x1_1s_relu = L.ReLU(net.loss2_conv1x1_1s, in_place=True) net.loss2_fc1 = L.InnerProduct(net.loss2_conv1x1_1s_relu, num_output=1024, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ]) net.loss2_fc1_relu1 = L.ReLU(net.loss2_fc1, in_place=True) net.loss2_dropout = L.Dropout(net.loss2_fc1_relu1, dropout_param=dict(dropout_ratio=0.7), in_place=True) net.loss2_pred_fc = L.InnerProduct(net.loss2_dropout, num_output=classes, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ]) net.loss2 = L.SoftmaxWithLoss(net.loss2_pred_fc, net.label, loss_weight=0.3) net.loss2_accuracy_top_1 = L.Accuracy( net.loss2_pred_fc, net.label, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) net.loss2_accuracy_top_5 = L.Accuracy( net.loss2_pred_fc, net.label, include=dict(phase=caffe_pb2.Phase.Value('TEST')), accuracy_param=dict(top_k=5)) # inception(4e) inception4e_output = inception(net=net, pre_layer=inception4d_output, conv1x1_num=256, conv3x3_reduce_num=160, conv3x3_num=320, conv5x5_reduce_num=32, conv5x5_num=128, maxpool3x3_proj1x1_num=128, name="inception4e") # max pool net.inception4_maxpool = L.Pooling(inception4e_output, kernel_size=3, stride=2, pool=P.Pooling.MAX) # inception(5a) inception5a_output = inception(net=net, pre_layer=net.inception4_maxpool, conv1x1_num=256, conv3x3_reduce_num=160, conv3x3_num=320, conv5x5_reduce_num=32, conv5x5_num=128, maxpool3x3_proj1x1_num=128, name="inception5a") # inception(5b) inception5b_output = inception(net=net, pre_layer=inception5a_output, conv1x1_num=384, conv3x3_reduce_num=192, conv3x3_num=384, conv5x5_reduce_num=48, conv5x5_num=128, maxpool3x3_proj1x1_num=128, name="inception5b") # avg pool net.avgpool7x7_s1 = L.Pooling(inception5b_output, kernel_size=7, stride=1, pool=P.Pooling.AVE) # dropout net.avgpool7x7_s1_dropout = L.Dropout( net.avgpool7x7_s1, dropout_param=dict(dropout_ratio=0.4), in_place=True) # pred fc net.loss3_pred_fc = L.InnerProduct( net.avgpool7x7_s1_dropout, num_output=classes, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) # loss3 if deploy: net.prob = L.Softmax(net.loss3_pred_fc) else: net.loss3 = L.SoftmaxWithLoss(net.loss3_pred_fc, net.label) net.loss3_accuracy_top_1 = L.Accuracy( net.loss3_pred_fc, net.label, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) net.loss3_accuracy_top_5 = L.Accuracy( net.loss3_pred_fc, net.label, include=dict(phase=caffe_pb2.Phase.Value('TEST')), accuracy_param=dict(top_k=5)) with open(net_filename, "a") as f: f.write(str(net.to_proto()))
def bottleneck(n, prev_layer, stage, num_bottle, num_output, type, param_add=None): scale_factor = 4 input_layer = prev_layer # save input layer of this bottleneck module = 0 phase = 'TRAIN' bn_mode = 0 if args.mode == 'test': phase = 'TEST' bn_mode = 1 param_str = str({'phase': phase, 'p': '0.01'}) # first module conv_name = 'conv{}_{}_{}'.format(stage, num_bottle, module) bn_name = 'bn{}_{}_{}'.format(stage, num_bottle, module) prelu_name = 'prelu{}_{}_{}'.format(stage, num_bottle, module) kernel_size = 1 stride = 1 if stage >= 2: param_str = str({'phase': phase, 'p': '0.1'}) if type == 'downsampling': kernel_size = 2 stride = 2 setattr( n, conv_name, L.Convolution(getattr(n, prev_layer), num_output=num_output / scale_factor, bias_term=0, kernel_size=kernel_size, stride=stride, weight_filler=dict(type='msra'))) setattr( n, bn_name, L.BN(getattr(n, conv_name), scale_filler=dict(type='constant', value=1), bn_mode=bn_mode, shift_filler=dict(type='constant', value=0.001), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=0) ])) if param_add == 'relu': setattr(n, prelu_name, L.ReLU(getattr(n, bn_name))) else: setattr(n, prelu_name, L.PReLU(getattr(n, bn_name))) prev_layer = getattr(n, prelu_name) # second module conv conv_name = 'conv{}_{}_{}'.format(stage, num_bottle, module + 1) bn_name = 'bn{}_{}_{}'.format(stage, num_bottle, module + 1) prelu_name = 'prelu{}_{}_{}'.format(stage, num_bottle, module + 1) if type == 'dilated': setattr( n, conv_name, L.Convolution(prev_layer, num_output=num_output / scale_factor, bias_term=1, kernel_size=3, stride=1, pad=param_add, dilation=param_add, weight_filler=dict(type='msra'))) elif type == 'asymmetric': conv_name2 = 'conv{}_{}_{}_a'.format(stage, num_bottle, module + 1) setattr( n, conv_name2, L.Convolution(prev_layer, num_output=num_output / scale_factor, bias_term=0, kernel_h=param_add, kernel_w=1, stride=1, pad=1, weight_filler=dict(type='msra'))) setattr( n, conv_name, L.Convolution(getattr(n, conv_name2), num_output=num_output / scale_factor, bias_term=1, kernel_h=1, kernel_w=param_add, stride=1, pad=1, weight_filler=dict(type='msra'))) elif type == 'upsampling': conv_name = 'deconv{}_{}_{}'.format(stage, num_bottle, module + 1) setattr( n, conv_name, L.Deconvolution(prev_layer, convolution_param=dict(num_output=num_output / scale_factor, bias_term=1, kernel_size=2, stride=2))) else: setattr( n, conv_name, L.Convolution(prev_layer, num_output=num_output / scale_factor, bias_term=1, kernel_size=3, stride=1, pad=1, weight_filler=dict(type='msra'))) setattr( n, bn_name, L.BN(getattr(n, conv_name), scale_filler=dict(type='constant', value=1), bn_mode=bn_mode, shift_filler=dict(type='constant', value=0.001), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=0) ])) if param_add == 'relu': setattr(n, prelu_name, L.ReLU(getattr(n, bn_name))) else: setattr(n, prelu_name, L.PReLU(getattr(n, bn_name))) prev_layer = getattr(n, prelu_name) # third module 1x1 conv_name = 'conv{}_{}_{}'.format(stage, num_bottle, module + 2) bn_name = 'bn{}_{}_{}'.format(stage, num_bottle, module + 2) prelu_name = 'prelu{}_{}_{}'.format(stage, num_bottle, module + 2) setattr( n, conv_name, L.Convolution(prev_layer, num_output=num_output, bias_term=0, kernel_size=1, stride=1, weight_filler=dict(type='msra'))) setattr( n, bn_name, L.BN(getattr(n, conv_name), scale_filler=dict(type='constant', value=1), bn_mode=bn_mode, shift_filler=dict(type='constant', value=0.001), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=0) ])) prev_layer = getattr(n, bn_name) # regularizer (fourth module) drop_name = 'drop{}_{}_{}'.format(stage, num_bottle, module + 3) setattr( n, drop_name, L.Python(prev_layer, python_param=dict(module="spatial_dropout", layer="SpatialDropoutLayer", param_str=param_str))) prev_layer1 = getattr(n, drop_name) eltwise_name = 'eltwise{}_{}_{}'.format(stage, num_bottle, module + 4) prelu_name = 'prelu{}_{}_{}'.format(stage, num_bottle, module + 4) # main branch; pool and pad, just for type == downsampling if type == 'downsampling': pool_name = 'pool{}_{}_{}'.format(stage, num_bottle, module + 4) conv_name = 'conv{}_{}_{}'.format(stage, num_bottle, module + 4) bn_name = 'bn{}_{}_{}'.format(stage, num_bottle, module + 4) if stage == 1 and args.mode != 'train_encoder': n.pool1_0_4, n.pool1_0_4_mask = L.Pooling(getattr(n, input_layer), kernel_size=2, stride=2, pool=P.Pooling.MAX, ntop=2) elif stage == 2 and args.mode != 'train_encoder': n.pool2_0_4, n.pool2_0_4_mask = L.Pooling(getattr(n, input_layer), kernel_size=2, stride=2, pool=P.Pooling.MAX, ntop=2) elif stage == 1 and args.mode == 'train_encoder': n.pool1_0_4 = L.Pooling(getattr(n, input_layer), kernel_size=2, stride=2, pool=P.Pooling.MAX) elif stage == 2 and args.mode == 'train_encoder': n.pool2_0_4 = L.Pooling(getattr(n, input_layer), kernel_size=2, stride=2, pool=P.Pooling.MAX) else: print 'downsampling is just available for stage 1 and 2' setattr( n, conv_name, L.Convolution(getattr(n, pool_name), num_output=num_output, bias_term=0, kernel_size=1, stride=1, weight_filler=dict(type='msra'))) setattr( n, bn_name, L.BN(getattr(n, conv_name), scale_filler=dict(type='constant', value=1), bn_mode=bn_mode, shift_filler=dict(type='constant', value=0.001), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=0) ])) prev_layer2 = getattr(n, bn_name) elif type == 'upsampling': conv_name = 'conv{}_{}_{}'.format(stage, num_bottle, module + 4) bn_name = 'bn{}_{}_{}'.format(stage, num_bottle, module + 4) upsample_name = 'upsample{}_{}_{}'.format(stage, num_bottle, module + 4) setattr( n, conv_name, L.Convolution(getattr(n, input_layer), num_output=num_output, bias_term=0, kernel_size=1, stride=1, weight_filler=dict(type='msra'))) setattr( n, bn_name, L.BN(getattr(n, conv_name), scale_filler=dict(type='constant', value=1), bn_mode=bn_mode, shift_filler=dict(type='constant', value=0.001), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=0) ])) if stage == 4: setattr(n, upsample_name, L.Upsample(getattr(n, bn_name), n.pool2_0_4_mask, scale=2)) elif stage == 5: setattr(n, upsample_name, L.Upsample(getattr(n, bn_name), n.pool1_0_4_mask, scale=2)) else: print 'upsampling is just available for stage 4 and 5' prev_layer2 = getattr(n, upsample_name) else: prev_layer2 = getattr( n, input_layer ) # if not type==downsampling: bottom layer of eltwise is input layer of # bottleneck setattr(n, eltwise_name, L.Eltwise(prev_layer1, prev_layer2)) if param_add == 'relu': setattr(n, prelu_name, L.ReLU(getattr(n, eltwise_name))) else: setattr(n, prelu_name, L.PReLU(getattr(n, eltwise_name))) last_layer = prelu_name return n.to_proto(), last_layer
def inception(net, pre_layer, conv1x1_num, conv3x3_reduce_num, conv3x3_num, conv5x5_reduce_num, conv5x5_num, maxpool3x3_proj1x1_num, name): # 1x1 net.conv1x1 = L.Convolution( pre_layer, kernel_size=1, num_output=conv1x1_num, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], name="{0}_conv1x1".format(name)) net.conv1x1_relu = L.ReLU(net.conv1x1, in_place=True, name="{0}_conv1x1_relu".format(name)) # 3x3 net.conv3x3_reduce = L.Convolution( pre_layer, kernel_size=1, num_output=conv3x3_reduce_num, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], name="{0}_conv3x3_reduce".format(name)) net.conv3x3_reduce_relu = L.ReLU( net.conv3x3_reduce, in_place=True, name="{0}_conv3x3_reduce_relu".format(name)) net.conv3x3 = L.Convolution( net.conv3x3_reduce_relu, kernel_size=3, num_output=conv3x3_num, pad=1, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], name="{0}_".format(name)) net.conv3x3_relu = L.ReLU(net.conv3x3, in_place=True, name="{0}_conv3x3".format(name)) # 5x5 net.conv5x5_reduce = L.Convolution( pre_layer, kernel_size=1, num_output=conv5x5_reduce_num, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], name="{0}_conv5x5_reduce".format(name)) net.conv5x5_reduce_relu = L.ReLU( net.conv5x5_reduce, in_place=True, name="{0}_conv5x5_reduce_relu".format(name)) net.conv5x5 = L.Convolution( net.conv5x5_reduce_relu, kernel_size=5, num_output=conv5x5_num, pad=2, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], name="{0}_conv5x5".format(name)) net.conv5x5_relu = L.ReLU(net.conv5x5, in_place=True, name="{0}_conv5x5_relu".format(name)) # pool net.maxpool3x3 = L.Pooling(pre_layer, kernel_size=3, stride=1, pad=1, pool=P.Pooling.MAX, name="{0}_maxpool3x3".format(name)) net.maxpool3x3_proj1x1 = L.Convolution( net.maxpool3x3, kernel_size=1, num_output=maxpool3x3_proj1x1_num, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], name="{0}_maxpool3x3_proj1x1".format(name)) net.maxpool3x3_proj1x1_relu = L.ReLU( net.maxpool3x3_proj1x1, in_place=True, name="{0}_maxpool3x3_proj1x1_relu".format(name)) # concat net.inception_output = L.Concat(net.conv1x1_relu, net.conv3x3_relu, net.conv5x5_relu, net.maxpool3x3_proj1x1_relu, name="{0}_output".format(name)) return net.inception_output
def part_block(bottom, nout, stride=2): conv = L.Convolution(bottom, kernel_size=1, stride=stride, num_output=nout, pad=0, weight_filler=dict(type='msra'), bias_term=False) batch_norm = L.BatchNorm(conv, in_place=True, batch_norm_param=dict(moving_average_fraction=0.9)) scale = L.Scale(batch_norm, bias_term=True, in_place=True) return scale
def conv_relu(bottom, nout, ks=3, stride=1, pad=1): conv = L.Convolution(bottom, kernel_size=ks, stride=stride, num_output=nout, pad=pad, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) return conv, L.ReLU(conv, in_place=True)
def Lenet(img_list, batch_size, include_acc=False): # 第一层,数据输入层,以ImageData格式输入 data, label = L.ImageData( source=img_list, batch_size=batch_size, ntop=2, root_folder=root, transform_param=dict(scale=0.00390625)) #将输入数据1/255归一到0-255 # 第二层:卷积层 conv1 = L.Convolution(data, kernel_size=5, stride=1, num_output=20, pad=4, weight_filler=dict(type='xavier')) # 激活函数层 relu1 = L.ReLU(conv1, in_place=True) # 卷积层 conv2 = L.Convolution(relu1, kernel_size=5, stride=1, num_output=50, pad=4, weight_filler=dict(type='xavier')) # 激活函数层 relu2 = L.ReLU(conv2, in_place=True) # 池化层 pool1 = L.Pooling(relu2, pool=P.Pooling.MAX, kernel_size=2, stride=2) # 第二层:卷积层 conv3 = L.Convolution(pool1, kernel_size=5, stride=1, num_output=20, pad=4, weight_filler=dict(type='xavier')) # 激活函数层 relu3 = L.ReLU(conv3, in_place=True) # 卷积层 conv4 = L.Convolution(relu3, kernel_size=5, stride=1, num_output=50, pad=4, weight_filler=dict(type='xavier')) # 激活函数层 relu4 = L.ReLU(conv4, in_place=True) # 全连接层 fc3 = L.InnerProduct(relu4, num_output=500, weight_filler=dict(type='xavier')) # 激活函数层 relu3 = L.ReLU(fc3, in_place=True) # 全连接层 fc4 = L.InnerProduct(relu3, num_output=10, weight_filler=dict(type='xavier')) # softmax层 loss = L.SoftmaxWithLoss(fc4, label) #softmax和loss组成 if include_acc: # test阶段需要有accuracy层 acc = L.Accuracy(fc4, label) return to_proto(loss, acc) else: return to_proto(loss)
def fcn(split): n = caffe.NetSpec() pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892), seed=1337) if split.startswith('train'): pydata_params['sbdd_dir'] = '../data/sbdd/dataset' pylayer = 'PartialSBDDSegDataLayer' else: pydata_params['voc_dir'] = '../data/pascal-obfuscated/VOC2011' pylayer = 'PartialVOCSegDataLayer' n.data, n.label = L.Python(module='voc_layers', layer=pylayer, ntop=2, param_str=str(pydata_params)) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr = L.Convolution( n.drop7, num_output=6, kernel_size=1, pad=0, weight_filler=dict(type='xavier'), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore2 = L.Deconvolution(n.score_fr, convolution_param=dict( num_output=6, kernel_size=4, stride=2, weight_filler=dict(type='xavier'), bias_term=False), param=[dict(lr_mult=0)]) # scale pool4 skip for compatibility n.scale_pool4 = L.Scale(n.pool4, filler=dict(type='constant', value=0.01), param=[dict(lr_mult=0)]) n.score_pool4 = L.Convolution( n.scale_pool4, num_output=6, kernel_size=1, pad=0, weight_filler=dict(type='xavier'), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.score_pool4c = crop(n.score_pool4, n.upscore2) n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c, operation=P.Eltwise.SUM) n.upscore_pool4 = L.Deconvolution(n.fuse_pool4, convolution_param=dict( num_output=6, kernel_size=4, stride=2, weight_filler=dict(type='xavier'), bias_term=False), param=[dict(lr_mult=0)]) # scale pool3 skip for compatibility n.scale_pool3 = L.Scale(n.pool3, filler=dict(type='constant', value=0.0001), param=[dict(lr_mult=0)]) n.score_pool3 = L.Convolution( n.scale_pool3, num_output=6, kernel_size=1, pad=0, weight_filler=dict(type='xavier'), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.score_pool3c = crop(n.score_pool3, n.upscore_pool4) n.fuse_pool3 = L.Eltwise(n.upscore_pool4, n.score_pool3c, operation=P.Eltwise.SUM) n.upscore8 = L.Deconvolution(n.fuse_pool3, convolution_param=dict( num_output=6, kernel_size=16, stride=8, weight_filler=dict(type='xavier'), bias_term=False), param=[dict(lr_mult=0)]) n.score = crop(n.upscore8, n.data) n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=False, ignore_label=255)) return n.to_proto()
def create_net(phase): global train_transform_param global test_transform_param train_transform_param = { 'mirror': False, 'mean_file': Params['mean_file'] } test_transform_param = { 'mean_file': Params['mean_file'] } if phase == 'train': lmdb_file = Params['train_lmdb'] transform_param = train_transform_param batch_size = Params['batch_size_per_device'] else: lmdb_file = Params['test_lmdb'] transform_param = test_transform_param batch_size = Params['test_batch_size'] net = caffe.NetSpec() net.data, net.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb_file, transform_param=transform_param, ntop=2) #include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), kwargs = { 'param': [dict(lr_mult=1), dict(lr_mult=2)], #'weight_filler': dict(type='xavier'), 'weight_filler': dict(type='gaussian', std=0.0001), 'bias_filler': dict(type='constant')} net.conv1_1 = L.Convolution(net.data, num_output=32, kernel_size=5, pad=2, **kwargs) net.relu1_1 = L.ReLU(net.conv1_1, in_place=True) #net.conv1_2 = L.Convolution(net.conv1_1, num_output=32, kernel_size=3, pad=1, **kwargs) #net.relu1_2 = L.ReLU(net.conv1_2, in_place=True) net.pool1 = L.Pooling(net.conv1_1, pool=P.Pooling.MAX, kernel_size=3, pad=1, stride=2) kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], #'weight_filler': dict(type='xavier'), 'weight_filler': dict(type='gaussian', std=0.005), 'bias_filler': dict(type='constant')} net.fc2 = L.InnerProduct(net.pool1, num_output=64, **kwargs) net.relu2 = L.ReLU(net.fc2, in_place=True) net.drop2 = L.Dropout(net.fc2, in_place=True, dropout_param=dict(dropout_ratio=0.5)) kwargs = { 'param': [dict(lr_mult=1, decay_mult=100), dict(lr_mult=2, decay_mult=0)], #'weight_filler': dict(type='xavier'), 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0)} net.fc3 = L.InnerProduct(net.fc2, num_output=45, **kwargs) if phase == 'train': net.loss = L.SoftmaxWithLoss(net.fc3, net.label) elif phase == 'test': net.accuracy = L.Accuracy(net.fc3, net.label) else: net.prob = L.Softmax(net.fc3) net_proto = net.to_proto() if phase == 'deploy': del net_proto.layer[0] #del net_proto.layer[-1] net_proto.input.extend(['data']) net_proto.input_dim.extend([1,3,12,36]) net_proto.name = '{}_{}'.format(Params['model_name'], phase) return net_proto
def create_neural_net(input_file, batch_size=50): net = caffe.NetSpec() net.data, net.label = L.Data(batch_size=batch_size, source=input_file, backend=caffe.params.Data.LMDB, ntop=2, include=dict(phase=caffe.TEST), name='juniward04') ## pre-process net.conv1 = L.Convolution(net.data, num_output=16, kernel_size=4, stride=1, pad=1, weight_filler=dict(type='dct4'), param=[{ 'lr_mult': 0, 'decay_mult': 0 }], bias_term=False) TRUNCABS = caffe_pb2.QuantTruncAbsParameter.TRUNCABS net.quanttruncabs = L.QuantTruncAbs(net.conv1, process=TRUNCABS, threshold=8, in_place=True) ## block 1 16 [ net.conv1_proj, net.bn2, net.scale2, net.conv512_1, net.bn2_1, net.scale2_1, net.relu512_1, net.conv512_to_256, net.bn2_2, net.scale2_2, net.res512_to_256, net.relu512_to_256 ] = add_downsampling_block_1(net.quanttruncabs, 12) # [net.conv1_proj, net.bn2, net.scale2, net.conv512_1, net.bn2_1, net.scale2_1, # net.relu512_1, net.conv512_2, net.bn2_2, net.scale2_2, net.relu512_2, net.conv512_to_256, # net.bn2_3, net.scale2_3, net.res512_to_256, # net.relu512_to_256] = add_downsampling_block(net.quanttruncabs, 12) ## block 2 13 [ net.conv256_1, net.bn2_4, net.scale2_4, net.relu256_1, net.conv256_2, net.bn2_5, net.scale2_5, net.relu256_2, net.conv256_3, net.bn2_6, net.scale2_6, net.res256_3, net.relu256_3 ] = add_skip_block(net.res512_to_256, 24) ## block 3 16 [ net.res256_3_proj, net.bn2_7, net.scale2_7, net.conv256_4, net.bn2_8, net.scale2_8, net.relu256_4, net.conv256_5, net.bn2_9, net.scale2_9, net.relu256_5, net.conv256_to_128, net.bn2_10, net.scale2_10, net.res256_to_128, net.relu256_to_128 ] = add_downsampling_block(net.res256_3, 24) ## block 4 13 [ net.conv128_1, net.bn2_11, net.scale2_11, net.relu128_1, net.conv128_2, net.bn2_12, net.scale2_12, net.relu128_2, net.conv128_3, net.bn2_13, net.scale2_13, net.res128_3, net.relu128_3 ] = add_skip_block(net.res256_to_128, 48) ## block 5 16 [ net.res128_3_proj, net.bn2_14, net.scale2_14, net.conv128_4, net.bn2_15, net.scale2_15, net.relu128_4, net.conv128_5, net.bn2_16, net.scale2_16, net.relu128_5, net.conv128_to_64, net.bn2_17, net.scale2_17, net.res128_to_64, net.relu128_to_64 ] = add_downsampling_block(net.res128_3, 48) ## block 6 13 [ net.conv64_1, net.bn2_18, net.scale2_18, net.relu64_1, net.conv64_2, net.bn2_19, net.scale2_19, net.relu64_2, net.conv64_3, net.bn2_20, net.scale2_20, net.res64_3, net.relu64_3 ] = add_skip_block(net.res128_to_64, 96) ## ## block 7 16 ## [net.res64_3_proj, net.bn2_21, net.scale2_21, net.conv64_4, net.bn2_22, net.scale2_22, ## net.relu64_4, net.con64_5, net.bn2_23, net.scale2_23, net.relu64_5, net.conv64_to_32, ## net.bn2_24, net.scale2_24, net.res64_to_32, ## net.relu64_to_32] = add_downsampling_block(net.res64_3, 96) [net.res64_3_proj, net.bn2_21, net.scale2_21] = add_module(net.res64_3, 2 * 96, 1, 3, 2) ## block 8 13 [ net.conv32_1, net.bn2_25, net.scale2_25, net.relu32_1, net.conv32_2, net.bn2_26, net.scale2_26, net.relu32_2, net.conv32_3, net.bn2_27, net.scale2_27, net.res32_3, net.relu32_3 ] = add_skip_block(net.res64_3_proj, 192) ## block 9 16 [ net.res32_3_proj, net.bn2_28, net.scale2_28, net.conv32_4, net.bn2_29, net.scale2_29, net.relu32_4, net.con32_5, net.bn2_30, net.scale2_30, net.relu32_5, net.conv32_to_16, net.bn2_31, net.scale2_31, net.res32_to_16, net.relu32_to_16 ] = add_downsampling_block(net.res32_3, 192) ## block 10 13 [ net.conv16_1, net.bn2_32, net.scale2_32, net.relu16_1, net.conv16_2, net.bn2_33, net.scale2_33, net.relu16_2, net.conv16_3, net.bn2_34, net.scale2_34, net.res16_3, net.relu16_3 ] = add_skip_block(net.res32_to_16, 384) ## global pool AVE = caffe_pb2.PoolingParameter.AVE net.global_pool = L.Pooling(net.res16_3, pool=AVE, kernel_size=8, stride=1) ## full connecting net.fc = L.InnerProduct(net.global_pool, param=[{ 'lr_mult': 1 }, { 'lr_mult': 2 }], num_output=2, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) ## accuracy net.accuracy = L.Accuracy(net.fc, net.label, include=dict(phase=caffe.TEST)) ## loss net.loss = L.SoftmaxWithLoss(net.fc, net.label) return net.to_proto()
def generate_caffe_prototxt(m, caffe_net, layer): if hasattr(m, 'generate_caffe_prototxt'): return m.generate_caffe_prototxt(caffe_net, layer) if isinstance(m, nn.Sequential): for module in m: layer = generate_caffe_prototxt(module, caffe_net, layer) return layer if isinstance(m, nn.Conv2d): if m.bias is None: param=[dict(lr_mult=1, decay_mult=1)] else: param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=0)] assert m.dilation[0] == m.dilation[1] convolution_param=dict( num_output=m.out_channels, group=m.groups, bias_term=(m.bias is not None), weight_filler=dict(type='msra'), dilation=m.dilation[0], ) if m.kernel_size[0] == m.kernel_size[1]: convolution_param['kernel_size'] = m.kernel_size[0] else: convolution_param['kernel_h'] = m.kernel_size[0] convolution_param['kernel_w'] = m.kernel_size[1] if m.stride[0] == m.stride[1]: convolution_param['stride'] = m.stride[0] else: convolution_param['stride_h'] = m.stride[0] convolution_param['stride_w'] = m.stride[1] if m.padding[0] == m.padding[1]: convolution_param['pad'] = m.padding[0] else: convolution_param['pad_h'] = m.padding[0] convolution_param['pad_w'] = m.padding[1] layer = L.Convolution( layer, param=param, convolution_param=convolution_param, ) caffe_net.tops[m.g_name] = layer return layer if isinstance(m, nn.ConvTranspose2d): if m.bias is None: param=[dict(lr_mult=1, decay_mult=1)] else: param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=0)] assert m.dilation[0] == m.dilation[1] convolution_param=dict( num_output=m.out_channels, group=m.groups, bias_term=(m.bias is not None), weight_filler=dict(type='msra'), dilation=m.dilation[0], ) if m.kernel_size[0] == m.kernel_size[1]: convolution_param['kernel_size'] = m.kernel_size[0] else: convolution_param['kernel_h'] = m.kernel_size[0] convolution_param['kernel_w'] = m.kernel_size[1] if m.stride[0] == m.stride[1]: convolution_param['stride'] = m.stride[0] else: convolution_param['stride_h'] = m.stride[0] convolution_param['stride_w'] = m.stride[1] if m.padding[0] == m.padding[1]: convolution_param['pad'] = m.padding[0] else: convolution_param['pad_h'] = m.padding[0] convolution_param['pad_w'] = m.padding[1] layer = L.Deconvolution( layer, param=param, convolution_param=convolution_param, ) caffe_net.tops[m.g_name] = layer return layer if isinstance(m, nn.BatchNorm2d): layer = L.BatchNorm( layer, in_place=True, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], ) caffe_net[m.g_name] = layer if m.affine: layer = L.Scale( layer, in_place=True, bias_term=True, filler=dict(type='constant', value=1), bias_filler=dict(type='constant', value=0), param=[dict(lr_mult=1, decay_mult=0), dict(lr_mult=1, decay_mult=0)], ) caffe_net[m.g_name + '/scale'] = layer return layer if isinstance(m, nn.ReLU): layer = L.ReLU(layer, in_place=True) caffe_net.tops[m.g_name] = layer return layer if isinstance(m, nn.PReLU): layer = L.PReLU(layer) caffe_net.tops[m.g_name] = layer return layer if isinstance(m, nn.AvgPool2d) or isinstance(m, nn.MaxPool2d): if isinstance(m, nn.AvgPool2d): pooling_param = dict(pool=P.Pooling.AVE) else: pooling_param = dict(pool=P.Pooling.MAX) if isinstance(m.kernel_size, tuple) or isinstance(m.kernel_size, list): pooling_param['kernel_h'] = m.kernel_size[0] pooling_param['kernel_w'] = m.kernel_size[1] else: pooling_param['kernel_size'] = m.kernel_size if isinstance(m.stride, tuple) or isinstance(m.stride, list): pooling_param['stride_h'] = m.stride[0] pooling_param['stride_w'] = m.stride[1] else: pooling_param['stride'] = m.stride if isinstance(m.padding, tuple) or isinstance(m.padding, list): pooling_param['pad_h'] = m.padding[0] pooling_param['pad_w'] = m.padding[1] else: pooling_param['pad'] = m.padding layer = L.Pooling(layer, pooling_param=pooling_param) caffe_net.tops[m.g_name] = layer return layer raise Exception("Unknow module '%s' to generate caffe prototxt." % m)
def conv_relu(name, bottom, nout, ks=3, stride=1, pad=1, group=1, lr1=1, lr2=2): conv = L.Convolution(bottom, kernel_size=ks, stride=stride, num_output=nout, pad=pad, group=group, weight_filler=dict(type='xavier'), param=[dict(name=name+"_w", lr_mult=lr1, decay_mult=1), dict(name=name+"_b", lr_mult=lr2, decay_mult=0)]) return conv, L.ReLU(conv, in_place=True)
def create_bnn_cnn_net(num_input_points, phase=None): n = caffe.NetSpec() n.input_color = L.Input(shape=[dict(dim=[1, 2, 1, num_input_points])]) n.in_features = L.Input(shape=[dict(dim=[1, 4, 1, num_input_points])]) n.out_features = L.Input(shape=[dict(dim=[1, 4, 480, 854])]) n.scales = L.Input(shape=[dict(dim=[1, 4, 1, 1])]) n.flatten_scales = L.Flatten(n.scales, flatten_param=dict(axis=0)) n.in_scaled_features = L.Scale(n.in_features, n.flatten_scales, scale_param=dict(axis=1)) n.out_scaled_features = L.Scale(n.out_features, n.flatten_scales, scale_param=dict(axis=1)) ### Start of BNN # BNN - stage - 1 n.out_color1 = L.Permutohedral( n.input_color, n.in_scaled_features, n.out_scaled_features, permutohedral_param=dict(num_output=32, group=1, neighborhood_size=0, bias_term=True, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.NONE), filter_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0.5), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.bnn_out_relu_1 = L.ReLU(n.out_color1, in_place=True) # BNN - stage - 2 n.out_color2 = L.Permutohedral(n.bnn_out_relu_1, n.out_scaled_features, n.out_scaled_features, permutohedral_param=dict( num_output=32, group=1, neighborhood_size=0, bias_term=True, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.NONE), filter_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.bnn_out_relu_2 = L.ReLU(n.out_color2, in_place=True) # BNN - combination n.connection_out = L.Concat(n.bnn_out_relu_1, n.bnn_out_relu_2) n.out_color_bilateral = L.Convolution( n.connection_out, convolution_param=dict(num_output=2, kernel_size=1, stride=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.out_color_bilateral_relu = L.ReLU(n.out_color_bilateral, in_place=True) ### Start of CNN # CNN - Stage 1 n.out_color_spatial1 = L.Convolution( n.out_color_bilateral_relu, convolution_param=dict(num_output=32, kernel_size=3, stride=1, pad_h=1, pad_w=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.out_color_spatial_relu1 = L.ReLU(n.out_color_spatial1, in_place=True) # CNN - Stage 2 n.out_color_spatial2 = L.Convolution( n.out_color_spatial_relu1, convolution_param=dict(num_output=32, kernel_size=3, stride=1, pad_h=1, pad_w=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.out_color_spatial_relu2 = L.ReLU(n.out_color_spatial2, in_place=True) # CNN - Stage 3 n.out_color_spatial = L.Convolution(n.out_color_spatial_relu2, convolution_param=dict( num_output=2, kernel_size=3, stride=1, pad_h=1, pad_w=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.out_color_spatial_relu = L.ReLU(n.out_color_spatial, in_place=True) n.final_connection_out = L.Concat(n.out_color_bilateral_relu, n.out_color_spatial_relu) n.out_color_result = L.Convolution(n.final_connection_out, convolution_param=dict( num_output=2, kernel_size=1, stride=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0.0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) return n.to_proto()
def fcn(split): n = caffe.NetSpec() pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892), seed=1337) if split == 'train': pydata_params['sbdd_dir'] = '/data02/bioinf/treml/pascal/SBDD/dataset' pylayer = 'SBDDSegDataLayer' else: pydata_params['voc_dir'] = '/data02/bioinf/treml/pascal/VOC2011' pylayer = 'VOCSegDataLayer' n.data, n.label = L.Python(module='voc_layers', layer=pylayer, ntop=2, param_str=str(pydata_params)) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr = L.Convolution( n.drop7, num_output=21, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore2 = L.Deconvolution(n.score_fr, convolution_param=dict(num_output=21, kernel_size=4, stride=2, bias_term=False), param=[dict(lr_mult=0)]) n.score_pool4 = L.Convolution( n.pool4, num_output=21, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.score_pool4c = crop(n.score_pool4, n.upscore2) n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c, operation=P.Eltwise.SUM) n.upscore16 = L.Deconvolution(n.fuse_pool4, convolution_param=dict(num_output=21, kernel_size=32, stride=16, bias_term=False), param=[dict(lr_mult=0)]) n.score = crop(n.upscore16, n.data) n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=False, ignore_label=255)) return n.to_proto()
def define_structure(self, stage): n = caffe.NetSpec() if stage != NetStage.DEPLOY: source_params = dict(stage=stage) source_params['dataset_dir'] = self.DATASET_DIR source_params['patch_dir'] = self.PATCH_DIR source_params['average_image'] = self.AVERAGE_IMAGE source_params['training_patches'] = self.TRAINING_PATCHES source_params['validation_patches'] = self.VALIDATION_PATCHES source_params['random_training'] = self.RANDOM_TRAINING source_params['include_rotations'] = self.INCLUDE_ROTATIONS n.data, n.label = L.Python(module='DataLayer', layer='DataLayer', ntop=2, param_str=str(source_params)) else: n.data = L.Input(shape=dict(dim=[1, 3, self.WSIZE, self.WSIZE])) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 32, pad=7) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 32) n.pool1 = max_pool(n.conv1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 64) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 64) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 128) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 128) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 128) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 256) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 256) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 256) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 256) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 256) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 256) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 2048, ks=2, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 2048, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr = L.Convolution( n.drop7, num_output=self.NUM_LABELS, kernel_size=1, pad=0, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], weight_filler=dict(type='xavier'), bias_filler=dict( type='constant')) # must be 1 x num_classes x 1 x 1 n.deconv = L.Deconvolution( n.score_fr, convolution_param=dict(num_output=self.NUM_LABELS, kernel_size=64, stride=32, bias_term=False, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')), # param=[dict(lr_mult=0)], #do not learn this filter? param=[dict(lr_mult=1, decay_mult=1)]) n.score = crop(n.deconv, n.data) if stage != NetStage.DEPLOY: n.loss = L.SoftmaxWithLoss( n.score, n.label, loss_param=dict(normalize=False)) # , ignore_label=0 #n.loss = L.Python(n.score, n.label, module='LossLayer', layer='TopoLossLayer', loss_weight=1) return n.to_proto()
def shortcut(bottom, nout, stride): conv = L.Convolution(bottom, kernel_size=1, stride=stride, num_output=nout, pad=0, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant')) return conv
def ZFNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False, dilated=False, dropout=True, need_fc8=False, freeze_layers=[]): kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0)} assert from_layer in net.keys() net.conv1 = L.Convolution(net[from_layer], num_output=96, pad=3, kernel_size=7, stride=2, **kwargs) net.relu1 = L.ReLU(net.conv1, in_place=True) net.norm1 = L.LRN(net.relu1, local_size=3, alpha=0.00005, beta=0.75, norm_region=P.LRN.WITHIN_CHANNEL, engine=P.LRN.CAFFE) net.pool1 = L.Pooling(net.norm1, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=2) net.conv2 = L.Convolution(net.pool1, num_output=256, pad=2, kernel_size=5, stride=2, **kwargs) net.relu2 = L.ReLU(net.conv2, in_place=True) net.norm2 = L.LRN(net.relu2, local_size=3, alpha=0.00005, beta=0.75, norm_region=P.LRN.WITHIN_CHANNEL, engine=P.LRN.CAFFE) net.pool2 = L.Pooling(net.norm2, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=2) net.conv3 = L.Convolution(net.pool2, num_output=384, pad=1, kernel_size=3, **kwargs) net.relu3 = L.ReLU(net.conv3, in_place=True) net.conv4 = L.Convolution(net.relu3, num_output=384, pad=1, kernel_size=3, **kwargs) net.relu4 = L.ReLU(net.conv4, in_place=True) net.conv5 = L.Convolution(net.relu4, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu5 = L.ReLU(net.conv5, in_place=True) if need_fc: if dilated: name = 'pool5' net[name] = L.Pooling(net.relu5, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=1) else: name = 'pool5' net[name] = L.Pooling(net.relu5, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=2) if fully_conv: if dilated: if reduced: net.fc6 = L.Convolution(net[name], num_output=1024, pad=5, kernel_size=3, dilation=5, **kwargs) else: net.fc6 = L.Convolution(net[name], num_output=4096, pad=5, kernel_size=6, dilation=2, **kwargs) else: if reduced: net.fc6 = L.Convolution(net[name], num_output=1024, pad=2, kernel_size=3, dilation=2, **kwargs) else: net.fc6 = L.Convolution(net[name], num_output=4096, pad=2, kernel_size=6, **kwargs) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) if reduced: net.fc7 = L.Convolution(net.relu6, num_output=1024, kernel_size=1, **kwargs) else: net.fc7 = L.Convolution(net.relu6, num_output=4096, kernel_size=1, **kwargs) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) else: net.fc6 = L.InnerProduct(net.pool5, num_output=4096) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) net.fc7 = L.InnerProduct(net.relu6, num_output=4096) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) if need_fc8: from_layer = net.keys()[-1] if fully_conv: net.fc8 = L.Convolution(net[from_layer], num_output=1000, kernel_size=1, **kwargs) else: net.fc8 = L.InnerProduct(net[from_layer], num_output=1000) net.prob = L.Softmax(net.fc8) # Update freeze layers. kwargs['param'] = [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)] layers = net.keys() for freeze_layer in freeze_layers: if freeze_layer in layers: net.update(freeze_layer, kwargs) return net