def fcn(split): n = caffe.NetSpec() pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892), seed=1337) if split == 'train': pydata_params['sbdd_dir'] = '../data/sbdd/dataset' pylayer = 'SBDDSegDataLayer' else: pydata_params['voc_dir'] = '../data/pascal/VOC2012' pylayer = 'VOCSegDataLayer' n.data, n.label = L.Python(module='voc_layers', layer=pylayer, ntop=2, param_str=str(pydata_params)) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr = L.Convolution( n.drop7, num_output=21, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore = L.Deconvolution(n.score_fr, convolution_param=dict(num_output=21, kernel_size=64, stride=32, bias_term=False), param=[dict(lr_mult=0)]) n.score = crop(n.upscore, n.data) n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=False, ignore_label=255)) return n.to_proto()
def fcn(train, mask, batch_size=8): n = caffe.NetSpec() # n.data, n.sem, n.geo = L.Python(module='siftflow_layers', # layer='SIFTFlowSegDataLayer', ntop=3, # param_str=str(dict(siftflow_dir='../data/sift-flow', # split=split, seed=1337))) n.data = L.Data(backend=P.Data.LMDB, batch_size=batch_size, source=train, transform_param=dict(scale=1. / 255), ntop=1) n.geo = L.Data(backend=P.Data.LMDB, batch_size=batch_size, source=mask, ntop=1) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # fully conv dropout = True Deconv_filters = 300 if dropout: n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr_geo = L.Convolution(n.drop7, num_output=Deconv_filters, kernel_size=1, pad=0, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ]) else: n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.fc7, n.relu7 = conv_relu(n.relu6, 4096, ks=1, pad=0) # upsampling n.score_fr_geo = L.Convolution(n.relu7, num_output=Deconv_filters, kernel_size=1, pad=0, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ]) n.upscore2_geo = L.Deconvolution( n.score_fr_geo, convolution_param=dict(num_output=Deconv_filters, kernel_size=4, stride=2, bias_term=False, weight_filler=dict(type="msra")), param=[dict(lr_mult=0)], ) n.score_pool4_geo = L.Convolution( n.pool4, num_output=Deconv_filters, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.score_pool4_geoc = crop(n.score_pool4_geo, n.upscore2_geo) n.fuse_pool4_geo = L.Eltwise(n.upscore2_geo, n.score_pool4_geoc, operation=P.Eltwise.SUM) n.upscore_pool4_geo = L.Deconvolution(n.fuse_pool4_geo, convolution_param=dict( num_output=Deconv_filters, kernel_size=4, stride=2, bias_term=False, weight_filler=dict(type="msra")), param=[dict(lr_mult=0)]) n.score_pool3_geo = L.Convolution( n.pool3, num_output=Deconv_filters, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.score_pool3_geoc = crop(n.score_pool3_geo, n.upscore_pool4_geo) n.fuse_pool3_geo = L.Eltwise(n.upscore_pool4_geo, n.score_pool3_geoc, operation=P.Eltwise.SUM) n.upscore8_geo = L.Deconvolution( n.fuse_pool3_geo, convolution_param=dict( num_output=Deconv_filters, kernel_size=16, stride=8, #ks 16 bias_term=False, weight_filler=dict(type="msra")), param=[dict(lr_mult=0)]) b = L.Convolution( n.upscore8_geo, kernel_size=3, stride=1, num_output=2, pad=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='msra')) n.score_geo = max_pool(crop(b, n.data)) #n.score_geo = max_pool(crop(n.upscore8_geo, n.data)) n.loss_geo = L.SoftmaxWithLoss( n.score_geo, n.geo, loss_param=dict(normalize=False)) #, ignore_label=255)) return n.to_proto()
def act_proto(mode, batchsize, exp_vocab_size, use_gt=True): n = caffe.NetSpec() mode_str = json.dumps({'mode': mode, 'batchsize': batchsize}) n.img_feature, n.label, n.exp, n.exp_out, n.exp_cont_1, n.exp_cont_2 = \ L.Python(module='activity_data_provider_layer', layer='ActivityDataProviderLayer', param_str=mode_str, ntop=6) # Attention n.att_conv1 = L.Convolution(n.img_feature, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.att_conv1_relu = L.ReLU(n.att_conv1) n.att_conv2 = L.Convolution(n.att_conv1_relu, kernel_size=1, stride=1, num_output=1, pad=0, weight_filler=dict(type='xavier')) n.att_reshaped = L.Reshape( n.att_conv2, reshape_param=dict(shape=dict(dim=[-1, 1, 14 * 14]))) n.att_softmax = L.Softmax(n.att_reshaped, axis=2) n.att_map = L.Reshape(n.att_softmax, reshape_param=dict(shape=dict(dim=[-1, 1, 14, 14]))) dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) n.att_feature = L.SoftAttention(n.img_feature, n.att_map, dummy) n.att_feature_resh = L.Reshape( n.att_feature, reshape_param=dict(shape=dict(dim=[-1, 2048]))) # Prediction n.prediction = L.InnerProduct(n.att_feature_resh, num_output=config.NUM_OUTPUT_UNITS, weight_filler=dict(type='xavier'), param=fixed_weights) # Take GT answer or Take the logits of the VQA model and get predicted answer to embed if use_gt: n.exp_emb_ans = L.Embed(n.label, input_dim=config.NUM_OUTPUT_UNITS, num_output=300, weight_filler=dict(type='uniform', min=-0.08, max=0.08)) else: n.vqa_ans = L.ArgMax(n.prediction, axis=1) n.exp_emb_ans = L.Embed(n.vqa_ans, input_dim=config.NUM_OUTPUT_UNITS, num_output=300, weight_filler=dict(type='uniform', min=-0.08, max=0.08)) n.exp_emb_ans_tanh = L.TanH(n.exp_emb_ans) n.exp_emb_ans2 = L.InnerProduct(n.exp_emb_ans_tanh, num_output=2048, weight_filler=dict(type='xavier')) # Merge activity answer and visual feature n.exp_emb_resh = L.Reshape( n.exp_emb_ans2, reshape_param=dict(shape=dict(dim=[-1, 2048, 1, 1]))) n.exp_emb_tiled_1 = L.Tile(n.exp_emb_resh, axis=2, tiles=14) n.exp_emb_tiled = L.Tile(n.exp_emb_tiled_1, axis=3, tiles=14) n.img_embed = L.Convolution(n.img_feature, kernel_size=1, stride=1, num_output=2048, pad=0, weight_filler=dict(type='xavier')) n.exp_eltwise = L.Eltwise(n.img_embed, n.exp_emb_tiled, eltwise_param={'operation': P.Eltwise.PROD}) n.exp_eltwise_sqrt = L.SignedSqrt(n.exp_eltwise) n.exp_eltwise_l2 = L.L2Normalize(n.exp_eltwise_sqrt) n.exp_eltwise_drop = L.Dropout(n.exp_eltwise_l2, dropout_param={'dropout_ratio': 0.3}) # Attention for Explanation n.exp_att_conv1 = L.Convolution(n.exp_eltwise_drop, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.exp_att_conv1_relu = L.ReLU(n.exp_att_conv1) n.exp_att_conv2 = L.Convolution(n.exp_att_conv1_relu, kernel_size=1, stride=1, num_output=1, pad=0, weight_filler=dict(type='xavier')) n.exp_att_reshaped = L.Reshape( n.exp_att_conv2, reshape_param=dict(shape=dict(dim=[-1, 1, 14 * 14]))) n.exp_att_softmax = L.Softmax(n.exp_att_reshaped, axis=2) n.exp_att_map = L.Reshape( n.exp_att_softmax, reshape_param=dict(shape=dict(dim=[-1, 1, 14, 14]))) exp_dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) n.exp_att_feature_prev = L.SoftAttention(n.img_feature, n.exp_att_map, exp_dummy) n.exp_att_feature_resh = L.Reshape( n.exp_att_feature_prev, reshape_param=dict(shape=dict(dim=[-1, 2048]))) n.exp_att_feature_embed = L.InnerProduct(n.exp_att_feature_resh, num_output=2048, weight_filler=dict(type='xavier')) n.exp_att_feature = L.Eltwise(n.exp_emb_ans2, n.exp_att_feature_embed, eltwise_param={'operation': P.Eltwise.PROD}) n.silence_exp_att = L.Silence(n.exp_att_feature, ntop=0) return n.to_proto()
def qlstm(mode, batchsize, T, question_vocab_size): n = caffe.NetSpec() mode_str = json.dumps({'mode': mode, 'batchsize': batchsize}) n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\ module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=5 ) n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \ weight_filler=dict(type='uniform',min=-0.08,max=0.08)) n.embed = L.TanH(n.embed_ba) concat_word_embed = [n.embed, n.glove] n.concat_embed = L.Concat(*concat_word_embed, concat_param={'axis': 2}) # T x N x 600 # LSTM1 n.lstm1 = L.LSTM(\ n.concat_embed, n.cont,\ recurrent_param=dict(\ num_output=1024,\ weight_filler=dict(type='uniform',min=-0.08,max=0.08),\ bias_filler=dict(type='constant',value=0))) tops1 = L.Slice(n.lstm1, ntop=T, slice_param={'axis': 0}) for i in range(T - 1): n.__setattr__('slice_first' + str(i), tops1[int(i)]) n.__setattr__('silence_data_first' + str(i), L.Silence(tops1[int(i)], ntop=0)) n.lstm1_out = tops1[T - 1] n.lstm1_reshaped = L.Reshape(n.lstm1_out,\ reshape_param=dict(\ shape=dict(dim=[-1,1024]))) n.lstm1_reshaped_droped = L.Dropout(n.lstm1_reshaped, dropout_param={'dropout_ratio': 0.3}) n.lstm1_droped = L.Dropout(n.lstm1, dropout_param={'dropout_ratio': 0.3}) # LSTM2 n.lstm2 = L.LSTM(\ n.lstm1_droped, n.cont,\ recurrent_param=dict(\ num_output=1024,\ weight_filler=dict(type='uniform',min=-0.08,max=0.08),\ bias_filler=dict(type='constant',value=0))) tops2 = L.Slice(n.lstm2, ntop=T, slice_param={'axis': 0}) for i in range(T - 1): n.__setattr__('slice_second' + str(i), tops2[int(i)]) n.__setattr__('silence_data_second' + str(i), L.Silence(tops2[int(i)], ntop=0)) n.lstm2_out = tops2[T - 1] n.lstm2_reshaped = L.Reshape(n.lstm2_out,\ reshape_param=dict(\ shape=dict(dim=[-1,1024]))) n.lstm2_reshaped_droped = L.Dropout(n.lstm2_reshaped, dropout_param={'dropout_ratio': 0.3}) concat_botom = [n.lstm1_reshaped_droped, n.lstm2_reshaped_droped] n.lstm_12 = L.Concat(*concat_botom) n.q_emb_tanh_droped_resh = L.Reshape( n.lstm_12, reshape_param=dict(shape=dict(dim=[-1, 2048, 1, 1]))) n.q_emb_tanh_droped_resh_tiled_1 = L.Tile(n.q_emb_tanh_droped_resh, axis=2, tiles=14) n.q_emb_tanh_droped_resh_tiled = L.Tile(n.q_emb_tanh_droped_resh_tiled_1, axis=3, tiles=14) n.i_emb_tanh_droped_resh = L.Reshape( n.img_feature, reshape_param=dict(shape=dict(dim=[-1, 2048, 14, 14]))) n.blcf = L.CompactBilinear(n.q_emb_tanh_droped_resh_tiled, n.i_emb_tanh_droped_resh, compact_bilinear_param=dict(num_output=16000, sum_pool=False)) n.blcf_sign_sqrt = L.SignedSqrt(n.blcf) n.blcf_sign_sqrt_l2 = L.L2Normalize(n.blcf_sign_sqrt) n.blcf_droped = L.Dropout(n.blcf_sign_sqrt_l2, dropout_param={'dropout_ratio': 0.1}) # multi-channel attention n.att_conv1 = L.Convolution(n.blcf_droped, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.att_conv1_relu = L.ReLU(n.att_conv1) n.att_conv2 = L.Convolution(n.att_conv1_relu, kernel_size=1, stride=1, num_output=2, pad=0, weight_filler=dict(type='xavier')) n.att_reshaped = L.Reshape( n.att_conv2, reshape_param=dict(shape=dict(dim=[-1, 2, 14 * 14]))) n.att_softmax = L.Softmax(n.att_reshaped, axis=2) n.att = L.Reshape(n.att_softmax, reshape_param=dict(shape=dict(dim=[-1, 2, 14, 14]))) att_maps = L.Slice(n.att, ntop=2, slice_param={'axis': 1}) n.att_map0 = att_maps[0] n.att_map1 = att_maps[1] dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) n.att_feature0 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map0, dummy) n.att_feature1 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map1, dummy) n.att_feature0_resh = L.Reshape( n.att_feature0, reshape_param=dict(shape=dict(dim=[-1, 2048]))) n.att_feature1_resh = L.Reshape( n.att_feature1, reshape_param=dict(shape=dict(dim=[-1, 2048]))) n.att_feature = L.Concat(n.att_feature0_resh, n.att_feature1_resh) # merge attention and lstm with compact bilinear pooling n.att_feature_resh = L.Reshape( n.att_feature, reshape_param=dict(shape=dict(dim=[-1, 4096, 1, 1]))) n.lstm_12_resh = L.Reshape( n.lstm_12, reshape_param=dict(shape=dict(dim=[-1, 2048, 1, 1]))) n.bc_att_lstm = L.CompactBilinear(n.att_feature_resh, n.lstm_12_resh, compact_bilinear_param=dict( num_output=16000, sum_pool=False)) n.bc_sign_sqrt = L.SignedSqrt(n.bc_att_lstm) n.bc_sign_sqrt_l2 = L.L2Normalize(n.bc_sign_sqrt) n.bc_dropped = L.Dropout(n.bc_sign_sqrt_l2, dropout_param={'dropout_ratio': 0.1}) n.bc_dropped_resh = L.Reshape( n.bc_dropped, reshape_param=dict(shape=dict(dim=[-1, 16000]))) n.prediction = L.InnerProduct(n.bc_dropped_resh, num_output=3000, weight_filler=dict(type='xavier')) n.loss = L.SoftmaxWithLoss(n.prediction, n.label) return n.to_proto()
def body(self): n = caffe.NetSpec() conv_defaults = dict( param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], weight_filler=dict(type="gaussian", std=0.01), ) lrn_defaults = dict(local_size=5, alpha=0.0001, beta=0.75) fc_defaults = dict(param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], weight_filler=dict(type="gaussian", std=0.005), bias_filler=dict(type="constant", value=1)) n.conv1 = L.Convolution(bottom="data", num_output=96, kernel_size=11, stride=4, bias_filler=dict(type="constant", value=0), **conv_defaults) n.relu1 = L.ReLU(n.conv1, in_place=True) n.pool1 = L.Pooling(n.relu1, pool=P.Pooling.MAX, kernel_size=3, stride=2) n.norm1 = L.LRN(n.pool1, **lrn_defaults) n.conv2 = L.Convolution(n.norm1, num_output=256, kernel_size=5, stride=1, pad=2, group=2, bias_filler=dict(type="constant", value=1), **conv_defaults) n.relu2 = L.ReLU(n.conv2, in_place=True) n.pool2 = L.Pooling(n.relu2, pool=P.Pooling.MAX, kernel_size=3, stride=2) n.norm2 = L.LRN(n.pool2, **lrn_defaults) n.conv3 = L.Convolution(n.norm2, num_output=384, kernel_size=3, stride=1, pad=1, bias_filler=dict(type="constant", value=0), **conv_defaults) n.relu3 = L.ReLU(n.conv3, in_place=True) n.conv4 = L.Convolution(n.relu3, num_output=384, kernel_size=3, stride=1, pad=1, group=2, bias_filler=dict(type="constant", value=1), **conv_defaults) n.relu4 = L.ReLU(n.conv4, in_place=True) n.conv5 = L.Convolution(n.relu4, num_output=256, kernel_size=3, stride=1, pad=1, group=2, bias_filler=dict(type="constant", value=1), **conv_defaults) n.relu5 = L.ReLU(n.conv5, in_place=True) n.pool5 = L.Pooling(n.relu5, pool=P.Pooling.MAX, kernel_size=3, stride=2) n.fc6 = L.InnerProduct(n.pool5, num_output=4096, **fc_defaults) n.relu6 = L.ReLU(n.fc6, in_place=True) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7 = L.InnerProduct(n.drop6, num_output=4096, **fc_defaults) n.relu7 = L.ReLU(n.fc7, in_place=True) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.out = L.InnerProduct(n.drop7, num_output=self.params['num_output'], param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], weight_filler=dict(type="gaussian", std=0.01), bias_filler=dict(type="constant", value=0)) return n.to_proto(), "out"
def simple_net(split, initialize_fc8=False, cur_shape=None, next_shape=None, batch_size=1, num_threads=1, max_queue_size=5): #Get crop layer parameters tmp_net = caffe.NetSpec() tmp_net.im, tmp_net.label = L.MemoryData(batch_size=1, channels=3, height=244, width=244, ntop=2) conv_vgg(tmp_net, tmp_net.im, suffix='', last_layer_pad=0, first_layer_pad=100) tmp_net.fc6, tmp_net.relu6 = conv_relu(tmp_net.conv5_3, 4096, ks=7, dilation=4) tmp_net.fc7, tmp_net.relu7 = conv_relu(tmp_net.relu6, 4096, ks=1, pad=0) tmp_net.fc8 = L.Convolution(tmp_net.relu7, kernel_size=1, num_output=2) tmp_net.upscore = L.Deconvolution(tmp_net.fc8, convolution_param=dict(kernel_size=16, stride=8, num_output=2)) ax, a, b = coord_map_from_to(tmp_net.upscore, tmp_net.im) assert (a == 1).all(), 'scale mismatch on crop (a = {})'.format(a) assert (b <= 0).all(), 'cannot crop negative offset (b = {})'.format(b) assert (np.round(b) == b ).all(), 'cannot crop noninteger offset (b = {})'.format(b) # #Create network n = caffe.NetSpec() if split == 'train': pydata_params = dict(batch_size=batch_size, im_shape=tuple(next_shape), num_threads=num_threads, max_queue_size=max_queue_size) n.cur_im, n.masked_im, n.next_im, n.label = L.Python( module='coco_transformed_datalayers_prefetch', layer='CocoTransformedDataLayerPrefetch', ntop=4, param_str=str(pydata_params)) elif split == 'val': pydata_params = dict(batch_size=batch_size, im_shape=tuple(next_shape), num_threads=num_threads, max_queue_size=max_queue_size) n.cur_im, n.masked_im, n.next_im, n.label = L.Python( module='coco_transformed_datalayers_prefetch', layer='CocoTransformedDataLayerPrefetch', ntop=4, param_str=str(pydata_params)) elif split == 'deploy': n.cur_im, n.label_1 = L.MemoryData(batch_size=1, channels=3, height=244, width=244, ntop=2) n.masked_im, n.label_2 = L.MemoryData(batch_size=1, channels=3, height=244, width=244, ntop=2) n.next_im, n.label_3 = L.MemoryData(batch_size=1, channels=3, height=244, width=244, ntop=2) else: raise Exception if cur_shape is None or next_shape is None: concat_pad = np.zeros((2, )) else: concat_pad = (next_shape - cur_shape) / 2.0 / 8.0 if not all(concat_pad == np.round(concat_pad)): raise Exception conv_vgg(n, n.cur_im, suffix='c', last_layer_pad=concat_pad, first_layer_pad=100) conv_vgg(n, n.masked_im, suffix='m', last_layer_pad=concat_pad, first_layer_pad=100) conv_vgg(n, n.next_im, suffix='n', last_layer_pad=0, first_layer_pad=100) # concatination n.concat1 = L.Concat(n.relu5_3c, n.relu5_3m, n.relu5_3n) # fully conv n.fc6, n.relu6 = conv_relu(n.concat1, 4096, ks=7, dilation=4) if split == 'train': n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.fc8 = L.Convolution(n.drop7, kernel_size=1, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], num_output=2) else: n.fc7, n.relu7 = conv_relu(n.relu6, 4096, ks=1, pad=0) if initialize_fc8: n.fc8 = L.Convolution(n.relu7, kernel_size=1, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=.01), num_output=2) else: n.fc8 = L.Convolution(n.relu7, kernel_size=1, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], num_output=2) n.upscore = L.Deconvolution(n.fc8, convolution_param=dict( kernel_size=16, stride=8, num_output=2, group=2, weight_filler=dict(type='bilinear'), bias_term=False), param=dict(lr_mult=0, decay_mult=0)) n.score = L.Crop( n.upscore, n.next_im, crop_param=dict( axis=ax + 1, # +1 for first cropping dim. offset=list(-np.round(b).astype(int)))) if split != 'deploy': n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(ignore_label=255)) else: n.prop = L.Softmax(n.score) return n
def vgg_16(lmdb, bs_train=16, bs_val=50, rate=0, lmdb_flag=False, not_deploy=True): n = caffe.NetSpec() if not_deploy: if lmdb_flag: n.data, n.label = L.Data(source=lmdb + 'ilsvrc12_train_lmdb', backend=P.Data.LMDB, include=dict(phase=caffe_pb2.TRAIN), batch_size=bs_train, ntop=2, transform_param=dict( crop_size=224, mean_value=[104, 117, 123], mirror=True)) data_str = n.to_proto() n.data, n.label = L.Data(source=lmdb + 'ilsvrc12_val_lmdb', backend=P.Data.LMDB, include=dict(phase=caffe_pb2.TEST), batch_size=bs_val, ntop=2, transform_param=dict( crop_size=224, mean_value=[104, 117, 123], mirror=False)) else: n.data, n.label = L.Data(source=lmdb + 'ilsvrc12_train_leveldb', backend=P.Data.LEVELDB, include=dict(phase=caffe_pb2.TRAIN), batch_size=bs_train, ntop=2, transform_param=dict( crop_size=224, mean_value=[104, 117, 123], mirror=True)) data_str = n.to_proto() n.data, n.label = L.Data(source=lmdb + 'ilsvrc12_val_leveldb', backend=P.Data.LEVELDB, include=dict(phase=caffe_pb2.TEST), batch_size=bs_val, ntop=2, transform_param=dict( crop_size=224, mean_value=[104, 117, 123], mirror=False)) else: data_str = 'input: "data"\ninput_dim: 1\ninput_dim: 3\ninput_dim: 224\ninput_dim: 224' n.data = L.Data() # the net itself n.conv1_1, n.relu1_1 = conv_relu(n.data, nout=int(rate[0] * 64), pad=1, ks=3) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, nout=int(rate[1] * 64), pad=1, ks=3) n.pool1 = max_pool(n.relu1_2, ks=2, stride=2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, nout=int(rate[2] * 128), pad=1, ks=3) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, nout=int(rate[3] * 128), pad=1, ks=3) n.pool2 = max_pool(n.relu2_2, ks=2, stride=2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, nout=int(rate[4] * 256), pad=1, ks=3) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, nout=int(rate[5] * 256), pad=1, ks=3) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, nout=int(rate[6] * 256), pad=1, ks=3) n.pool3 = max_pool(n.relu3_3, ks=2, stride=2) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, nout=int(rate[7] * 512), pad=1, ks=3) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, nout=int(rate[8] * 512), pad=1, ks=3) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, nout=int(rate[9] * 512), pad=1, ks=3) n.pool4 = max_pool(n.relu4_3, ks=2, stride=2) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, nout=int(rate[10] * 512), pad=1, ks=3) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, nout=int(rate[11] * 512), pad=1, ks=3) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, nout=int(rate[12] * 512), pad=1, ks=3) n.pool5 = max_pool(n.relu5_3, ks=2, stride=2) n.fc6, n.relu6 = fc_relu(n.pool5, nout=4096) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = fc_relu(n.relu6, nout=4096) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.fc8 = L.InnerProduct( n.relu7, num_output=1000, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) if not_deploy: n.loss = L.SoftmaxWithLoss(n.fc8, n.label) n.acc_top_1 = L.Accuracy(n.fc8, n.label, top_k=1) n.acc_top_5 = L.Accuracy(n.fc8, n.label, top_k=5) else: n.prob = L.Softmax(n.fc8) model_str = str(n.to_proto()) if not not_deploy: model_str = model_str[54:-1] return str(data_str) + '\n' + model_str
def __wide_basic(self, net, top, basename, num_input, num_output, stride, generate_deploy): if config['bottleneck']: conv_params = [{ 'kernel_size': 1, 'stride': stride, 'pad': 0, 'num_output': num_output / 4 }, { 'kernel_size': 3, 'stride': 1, 'pad': 1, 'num_output': num_output / 4 }, { 'kernel_size': 1, 'stride': 1, 'pad': 0, 'num_output': num_output }] dropout_layers = [1] else: conv_params = [{ 'kernel_size': 3, 'stride': stride, 'pad': 1, 'num_output': num_output }, { 'kernel_size': 3, 'stride': 1, 'pad': 1, 'num_output': num_output }] dropout_layers = [1] resunit_layer = top shortcut_layer = top for i, p in enumerate(conv_params): branch_layer_name = '%sa_%d' % (basename, i + 1) add_dropout = i in dropout_layers and config['dropout'] if generate_deploy: bn = L.BatchNorm(resunit_layer, in_place=i > 0, batch_norm_param={'use_global_stats': True}) else: bn = L.BatchNorm(resunit_layer, in_place=i > 0) scale = L.Scale(bn, in_place=True, scale_param={'bias_term': True}) relu = L.ReLU(scale, in_place=True) if add_dropout: drop = L.Dropout(relu, in_place=True, dropout_ratio=config['dropout']) conv = L.Convolution(drop if add_dropout else relu, weight_filler={'type': 'msra'}, bias_term=False, **p) net[branch_layer_name + '_bn'] = bn net[branch_layer_name + '_scale'] = scale net[branch_layer_name + '_relu'] = relu if add_dropout: net[branch_layer_name + '_dropout'] = drop net[branch_layer_name + '_%dx%d_s%d' % (p['kernel_size'], p['kernel_size'], p['stride'])] = conv resunit_layer = conv if num_input != num_output: conv = L.Convolution(shortcut_layer, kernel_size=1, stride=stride, pad=0, num_output=num_output, weight_filler={'type': 'xavier'}, bias_term=False) net['%sb_1x1_s%d' % (basename, stride)] = conv shortcut_layer = conv eltwise = L.Eltwise(resunit_layer, shortcut_layer, operation=P.Eltwise.SUM) net[basename] = eltwise return eltwise
def cnn(split): n = caffe.NetSpec() pydata_params = dict( dataset_dir='/home/kevin/dataset/washington_rgbd_dataset', split=split, mean=(104.00698793, 116.66876762, 122.67891434), seed=1337, img_size=(224, 224), crop_size=(224, 224, 224, 224)) if split == 'train': pylayer = 'WashingtonDataLayer' pydata_params['randomize'] = True pydata_params['batch_size'] = 32 elif split == 'test': pylayer = 'WashingtonDataLayer' pydata_params['randomize'] = False pydata_params['batch_size'] = 1 else: n.img = L.Input( name='input', ntop=2, shape=[dict(dim=1), dict(dim=1), dict(dim=224), dict(dim=224)]) #---------------------------------Data Layer---------------------------------------# n.rgb, n.depth, n.label = L.Python( name="data", module='data_layers.washington_data_layer', layer=pylayer, ntop=3, param_str=str(pydata_params)) #---------------------------------RGB-Net---------------------------------------# # the vgg 16 base net n.conv1_1, n.relu1_1 = conv_relu("conv1_1", n.rgb, 64, pad=1, lr1=0, lr2=0) n.conv1_2, n.relu1_2 = conv_relu("conv1_2", n.relu1_1, 64, lr1=0, lr2=0) n.rgb_pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu("conv2_1", n.rgb_pool1, 128, lr1=0, lr2=0) n.conv2_2, n.relu2_2 = conv_relu("conv2_2", n.relu2_1, 128, lr1=0, lr2=0) n.rgb_pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu("conv3_1", n.rgb_pool2, 256, lr1=0, lr2=0) n.conv3_2, n.relu3_2 = conv_relu("conv3_2", n.relu3_1, 256, lr1=0, lr2=0) n.conv3_3, n.relu3_3 = conv_relu("conv3_3", n.relu3_2, 256, lr1=0, lr2=0) n.rgb_pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu("conv4_1", n.rgb_pool3, 512, lr1=0, lr2=0) n.conv4_2, n.relu4_2 = conv_relu("conv4_2", n.relu4_1, 512, lr1=0, lr2=0) n.conv4_3, n.relu4_3 = conv_relu("conv4_3", n.relu4_2, 512, lr1=0, lr2=0) n.rgb_pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu("conv5_1", n.rgb_pool4, 512, lr1=0, lr2=0) n.conv5_2, n.relu5_2 = conv_relu("conv5_2", n.relu5_1, 512, lr1=0, lr2=0) n.conv5_3, n.relu5_3 = conv_relu("conv5_3", n.relu5_2, 512, lr1=0, lr2=0) n.rgb_pool5 = max_pool(n.relu5_3) # fully conv n.rgb_fc6, n.rgb_relu6 = fc_relu(n.rgb_pool5, 4096, lr1=0, lr2=0) n.rgb_drop6 = L.Dropout(n.rgb_relu6, dropout_ratio=0.5, in_place=True) n.rgb_fc7, n.rgb_relu7 = fc_relu(n.rgb_drop6, 4096, lr1=0, lr2=0) n.rgb_drop7 = L.Dropout(n.rgb_relu7, dropout_ratio=0.5, in_place=True) n.rgb_fc8 = fc(n.rgb_drop7, 51, lr1=0, lr2=0) #---------------------------------Depth-Net---------------------------------------# # the base net n.conv1, n.relu1 = conv_relu("conv1", n.depth, 128, ks=5, stride=2, pad=2, lr1=0, lr2=0) n.depth_pool1 = max_pool(n.relu1, ks=3) n.norm1 = L.LRN(n.depth_pool1, lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2)) n.conv2, n.relu2 = conv_relu("conv2", n.norm1, 256, ks=5, stride=1, pad=2, lr1=0, lr2=0) n.depth_pool2 = max_pool(n.relu2, ks=3) n.norm2 = L.LRN(n.depth_pool2, lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2)) n.conv3, n.relu3 = conv_relu("conv3", n.norm2, 384, ks=3, pad=1, group=2, lr1=0, lr2=0) n.depth_pool3 = max_pool(n.relu3, ks=3) n.conv4, n.relu4 = conv_relu("conv4", n.depth_pool3, 512, ks=3, pad=1, group=1, lr1=0, lr2=0) n.conv5, n.relu5 = conv_relu("conv5", n.relu4, 512, ks=3, pad=1, group=1, lr1=0, lr2=0) n.depth_pool5 = max_pool(n.relu5, ks=3) n.depth_fc6, n.depth_relu6 = fc_relu(n.depth_pool5, 4096, lr1=0, lr2=0) n.depth_drop6 = L.Dropout(n.depth_relu6, dropout_ratio=0.5, in_place=True) n.depth_fc7, n.depth_relu7 = fc_relu(n.depth_drop6, 4096, lr1=0, lr2=0) n.depth_drop7 = L.Dropout(n.depth_relu7, dropout_ratio=0.5, in_place=True) n.depth_fc8 = fc(n.depth_drop7, 51, lr1=0, lr2=0) #-----------------------------------final output---------------------------------# # Concatenation n.concat = L.Concat(n.rgb_drop7, n.depth_drop7, axis=1) #n.fuse_fc1 = fc(n.concat, 4096, lr1=1, lr2=2) #n.fuse_drop1 = L.Dropout(n.fuse_fc1, dropout_ratio=0.9, in_place=True) #n.fuse_fc2 = fc(n.fuse_drop1, 4096, lr1=1, lr2=2) #n.fuse_drop2 = L.Dropout(n.fuse_fc2, dropout_ratio=0.9, in_place=True) n.rgbd_fc8 = fc(n.concat, 51, lr1=1, lr2=2) if split != 'deploy': n.rgb_accuracy = L.Accuracy(n.rgb_fc8, n.label) n.rgb_loss = L.SoftmaxWithLoss(n.rgb_fc8, n.label) n.depth_accuracy = L.Accuracy(n.depth_fc8, n.label) n.depth_loss = L.SoftmaxWithLoss(n.depth_fc8, n.label) n.rgbd_accuracy = L.Accuracy(n.rgbd_fc8, n.label) n.rgbd_loss = L.SoftmaxWithLoss(n.rgbd_fc8, n.label) return n.to_proto()
def buildnet(inputdb, mean_file, batch_size, height, width, nchannels, net_type="train"): net = caffe.NetSpec() crop_size = -1 if augment_data: crop_size = width train = False if net_type == "train": train = True data_layers, label = lt.data_layer_trimese(net, inputdb, mean_file, batch_size, net_type, height, width, nchannels, [4, 8], crop_size=768) # First conv layer branch_ends = [] for n, layer in enumerate(data_layers): conv1 = lt.convolution_layer(net, layer, "plane%d_conv1" % (n), "tri_conv1", 32, 2, 7, 3, 0.05, addbatchnorm=True, train=train) pool1 = lt.pool_layer(net, conv1, "plane%d_pool1" % (n), 3, 1) conv2 = lt.convolution_layer(net, pool1, "plane%d_conv2" % (n), "tri_conv2", 16, 2, 3, 3, 0.05, addbatchnorm=True, train=train) conv3 = lt.convolution_layer(net, conv2, "plane%d_conv3" % (n), "tri_conv3", 16, 2, 3, 3, 0.05, addbatchnorm=True, train=train) pool3 = lt.pool_layer(net, conv3, "plane%d_pool3" % (n), 3, 1) branch_ends.append(pool3) concat = lt.concat_layer(net, "mergeplanes", *branch_ends) resnet1 = lt.resnet_module(net, concat, "resnet1", 16 * 3, 3, 1, 1, 8, 16, use_batch_norm, train) resnet2 = lt.resnet_module(net, resnet1, "resnet2", 16, 3, 1, 1, 8, 16, use_batch_norm, train) resnet3 = lt.resnet_module(net, resnet2, "resnet3", 16, 3, 1, 1, 8, 32, use_batch_norm, train) resnet4 = lt.resnet_module(net, resnet3, "resnet4", 32, 3, 1, 1, 8, 32, use_batch_norm, train) resnet5 = lt.resnet_module(net, resnet4, "resnet5", 32, 3, 1, 1, 8, 32, use_batch_norm, train) resnet6 = lt.resnet_module(net, resnet5, "resnet6", 32, 3, 1, 1, 16, 64, use_batch_norm, train) resnet7 = lt.resnet_module(net, resnet6, "resnet7", 64, 3, 1, 1, 16, 64, use_batch_norm, train) resnet8 = lt.resnet_module(net, resnet7, "resnet8", 64, 3, 1, 1, 16, 64, use_batch_norm, train) resnet9 = lt.resnet_module(net, resnet8, "resnet9", 64, 3, 1, 1, 32, 128, use_batch_norm, train) net.lastpool = lt.pool_layer(net, resnet9, "lastpool", 7, 1, P.Pooling.AVE) lastpool_layer = net.lastpool if use_dropout: net.lastpool_dropout = L.Dropout(net.lastpool, in_place=True, dropout_param=dict(dropout_ratio=0.5)) lastpool_layer = net.lastpool_dropout fc1 = lt.final_fully_connect(net, lastpool_layer, nclasses=256) fc2 = lt.final_fully_connect(net, fc1, nclasses=4096) fc3 = lt.final_fully_connect(net, fc2, nclasses=2) if train: net.loss = L.SoftmaxWithLoss(fc3, net.label) net.acc = L.Accuracy(fc3, net.label) else: net.probt = L.Softmax(fc3) net.acc = L.Accuracy(fc3, net.label) return net
def cnn(split): n = caffe.NetSpec() pydata_params = dict( dataset_dir='/home/kevin/dataset/washington_rgbd_dataset', split=split, mean=(104.00698793, 116.66876762, 122.67891434), seed=1337, batch_size=128, img_size=(227, 227)) if split == 'deploy': n.img = L.Input( name='input', ntop=2, shape=[dict(dim=1), dict(dim=1), dict(dim=224), dict(dim=224)]) else: pylayer = 'WashingtonDataLayer' #---------------------------------Data Layer---------------------------------------# n.rgb, n.depth, n.label = L.Python( name="data", module='data_layers.washington_data_layer', layer=pylayer, ntop=3, param_str=str(pydata_params)) #---------------------------------RGB-Net---------------------------------------# # the caffe-net (alex-net) n.rgb_conv1, n.rgb_relu1 = conv_relu(n.rgb, 96, ks=11, stride=4, pad=0) n.rgb_pool1 = max_pool(n.rgb_relu1, ks=3) n.rgb_norm1 = L.LRN(n.rgb_pool1, lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2)) n.rgb_conv2, n.rgb_relu2 = conv_relu(n.rgb_norm1, 256, ks=5, pad=2, group=2) n.rgb_pool2 = max_pool(n.rgb_relu2, ks=3) n.rgb_norm2 = L.LRN(n.rgb_pool2, lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2)) n.rgb_conv3, n.rgb_relu3 = conv_relu(n.rgb_norm2, 384, ks=3, pad=1, lr1=1, lr2=2) n.rgb_conv4, n.rgb_relu4 = conv_relu(n.rgb_relu3, 384, ks=3, pad=1, group=2, lr1=1, lr2=2) n.rgb_conv5, n.rgb_relu5 = conv_relu(n.rgb_relu4, 256, ks=3, pad=1, group=2, lr1=1, lr2=2) n.rgb_pool5 = max_pool(n.rgb_relu5, ks=3) # fully conv n.rgb_fc6, n.rgb_relu6 = fc_relu(n.rgb_pool5, 4096, lr1=1, lr2=2) n.rgb_drop6 = L.Dropout(n.rgb_relu6, dropout_ratio=0.5, in_place=True) n.rgb_fc7, n.rgb_relu7 = fc_relu(n.rgb_drop6, 4096, lr1=1, lr2=2) n.rgb_drop7 = L.Dropout(n.rgb_relu7, dropout_ratio=0.5, in_place=True) n.rgb_fc8 = fc(n.rgb_drop7, 51, lr1=1, lr2=2) #---------------------------------Depth-Net---------------------------------------# # the caffe-net (alex-net) n.depth_conv1, n.depth_relu1 = conv_relu(n.depth, 96, ks=11, stride=4, pad=0) n.depth_pool1 = max_pool(n.depth_relu1, ks=3) n.depth_norm1 = L.LRN(n.depth_pool1, lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2)) n.depth_conv2, n.depth_relu2 = conv_relu(n.depth_norm1, 256, ks=5, pad=2, group=2) n.depth_pool2 = max_pool(n.depth_relu2, ks=3) n.depth_norm2 = L.LRN(n.depth_pool2, lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2)) n.depth_conv3, n.depth_relu3 = conv_relu(n.depth_norm2, 384, ks=3, pad=1, lr1=1, lr2=2) n.depth_conv4, n.depth_relu4 = conv_relu(n.depth_relu3, 384, ks=3, pad=1, group=2, lr1=1, lr2=2) n.depth_conv5, n.depth_relu5 = conv_relu(n.depth_relu4, 256, ks=3, pad=1, group=2, lr1=1, lr2=2) n.depth_pool5 = max_pool(n.depth_relu5, ks=3) # fully conv n.depth_fc6, n.depth_relu6 = fc_relu(n.depth_pool5, 4096, lr1=1, lr2=2) n.depth_drop6 = L.Dropout(n.depth_relu6, dropout_ratio=0.5, in_place=True) n.depth_fc7, n.depth_relu7 = fc_relu(n.depth_drop6, 4096, lr1=1, lr2=2) n.depth_drop7 = L.Dropout(n.depth_relu7, dropout_ratio=0.5, in_place=True) n.depth_fc8 = fc(n.depth_drop7, 51, lr1=1, lr2=2) #-----------------------------------final output---------------------------------# # Concatenation n.concat = L.Concat(n.rgb_drop7, n.depth_drop7, axis=1) n.rgbd_fc8 = fc(n.concat, 51, lr1=1, lr2=2) if split != 'deploy': n.rgb_accuracy = L.Accuracy(n.rgb_fc8, n.label) n.rgb_loss = L.SoftmaxWithLoss(n.rgb_fc8, n.label) n.depth_accuracy = L.Accuracy(n.depth_fc8, n.label) n.depth_loss = L.SoftmaxWithLoss(n.depth_fc8, n.label) n.overall_accuracy = L.Accuracy(n.rgbd_fc8, n.label) n.overall_loss = L.SoftmaxWithLoss(n.rgbd_fc8, n.label) return n.to_proto()
def create_net(phase): global train_transform_param global test_transform_param train_transform_param = { 'mirror': True, 'mean_file': Params['mean_file'] } test_transform_param = { 'mean_file': Params['mean_file'] } if phase == 'train': lmdb_file = Params['train_lmdb'] transform_param = train_transform_param batch_size = Params['batch_size_per_device'] else: lmdb_file = Params['test_lmdb'] transform_param = test_transform_param batch_size = Params['test_batch_size'] net = caffe.NetSpec() net.data, net.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb_file, transform_param=transform_param, ntop=2) #include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='gaussian', std=0.0001), 'bias_filler': dict(type='constant')} net.conv1 = L.Convolution(net.data, num_output=16, kernel_size=3, **kwargs) net.pool1 = L.Pooling(net.conv1, pool=P.Pooling.MAX, kernel_size=3, stride=2) net.relu1 = L.ReLU(net.pool1, in_place=True) kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='gaussian', std=0.005), 'bias_filler': dict(type='constant')} net.fc2 = L.InnerProduct(net.pool1, num_output=16, **kwargs) net.relu2 = L.ReLU(net.fc2, in_place=True) net.drop2 = L.Dropout(net.fc2, in_place=True, dropout_param=dict(dropout_ratio=0.5)) kwargs = { 'param': [dict(lr_mult=1, decay_mult=100), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0)} net.fc3 = L.InnerProduct(net.fc2, num_output=2, **kwargs) if phase == 'train': net.loss = L.SoftmaxWithLoss(net.fc3, net.label) elif phase == 'test': net.accuracy = L.Accuracy(net.fc3, net.label) else: net.prob = L.Softmax(net.fc3) net_proto = net.to_proto() if phase == 'deploy': del net_proto.layer[0] #del net_proto.layer[-1] net_proto.input.extend(['data']) net_proto.input_dim.extend([64,3,12,36]) net_proto.name = '{}_{}'.format(Params['model_name'], phase) return net_proto
def fcn(split, tops): n = caffe.NetSpec() n.data, n.label = L.Python( module='nyud_layers', layer='NYUDSegDataLayer', ntop=2, param_str=str( dict(image_path='/media/ssd500/autocity_dataset/images/', image_list="/media/ssd500/autocity_dataset/image_train.txt", label_list="/media/ssd500/autocity_dataset/label_train.txt", label_path="/media/ssd500/autocity_dataset/labels/0/", split=split, tops=tops, seed=1337))) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr = L.Convolution( n.drop7, num_output=2, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore = L.Deconvolution(n.score_fr, convolution_param=dict(num_output=2, kernel_size=64, stride=32, bias_term=False), param=[dict(lr_mult=0)]) n.score = crop(n.upscore, n.data) n.loss = L.SoftmaxWithLoss( n.score, n.label, loss_param=dict(normalize=False)) #, ignore_label=255 return n.to_proto()
def create_cifar10_googlenet(input_shape, classes=1000, deploy=False): net_name = "cifar10_googlenet" data_root_dir = "/home/tim/datasets/cifar10/" if deploy: net_filename = "{0}_deploy.prototxt".format(net_name) else: net_filename = "{0}_train_test.prototxt".format(net_name) # net name with open(net_filename, "w") as f: f.write('name: "{0}"\n'.format(net_name)) if deploy: net = caffe.NetSpec() """ The conventional blob dimensions for batches of image data are number N x channel K x height H x width W. Blob memory is row-major in layout, so the last / rightmost dimension changes fastest. For example, in a 4D blob, the value at index (n, k, h, w) is physically located at index ((n * K + k) * H + h) * W + w. """ # batch_size, channel, height, width net.data = L.Input(input_param=dict( shape=[dict(dim=list(input_shape))])) else: net = caffe.NetSpec() batch_size = 32 lmdb = data_root_dir + "train_lmdb" net.data, net.label = L.Data( batch_size=batch_size, backend=P.Data.LMDB, source=lmdb, transform_param=dict( mirror=True, # crop_size=32, mean_file=data_root_dir + "mean.binaryproto"), # mean_value=[104, 117, 123]), ntop=2, include=dict(phase=caffe_pb2.Phase.Value("TRAIN"))) with open(net_filename, "a") as f: f.write(str(net.to_proto())) del net net = caffe.NetSpec() batch_size = 50 lmdb = data_root_dir + "test_lmdb" net.data, net.label = L.Data( batch_size=batch_size, backend=P.Data.LMDB, source=lmdb, transform_param=dict( mirror=False, # crop_size=224, mean_file=data_root_dir + "mean.binaryproto"), # mean_value=[104, 117, 123]), ntop=2, include=dict(phase=caffe_pb2.Phase.Value("TEST"))) # padding = 'same', equal to pad = 1 net.conv1_7x7_2s = L.Convolution( net.data, kernel_size=7, num_output=64, pad=3, stride=2, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) net.conv1_7x7_2s_relu = L.ReLU(net.conv1_7x7_2s, in_place=True) # net.conv1_maxpool1_3x3_2s = L.Pooling(net.conv1_7x7_2s_relu, kernel_size=3, stride=2, pool=P.Pooling.MAX) net.conv1_norm1 = L.LRN(net.conv1_7x7_2s_relu, local_size=5, alpha=0.0001, beta=0.75) net.conv2_1x1_1v = L.Convolution( net.conv1_norm1, kernel_size=1, num_output=64, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) net.conv2_1x1_1v_relu = L.ReLU(net.conv2_1x1_1v, in_place=True) net.conv2_3x3_1s = L.Convolution( net.conv2_1x1_1v_relu, kernel_size=3, num_output=192, pad=1, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) net.conv2_3x3_1s_relu = L.ReLU(net.conv2_3x3_1s, in_place=True) net.conv2_norm2 = L.LRN(net.conv2_3x3_1s_relu, local_size=5, alpha=0.0001, beta=0.75) # net.conv2_pool_3x3_2s = L.Pooling(net.conv2_norm2, kernel_size=3, stride=2, pool=P.Pooling.MAX) # inception(3a) inception3a_output = inception(net=net, pre_layer=net.conv2_norm2, conv1x1_num=64, conv3x3_reduce_num=96, conv3x3_num=128, conv5x5_reduce_num=16, conv5x5_num=32, maxpool3x3_proj1x1_num=32, name="inception3a") # inception(3b) inception3b_output = inception(net=net, pre_layer=inception3a_output, conv1x1_num=128, conv3x3_reduce_num=128, conv3x3_num=192, conv5x5_reduce_num=32, conv5x5_num=96, maxpool3x3_proj1x1_num=64, name="inception3b") # max pool net.inception3_maxpool = L.Pooling(inception3b_output, kernel_size=3, stride=2, pool=P.Pooling.MAX) # inception(4a) inception4a_output = inception(net=net, pre_layer=net.inception3_maxpool, conv1x1_num=192, conv3x3_reduce_num=96, conv3x3_num=208, conv5x5_reduce_num=16, conv5x5_num=48, maxpool3x3_proj1x1_num=64, name="inception4a") # loss1 if not deploy: # avg pool net.loss1_avgpool5x5_3v = L.Pooling(inception4a_output, kernel_size=5, stride=3, pool=P.Pooling.AVE) # conv1x1_1s net.loss1_conv1x1_1s = L.Convolution(net.loss1_avgpool5x5_3v, kernel_size=1, num_output=128, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0.2), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ]) net.loss1_conv1x1_1s_relu = L.ReLU(net.loss1_conv1x1_1s, in_place=True) net.loss1_fc1 = L.InnerProduct(net.loss1_conv1x1_1s_relu, num_output=1024, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ]) net.loss1_fc1_relu1 = L.ReLU(net.loss1_fc1, in_place=True) net.loss1_dropout = L.Dropout(net.loss1_fc1_relu1, dropout_param=dict(dropout_ratio=0.7), in_place=True) net.loss1_pred_fc = L.InnerProduct(net.loss1_dropout, num_output=classes, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ]) net.loss1 = L.SoftmaxWithLoss(net.loss1_pred_fc, net.label, loss_weight=0.3) # net.loss1_accuracy_top_1 = L.Accuracy(net.loss1_pred_fc, net.label, # include=dict(phase=caffe_pb2.Phase.Value('TEST'))) # net.loss1_accuracy_top_5 = L.Accuracy(net.loss1_pred_fc, net.label, # include=dict(phase=caffe_pb2.Phase.Value('TEST')), # accuracy_param=dict(top_k=5)) # inception(4b) inception4b_output = inception(net=net, pre_layer=inception4a_output, conv1x1_num=160, conv3x3_reduce_num=112, conv3x3_num=224, conv5x5_reduce_num=24, conv5x5_num=64, maxpool3x3_proj1x1_num=64, name="inception4b") # inception(4c) inception4c_output = inception(net=net, pre_layer=inception4b_output, conv1x1_num=128, conv3x3_reduce_num=128, conv3x3_num=256, conv5x5_reduce_num=24, conv5x5_num=64, maxpool3x3_proj1x1_num=64, name="inception4c") # inception(4d) inception4d_output = inception(net=net, pre_layer=inception4c_output, conv1x1_num=112, conv3x3_reduce_num=144, conv3x3_num=288, conv5x5_reduce_num=32, conv5x5_num=64, maxpool3x3_proj1x1_num=64, name="inception4d") # loss2 if not deploy: # avg pool net.loss2_avgpool5x5_3v = L.Pooling(inception4d_output, kernel_size=5, stride=3, pool=P.Pooling.AVE) # conv1x1_1s net.loss2_conv1x1_1s = L.Convolution(net.loss2_avgpool5x5_3v, kernel_size=1, num_output=128, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0.2), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ]) net.loss2_conv1x1_1s_relu = L.ReLU(net.loss2_conv1x1_1s, in_place=True) net.loss2_fc1 = L.InnerProduct(net.loss2_conv1x1_1s_relu, num_output=1024, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ]) net.loss2_fc1_relu1 = L.ReLU(net.loss2_fc1, in_place=True) net.loss2_dropout = L.Dropout(net.loss2_fc1_relu1, dropout_param=dict(dropout_ratio=0.7), in_place=True) net.loss2_pred_fc = L.InnerProduct(net.loss2_dropout, num_output=classes, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ]) net.loss2 = L.SoftmaxWithLoss(net.loss2_pred_fc, net.label, loss_weight=0.3) # net.loss2_accuracy_top_1 = L.Accuracy(net.loss2_pred_fc, net.label, # include=dict(phase=caffe_pb2.Phase.Value('TEST'))) # net.loss2_accuracy_top_5 = L.Accuracy(net.loss2_pred_fc, net.label, # include=dict(phase=caffe_pb2.Phase.Value('TEST')), # accuracy_param=dict(top_k=5)) # inception(4e) inception4e_output = inception(net=net, pre_layer=inception4d_output, conv1x1_num=256, conv3x3_reduce_num=160, conv3x3_num=320, conv5x5_reduce_num=32, conv5x5_num=128, maxpool3x3_proj1x1_num=128, name="inception4e") # max pool net.inception4_maxpool = L.Pooling(inception4e_output, kernel_size=2, stride=2, pool=P.Pooling.MAX) # inception(5a) inception5a_output = inception(net=net, pre_layer=net.inception4_maxpool, conv1x1_num=256, conv3x3_reduce_num=160, conv3x3_num=320, conv5x5_reduce_num=32, conv5x5_num=128, maxpool3x3_proj1x1_num=128, name="inception5a") # inception(5b) inception5b_output = inception(net=net, pre_layer=inception5a_output, conv1x1_num=384, conv3x3_reduce_num=192, conv3x3_num=384, conv5x5_reduce_num=48, conv5x5_num=128, maxpool3x3_proj1x1_num=128, name="inception5b") # avg pool net.avgpool7x7_s1 = L.Pooling(inception5b_output, kernel_size=4, stride=1, pool=P.Pooling.AVE) # dropout net.avgpool7x7_s1_dropout = L.Dropout( net.avgpool7x7_s1, dropout_param=dict(dropout_ratio=0.4), in_place=True) # pred fc net.loss_pred_fc = L.InnerProduct( net.avgpool7x7_s1_dropout, num_output=classes, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) # loss if deploy: net.prob = L.Softmax(net.loss_pred_fc) else: net.loss = L.SoftmaxWithLoss(net.loss_pred_fc, net.label) net.accuracy = L.Accuracy( net.loss_pred_fc, net.label, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) with open(net_filename, "a") as f: f.write(str(net.to_proto()))
def create_net(lmdb, batch_size, mean_file, model): n = caffe.NetSpec() #数据层 if model == False: n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb, include=dict(phase=0), transform_param=dict(scale=1. / 255, mirror=True, crop_size=227, mean_file=mean_file), ntop=2) if model == True: n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb, include=dict(phase=1), transform_param=dict(scale=1. / 255, mirror=True, crop_size=227, mean_file=mean_file), ntop=2) #卷积层conv1 n.conv1 = L.Convolution( n.data, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], kernel_size=11, stride=4, num_output=96, weight_filler=dict(type="gaussian", std=0.01), bias_filler=dict(type='constant', value=0)) #ReLu层 n.relu1 = L.ReLU(n.conv1, in_place=True) #LRN层 n.norm1 = L.LRN(n.conv1, local_size=5, alpha=0.0001, beta=0.75) #Pooling层 n.pool1 = L.Pooling(n.norm1, kernel_size=3, stride=2, pool=P.Pooling.MAX) #卷积层conv2 n.conv2 = L.Convolution( n.pool1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], kernel_size=5, num_output=256, pad=2, group=2, weight_filler=dict(type="gaussian", std=0.01), bias_filler=dict(type='constant', value=0.1)) # ReLu2层 n.relu2 = L.ReLU(n.conv2, in_place=True) # LRN2层 n.norm2 = L.LRN(n.conv2, local_size=5, alpha=0.0001, beta=0.75) # Pooling2层 n.pool2 = L.Pooling(n.norm2, kernel_size=3, stride=2, pool=P.Pooling.MAX) # 卷积层conv3 n.conv3 = L.Convolution( n.pool2, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], kernel_size=3, num_output=384, pad=1, weight_filler=dict(type="gaussian", std=0.01), bias_filler=dict(type='constant', value=0)) # ReLu3层 n.relu3 = L.ReLU(n.conv3, in_place=True) # 卷积层conv4 n.conv4 = L.Convolution( n.conv3, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], kernel_size=3, num_output=384, pad=1, group=2, weight_filler=dict(type="gaussian", std=0.01), bias_filler=dict(type='constant', value=0.1)) # ReLu4层 n.relu4 = L.ReLU(n.conv4, in_place=True) # 卷积层conv5 n.conv5 = L.Convolution( n.conv4, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], kernel_size=3, num_output=256, pad=1, group=2, weight_filler=dict(type="gaussian", std=0.01), bias_filler=dict(type='constant', value=0.1)) # ReLu5层 n.relu5 = L.ReLU(n.conv5, in_place=True) # Pooling5层 n.pool5 = L.Pooling(n.conv5, kernel_size=3, stride=2, pool=P.Pooling.MAX) #全连接层fc6 n.fc6 = L.InnerProduct( n.pool5, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=4096, weight_filler=dict(type="gaussian", std=0.005), bias_filler=dict(type='constant', value=0.1)) n.relu6 = L.ReLU(n.fc6, in_place=True) #Dropout6层 n.drop6 = L.Dropout(n.fc6, dropout_ratio=0.5, in_place=True) #丢弃数据的概率 # 全连接层fc7 n.fc7 = L.InnerProduct( n.fc6, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=4096, weight_filler=dict(type="gaussian", std=0.005), bias_filler=dict(type='constant', value=0.1)) # ReLu7层 n.relu7 = L.ReLU(n.fc7, in_place=True) # Dropout7层 n.drop7 = L.Dropout(n.fc7, dropout_ratio=0.5, in_place=True) # 丢弃数据的概率 # 全连接层fc8 n.fc8 = L.InnerProduct( n.fc7, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=1000, weight_filler=dict(type="gaussian", std=0.01), bias_filler=dict(type='constant', value=0)) if model: n.acc = L.Accuracy(n.fc8, n.label) else: n.loss = L.SoftmaxWithLoss(n.fc8, n.label) return n.to_proto()
def lenet(lmdb_data, lmdb_label, batch_size, deploy, crop=64, mirror=False): """Simple LeNet to predict cdf.""" data_transforms = dict(scale=1.) if crop: # will crop images to [crop]x[crop] with random center data_transforms['crop_size'] = crop if mirror: # will randomly flip images data_transforms['mirror'] = 1 n = caffe.NetSpec() if deploy: input_ = "data" dim1 = batch_size dim2 = 3 # need to change these manually dim3 = 64 dim4 = 64 n.data = L.Layer() else: n.data = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb_data, transform_param=data_transforms, ntop=1) n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb_label, ntop=1) # first convolutional layer n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=40, weight_filler=dict(type='xavier')) n.norm1 = L.BatchNorm(n.conv1) n.relu1 = L.ReLU(n.norm1, in_place=True) n.pool1 = L.Pooling(n.relu1, kernel_size=2, stride=2, pool=P.Pooling.MAX) # second convolutional layer n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=40, weight_filler=dict(type='xavier')) n.norm2 = L.BatchNorm(n.conv2) n.relu2 = L.ReLU(n.norm2, in_place=True) n.pool2 = L.Pooling(n.relu2, kernel_size=2, stride=2, pool=P.Pooling.MAX) # fully connected layers n.drop = L.Dropout(n.pool2, dropout_ratio=0.5) n.ip1 = L.InnerProduct(n.drop, num_output=600, weight_filler=dict(type='xavier')) n.out = L.Sigmoid(n.ip1) if deploy: deploy_str = ('input: {}\ninput_dim: {}\n' 'input_dim: {}\ninput_dim: {}\n' 'input_dim: {}').format('"%s"' % input_, dim1, dim2, dim3, dim4) return (deploy_str + '\n' + 'layer {' + 'layer {'.join(str(n.to_proto()).split('layer {')[2:])) else: n.loss = L.EuclideanLoss(n.out, n.label) return str(n.to_proto())
def VGGNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False, dilated=False, nopool=False, dropout=True, freeze_layers=[]): kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0)} assert from_layer in net.keys() net.conv1_1 = L.Convolution(net[from_layer], num_output=64, pad=1, kernel_size=3, **kwargs) net.relu1_1 = L.ReLU(net.conv1_1, in_place=True) net.conv1_2 = L.Convolution(net.relu1_1, num_output=64, pad=1, kernel_size=3, **kwargs) net.relu1_2 = L.ReLU(net.conv1_2, in_place=True) if nopool: name = 'conv1_3' net[name] = L.Convolution(net.relu1_2, num_output=64, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool1' net.pool1 = L.Pooling(net.relu1_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv2_1 = L.Convolution(net[name], num_output=128, pad=1, kernel_size=3, **kwargs) net.relu2_1 = L.ReLU(net.conv2_1, in_place=True) net.conv2_2 = L.Convolution(net.relu2_1, num_output=128, pad=1, kernel_size=3, **kwargs) net.relu2_2 = L.ReLU(net.conv2_2, in_place=True) if nopool: name = 'conv2_3' net[name] = L.Convolution(net.relu2_2, num_output=128, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool2' net[name] = L.Pooling(net.relu2_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv3_1 = L.Convolution(net[name], num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_1 = L.ReLU(net.conv3_1, in_place=True) net.conv3_2 = L.Convolution(net.relu3_1, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_2 = L.ReLU(net.conv3_2, in_place=True) net.conv3_3 = L.Convolution(net.relu3_2, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_3 = L.ReLU(net.conv3_3, in_place=True) if nopool: name = 'conv3_4' net[name] = L.Convolution(net.relu3_3, num_output=256, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool3' net[name] = L.Pooling(net.relu3_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv4_1 = L.Convolution(net[name], num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_1 = L.ReLU(net.conv4_1, in_place=True) net.conv4_2 = L.Convolution(net.relu4_1, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_2 = L.ReLU(net.conv4_2, in_place=True) net.conv4_3 = L.Convolution(net.relu4_2, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_3 = L.ReLU(net.conv4_3, in_place=True) if nopool: name = 'conv4_4' net[name] = L.Convolution(net.relu4_3, num_output=512, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool4' net[name] = L.Pooling(net.relu4_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv5_1 = L.Convolution(net[name], num_output=512, pad=1, kernel_size=3, **kwargs) net.relu5_1 = L.ReLU(net.conv5_1, in_place=True) net.conv5_2 = L.Convolution(net.relu5_1, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu5_2 = L.ReLU(net.conv5_2, in_place=True) net.conv5_3 = L.Convolution(net.relu5_2, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu5_3 = L.ReLU(net.conv5_3, in_place=True) if need_fc: if dilated: if nopool: name = 'conv5_4' net[name] = L.Convolution(net.relu5_3, num_output=512, pad=1, kernel_size=3, stride=1, **kwargs) else: name = 'pool5' net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=1) else: if nopool: name = 'conv5_4' net[name] = L.Convolution(net.relu5_3, num_output=512, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool5' net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) if fully_conv: if dilated: if reduced: net.fc6 = L.Convolution(net[name], num_output=1024, pad=6, kernel_size=3, dilation=6, **kwargs) else: net.fc6 = L.Convolution(net[name], num_output=4096, pad=6, kernel_size=7, dilation=2, **kwargs) else: if reduced: net.fc6 = L.Convolution(net[name], num_output=1024, pad=3, kernel_size=3, dilation=3, **kwargs) else: net.fc6 = L.Convolution(net[name], num_output=4096, pad=3, kernel_size=7, **kwargs) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) if reduced: net.fc7 = L.Convolution(net.relu6, num_output=1024, kernel_size=1, **kwargs) else: net.fc7 = L.Convolution(net.relu6, num_output=4096, kernel_size=1, **kwargs) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) else: net.fc6 = L.InnerProduct(net.pool5, num_output=4096) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) net.fc7 = L.InnerProduct(net.relu6, num_output=4096) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) # Update freeze layers. kwargs['param'] = [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)] layers = net.keys() for freeze_layer in freeze_layers: if freeze_layer in layers: net.update(freeze_layer, kwargs) return net
def mfb_coatt(mode, batchsize, T, question_vocab_size, folder): n = caffe.NetSpec() mode_str = json.dumps({ 'mode': mode, 'batchsize': batchsize, 'folder': folder }) if mode == 'val': n.data, n.cont, n.img_feature, n.label, n.glove = L.Python( \ module='vqa_data_layer', layer='VQADataProviderLayer', \ param_str=mode_str, ntop=5 ) else: n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\ module='vqa_data_layer_kld', layer='VQADataProviderLayer', \ param_str=mode_str, ntop=5 ) n.embed = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \ weight_filler=dict(type='xavier')) n.embed_tanh = L.TanH(n.embed) concat_word_embed = [n.embed_tanh, n.glove] n.concat_embed = L.Concat(*concat_word_embed, concat_param={'axis': 2}) # T x N x 600 # LSTM n.lstm1 = L.LSTM(\ n.concat_embed, n.cont,\ recurrent_param=dict(\ num_output=config.LSTM_UNIT_NUM,\ weight_filler=dict(type='xavier'))) n.lstm1_droped = L.Dropout( n.lstm1, dropout_param={'dropout_ratio': config.LSTM_DROPOUT_RATIO}) n.lstm1_resh = L.Permute(n.lstm1_droped, permute_param=dict(order=[1, 2, 0])) n.lstm1_resh2 = L.Reshape(n.lstm1_resh, \ reshape_param=dict(shape=dict(dim=[0,0,0,1]))) ''' Question Attention ''' n.qatt_conv1 = L.Convolution(n.lstm1_resh2, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.qatt_relu = L.ReLU(n.qatt_conv1) n.qatt_conv2 = L.Convolution(n.qatt_relu, kernel_size=1, stride=1, num_output=config.NUM_QUESTION_GLIMPSE, pad=0, weight_filler=dict(type='xavier')) n.qatt_reshape = L.Reshape( n.qatt_conv2, reshape_param=dict(shape=dict(dim=[ -1, config.NUM_QUESTION_GLIMPSE, config.MAX_WORDS_IN_QUESTION, 1 ]))) # N*NUM_QUESTION_GLIMPSE*15 n.qatt_softmax = L.Softmax(n.qatt_reshape, axis=2) qatt_maps = L.Slice(n.qatt_softmax, ntop=config.NUM_QUESTION_GLIMPSE, slice_param={'axis': 1}) dummy_lstm = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) qatt_feature_list = [] for i in xrange(config.NUM_QUESTION_GLIMPSE): if config.NUM_QUESTION_GLIMPSE == 1: n.__setattr__( 'qatt_feat%d' % i, L.SoftAttention(n.lstm1_resh2, qatt_maps, dummy_lstm)) else: n.__setattr__( 'qatt_feat%d' % i, L.SoftAttention(n.lstm1_resh2, qatt_maps[i], dummy_lstm)) qatt_feature_list.append(n.__getattr__('qatt_feat%d' % i)) n.qatt_feat_concat = L.Concat(*qatt_feature_list) ''' Image Attention with MFB ''' n.q_feat_resh = L.Reshape( n.qatt_feat_concat, reshape_param=dict(shape=dict(dim=[0, -1, 1, 1]))) n.i_feat_resh = L.Reshape( n.img_feature, reshape_param=dict(shape=dict( dim=[0, -1, config.IMG_FEAT_WIDTH, config.IMG_FEAT_WIDTH]))) n.iatt_q_proj = L.InnerProduct(n.q_feat_resh, num_output=config.JOINT_EMB_SIZE, weight_filler=dict(type='xavier')) n.iatt_q_resh = L.Reshape( n.iatt_q_proj, reshape_param=dict(shape=dict(dim=[-1, config.JOINT_EMB_SIZE, 1, 1]))) n.iatt_q_tile1 = L.Tile(n.iatt_q_resh, axis=2, tiles=config.IMG_FEAT_WIDTH) n.iatt_q_tile2 = L.Tile(n.iatt_q_tile1, axis=3, tiles=config.IMG_FEAT_WIDTH) n.iatt_i_conv = L.Convolution(n.i_feat_resh, kernel_size=1, stride=1, num_output=config.JOINT_EMB_SIZE, pad=0, weight_filler=dict(type='xavier')) n.iatt_i_resh1 = L.Reshape(n.iatt_i_conv, reshape_param=dict(shape=dict(dim=[ -1, config.JOINT_EMB_SIZE, config.IMG_FEAT_WIDTH, config.IMG_FEAT_WIDTH ]))) n.iatt_iq_eltwise = L.Eltwise(n.iatt_q_tile2, n.iatt_i_resh1, eltwise_param=dict(operation=0)) n.iatt_iq_droped = L.Dropout( n.iatt_iq_eltwise, dropout_param={'dropout_ratio': config.MFB_DROPOUT_RATIO}) n.iatt_iq_resh2 = L.Reshape(n.iatt_iq_droped, reshape_param=dict(shape=dict( dim=[-1, config.JOINT_EMB_SIZE, 196, 1]))) n.iatt_iq_permute1 = L.Permute(n.iatt_iq_resh2, permute_param=dict(order=[0, 2, 1, 3])) n.iatt_iq_resh2 = L.Reshape( n.iatt_iq_permute1, reshape_param=dict(shape=dict(dim=[ -1, config.IMG_FEAT_SIZE, config.MFB_OUT_DIM, config.MFB_FACTOR_NUM ]))) n.iatt_iq_sumpool = L.Pooling(n.iatt_iq_resh2, pool=P.Pooling.SUM, \ pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1)) n.iatt_iq_permute2 = L.Permute(n.iatt_iq_sumpool, permute_param=dict(order=[0, 2, 1, 3])) n.iatt_iq_sqrt = L.SignedSqrt(n.iatt_iq_permute2) n.iatt_iq_l2 = L.L2Normalize(n.iatt_iq_sqrt) ## 2 conv layers 1000 -> 512 -> 2 n.iatt_conv1 = L.Convolution(n.iatt_iq_l2, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.iatt_relu = L.ReLU(n.iatt_conv1) n.iatt_conv2 = L.Convolution(n.iatt_relu, kernel_size=1, stride=1, num_output=config.NUM_IMG_GLIMPSE, pad=0, weight_filler=dict(type='xavier')) n.iatt_resh = L.Reshape( n.iatt_conv2, reshape_param=dict(shape=dict( dim=[-1, config.NUM_IMG_GLIMPSE, config.IMG_FEAT_SIZE]))) n.iatt_softmax = L.Softmax(n.iatt_resh, axis=2) n.iatt_softmax_resh = L.Reshape( n.iatt_softmax, reshape_param=dict(shape=dict(dim=[ -1, config.NUM_IMG_GLIMPSE, config.IMG_FEAT_WIDTH, config.IMG_FEAT_WIDTH ]))) iatt_maps = L.Slice(n.iatt_softmax_resh, ntop=config.NUM_IMG_GLIMPSE, slice_param={'axis': 1}) dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) iatt_feature_list = [] for i in xrange(config.NUM_IMG_GLIMPSE): if config.NUM_IMG_GLIMPSE == 1: n.__setattr__('iatt_feat%d' % i, L.SoftAttention(n.i_feat_resh, iatt_maps, dummy)) else: n.__setattr__('iatt_feat%d' % i, L.SoftAttention(n.i_feat_resh, iatt_maps[i], dummy)) n.__setattr__('iatt_feat%d_resh'%i, L.Reshape(n.__getattr__('iatt_feat%d'%i), \ reshape_param=dict(shape=dict(dim=[0,-1])))) iatt_feature_list.append(n.__getattr__('iatt_feat%d_resh' % i)) n.iatt_feat_concat = L.Concat(*iatt_feature_list) n.iatt_feat_concat_resh = L.Reshape( n.iatt_feat_concat, reshape_param=dict(shape=dict(dim=[0, -1, 1, 1]))) ''' Fine-grained Image-Question MFB fusion ''' n.mfb_q_proj = L.InnerProduct(n.q_feat_resh, num_output=config.JOINT_EMB_SIZE, weight_filler=dict(type='xavier')) n.mfb_i_proj = L.InnerProduct(n.iatt_feat_concat_resh, num_output=config.JOINT_EMB_SIZE, weight_filler=dict(type='xavier')) n.mfb_iq_eltwise = L.Eltwise(n.mfb_q_proj, n.mfb_i_proj, eltwise_param=dict(operation=0)) n.mfb_iq_drop = L.Dropout( n.mfb_iq_eltwise, dropout_param={'dropout_ratio': config.MFB_DROPOUT_RATIO}) n.mfb_iq_resh = L.Reshape( n.mfb_iq_drop, reshape_param=dict(shape=dict( dim=[-1, 1, config.MFB_OUT_DIM, config.MFB_FACTOR_NUM]))) n.mfb_iq_sumpool = L.Pooling(n.mfb_iq_resh, pool=P.Pooling.SUM, \ pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1)) n.mfb_out = L.Reshape(n.mfb_iq_sumpool,\ reshape_param=dict(shape=dict(dim=[-1,config.MFB_OUT_DIM]))) n.mfb_sign_sqrt = L.SignedSqrt(n.mfb_out) n.mfb_l2 = L.L2Normalize(n.mfb_sign_sqrt) n.prediction = L.InnerProduct(n.mfb_l2, num_output=config.NUM_OUTPUT_UNITS, weight_filler=dict(type='xavier')) if mode == 'val': n.loss = L.SoftmaxWithLoss(n.prediction, n.label) else: n.loss = L.SoftmaxKLDLoss(n.prediction, n.label) return n.to_proto()
def modified_u_net(split): n = caffe.NetSpec() pydata_params = dict(split=split, mean=(41.4661, 69.1061, 126.993), seed=1337) if split == 'train': pydata_params['train_dir'] = '../image_augmentor/DRIVE/training' pylayer = 'TRAINSegDataLayer' else: pydata_params['val_dir'] = '../image_augmentor/DRIVE/val' pylayer = 'VALSegDataLayer' n.data, n.label = L.Python(module='train_val', layer=pylayer, ntop=2, param_str=str(pydata_params)) # layer group 1 n.conv1_1, n.relu1_1 = conv_relu(n.data, 32) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 32) n.pool1 = max_pool(n.relu1_2) # layer group 2 n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 64) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 64) n.pool2 = max_pool(n.relu2_2) # layer group 3 n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 128) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 128) # layer group 4 n.upconv4 = L.Deconvolution(n.relu3_2, param=[dict(lr_mult=1), dict(lr_mult=2)], convolution_param=dict(num_output=128, kernel_size=2, stride=2)) n.concat4 = L.Concat(n.upconv4, n.relu2_2, axis=1) n.conv4_1, n.relu4_1 = conv_relu(n.concat4, 64) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 64) # layer group 5 n.upconv5 = L.Deconvolution(n.relu4_2, param=[dict(lr_mult=1), dict(lr_mult=2)], convolution_param=dict(num_output=64, kernel_size=2, stride=2)) n.concat5 = L.Concat(n.upconv5, n.conv1_2, axis=1) n.conv5_1, n.relu5_1 = conv_relu(n.concat5, 32) # n.drop5 = L.Dropout(n.relu5_1, dropout_ratio=0.2, in_place=True) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 32) # layer group 6 n.score = L.Convolution( n.relu5_2, pad=0, kernel_size=1, num_output=2, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.seg = L.Dropout(n.score, dropout_ratio=0.5, in_place=True) # others n.loss = L.SoftmaxWithLoss(n.seg, n.label, loss_param=dict(normalize=False)) # n.softmax = L.Softmax(n.seg, # include={'phase':caffe.TEST}) # n.argmax = L.ArgMax(n.softmax, axis=1, # include={'phase':caffe.TEST}) # n.accuracy = L.Accuracy(n.seg, n.label, exclude={'stage': 'deploy'}) return n.to_proto()
def define_structure(self, stage): n = caffe.NetSpec() if stage != CaffeLocations.STAGE_DEPLOY: source_params = dict(stage=stage) source_params['data_dir'] = self.DATA_DIR source_params['split_dir'] = self.SPLIT_DIR n.data, n.label = L.Python(module='DataLayer', layer='DataLayer', ntop=2, param_str=str(source_params)) else: n.data = L.Input(shape=dict(dim=[1, 3, self.WSIZE, self.WSIZE])) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 32, pad=85) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 32) n.pool1 = max_pool(n.conv1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 64) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 64) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 128) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 128) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 128) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 256) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 256) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 256) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 256) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 256) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 256) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 2048, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 2048, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr = L.Convolution( n.drop7, num_output=CaffeLocations.NUM_LABELS, kernel_size=1, pad=0, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], weight_filler=dict(type='xavier'), bias_filler=dict( type='constant')) # must be 1 x num_classes x 1 x 1 n.upscore_a = L.Deconvolution(n.score_fr, convolution_param=dict( num_output=CaffeLocations.NUM_LABELS, kernel_size=4, stride=2, bias_term=False, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')), param=[dict(lr_mult=1, decay_mult=1)]) n.score_pool4 = L.Convolution(n.pool4, num_output=CaffeLocations.NUM_LABELS, kernel_size=1, pad=0, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) n.score_pool4c = crop(n.score_pool4, n.upscore_a) n.fuse_pool4 = L.Eltwise(n.upscore_a, n.score_pool4c, operation=P.Eltwise.SUM) n.upscore_pool4 = L.Deconvolution( n.fuse_pool4, convolution_param=dict(num_output=CaffeLocations.NUM_LABELS, kernel_size=4, stride=2, bias_term=False), param=[dict(lr_mult=1, decay_mult=1)]) n.score_pool3 = L.Convolution(n.pool3, num_output=CaffeLocations.NUM_LABELS, kernel_size=1, pad=0, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) n.score_pool3c = crop(n.score_pool3, n.upscore_pool4) n.fuse_pool3 = L.Eltwise(n.upscore_pool4, n.score_pool3c, operation=P.Eltwise.SUM) n.upscore8 = L.Deconvolution(n.fuse_pool3, convolution_param=dict( num_output=CaffeLocations.NUM_LABELS, kernel_size=16, stride=8, bias_term=False), param=[dict(lr_mult=1, decay_mult=1)]) n.score = crop(n.upscore8, n.data) if stage != CaffeLocations.STAGE_DEPLOY: n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=False)) #else: # n.output = L.Softmax(n.score) # n.loss = L.Python(n.score, n.label, module='LossLayer', layer='TopoLossLayer', loss_weight=1) return n.to_proto()
def setLayers(data_source, batch_size, layername, kernel, stride, outCH, label_name, transform_param_in, deploy=False): # it is tricky to produce the deploy prototxt file, as the data input is not from a layer, so we have to creat a workaround # producing training and testing prototxt files is pretty straight forward n = caffe.NetSpec() assert len(layername) == len(kernel) assert len(layername) == len(stride) assert len(layername) == len(outCH) # produce data definition for deploy net if deploy == False: n.data, n.tops['label'] = L.CPMData(cpmdata_param=dict( backend=1, source=data_source, batch_size=batch_size), transform_param=transform_param_in, ntop=2) n.tops[label_name[1]], n.tops[label_name[0]] = L.Slice( n.label, slice_param=dict(axis=1, slice_point=15), ntop=2) else: input = "data" dim1 = 1 dim2 = 4 dim3 = 368 dim4 = 368 # make an empty "data" layer so the next layer accepting input will be able to take the correct blob name "data", # we will later have to remove this layer from the serialization string, since this is just a placeholder n.data = L.Layer() # something special before everything n.image, n.center_map = L.Slice(n.data, slice_param=dict(axis=1, slice_point=3), ntop=2) n.pool_center_lower = L.Pooling(n.center_map, kernel_size=9, stride=8, pool=P.Pooling.AVE) # just follow arrays..CPCPCPCPCCCC.... last_layer = 'image' stage = 1 conv_counter = 1 pool_counter = 1 drop_counter = 1 state = 'image' # can be image or fuse share_point = 0 for l in range(0, len(layername)): if layername[l] == 'C': if state == 'image': conv_name = 'conv%d_stage%d' % (conv_counter, stage) else: conv_name = 'Mconv%d_stage%d' % (conv_counter, stage) if stage == 1: lr_m = 5 else: lr_m = 1 n.tops[conv_name] = L.Convolution( n.tops[last_layer], kernel_size=kernel[l], num_output=outCH[l], pad=int(math.floor(kernel[l] / 2)), param=[ dict(lr_mult=lr_m, decay_mult=1), dict(lr_mult=lr_m * 2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant')) last_layer = conv_name if layername[l + 1] != 'L': if (state == 'image'): ReLUname = 'relu%d_stage%d' % (conv_counter, stage) n.tops[ReLUname] = L.ReLU(n.tops[last_layer], in_place=True) else: ReLUname = 'Mrelu%d_stage%d' % (conv_counter, stage) n.tops[ReLUname] = L.ReLU(n.tops[last_layer], in_place=True) last_layer = ReLUname conv_counter += 1 elif layername[l] == 'P': # Pooling n.tops['pool%d_stage%d' % (pool_counter, stage)] = L.Pooling( n.tops[last_layer], kernel_size=kernel[l], stride=stride[l], pool=P.Pooling.MAX) last_layer = 'pool%d_stage%d' % (pool_counter, stage) pool_counter += 1 elif layername[l] == 'L': # Loss: n.loss layer is only in training and testing nets, but not in deploy net. if deploy == False: if stage == 1: n.tops['loss_stage%d' % stage] = L.EuclideanLoss( n.tops[last_layer], n.tops[label_name[0]]) else: n.tops['loss_stage%d' % stage] = L.EuclideanLoss( n.tops[last_layer], n.tops[label_name[1]]) stage += 1 last_connect = last_layer last_layer = 'image' conv_counter = 1 pool_counter = 1 drop_counter = 1 state = 'image' elif layername[l] == 'D': if deploy == False: n.tops['drop%d_stage%d' % (drop_counter, stage)] = L.Dropout( n.tops[last_layer], in_place=True, dropout_param=dict(dropout_ratio=0.5)) drop_counter += 1 elif layername[l] == '@': n.tops['concat_stage%d' % stage] = L.Concat( n.tops[last_layer], n.tops[last_connect], n.pool_center_lower, concat_param=dict(axis=1)) conv_counter = 1 state = 'fuse' last_layer = 'concat_stage%d' % stage elif layername[l] == '$': if not share_point: share_point = last_layer else: last_layer = share_point # final process stage -= 1 if stage == 1: n.silence = L.Silence(n.pool_center_lower, ntop=0) if deploy == False: return str(n.to_proto()) # for generating the deploy net else: # generate the input information header string deploy_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format( '"' + input + '"', dim1, dim2, dim3, dim4) # assemble the input header with the net layers string. remove the first placeholder layer from the net string. return deploy_str + '\n' + 'layer {' + 'layer {'.join( str(n.to_proto()).split('layer {')[2:])
def cnn(split): n = caffe.NetSpec() pydata_params = dict(dataset_dir='/home/kevin/dataset/normal_feature', variable='normal_map', split=split, mean=(0, 0, 0), seed=1337, batch_size=256, img_size=(250, 250)) if split == 'deploy': n.img = L.Input( name='input', ntop=2, shape=[dict(dim=1), dict(dim=3), dict(dim=130), dict(dim=130)]) else: if split is 'train': pydata_params['dtype'] = 'frame' pylayer = 'ModelNetDataLayer' else: pydata_params['dtype'] = 'object' pylayer = 'ModelNetDataLayer' n.img, n.label = L.Python(module='data_layers.model_net_layer', layer=pylayer, ntop=2, param_str=str(pydata_params)) # the base net n.conv1, n.relu1 = conv_relu("conv1", n.img, 96, ks=11, stride=4, pad=0) n.pool1 = max_pool(n.relu1, ks=3) n.norm1 = L.LRN(n.pool1, lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2)) # n.bn1 = L.BatchNorm(n.pool1, param=[dict(lr_mult=0),dict(lr_mult=0),dict(lr_mult=0)], batch_norm_param=dict(use_global_stats=True)) n.conv2, n.relu2 = conv_relu("conv2", n.norm1, 256, ks=5, pad=2, group=2) n.pool2 = max_pool(n.relu2, ks=3) n.norm2 = L.LRN(n.pool2, lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2)) # n.bn2 = L.BatchNorm(n.pool2, param=[dict(lr_mult=0),dict(lr_mult=0),dict(lr_mult=0)], batch_norm_param=dict(use_global_stats=True)) n.conv3, n.relu3 = conv_relu("conv3", n.norm2, 384, ks=3, pad=1, group=2) n.conv4, n.relu4 = conv_relu("conv4", n.relu3, 256, ks=3, pad=1, group=2) n.pool5 = max_pool(n.relu4, ks=3) n.fc6, n.relu6 = fc_relu(n.pool5, 4096, lr1=1, lr2=2) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = fc_relu(n.drop6, 4096, lr1=1, lr2=2) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.fc8 = fc(n.drop7, 40, lr1=1, lr2=2) if split != 'deploy': #n.accuracyt = L.Accuracy(n.predictT, n.labelT) #n.losst = L.SoftmaxWithLoss(n.predictT, n.labelT) n.accuracy = L.Accuracy(n.fc8, n.label) n.loss = L.SoftmaxWithLoss(n.fc8, n.label) # n.display = L.Scale(n.corr, param=[dict(lr_mult=0)], filler=dict(type='constant',value=1.0)) # n.fc9_bn = L.BatchNorm(n.relu9, param=[dict(lr_mult=0),dict(lr_mult=0),dict(lr_mult=0)], batch_norm_param=dict(use_global_stats=True)) return n.to_proto()
def fcn(split): n = caffe.NetSpec() pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892), seed=1337) if split.startswith('train'): pydata_params['sbdd_dir'] = '../data/sbdd-subsampl/dataset' pylayer = 'SBDDSegDataLayer' else: pydata_params['voc_dir'] = '../data/pascal-subsampl/VOC2011' pylayer = 'VOCSegDataLayer' n.data, n.label = L.Python(module='voc_layers', layer=pylayer, ntop=2, param_str=str(pydata_params)) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr = L.Convolution(n.drop7, num_output=16, kernel_size=1, pad=0, weight_filler=dict(type='xavier'), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore2 = L.Deconvolution(n.score_fr, convolution_param=dict(num_output=16, kernel_size=4, stride=2, weight_filler=dict(type='xavier'), bias_term=False), param=[dict(lr_mult=0)]) # scale pool4 skip for compatibility n.scale_pool4 = L.Scale(n.pool4, filler=dict(type='constant', value=0.01), param=[dict(lr_mult=0)]) n.score_pool4 = L.Convolution(n.scale_pool4, num_output=16, kernel_size=1, pad=0, weight_filler=dict(type='xavier'), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.score_pool4c = crop(n.score_pool4, n.upscore2) n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c, operation=P.Eltwise.SUM) n.upscore_pool4 = L.Deconvolution(n.fuse_pool4, convolution_param=dict(num_output=16, kernel_size=4, stride=2, weight_filler=dict(type='xavier'), bias_term=False), param=[dict(lr_mult=0)]) # scale pool3 skip for compatibility n.scale_pool3 = L.Scale(n.pool3, filler=dict(type='constant', value=0.0001), param=[dict(lr_mult=0)]) n.score_pool3 = L.Convolution(n.scale_pool3, num_output=16, kernel_size=1, pad=0, weight_filler=dict(type='xavier'), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.score_pool3c = crop(n.score_pool3, n.upscore_pool4) n.fuse_pool3 = L.Eltwise(n.upscore_pool4, n.score_pool3c, operation=P.Eltwise.SUM) n.upscore8 = L.Deconvolution(n.fuse_pool3, convolution_param=dict(num_output=16, kernel_size=16, stride=8, weight_filler=dict(type='xavier'), bias_term=False), param=[dict(lr_mult=0)]) n.score = crop(n.upscore8, n.data) n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=False, ignore_label=255)) return n.to_proto()
def setLayers_twoBranches(data_source, batch_size, layername, kernel, stride, outCH, label_name, transform_param_in, deploy=False, batchnorm=0, lr_mult_distro=[1, 1, 1]): # it is tricky to produce the deploy prototxt file, as the data input is not from a layer, so we have to creat a workaround # producing training and testing prototxt files is pretty straight forward n = caffe.NetSpec() assert len(layername) == len(kernel) assert len(layername) == len(stride) assert len(layername) == len(outCH) num_parts = transform_param['num_parts'] if deploy == False and "lmdb" not in data_source: if (len(label_name) == 1): n.data, n.tops[label_name[0]] = L.HDF5Data(hdf5_data_param=dict( batch_size=batch_size, source=data_source), ntop=2) elif (len(label_name) == 2): n.data, n.tops[label_name[0]], n.tops[label_name[1]] = L.HDF5Data( hdf5_data_param=dict(batch_size=batch_size, source=data_source), ntop=3) # produce data definition for deploy net elif deploy == False: n.data, n.tops['label'] = L.CPMData( data_param=dict(backend=1, source=data_source, batch_size=batch_size), cpm_transform_param=transform_param_in, ntop=2) n.tops[label_name[2]], n.tops[label_name[3]], n.tops[ label_name[4]], n.tops[label_name[5]] = L.Slice( n.label, slice_param=dict( axis=1, slice_point=[38, num_parts + 1, num_parts + 39]), ntop=4) n.tops[label_name[0]] = L.Eltwise(n.tops[label_name[2]], n.tops[label_name[4]], operation=P.Eltwise.PROD) n.tops[label_name[1]] = L.Eltwise(n.tops[label_name[3]], n.tops[label_name[5]], operation=P.Eltwise.PROD) else: input = "data" dim1 = 1 dim2 = 4 dim3 = 368 dim4 = 368 # make an empty "data" layer so the next layer accepting input will be able to take the correct blob name "data", # we will later have to remove this layer from the serialization string, since this is just a placeholder n.data = L.Layer() # something special before everything n.image, n.center_map = L.Slice(n.data, slice_param=dict(axis=1, slice_point=3), ntop=2) n.silence2 = L.Silence(n.center_map, ntop=0) #n.pool_center_lower = L.Pooling(n.center_map, kernel_size=9, stride=8, pool=P.Pooling.AVE) # just follow arrays..CPCPCPCPCCCC.... last_layer = ['image', 'image'] stage = 1 conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' # can be image or fuse share_point = 0 for l in range(0, len(layername)): if layername[l] == 'V': #pretrained VGG layers conv_name = 'conv%d_%d' % (pool_counter, local_counter) lr_m = lr_mult_distro[0] n.tops[conv_name] = L.Convolution( n.tops[last_layer[0]], kernel_size=kernel[l], num_output=outCH[l], pad=int(math.floor(kernel[l] / 2)), param=[ dict(lr_mult=lr_m, decay_mult=1), dict(lr_mult=lr_m * 2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant')) last_layer[0] = conv_name last_layer[1] = conv_name print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m) ReLUname = 'relu%d_%d' % (pool_counter, local_counter) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True) local_counter += 1 print ReLUname if layername[l] == 'B': pool_counter += 1 local_counter = 1 if layername[l] == 'C': if state == 'image': #conv_name = 'conv%d_stage%d' % (conv_counter, stage) conv_name = 'conv%d_%d_CPM' % ( pool_counter, local_counter ) # no image state in subsequent stages if stage == 1: lr_m = lr_mult_distro[1] else: lr_m = lr_mult_distro[1] else: # fuse conv_name = 'Mconv%d_stage%d' % (conv_counter, stage) lr_m = lr_mult_distro[2] conv_counter += 1 #if stage == 1: # lr_m = 1 #else: # lr_m = lr_sub n.tops[conv_name] = L.Convolution( n.tops[last_layer[0]], kernel_size=kernel[l], num_output=outCH[l], pad=int(math.floor(kernel[l] / 2)), param=[ dict(lr_mult=lr_m, decay_mult=1), dict(lr_mult=lr_m * 2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant')) last_layer[0] = conv_name last_layer[1] = conv_name print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m) if layername[l + 1] != 'L': if (state == 'image'): if (batchnorm == 1): batchnorm_name = 'bn%d_stage%d' % (conv_counter, stage) n.tops[batchnorm_name] = L.BatchNorm( n.tops[last_layer[0]], param=[ dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0) ]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[0] = batchnorm_name #ReLUname = 'relu%d_stage%d' % (conv_counter, stage) ReLUname = 'relu%d_%d_CPM' % (pool_counter, local_counter) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True) else: if (batchnorm == 1): batchnorm_name = 'Mbn%d_stage%d' % (conv_counter, stage) n.tops[batchnorm_name] = L.BatchNorm( n.tops[last_layer[0]], param=[ dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0) ]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[0] = batchnorm_name ReLUname = 'Mrelu%d_stage%d' % (conv_counter, stage) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True) #last_layer = ReLUname print ReLUname #conv_counter += 1 local_counter += 1 elif layername[l] == 'C2': for level in range(0, 2): if state == 'image': #conv_name = 'conv%d_stage%d' % (conv_counter, stage) conv_name = 'conv%d_%d_CPM_L%d' % ( pool_counter, local_counter, level + 1 ) # no image state in subsequent stages if stage == 1: lr_m = lr_mult_distro[1] else: lr_m = lr_mult_distro[1] else: # fuse conv_name = 'Mconv%d_stage%d_L%d' % (conv_counter, stage, level + 1) lr_m = lr_mult_distro[2] #conv_counter += 1 #if stage == 1: # lr_m = 1 #else: # lr_m = lr_sub if layername[l + 1] == 'L2' or layername[l + 1] == 'L3': if level == 0: outCH[l] = 38 else: outCH[l] = 19 n.tops[conv_name] = L.Convolution( n.tops[last_layer[level]], kernel_size=kernel[l], num_output=outCH[l], pad=int(math.floor(kernel[l] / 2)), param=[ dict(lr_mult=lr_m, decay_mult=1), dict(lr_mult=lr_m * 2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant')) last_layer[level] = conv_name print '%s\tch=%d\t%.1f' % (last_layer[level], outCH[l], lr_m) if layername[l + 1] != 'L2' and layername[l + 1] != 'L3': if (state == 'image'): if (batchnorm == 1): batchnorm_name = 'bn%d_stage%d_L%d' % ( conv_counter, stage, level + 1) n.tops[batchnorm_name] = L.BatchNorm( n.tops[last_layer[level]], param=[ dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0) ]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[level] = batchnorm_name #ReLUname = 'relu%d_stage%d' % (conv_counter, stage) ReLUname = 'relu%d_%d_CPM_L%d' % ( pool_counter, local_counter, level + 1) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]], in_place=True) else: if (batchnorm == 1): batchnorm_name = 'Mbn%d_stage%d_L%d' % ( conv_counter, stage, level + 1) n.tops[batchnorm_name] = L.BatchNorm( n.tops[last_layer[level]], param=[ dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0) ]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[level] = batchnorm_name ReLUname = 'Mrelu%d_stage%d_L%d' % (conv_counter, stage, level + 1) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]], in_place=True) print ReLUname conv_counter += 1 local_counter += 1 elif layername[l] == 'P': # Pooling n.tops['pool%d_stage%d' % (pool_counter, stage)] = L.Pooling( n.tops[last_layer[0]], kernel_size=kernel[l], stride=stride[l], pool=P.Pooling.MAX) last_layer[0] = 'pool%d_stage%d' % (pool_counter, stage) pool_counter += 1 local_counter = 1 conv_counter += 1 print last_layer[0] elif layername[l] == 'L': # Loss: n.loss layer is only in training and testing nets, but not in deploy net. if deploy == False and "lmdb" not in data_source: n.tops['map_vec_stage%d' % stage] = L.Flatten( n.tops[last_layer[0]]) n.tops['loss_stage%d' % stage] = L.EuclideanLoss( n.tops['map_vec_stage%d' % stage], n.tops[label_name[1]]) elif deploy == False: level = 1 name = 'weight_stage%d' % stage n.tops[name] = L.Eltwise(n.tops[last_layer[level]], n.tops[label_name[(level + 2)]], operation=P.Eltwise.PROD) n.tops['loss_stage%d' % stage] = L.EuclideanLoss( n.tops[name], n.tops[label_name[level]]) print 'loss %d' % stage stage += 1 conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' elif layername[l] == 'L2': # Loss: n.loss layer is only in training and testing nets, but not in deploy net. weight = [lr_mult_distro[3], 1] # print lr_mult_distro[3] for level in range(0, 2): if deploy == False and "lmdb" not in data_source: n.tops['map_vec_stage%d_L%d' % (stage, level + 1)] = L.Flatten( n.tops[last_layer[level]]) n.tops['loss_stage%d_L%d' % (stage, level + 1)] = L.EuclideanLoss( n.tops['map_vec_stage%d' % stage], n.tops[label_name[level]], loss_weight=weight[level]) elif deploy == False: name = 'weight_stage%d_L%d' % (stage, level + 1) n.tops[name] = L.Eltwise(n.tops[last_layer[level]], n.tops[label_name[(level + 2)]], operation=P.Eltwise.PROD) n.tops['loss_stage%d_L%d' % (stage, level + 1)] = L.EuclideanLoss( n.tops[name], n.tops[label_name[level]], loss_weight=weight[level]) print 'loss %d level %d' % (stage, level + 1) stage += 1 #last_connect = last_layer #last_layer = 'image' conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' elif layername[l] == 'L3': # Loss: n.loss layer is only in training and testing nets, but not in deploy net. weight = [lr_mult_distro[3], 1] # print lr_mult_distro[3] if deploy == False: level = 0 n.tops['loss_stage%d_L%d' % (stage, level + 1)] = L.Euclidean2Loss( n.tops[last_layer[level]], n.tops[label_name[level]], n.tops[label_name[2]], loss_weight=weight[level]) print 'loss %d level %d' % (stage, level + 1) level = 1 n.tops['loss_stage%d_L%d' % (stage, level + 1)] = L.EuclideanLoss( n.tops[last_layer[level]], n.tops[label_name[level]], loss_weight=weight[level]) print 'loss %d level %d' % (stage, level + 1) stage += 1 #last_connect = last_layer #last_layer = 'image' conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' elif layername[l] == 'D': if deploy == False: n.tops['drop%d_stage%d' % (drop_counter, stage)] = L.Dropout( n.tops[last_layer[0]], in_place=True, dropout_param=dict(dropout_ratio=0.5)) drop_counter += 1 elif layername[l] == '@': #if not share_point: # share_point = last_layer n.tops['concat_stage%d' % stage] = L.Concat( n.tops[last_layer[0]], n.tops[last_layer[1]], n.tops[share_point], concat_param=dict(axis=1)) local_counter = 1 state = 'fuse' last_layer[0] = 'concat_stage%d' % stage last_layer[1] = 'concat_stage%d' % stage print last_layer elif layername[l] == '$': share_point = last_layer[0] pool_counter += 1 local_counter = 1 print 'share' # final process stage -= 1 #if stage == 1: # n.silence = L.Silence(n.pool_center_lower, ntop=0) if deploy == False: return str(n.to_proto()) # for generating the deploy net else: # generate the input information header string deploy_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format( '"' + input + '"', dim1, dim2, dim3, dim4) # assemble the input header with the net layers string. remove the first placeholder layer from the net string. return deploy_str + '\n' + 'layer {' + 'layer {'.join( str(n.to_proto()).split('layer {')[2:])
def generate_model(split, config): n = caffe.NetSpec() batch_size = config.N mode_str = str(dict(split=split, batch_size=batch_size)) n.language, n.cont, n.image, n.spatial, n.label = L.Python( module=config.data_provider, layer=config.data_provider_layer, param_str=mode_str, ntop=5) # the base net (VGG-16) n.conv1_1, n.relu1_1 = conv_relu(n.image, 64, fix_param=config.fix_vgg, finetune=(not config.fix_vgg)) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64, fix_param=config.fix_vgg, finetune=(not config.fix_vgg)) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128, fix_param=config.fix_vgg, finetune=(not config.fix_vgg)) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128, fix_param=config.fix_vgg, finetune=(not config.fix_vgg)) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256, fix_param=config.fix_vgg, finetune=(not config.fix_vgg)) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256, fix_param=config.fix_vgg, finetune=(not config.fix_vgg)) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256, fix_param=config.fix_vgg, finetune=(not config.fix_vgg)) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512, fix_param=config.fix_vgg, finetune=(not config.fix_vgg)) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512, fix_param=config.fix_vgg, finetune=(not config.fix_vgg)) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512, fix_param=config.fix_vgg, finetune=(not config.fix_vgg)) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512, fix_param=config.fix_vgg, finetune=(not config.fix_vgg)) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512, fix_param=config.fix_vgg, finetune=(not config.fix_vgg)) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512, fix_param=config.fix_vgg, finetune=(not config.fix_vgg)) n.pool5 = max_pool(n.relu5_3) # fully conv n.fcn_fc6, n.fcn_relu6 = conv_relu(n.pool5, 4096, ks=7, pad=3) if config.vgg_dropout: n.fcn_drop6 = L.Dropout(n.fcn_relu6, dropout_ratio=0.5, in_place=True) n.fcn_fc7, n.fcn_relu7 = conv_relu(n.fcn_drop6, 4096, ks=1, pad=0) n.fcn_drop7 = L.Dropout(n.fcn_relu7, dropout_ratio=0.5, in_place=True) n.fcn_fc8 = conv(n.fcn_drop7, 1000, ks=1, pad=0) else: n.fcn_fc7, n.fcn_relu7 = conv_relu(n.fcn_relu6, 4096, ks=1, pad=0) n.fcn_fc8 = conv(n.fcn_relu7, 1000, ks=1, pad=0) # embedding n.embed = L.Embed(n.language, input_dim=config.vocab_size, num_output=config.embed_dim, weight_filler=dict(type='uniform', min=-0.08, max=0.08)) # LSTM n.lstm = L.LSTM(n.embed, n.cont, recurrent_param=dict(num_output=config.lstm_dim, weight_filler=dict(type='uniform', min=-0.08, max=0.08), bias_filler=dict(type='constant', value=0))) tops = L.Slice(n.lstm, ntop=config.T, slice_param=dict(axis=0)) for i in range(config.T - 1): n.__setattr__('slice' + str(i), tops[i]) n.__setattr__('silence' + str(i), L.Silence(tops[i], ntop=0)) n.lstm_out = tops[-1] n.lstm_feat = L.Reshape( n.lstm_out, reshape_param=dict(shape=dict(dim=[-1, config.lstm_dim]))) # Dynamic conv filters n.dyn_l, n.dyn_sig = fc_sigmoid(n.lstm_feat, 1000 + 8) n.lstm_dyn_kernel = L.Reshape( n.dyn_sig, reshape_param=dict(shape=dict(dim=[-1, 1, config.lstm_dim + 8, 1, 1]))) # Tile LSTM feature #n.lstm_resh = L.Reshape(n.lstm_feat, reshape_param=dict(shape=dict(dim=[-1, config.lstm_dim, 1, 1]))) #n.lstm_tile_1 = L.Tile(n.lstm_resh, axis=2, tiles=config.featmap_H) #n.lstm_tile_2 = L.Tile(n.lstm_tile_1, axis=3, tiles=config.featmap_W) # L2 Normalize image and language features #n.img_l2norm = L.L2Normalize(n.fcn_fc8) #n.lstm_l2norm = L.L2Normalize(n.lstm_tile_2) # Concatenate #n.feat_all = L.Concat(n.lstm_l2norm, n.img_l2norm, n.spatial, concat_param=dict(axis=1)) n.feat_all = L.Concat(n.fcn_fc8, n.spatial, concat_param=dict(axis=1)) # MLP Classifier over concatenated feature #n.fcn_l1, n.fcn_relu1 = conv_relu(n.feat_all, config.mlp_hidden_dims, ks=1, pad=0) #if config.mlp_dropout: # n.fcn_drop1 = L.Dropout(n.fcn_relu1, dropout_ratio=0.5, in_place=True) # n.fcn_scores = conv(n.fcn_drop1, 1, ks=1, pad=0) #else: # n.fcn_scores = conv(n.fcn_relu1, 1, ks=1, pad=0) # Dyn conv layer n.fcn_scores = L.DynamicConvolution(n.feat_all, n.lstm_dyn_kernel, convolution_param=dict( num_output=1, kernel_size=1, stride=1, pad=0, bias_term=False)) # Loss Layer n.loss = L.SigmoidCrossEntropyLoss(n.fcn_scores, n.label) return n.to_proto()
def drop(bottom, dropout_ratio): return L.Dropout(bottom, dropout_ratio=0.25, in_place=True)
def yolo_net(data_lmdb, label_lmdb, batch_size): # our version of LeNet: a series of linear and simple nonlinear transformations n = caffe.NetSpec() # input n.data = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=data_lmdb, transform_param=dict(scale=1./255), ntop=1) n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=label_lmdb, ntop=1) # 7x7x64-s-2 n.conv1 = ConvLayer(n.data, 64, 7, 2, 1) n.leaky1 = LeakyLayer(n.conv1) n.pool1 = MaxpoolingLayer(n.leaky1, 2, 2) # 3x3x192 n.conv2 = ConvLayer(n.pool1, 192, 3, 1, 1) n.leaky2 = LeakyLayer(n.conv2) n.pool2 = MaxpoolingLayer(n.leaky2, 2, 2) n.conv3 = ConvLayer(n.pool2, 128, 1, 1, 1) n.leaky3 = LeakyLayer(n.conv3) n.conv4 = ConvLayer(n.leaky3, 256, 3, 1, 1) n.leaky4 = LeakyLayer(n.conv4) n.conv5 = ConvLayer(n.leaky4, 256, 1, 1, 1) n.leaky5 = LeakyLayer(n.conv5) n.conv6 = ConvLayer(n.leaky5, 512, 3, 1, 1) n.leaky6 = LeakyLayer(n.conv6) n.pool3 = MaxpoolingLayer(n.leaky6, 2, 2) n.conv7 = ConvLayer(n.pool3, 256, 1, 1, 1) n.leaky7 = LeakyLayer(n.conv7) n.conv8 = ConvLayer(n.leaky7, 512, 3, 1, 1) n.leaky8 = LeakyLayer(n.conv8) n.conv9 = ConvLayer(n.leaky8, 256, 1, 1, 1) n.leaky9 = LeakyLayer(n.conv9) n.conv10 = ConvLayer(n.leaky9, 512, 3, 1, 1) n.leaky10 = LeakyLayer(n.conv10) n.conv11 = ConvLayer(n.leaky10, 256, 1, 1, 1) n.leaky11 = LeakyLayer(n.conv11) n.conv12 = ConvLayer(n.leaky11, 512, 3, 1, 1) n.leaky12 = LeakyLayer(n.conv12) n.conv13 = ConvLayer(n.leaky12, 256, 1, 1, 1) n.leaky13 = LeakyLayer(n.conv13) n.conv14 = ConvLayer(n.leaky13, 512, 3, 1, 1) n.leaky14 = LeakyLayer(n.conv14) n.conv15 = ConvLayer(n.leaky14, 512, 1, 1, 1) n.leaky15 = LeakyLayer(n.conv15) n.conv16 = ConvLayer(n.leaky15, 1024, 3, 1, 1) n.leaky16 = LeakyLayer(n.conv16) n.pool4 = MaxpoolingLayer(n.leaky16, 2, 2) n.conv17 = ConvLayer(n.pool4, 512, 1, 1, 1) n.leaky17 = LeakyLayer(n.conv17) n.conv18 = ConvLayer(n.leaky17, 1024, 3, 1, 1) n.leaky18 = LeakyLayer(n.conv18) n.conv19 = ConvLayer(n.leaky18, 512, 1, 1, 1) n.leaky19 = LeakyLayer(n.conv19) n.conv20 = ConvLayer(n.leaky19, 1024, 3, 1, 1) n.leaky20 = LeakyLayer(n.conv20) n.pool5 = MaxpoolingLayer(n.leaky20, 2, 2) n.conv21 = ConvLayer(n.pool5, 512, 1, 1, 1) n.leaky21 = LeakyLayer(n.conv21) n.conv22 = ConvLayer(n.leaky21, 1024, 3, 1, 1) n.leaky22 = LeakyLayer(n.conv22) n.conv23 = ConvLayer(n.leaky22, 512, 1, 1, 1) n.leaky23 = LeakyLayer(n.conv23) n.conv24 = ConvLayer(n.leaky23, 1024, 3, 1, 1) n.leaky24 = LeakyLayer(n.conv24) n.fc1 = L.InnerProduct(n.leaky24, num_output=4096, weight_filler=dict(type='xavier')) n.leaky25 = LeakyLayer(n.fc1) n.dropout = L.Dropout(n.leaky25, dropout_ratio=0.5, in_place=True) n.fc2 = L.InnerProduct(n.dropout, num_output=1470, weight_filler=dict(type='xavier')) return n.to_proto()
def ResNet(split): data, labels = L.Python(module='readDataLayer', layer='input_layer', ntop=2, param_str=str( dict(split=split, data_dir=this_dir + '/data/', train_data_name='train_', test_data_name='test', train_batches=128, test_batches=128, crop_size_x=33, crop_size_y=33, train_pack_nums=9, test_pack_nums=1))) HGG_1, _ = conv_BN_scale_relu(split, data, 64, 3, 1, 0) HGG_2, _ = conv_BN_scale_relu(split, HGG_1, 64, 3, 1, 0) HGG_3, _ = conv_BN_scale_relu(split, HGG_2, 64, 3, 1, 0) HGG_4 = L.Pooling(HGG_3, pool=P.Pooling.MAX, global_pooling=False, stride=2, kernel_size=3) HGG_5, _ = conv_BN_scale_relu(split, HGG_4, 128, 3, 1, 0) HGG_6, _ = conv_BN_scale_relu(split, HGG_5, 128, 3, 1, 0) HGG_7, _ = conv_BN_scale_relu(split, HGG_6, 128, 3, 1, 0) HGG_8 = L.Pooling(HGG_7, pool=P.Pooling.MAX, global_pooling=False, stride=2, kernel_size=3) HGG_8a = L.Flatten(HGG_8) HGG_9 = L.ReLU(HGG_8a) HGG_9a = L.InnerProduct(L.Dropout(HGG_9, dropout_ratio=0.1), num_output=256, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) # HGG_9a = L.InnerProduct(HGG_9, num_output = 256) HGG_10 = L.ReLU(HGG_9a) HGG_10a = L.InnerProduct(L.Dropout(HGG_10, dropout_ratio=0.1), num_output=256, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) # HGG_10a = L.InnerProduct(HGG_10,num_output = 256) HGG_11 = L.Dropout(HGG_10a, dropout_ratio=0.1) HGG_11a = L.InnerProduct(HGG_11, num_output=5, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) acc = L.Accuracy(HGG_11a, labels) loss = L.SoftmaxWithLoss(HGG_11a, labels) return to_proto(loss, acc)
def fcn(split): n = caffe.NetSpec() n.data, n.sem, n.geo = L.Python(module='siftflow_layers', layer='SIFTFlowSegDataLayer', ntop=3, param_str=str( dict(siftflow_dir='../data/sift-flow', split=split, seed=1337))) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr_sem = L.Convolution( n.drop7, num_output=33, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore_sem = L.Deconvolution(n.score_fr_sem, convolution_param=dict(num_output=33, kernel_size=64, stride=32, bias_term=False), param=[dict(lr_mult=0)]) n.score_sem = crop(n.upscore_sem, n.data) # loss to make score happy (o.w. loss_sem) n.loss = L.SoftmaxWithLoss(n.score_sem, n.sem, loss_param=dict(normalize=False, ignore_label=255)) n.score_fr_geo = L.Convolution( n.drop7, num_output=3, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore_geo = L.Deconvolution(n.score_fr_geo, convolution_param=dict(num_output=3, kernel_size=64, stride=32, bias_term=False), param=[dict(lr_mult=0)]) n.score_geo = crop(n.upscore_geo, n.data) n.loss_geo = L.SoftmaxWithLoss(n.score_geo, n.geo, loss_param=dict(normalize=False, ignore_label=255)) return n.to_proto()
def custom_net(hdf5, batch_size): # define your own net! n = caffe.NetSpec() #keep this data layer for all networks #HDF5 DATA LAYER n.data, n.label = L.HDF5Data(batch_size=batch_size, source=hdf5, ntop=2) # n.conv_d0a_b = L.Convolution(n.data,kernel_size=3,num_output=64,pad=0,weight_filler=dict(type='xavier')) # n.relu_d0b = L.ReLU(n.conv_d0a_b) # n.conv_d0b_c = L.Convolution(n.relu_d0b,kernel_size=3,num_output=64,pad=0,weight_filler=dict(type='xavier')) # n.relu_d0c = L.ReLU(n.conv_d0b_c) # n.pool_d0c_1a = L.Pooling(n.relu_d0c, kernel_size=2, stride=2, pool=P.Pooling.MAX) n.conv_d0a_b, n.relu_d0b = conv_relu(n.data, 64) n.conv_d0b_c, n.relu_d0c = conv_relu(n.relu_d0b, 64) n.pool_d0c_1a = max_pool(n.relu_d0c) # n.conv_d1a_b = L.Convolution(n.pool_d0c_1a,kernel_size=3,num_output=128,pad=0,weight_filler=dict(type='xavier')) # n.relu_d1b = L.ReLU(n.conv_d1a_b) # n.conv_d1b_c = L.Convolution(n.relu_d1b,kernel_size=3,num_output=128,pad=0,weight_filler=dict(type='xavier')) # n.relu_d1c = L.ReLU(n.conv_d1b_c) # n.pool_d1c_2a = L.Pooling(n.relu_d1c, kernel_size=2, stride=2, pool=P.Pooling.MAX) n.conv_d1a_b, n.relu_d1b = conv_relu(n.pool_d0c_1a, 128) n.conv_d1b_c, n.relu_d1c = conv_relu(n.relu_d1b, 128) n.pool_d1c_2a = max_pool(n.relu_d1c) # n.conv_d2a_b = L.Convolution(n.pool_d1c_2a,kernel_size=3,num_output=256,pad=0,weight_filler=dict(type='xavier')) # n.relu_d2b = L.ReLU(n.conv_d2a_b) # n.conv_d2b_c = L.Convolution(n.relu_d2b,kernel_size=3,num_output=256,pad=0,weight_filler=dict(type='xavier')) # n.relu_d2c = L.ReLU(n.conv_d2b_c) # n.pool_d2c_3a = L.Pooling(n.relu_d2c, kernel_size=2,stride = 2,pool = P.Pooling.MAX) n.conv_d2a_b, n.relu_d2b = conv_relu(n.pool_d1c_2a, 256) n.conv_d2b_c, n.relu_d2c = conv_relu(n.relu_d2b, 256) n.pool_d2c_3a = max_pool(n.relu_d2c) # n.conv_d3a_b = L.Convolution(n.pool_d2c_3a,kernel_size=3,num_output=512,pad=0,weight_filler=dict(type='xavier')) # n.relu_d3b = L.ReLU(n.conv_d3a_b) # n.conv_d3b_c = L.Convolution(n.relu_d3b,kernel_size=3,num_output=512,pad=0,weight_filler=dict(type='xavier')) # n.relu_d3c = L.ReLU(n.conv_d3b_c) # n.dropout_d3c = L.Dropout(n.relu_d3c,dropout_ratio=0.5) # n.pool_d3c_4a = L.Pooling(n.relu_d3c, kernel_size=2,stride = 2,pool = P.Pooling.MAX) n.conv_d3a_b, n.relu_d3b = conv_relu(n.pool_d2c_3a, 512) n.conv_d3b_c, n.relu_d3c = conv_relu(n.relu_d3b, 512) n.dropout_d3c = L.Dropout(n.relu_d3c, dropout_ratio=0.5) n.pool_d3c_4a = max_pool(n.dropout_d3c) # n.conv_d4a_b = L.Convolution(n.pool_d3c_4a,kernel_size=3,num_output=1024,pad=0,weight_filler=dict(type='xavier')) # n.relu_d4b = L.ReLU(n.conv_d4a_b) # n.conv_d4b_c = L.Convolution(n.relu_d4b,kernel_size=3,num_output=1024,pad=0,weight_filler=dict(type='xavier')) # n.relu_d4c = L.ReLU(n.conv_d4b_c) # n.dropout_d4c = L.Dropout(n.relu_d4c,dropout_ratio=0.5) # #n.upconv_d4c_u3a = L.DeConvolution(n.dropout_d4c,num_output = 512, pad=0, kernel_size=2,stride=2,weight_filler=dict(type='xavier')) # n.upconv_d4c_u3a = L.Deconvolution(n.dropout_d4c) # n.relu_u3a = L.ReLU(n.upconv_d4c_u3a) n.conv_d4a_b, n.relu_d4b = conv_relu(n.pool_d3c_4a, 1024) n.conv_d4b_c, n.relu_d4c = conv_relu(n.relu_d4b, 1024) n.dropout_d4c = L.Dropout(n.relu_d4c, dropout_ratio=0.5) n.upconv_d4c_u3a, n.relu_u3a = deconv_relu(n.dropout_d4c, 512) # n.crop_d3c_d3cc = L.Crop(n.relu_d3c,n.relu_u3a) # n.concat_d3cc_u3a_b = L.Concat(n.relu_u3a,n.crop_d3c_d3cc) # n.conv_u3b_c = L.Convolution(n.concat_d3cc_u3a_b,num_output=512,pad=0,kernel_size=3,weight_filler=dict(type='xavier')) # n.relu_u3c = L.ReLU(n.conv_u3b_c) # n.conv_u3c_d = L.Convolution(n.relu_u3c, num_output=512,pad=0,kernel_size=3,weight_filler=dict(type='xavier')) # n.relu_u3d = L.ReLU(n.conv_u3c_d) # #n.upconv_u3d_u2a = L.Deconvolution(n.relu_u3d, num_output=256,pad =0,kernel_size=2,stride=2,weight_filler=dict(type='xavier')) # n.upconv_u3d_u2a = L.Deconvolution(n.relu_u3d) # n.relu_u2a = L.ReLU(n.upconv_u3d_u2a) n.crop_d3c_d3cc = L.Crop(n.relu_d3c, n.relu_u3a) n.concat_d3cc_u3a_b = L.Concat(n.relu_u3a, n.crop_d3c_d3cc) n.conv_u3b_c, n.relu_u3c = conv_relu(n.concat_d3cc_u3a_b, 512) n.conv_u3c_d, n.relu_u3d = conv_relu(n.relu_u3c, 512) n.upconv_u3d_u2a, n.relu_u2a = deconv_relu(n.relu_u3d, 256) # n.crop_d2c_d2cc = L.Crop(n.relu_d2c,n.relu_u2a) # n.concat_d2cc_u2a_b = L.Concat(n.relu_u2a,n.crop_d2c_d2cc) # n.conv_u2b_c = L.Convolution(n.concat_d2cc_u2a_b,num_output=256,pad=0,kernel_size=3,weight_filler=dict(type='xavier')) # n.relu_u2c = L.ReLU(n.conv_u2b_c) # n.conv_u2c_d = L.Convolution(n.relu_u2c, num_output=256,pad=0,kernel_size=3,weight_filler=dict(type='xavier')) # n.relu_u2d = L.ReLU(n.conv_u2c_d) # #n.upconv_u2d_u1a = L.Deconvolution(n.relu_u2d, num_output=128,pad =0,kernel_size=2,stride=2,weight_filler=dict(type='xavier')) # n.upconv_u2d_u1a = L.Deconvolution(n.relu_u2d) # n.relu_u1a = L.ReLU(n.upconv_u2d_u1a) n.crop_d2c_d2cc = L.Crop(n.relu_d2c, n.relu_u2a) n.concat_d2cc_u2a_b = L.Concat(n.relu_u2a, n.crop_d2c_d2cc) n.conv_u2b_c, n.relu_u2c = conv_relu(n.concat_d2cc_u2a_b, 256) n.conv_u2c_d, n.relu_u2d = conv_relu(n.relu_u2c, 256) n.upconv_u2d_u1a, n.relu_u1a = deconv_relu(n.relu_u2d, 128) # n.crop_d1c_d1cc = L.Crop(n.relu_d1c,n.relu_u1a) # n.concat_d1cc_u1a_b = L.Concat(n.relu_u1a,n.crop_d1c_d1cc) # n.conv_u1b_c = L.Convolution(n.concat_d1cc_u1a_b,num_output=128,pad=0,kernel_size=3,weight_filler=dict(type='xavier')) # n.relu_u1c = L.ReLU(n.conv_u1b_c) # n.conv_u1c_d = L.Convolution(n.relu_u1c, num_output=128,pad=0,kernel_size=3,weight_filler=dict(type='xavier')) # n.relu_u1d = L.ReLU(n.conv_u1c_d) # #n.upconv_u1d_u0a = L.Deconvolution(n.relu_u1d, num_output=64,pad =0,kernel_size=2,stride=2,weight_filler=dict(type='xavier')) # n.upconv_u1d_u0a = L.Deconvolution(n.relu_u1d) # n.relu_u0a = L.ReLU(n.upconv_u1d_u0a) n.crop_d1c_d1cc = L.Crop(n.relu_d1c, n.relu_u1a) n.concat_d1cc_u1a_b = L.Concat(n.relu_u1a, n.crop_d1c_d1cc) n.conv_u1b_c, n.relu_u1c = conv_relu(n.concat_d1cc_u1a_b, 128) n.conv_u1c_d, n.relu_u1d = conv_relu(n.relu_u1c, 128) n.upconv_u1d_u0a, n.relu_u0a = deconv_relu(n.relu_u1d, 128) # n.crop_d0c_d0cc = L.Crop(n.relu_d0c,n.relu_u0a) # n.concat_d0cc_u0a_b = L.Concat(n.relu_u0a,n.crop_d0c_d0cc) # n.conv_u0b_c = L.Convolution(n.concat_d0cc_u0a_b,num_output=64,pad=0,kernel_size=3,weight_filler=dict(type='xavier')) # n.relu_u0c = L.ReLU(n.conv_u0b_c) # n.conv_u0c_d = L.Convolution(n.relu_u0c, num_output=64,pad=0,kernel_size=3,weight_filler=dict(type='xavier')) # n.relu_u0d = L.ReLU(n.conv_u0c_d) n.crop_d0c_d0cc = L.Crop(n.relu_d0c, n.relu_u0a) n.concat_d0cc_u0a_b = L.Concat(n.relu_u0a, n.crop_d0c_d0cc) n.conv_u0b_c, n.relu_u0c = conv_relu(n.concat_d0cc_u0a_b, 64) n.conv_u0c_d, n.relu_u0d = conv_relu(n.relu_u0c, 64) n.conv_u0d_score = L.Convolution( n.relu_u0d, num_output=2, pad=0, kernel_size=1, weight_filler=dict(type='xavier'), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) # keep this loss layer for all networks n.loss = L.SoftmaxWithLoss(n.conv_u0d_score, n.label, loss_param=dict(ignore_label=2)) return n.to_proto()