def net(): n = caffe.NetSpec() n.data = L.Input(input_param=dict(shape=dict(dim=data_shape))) n.dataout = L.Log(n.data, base=_base, scale=_scale, shift=_shift) return n.to_proto()
def fcn(split): n = caffe.NetSpec() n.data, n.sem, n.geo = L.Python( module='siftflow_layers', layer='SIFTFlowSegDataLayer', ntop=3, param_str=str( dict(siftflow_dir='../data/sift-flow/data/', split=split, seed=1337))) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr_sem = L.Convolution( n.drop7, num_output=33, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore2_sem = L.Deconvolution(n.score_fr_sem, convolution_param=dict(num_output=33, kernel_size=4, stride=2, bias_term=False), param=[dict(lr_mult=0)]) n.score_pool4_sem = L.Convolution( n.pool4, num_output=33, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.score_pool4_semc = crop(n.score_pool4_sem, n.upscore2_sem) n.fuse_pool4_sem = L.Eltwise(n.upscore2_sem, n.score_pool4_semc, operation=P.Eltwise.SUM) n.upscore_pool4_sem = L.Deconvolution(n.fuse_pool4_sem, convolution_param=dict( num_output=33, kernel_size=4, stride=2, bias_term=False), param=[dict(lr_mult=0)]) n.score_pool3_sem = L.Convolution( n.pool3, num_output=33, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.score_pool3_semc = crop(n.score_pool3_sem, n.upscore_pool4_sem) n.fuse_pool3_sem = L.Eltwise(n.upscore_pool4_sem, n.score_pool3_semc, operation=P.Eltwise.SUM) n.upscore8_sem = L.Deconvolution(n.fuse_pool3_sem, convolution_param=dict(num_output=33, kernel_size=16, stride=8, bias_term=False), param=[dict(lr_mult=0)]) n.score_sem = crop(n.upscore8_sem, n.data) # loss to make score happy (o.w. loss_sem) n.loss = L.SoftmaxWithLoss(n.score_sem, n.sem, loss_param=dict(normalize=False, ignore_label=255)) n.score_fr_geo = L.Convolution( n.drop7, num_output=3, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore2_geo = L.Deconvolution(n.score_fr_geo, convolution_param=dict(num_output=3, kernel_size=4, stride=2, bias_term=False), param=[dict(lr_mult=0)]) n.score_pool4_geo = L.Convolution( n.pool4, num_output=3, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.score_pool4_geoc = crop(n.score_pool4_geo, n.upscore2_geo) n.fuse_pool4_geo = L.Eltwise(n.upscore2_geo, n.score_pool4_geoc, operation=P.Eltwise.SUM) n.upscore_pool4_geo = L.Deconvolution(n.fuse_pool4_geo, convolution_param=dict( num_output=3, kernel_size=4, stride=2, bias_term=False), param=[dict(lr_mult=0)]) n.score_pool3_geo = L.Convolution( n.pool3, num_output=3, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.score_pool3_geoc = crop(n.score_pool3_geo, n.upscore_pool4_geo) n.fuse_pool3_geo = L.Eltwise(n.upscore_pool4_geo, n.score_pool3_geoc, operation=P.Eltwise.SUM) n.upscore8_geo = L.Deconvolution(n.fuse_pool3_geo, convolution_param=dict(num_output=3, kernel_size=16, stride=8, bias_term=False), param=[dict(lr_mult=0)]) n.score_geo = crop(n.upscore8_geo, n.data) n.loss_geo = L.SoftmaxWithLoss(n.score_geo, n.geo, loss_param=dict(normalize=False, ignore_label=255)) return n.to_proto()
'evaluate_difficult_gt': False, 'name_size_file': name_size_file, } ### Hopefully you don't need to change the following ### # Check file. check_if_exist(train_data) check_if_exist(test_data) check_if_exist(label_map_file) check_if_exist(pretrain_model) make_if_not_exist(save_dir) make_if_not_exist(job_dir) make_if_not_exist(snapshot_dir) # Create train net. net = caffe.NetSpec() net.data, net.label = CreateAnnotatedDataLayer( train_data, batch_size=batch_size_per_device, train=True, output_label=True, label_map_file=label_map_file, transform_param=train_transform_param, batch_sampler=batch_sampler) VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=True, dropout=False)
def net(split): n = caffe.NetSpec() loss_param = dict(normalize=False) if split == 'train': data_params = dict(mean=(104.00699, 116.66877, 122.67892)) # 图像与标签 data_params['root'] = './datasets/ICDAR2013_TCB' data_params['source'] = "ICDAR2013_TCB.lst" data_params['shuffle'] = True data_params['ignore_label'] = -1 n.data, n.label = L.Python(module='pylayer_old', layer='ImageLabelmapDataLayer', ntop=2, \ param_str=str(data_params)) if data_params.has_key('ignore_label'): loss_param['ignore_label'] = int(data_params['ignore_label']) elif split == 'test': n.data = L.Input(name='data', input_param=dict(shape=dict(dim=[1, 3, 500, 500]))) else: raise Exception("Invalid phase") #第一个卷积阶段 n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=1) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) #第二个卷积阶段 n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) #第三个卷积阶段 n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) # 第三个卷积阶段最后一个卷积层,接一个MCFE模块, Channel: 64, kernel: 3*3 n.conv3_dilation1 = conv_dilation01(n.conv3_3, mult=[100, 1, 200, 0]) n.conv3_dilation2 = conv_dilation03(n.conv3_3, mult=[100, 1, 200, 0]) n.conv3_dilation3 = conv_dilation05(n.conv3_3, mult=[100, 1, 200, 0]) n.conv3_dilation4 = conv_dilation07(n.conv3_3, mult=[100, 1, 200, 0]) # 在Channel维度上进行拼接 n.concat_conv33 = L.Concat(n.conv3_dilation1, n.conv3_dilation2, n.conv3_dilation3, n.conv3_dilation4, concat_param=dict({'concat_dim': 1})) # MCFE模块后接BLSTM module # # ===================== prepare lstm inputs ===================== n.im2col_conv33 = L.Im2col(n.concat_conv33, convolution_param=dict(kernel_size=3, pad=1)) n.im2col_transpose_conv33 = L.Transpose( n.im2col_conv33, transpose_param=dict(dim=[3, 2, 0, 1])) n.lstm_input_conv33 = L.Reshape(n.im2col_transpose_conv33, reshape_param=dict(shape=dict(dim=-1), axis=1, num_axes=2)) # 前向LSTM n.lstm_conv33 = L.Lstm(n.lstm_input_conv33, lstm_param=dict(num_output=128, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), clipping_threshold=1)) #后向LSTM n.rlstm_input_conv33 = L.Reverse(n.lstm_input_conv33, name='lstm_reverse1_conv33', reverse_param=dict(axis=0)) n.rlstm_output_conv33 = L.Lstm(n.rlstm_input_conv33, name='rlstm_conv33', lstm_param=dict(num_output=128)) n.rlstm_conv33 = L.Reverse(n.rlstm_output_conv33, name='lstm_reverse2_conv33', reverse_param=dict(axis=0)) # lstm_conv33 和 rlstm_conv33经过Concat拼接,n*c*(h1+h2+...+hk)*w n.merge_lstm_rlstm_conv33 = L.Concat(n.lstm_conv33, n.rlstm_conv33, concat_param=dict(axis=2)) n.lstm_output_reshape_conv33 = L.Reshape(n.merge_lstm_rlstm_conv33, reshape_param=dict( shape=dict(dim=[-1, 1]), axis=1, num_axes=1)) # transpose size of output as (N, C, H, W) n.lstm_output_conv33 = L.Transpose(n.lstm_output_reshape_conv33, transpose_param=dict(dim=[2, 3, 1, 0])) n.pool3 = max_pool(n.relu3_3) # 第四个卷积阶段 n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) # 第三个卷积阶段最后一个卷积层,接一个MCFE模块, Channel: 128, kernel: 3*3 n.conv4_dilation1 = conv_dilation1(n.conv4_3, mult=[100, 1, 200, 0]) n.conv4_dilation2 = conv_dilation3(n.conv4_3, mult=[100, 1, 200, 0]) n.conv4_dilation3 = conv_dilation5(n.conv4_3, mult=[100, 1, 200, 0]) n.conv4_dilation4 = conv_dilation7(n.conv4_3, mult=[100, 1, 200, 0]) # 在Channel维度上进行拼接, n*(c1+c2+...+ck)*h*w n.concat_conv43 = L.Concat(n.conv4_dilation1, n.conv4_dilation2, n.conv4_dilation3, n.conv4_dilation4, concat_param=dict({'concat_dim': 1})) # BLSTM module # # ===================== prepare lstm inputs ===================== n.im2col_conv43 = L.Im2col(n.concat_conv43, convolution_param=dict(kernel_size=3, pad=1)) n.im2col_transpose_conv43 = L.Transpose( n.im2col_conv43, transpose_param=dict(dim=[3, 2, 0, 1])) n.lstm_input_conv43 = L.Reshape(n.im2col_transpose_conv43, reshape_param=dict(shape=dict(dim=-1), axis=1, num_axes=2)) # 前向LSTM n.lstm_conv43 = L.Lstm(n.lstm_input_conv43, lstm_param=dict(num_output=256, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), clipping_threshold=1)) # 后向LSTM n.rlstm_input_conv43 = L.Reverse(n.lstm_input_conv43, name='lstm_reverse1_conv43', reverse_param=dict(axis=0)) n.rlstm_output_conv43 = L.Lstm(n.rlstm_input_conv43, name='rlstm_conv43', lstm_param=dict(num_output=256)) n.rlstm_conv43 = L.Reverse(n.rlstm_output_conv43, name='lstm_reverse2_conv43', reverse_param=dict(axis=0)) #lstm_conv43 和 rlstm_conv43经Concat拼接,n*c*(h1+h2+...+hk)*w n.merge_lstm_rlstm_conv43 = L.Concat(n.lstm_conv43, n.rlstm_conv43, concat_param=dict(axis=2)) n.lstm_output_reshape_conv43 = L.Reshape(n.merge_lstm_rlstm_conv43, reshape_param=dict( shape=dict(dim=[-1, 1]), axis=1, num_axes=1)) # transpose size of output as (N, C, H, W) n.lstm_output_conv43 = L.Transpose(n.lstm_output_reshape_conv43, transpose_param=dict(dim=[2, 3, 1, 0])) n.pool4 = max_pool(n.relu4_3) # The fiveth conv stage n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) # MCFE inception module, Channel: 128, kernel: 3*3 n.conv5_dilation1 = conv_dilation1(n.conv5_3, mult=[100, 1, 200, 0]) n.conv5_dilation2 = conv_dilation3(n.conv5_3, mult=[100, 1, 200, 0]) n.conv5_dilation3 = conv_dilation5(n.conv5_3, mult=[100, 1, 200, 0]) n.conv5_dilation4 = conv_dilation7(n.conv5_3, mult=[100, 1, 200, 0]) n.concat_conv53 = L.Concat(n.conv5_dilation1, n.conv5_dilation2, n.conv5_dilation3, n.conv5_dilation4, concat_param=dict({'concat_dim': 1})) # BLSTM module # ===================== prepare lstm inputs ===================== n.im2col_conv53 = L.Im2col(n.concat_conv53, convolution_param=dict(kernel_size=3, pad=1)) n.im2col_transpose_conv53 = L.Transpose( n.im2col_conv53, transpose_param=dict(dim=[3, 2, 0, 1])) n.lstm_input_conv53 = L.Reshape(n.im2col_transpose_conv53, reshape_param=dict(shape=dict(dim=-1), axis=1, num_axes=2)) # 前向LSTM n.lstm_conv53 = L.Lstm(n.lstm_input_conv53, lstm_param=dict(num_output=256, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), clipping_threshold=1)) #后向LSTM n.rlstm_input_conv53 = L.Reverse(n.lstm_input_conv53, name='lstm_reverse1_conv53', reverse_param=dict(axis=0)) n.rlstm_output_conv53 = L.Lstm(n.rlstm_input_conv53, name='rlstm_conv53', lstm_param=dict(num_output=256)) n.rlstm_conv53 = L.Reverse(n.rlstm_output_conv53, name='lstm_reverse2_conv53', reverse_param=dict(axis=0)) # lstm_conv53和rlstm_conv53经过Concat拼接,n*c*(h1+h2+...+hk)*w n.merge_lstm_rlstm_conv53 = L.Concat(n.lstm_conv53, n.rlstm_conv53, concat_param=dict(axis=2)) n.lstm_output_reshape_conv53 = L.Reshape(n.merge_lstm_rlstm_conv53, reshape_param=dict( shape=dict(dim=[-1, 1]), axis=1, num_axes=1)) # transpose size of output as (N, C, H, W) n.lstm_output_conv53 = L.Transpose(n.lstm_output_reshape_conv53, transpose_param=dict(dim=[2, 3, 1, 0])) # 第三个阶段,BLSTM的输出,经过1x1的卷积降维,4x上采样,裁剪成与原图像大小相同 n.score_dsn3 = conv1x1(n.lstm_output_conv33, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) n.score_dsn3_up = upsample(n.score_dsn3, stride=4) n.upscore_dsn3 = L.Crop(n.score_dsn3_up, n.data) # BalanceCrossEntropyLoss if split == 'train': n.loss3 = L.BalanceCrossEntropyLoss(n.upscore_dsn3, n.label, loss_param=loss_param) if split == 'test': n.sigmoid_dsn3 = L.Sigmoid(n.upscore_dsn3) #第四个阶段,BLSTM的输出,经过1x1的卷积降维,8x上采样,裁剪成与原图像大小相同 n.score_dsn4 = conv1x1(n.lstm_output_conv43, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) n.score_dsn4_up = upsample(n.score_dsn4, stride=8) n.upscore_dsn4 = L.Crop(n.score_dsn4_up, n.data) # BalanceCrossEntropyLoss if split == 'train': n.loss4 = L.BalanceCrossEntropyLoss(n.upscore_dsn4, n.label, loss_param=loss_param) if split == 'test': n.sigmoid_dsn4 = L.Sigmoid(n.upscore_dsn4) # 第五个阶段,BLSTM的输出,经过1x1的卷积降维,16x上采样,裁剪成与原图像大小相同 n.score_dsn5 = conv1x1(n.lstm_output_conv53, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) n.score_dsn5_up = upsample(n.score_dsn5, stride=16) n.upscore_dsn5 = L.Crop(n.score_dsn5_up, n.data) # BalanceCrossEntropyLoss if split == 'train': n.loss5 = L.BalanceCrossEntropyLoss(n.upscore_dsn5, n.label, loss_param=loss_param) if split == 'test': n.sigmoid_dsn5 = L.Sigmoid(n.upscore_dsn5) # 将三个阶段的输出,在Channel维度上进行拼接,作为Attention模块的输入 n.concat_upscore = L.Concat(n.upscore_dsn3, n.upscore_dsn4, n.upscore_dsn5, name='concat', concat_param=dict({'concat_dim': 1})) # upscore_dsn3,upscore_dsn4,upscore_dsn5经3X3的卷积, 降维 n.output_mask_product03 = L.Convolution( n.upscore_dsn3, num_output=1, kernel_size=3, pad=1, param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), engine=1) n.output_mask_product04 = L.Convolution( n.upscore_dsn4, num_output=1, kernel_size=3, pad=1, param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), engine=1) n.output_mask_product05 = L.Convolution( n.upscore_dsn5, num_output=1, kernel_size=3, pad=1, param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), engine=1) ### Attention 模块 # 第一个卷积层num_output=512, kernel_size:3x3 n.att_conv1_mask_512 = L.Convolution( n.concat_upscore, num_output=512, kernel_size=3, pad=1, param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], engine=1) n.relu_att_conv1 = L.ReLU(n.att_conv1_mask_512, in_place=True) n.drop_att_conv1_mask = L.Dropout(n.relu_att_conv1, dropout_ratio=0.5, in_place=True) # 第二个卷积层num_output=3, kernel_size:1x1 n.att_fc_mask_512 = L.Convolution( n.drop_att_conv1_mask, num_output=3, kernel_size=1, param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], engine=1) n.attention = L.Softmax(n.att_fc_mask_512) # 生成三个注意力权重 n.attention3, n.attention4, n.attention5 = L.Slice(n.attention, name='slice_attention', slice_param=dict( axis=1, slice_point=[1, 2]), ntop=3) # 注意力权重与feature map相乘,进行融合 n.output_mask3 = L.Eltwise(n.attention3, n.output_mask_product03, operation=P.Eltwise.PROD) n.output_mask4 = L.Eltwise(n.attention4, n.output_mask_product04, operation=P.Eltwise.PROD) n.output_mask5 = L.Eltwise(n.attention5, n.output_mask_product05, operation=P.Eltwise.PROD) n.output_fusion = L.Eltwise(n.output_mask3, n.output_mask4, n.output_mask5, operation=P.Eltwise.SUM) #作为对比,不经过Attention模块, 将三个阶段的输出,在Channel维度上进行拼接,经1X1的卷积,输出 n.upscore_fuse = L.Convolution(n.concat_upscore, name='new-score-weighting', num_output=1, kernel_size=1, param=[ dict(lr_mult=0.001, decay_mult=1), dict(lr_mult=0.002, decay_mult=0) ], weight_filler=dict(type='constant', value=0.2), engine=1) if split == 'train': n.loss_fuse = L.BalanceCrossEntropyLoss(n.upscore_fuse, n.label, loss_param=loss_param) n.loss_output_fusion = L.BalanceCrossEntropyLoss(n.output_fusion, n.label, loss_param=loss_param) if split == 'test': n.sigmoid_fuse = L.Sigmoid(n.upscore_fuse) n.sigmoid_output_fusion = L.Sigmoid(n.output_fusion) return n.to_proto()
def write_prototxt(is_train, output_folder, \ filename, main_branch, \ num_output_stage1, \ blocks, sync_bn, uni_bn): netspec = caffe.NetSpec() #### Input Setting #### crop_size = 112 width = 170 height = 128 length = 16 step = 1 num_segments = 1 if is_train: use_global_stats = False else: use_global_stats = True #### Data layer #### if is_train: data_train_params = dict(name='data', \ ntop=2, \ video4d_data_param=dict( \ source="../kinetics_train_list.txt", \ batch_size=24, \ new_width=width, \ new_height=height, \ new_length=length, \ num_segments=num_segments, \ modality=0, \ step=step, \ rand_step=True, \ name_pattern='image_%06d.jpg', \ shuffle=True), \ transform_param=dict( crop_size=crop_size, \ mirror=True, \ multi_scale=True, \ max_distort=1, \ scale_ratios=[1, 0.875, 0.75, 0.66], \ mean_value=[104]*length+[117]*length+[123]*length), \ include=dict(phase=0)) data_val_params = dict(name='vdata', \ ntop=2, \ video4d_data_param=dict( source="../kinetics_val_list.txt", \ batch_size=1, \ new_width=width, \ new_height=height, \ new_length=length, \ num_segments=num_segments, \ modality=0, \ step=step, \ name_pattern='image_%06d.jpg'), \ transform_param=dict( crop_size=crop_size, \ mirror=False, \ mean_value=[104]*length+[117]*length+[123]*length), \ include=dict(phase=1)) # pdb.set_trace() netspec.data, netspec.label = BaseModule('Video4dData', data_train_params).attach( netspec, []) netspec.vdata, netspec.vlabel = BaseModule('Video4dData', data_val_params).attach( netspec, []) else: data_params = dict(name='data', \ dummy_data_param=dict( \ shape=dict(\ dim=[10, 3, length, crop_size, crop_size]))) netspec.data = BaseModule('DummyData', data_params).attach(netspec, []) #### (Optional) Reshape Layer #### if is_train: reshape_params = dict(name='data_reshape', \ reshape_param=dict( \ shape=dict(dim=[-1, 3, length, crop_size, crop_size]))) netspec.data_reshape = BaseModule('Reshape', reshape_params).attach( netspec, [netspec.data]) #### Stage 1 #### channels = 3 * 7 * 7 * 3 * 64 / (7 * 7 * 3 + 3 * 64) name = '1_s' conv1xdxd_params = dict(name='conv'+name, \ num_output=channels, \ kernel_size=[1, 7, 7], \ pad=[0, 3, 3], \ stride=[1, 2, 2], \ engine=2) conv1xdxd = BaseModule('Convolution', conv1xdxd_params).attach( netspec, [netspec.data_reshape if is_train else netspec.data]) name = '1_t' stage1 = SgpAttenModule(name_template=name, \ bn_params=dict(frozen=False), \ stride=2, \ num_output=64, \ t_conv=False, \ sync_bn=sync_bn, \ uni_bn=uni_bn).attach(netspec, [conv1xdxd]) num_output = num_output_stage1 #### Stages 2 - 5 #### last = stage1 for stage in range(4): for block in range(blocks[stage]): # First block usually projection if block == 0: shortcut = 'projection' stride = 2 if stage == 0: shortcut = 'identity' stride = 1 else: shortcut = 'identity' stride = 1 name = str(stage + 2) + num2letter[int(block)] curr_num_output = num_output * (2**(stage)) if uni_bn: params = dict(name=name, num_output=curr_num_output, shortcut=shortcut, main_branch=main_branch, stride=stride, frozen=False) else: params = dict(name=name, num_output=curr_num_output, shortcut=shortcut, main_branch=main_branch, stride=stride, use_global_stats=use_global_stats) last = PreActWiderDecoupSgpAttenMixBlock(name_template=name, \ shortcut=shortcut, \ num_output=curr_num_output, \ stride=stride, \ t_conv=False, \ sync_bn=sync_bn, \ uni_bn=uni_bn).attach(netspec, [last]) # else: # last = PreActWiderDecoupSgpAttenStrongestBlock(name_template=name, \ # shortcut=shortcut, \ # num_output=curr_num_output, \ # stride=stride, \ # t_conv=True, \ # sync_bn=sync_bn, \ # uni_bn=uni_bn).attach(netspec, [last]) #### Last Norm & ReLU #### if uni_bn: bn_params = dict(frozen=False) else: bn_params = dict(use_global_stats=use_global_stats) last = BNReLUModule(name_template='5b', \ bn_params=bn_params, \ sync_bn=sync_bn, \ uni_bn=uni_bn).attach(netspec, [last]) #### pool5 #### pool_params = dict(global_pooling=True, pool=P.Pooling.AVE, name='pool5') pool = BaseModule('Pooling', pool_params).attach(netspec, [last]) #### pool5_reshape #### reshape_params = dict(shape=dict(dim=[-1, num_output_stage1 * 8]), name='pool5_reshape') reshape = BaseModule('Reshape', reshape_params).attach(netspec, [pool]) #### dropout #### dropout_params = dict(dropout_ratio=0.2, name='dropout') dropout = BaseModule('Dropout', dropout_params).attach(netspec, [reshape]) #### ip #### ip_params = dict(name='fc400', num_output=400) ip = BaseModule('InnerProduct', ip_params).attach(netspec, [dropout]) if is_train: #### Softmax Loss #### smax_params = dict(name='loss') smax_loss = BaseModule('SoftmaxWithLoss', smax_params).attach(netspec, [ip, netspec.label]) #### Top1 Accuracy #### top1_params = dict(name='top1', accuracy_param=dict(top_k=1), include=dict(phase=1)) top1 = BaseModule('Accuracy', top1_params).attach(netspec, [ip, netspec.label]) #### Top5 Accuracy #### top5_params = dict(name='top5', accuracy_param=dict(top_k=5), include=dict(phase=1)) top5 = BaseModule('Accuracy', top5_params).attach(netspec, [ip, netspec.label]) filepath = os.path.join(output_folder, filename) fp = open(filepath, 'w') print >> fp, netspec.to_proto() fp.close()
def build_VGG16Net(split, num_classes, batch_size, resize_w, resize_h, crop_w=0, crop_h=0, crop_margin=0, mirror=0, rotate=0, HSV_prob=0, HSV_jitter=0, train=True, deploy=False): weight_param = dict(lr_mult=1, decay_mult=1) bias_param = dict(lr_mult=2, decay_mult=0) learned_param = [weight_param, bias_param] frozen_param = [dict(lr_mult=0)] * 2 n = caffe.NetSpec() pydata_params = dict(split=split, mean=(103.939, 116.779, 123.68)) #For VGG16 different mean than AlexNet pydata_params['dir'] = '../../../datasets/SocialMedia' pydata_params['train'] = train pydata_params['batch_size'] = batch_size pydata_params['resize_w'] = resize_w pydata_params['resize_h'] = resize_h pydata_params['crop_w'] = crop_w pydata_params['crop_h'] = crop_h pydata_params['crop_margin'] = crop_margin pydata_params['mirror'] = mirror pydata_params['rotate'] = rotate pydata_params['HSV_prob'] = HSV_prob pydata_params['HSV_jitter'] = HSV_jitter pydata_params['num_classes'] = num_classes pylayer = 'customDataLayer' n.data, n.label = L.Python(module='layers', layer=pylayer, ntop=2, param_str=str(pydata_params)) # conv n.conv1_1, n.relu1_1 = conv_relu(n.data, 3, 64, pad=1, param=froozen_param) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 3, 64, pad=1, param=froozen_param) n.pool1 = max_pool(n.relu1_2, 2, stride=2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 3, 128, pad=1, param=froozen_param) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 3, 128, pad=1, param=froozen_param) n.pool2 = max_pool(n.relu2_2, 2, stride=2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 3, 256, pad=1, param=froozen_param) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 3, 256, pad=1, param=froozen_param) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 3, 256, pad=1, param=froozen_param) n.pool3 = max_pool(n.relu3_3, 2, stride=2) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 3, 512, pad=1, param=froozen_param) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 3, 512, pad=1, param=froozen_param) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 3, 512, pad=1, param=froozen_param) n.pool4 = max_pool(n.relu4_3, 2, stride=2) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 3, 512, pad=1, param=learned_param) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 3, 512, pad=1, param=learned_param) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 3, 512, pad=1, param=learned_param) n.pool5 = max_pool(n.relu5_3, 2, stride=2) # fully conn n.fc6, n.relu6 = fc_relu(n.pool5, 4096, param=boosted_param) if train: n.drop6 = fc7input = L.Dropout(n.relu6, in_place=True, dropout_ratio=0.5) #0.5 else: fc7input = n.relu6 n.fc7, n.relu7 = fc_relu(fc7input, 4096, param=boosted_param) if train: n.drop7 = fc8input = L.Dropout(n.relu7, in_place=True, dropout_ratio=0.5) #0.5 else: fc8input = n.relu7 n.fc8C = L.InnerProduct(fc8input, num_output=num_classes, param=boosted_param) if not deploy: n.loss = L.SigmoidCrossEntropyLoss(n.fc8C, n.label) if deploy: n.probs = L.Sigmoid(n.fc8C) with open('deploy.prototxt', 'w') as f: f.write(str(n.to_proto())) return f.name else: if train: with open('train.prototxt', 'w') as f: f.write(str(n.to_proto())) return f.name else: with open('val.prototxt', 'w') as f: f.write(str(n.to_proto())) return f.name if train: with open('train.prototxt', 'w') as f: f.write(str(n.to_proto())) return f.name else: with open('val.prototxt', 'w') as f: f.write(str(n.to_proto())) return f.name
def dump_model(operation='create', redo=False): # Creates graph from saved GraphDef. create_graph() sess = tf.InteractiveSession() # Creates caffe model. deploy_net_file = 'models/inception_v3/inception_v3_deploy.prototxt' model_file = 'models/inception_v3/inception_v3.caffemodel' net = [] if operation == 'create' and (not os.path.exists(deploy_net_file) or redo): net = caffe.NetSpec() elif operation == 'save' and (not os.path.exists(model_file) or redo): caffe.set_device(1) caffe.set_mode_gpu() net = caffe.Net(deploy_net_file, caffe.TEST) else: return # dump the preprocessing parameters dump_inputlayer(sess, net, operation) # dump the filters dump_convbn(sess, net, 'data', 'conv', operation) dump_convbn(sess, net, 'conv', 'conv_1', operation) dump_convbn(sess, net, 'conv_1', 'conv_2', operation) dump_pool(sess, net, 'conv_2', 'pool', operation) dump_convbn(sess, net, 'pool', 'conv_3', operation) dump_convbn(sess, net, 'conv_3', 'conv_4', operation) dump_pool(sess, net, 'conv_4', 'pool_1', operation) # inceptions with 1x1, 3x3, 5x5 convolutions from_layer = 'pool_1' for inception_id in range(0, 3): if inception_id == 0: out_layer = 'mixed' else: out_layer = 'mixed_{}'.format(inception_id) dump_tower(sess, net, from_layer, out_layer, ['conv'], operation) dump_tower(sess, net, from_layer, '{}/tower'.format(out_layer), ['conv', 'conv_1'], operation) dump_tower(sess, net, from_layer, '{}/tower_1'.format(out_layer), ['conv', 'conv_1', 'conv_2'], operation) dump_tower(sess, net, from_layer, '{}/tower_2'.format(out_layer), ['pool', 'conv'], operation) dump_inception( sess, net, out_layer, ['conv', 'tower/conv_1', 'tower_1/conv_2', 'tower_2/conv'], operation) from_layer = '{}/join'.format(out_layer) # inceptions with 1x1, 3x3(in sequence) convolutions out_layer = 'mixed_3' dump_tower(sess, net, from_layer, out_layer, ['conv'], operation) dump_tower(sess, net, from_layer, '{}/tower'.format(out_layer), ['conv', 'conv_1', 'conv_2'], operation) dump_tower(sess, net, from_layer, out_layer, ['pool'], operation) dump_inception(sess, net, out_layer, ['conv', 'tower/conv_2', 'pool'], operation) from_layer = '{}/join'.format(out_layer) # inceptions with 1x1, 7x1, 1x7 convolutions for inception_id in range(4, 8): out_layer = 'mixed_{}'.format(inception_id) dump_tower(sess, net, from_layer, out_layer, ['conv'], operation) dump_tower(sess, net, from_layer, '{}/tower'.format(out_layer), ['conv', 'conv_1', 'conv_2'], operation) dump_tower(sess, net, from_layer, '{}/tower_1'.format(out_layer), ['conv', 'conv_1', 'conv_2', 'conv_3', 'conv_4'], operation) dump_tower(sess, net, from_layer, '{}/tower_2'.format(out_layer), ['pool', 'conv'], operation) dump_inception( sess, net, out_layer, ['conv', 'tower/conv_2', 'tower_1/conv_4', 'tower_2/conv'], operation) from_layer = '{}/join'.format(out_layer) # inceptions with 1x1, 3x3, 1x7, 7x1 filters out_layer = 'mixed_8' dump_tower(sess, net, from_layer, '{}/tower'.format(out_layer), ['conv', 'conv_1'], operation) dump_tower(sess, net, from_layer, '{}/tower_1'.format(out_layer), ['conv', 'conv_1', 'conv_2', 'conv_3'], operation) dump_tower(sess, net, from_layer, out_layer, ['pool'], operation) dump_inception(sess, net, out_layer, ['tower/conv_1', 'tower_1/conv_3', 'pool'], operation) from_layer = '{}/join'.format(out_layer) for inception_id in range(9, 11): out_layer = 'mixed_{}'.format(inception_id) dump_tower(sess, net, from_layer, out_layer, ['conv'], operation) dump_tower(sess, net, from_layer, '{}/tower'.format(out_layer), ['conv'], operation) dump_tower(sess, net, '{}/tower/conv'.format(out_layer), '{}/tower/mixed'.format(out_layer), ['conv'], operation) dump_tower(sess, net, '{}/tower/conv'.format(out_layer), '{}/tower/mixed'.format(out_layer), ['conv_1'], operation) dump_inception(sess, net, '{}/tower/mixed'.format(out_layer), ['conv', 'conv_1'], operation, False) dump_tower(sess, net, from_layer, '{}/tower_1'.format(out_layer), ['conv', 'conv_1'], operation) dump_tower(sess, net, '{}/tower_1/conv_1'.format(out_layer), '{}/tower_1/mixed'.format(out_layer), ['conv'], operation) dump_tower(sess, net, '{}/tower_1/conv_1'.format(out_layer), '{}/tower_1/mixed'.format(out_layer), ['conv_1'], operation) dump_inception(sess, net, '{}/tower_1/mixed'.format(out_layer), ['conv', 'conv_1'], operation, False) dump_tower(sess, net, from_layer, '{}/tower_2'.format(out_layer), ['pool', 'conv'], operation) dump_inception( sess, net, out_layer, ['conv', 'tower/mixed', 'tower_1/mixed', 'tower_2/conv'], operation) from_layer = '{}/join'.format(out_layer) dump_pool(sess, net, from_layer, 'pool_3', operation) dump_softmax(sess, net, 'pool_3', 'softmax', operation) if operation == 'create' and (not os.path.exists(deploy_net_file) or redo): model_dir = os.path.dirname(deploy_net_file) if not os.path.exists(model_dir): os.makedirs(model_dir) with open(deploy_net_file, 'w') as f: print('name: "inception_v3_deploy"', file=f) print(net.to_proto(), file=f) elif operation == 'save' and (not os.path.exists(model_file) or redo): net.save(model_file) sess.close()
def fcn(split, tops): n = caffe.NetSpec() n.data, n.label = L.Python(module='nyud_layers', layer='NYUDSegDataLayer', ntop=2, param_str=str( dict(nyud_dir='../data', split=split, tops=tops, seed=1337))) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6_new, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7_new, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr = L.Convolution( n.drop7, num_output=40, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore = L.Deconvolution(n.score_fr, convolution_param=dict(num_output=40, kernel_size=64, stride=32, bias_term=False), param=[dict(lr_mult=0)]) n.score = crop(n.upscore, n.data) n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=False, ignore_label=255)) return n.to_proto()
def alexnet(train_lmdb, test_lmdb, mean_file, train_batch_size, test_batch_size, output_path): # train ntrain = caffe.NetSpec() # val nval = caffe.NetSpec() # deploy ndeploy = caffe.NetSpec() #-------------------------------------------------- # train + val ntrain.data, ntrain.label = L.Data(name='data', batch_size=train_batch_size, backend=P.Data.LMDB, source=train_lmdb, transform_param=dict(mirror=True, crop_size=227, mean_file=mean_file), include=dict(phase=caffe.TRAIN), ntop=2) nval.data, nval.label = L.Data(name='data', batch_size=test_batch_size, backend=P.Data.LMDB, source=test_lmdb, transform_param=dict(mirror=False, crop_size=227, mean_file=mean_file), include=dict(phase=caffe.TEST), ntop=2) ntrain.conv1 = L.Convolution(ntrain.data, name='conv1', kernel_size=11, num_output=96, stride=4, weight_filler=dict( type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) ntrain.relu1 = L.ReLU(ntrain.conv1, name='relu1', in_place=True) ntrain.norm1 = L.LRN(ntrain.relu1, name='norm1', local_size=5, alpha=1e-4, beta=0.75) ntrain.pool1 = L.Pooling(ntrain.norm1, name='pool1', kernel_size=3, stride=2, pool=P.Pooling.MAX) ntrain.conv2 = L.Convolution(ntrain.pool1, name='conv2', kernel_size=5, num_output=256, pad=2, group=2, weight_filler=dict( type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0.1), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) ntrain.relu2 = L.ReLU(ntrain.conv2, name='relu2', in_place=True) ntrain.norm2 = L.LRN(ntrain.relu2, name='norm2', local_size=5, alpha=1e-4, beta=0.75) ntrain.pool2 = L.Pooling(ntrain.norm2, name='pool2', kernel_size=3, stride=2, pool=P.Pooling.MAX) ntrain.conv3 = L.Convolution(ntrain.pool2, name='conv3', kernel_size=3, num_output=384, pad=1, weight_filler=dict( type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) ntrain.relu3 = L.ReLU(ntrain.conv3, name='relu3', in_place=True) ntrain.conv4 = L.Convolution(ntrain.relu3, name='conv4', kernel_size=3, num_output=384, pad=1, group=2, weight_filler=dict( type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0.1), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) ntrain.relu4 = L.ReLU(ntrain.conv4, name='relu4', in_place=True) ntrain.conv5 = L.Convolution(ntrain.relu4, name='conv5', kernel_size=3, num_output=256, pad=1, group=2, weight_filler=dict( type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0.1), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) ntrain.relu5 = L.ReLU(ntrain.conv5, name='relu5', in_place=True) ntrain.pool5 = L.Pooling(ntrain.relu5, name='pool5', kernel_size=3, stride=2, pool=P.Pooling.MAX) ntrain.fc6 = L.InnerProduct(ntrain.pool5, name='fc6', num_output=4096, weight_filler=dict(type='gaussian', std=0.005), bias_filler=dict(type='constant', value=1e-1), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) ntrain.relu6 = L.ReLU(ntrain.fc6, name='relu6', in_place=True) ntrain.drop6 = L.Dropout(ntrain.relu6, name='drop6', dropout_ratio=0.5, in_place=True) ntrain.fc7 = L.InnerProduct(ntrain.drop6, name='fc7', num_output=4096, weight_filler=dict(type='gaussian', std=0.005), bias_filler=dict(type='constant', value=1e-1), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) ntrain.relu7 = L.ReLU(ntrain.fc7, name='relu7', in_place=True) ntrain.drop7 = L.Dropout(ntrain.relu7, name='drop7', dropout_ratio=0.5, in_place=True) ntrain.fc8 = L.InnerProduct(ntrain.drop7, name='fc8', num_output=1000, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) ntrain.accuracy = L.Accuracy(ntrain.fc8, ntrain.label, name='accuracy', include=dict(phase=caffe.TEST)) ntrain.loss = L.SoftmaxWithLoss(ntrain.fc8, ntrain.label, name='loss') #-------------------------------------------------- # deploy,删去lr_mult、decay_mult、weight_filler、bias_filler # ({'shape': {'dim': [batch_size, channels, n_rows, n_cols]}}) ndeploy.data = L.Input(input_param={'shape': {'dim': [10, 13, 227, 227]}}) ndeploy.conv1 = L.Convolution( ndeploy.data, name='conv1', kernel_size=11, num_output=96, stride=4) ndeploy.relu1 = L.ReLU(ndeploy.conv1, name='relu1', in_place=True) ndeploy.norm1 = L.LRN(ndeploy.relu1, name='norm1', local_size=5, alpha=1e-4, beta=0.75) ndeploy.pool1 = L.Pooling(ndeploy.norm1, name='pool1', kernel_size=3, stride=2, pool=P.Pooling.MAX) ndeploy.conv2 = L.Convolution( ndeploy.pool1, name='conv2', kernel_size=5, num_output=256, pad=2, group=2) ndeploy.relu2 = L.ReLU(ndeploy.conv2, name='relu2', in_place=True) ndeploy.norm2 = L.LRN(ndeploy.relu2, name='norm2', local_size=5, alpha=1e-4, beta=0.75) ndeploy.pool2 = L.Pooling(ndeploy.norm2, name='pool2', kernel_size=3, stride=2, pool=P.Pooling.MAX) ndeploy.conv3 = L.Convolution( ndeploy.pool2, name='conv3', kernel_size=3, num_output=384, pad=1) ndeploy.relu3 = L.ReLU(ndeploy.conv3, name='relu3', in_place=True) ndeploy.conv4 = L.Convolution( ndeploy.relu3, name='conv4', kernel_size=3, num_output=384, pad=1, group=2) ndeploy.relu4 = L.ReLU(ndeploy.conv4, name='relu4', in_place=True) ndeploy.conv5 = L.Convolution( ndeploy.relu4, name='conv5', kernel_size=3, num_output=256, pad=1, group=2) ndeploy.relu5 = L.ReLU(ndeploy.conv5, name='relu5', in_place=True) ndeploy.pool5 = L.Pooling(ndeploy.relu5, name='pool5', kernel_size=3, stride=2, pool=P.Pooling.MAX) ndeploy.fc6 = L.InnerProduct(ndeploy.pool5, name='fc6', num_output=4096) ndeploy.relu6 = L.ReLU(ndeploy.fc6, name='relu6', in_place=True) ndeploy.drop6 = L.Dropout(ndeploy.relu6, name='drop6', dropout_ratio=5e-1, in_place=True) ndeploy.fc7 = L.InnerProduct(ndeploy.drop6, name='fc7', num_output=4096) ndeploy.relu7 = L.ReLU(ndeploy.fc7, name='relu7', in_place=True) ndeploy.drop7 = L.Dropout(ndeploy.relu7, name='drop7', dropout_ratio=5e-1, in_place=True) ndeploy.fc8 = L.InnerProduct(ndeploy.drop7, name='fc8', num_output=1000) ndeploy.prob = L.Softmax(ndeploy.fc8, name='prob') out_train_val = str('name: "AlexNet"\n') + \ str(nval.to_proto()) + str(ntrain.to_proto()) with open(output_path + '/alexnet_train_val.prototxt', 'w') as f: f.write(out_train_val) out_deploy = str('name: "AlexNet"\n') + str(ndeploy.to_proto()) with open(output_path + '/alexnet_deploy.prototxt', 'w') as f: f.write(out_deploy)
def net(): n = caffe.NetSpec() n.data = L.Input(input_param=dict(shape=dict(dim=data_shape))) n.dataout = L.Reduction(n.data, axis=0, coeff=1, operation=_operation) return n.to_proto()
def net(): n = caffe.NetSpec() n.data = L.Input(input_param=dict(shape=dict(dim=data_shape))) n.dataout = L.Tile(n.data, axis=3, tiles=3) return n.to_proto()
def net(): n = caffe.NetSpec() n.data = L.Input(input_param=dict(shape=dict(dim=data_shape))) n.dataout = L.Flatten(n.data, axis=_axis, end_axis=_end_axis) return n.to_proto()
def net(): n = caffe.NetSpec() n.data1 = L.Input(input_param=dict(shape=dict(dim=data_shape))) n.data2 = L.Input(input_param=dict(shape=dict(dim=data_shape))) n.dataout = L.Crop(n.data1, n.data2, axis=_axis, offset=_offset) return n.to_proto()
def net(): n = caffe.NetSpec() n.data = L.Input(input_param=dict(shape=dict(dim=data_shape))) n.dataout = L.AbsVal(n.data) return n.to_proto()
def gen_prototxt(nangles, max_order, scales, filter_size_factor=wavelet.DEFAULT_SIZE, nchannels_input=3, intput_shape=[256, 256], data=None, verbose=False, output_path=None): n = caffe.NetSpec() if data is None: data = L.Input(shape=dict(dim=[1, nchannels_input] + intput_shape)) n.data = data scat_count = -1 dim_total = nchannels_input layers = [[(data, [None], 0)]] for o in range(max_order): layer = [] for s in scales: kernel_size = s * filter_size_factor * 2 delta_offset = kernel_size // 2 for c0, s0, offset in layers[-1]: if s0[-1] is not None and s <= s0[-1]: continue scat_count += 1 dim_in = nchannels_input * nangles**o dim_out = nchannels_input * nangles**(o + 1) dim_total += dim_out name = 'scat%i_%i_%ito%i' % (s, scat_count, dim_in, dim_out) c = scat_layer(c0, dim=dim_out, kernel_size=kernel_size, name=name, group=dim_in) layer.append((c, s0 + [s], offset + delta_offset)) if verbose: print "%s:" % name print " kernel size: %i" % kernel_size print " %s (%i)" % ("->".join(map(str, (s0 + [s]))), dim_out) layers.append(layer) if verbose: print "Total output dimensionality: %i" % dim_total # Crop the coefficients before concatenation # The last coefficient is the smallest because it's having the highest order. last_coefficient = layers[-1][-1][0] max_offset = layers[-1][-1][2] coefficients = [] for layer in layers: for c, _, offset in layer: coefficients.append( L.Crop(c, last_coefficient, offset=max_offset - offset)) concat = L.Concat(*coefficients) # Do the final gaussian blur and resampling kernel_size = scales[-1] * filter_size_factor * 2 stride = scales[-1] c = conv_layer(concat, dim=dim_total, group=dim_total, kernel_size=kernel_size, name='psi', stride=stride) n.output = c proto_str = str(n.to_proto()) if output_path: with open(output_path, 'w+') as f: f.write(proto_str) return f.name else: with tempfile.NamedTemporaryFile(delete=False) as f: f.write(proto_str) return f.name
def inception_resnet_v2_proto(self, batch_size, phase='TRAIN'): n = caffe.NetSpec() if phase == 'TRAIN': source_data = self.train_data mirror = True else: source_data = self.test_data mirror = False n.data, n.label = L.Data(source=source_data, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(crop_size=299, mean_value=[104, 117, 123], mirror=mirror)) # stem n.conv1_3x3_s2, n.conv1_3x3_s2_bn, n.conv1_3x3_s2_scale, n.conv1_3x3_s2_relu, n.conv2_3x3_s1, n.conv2_3x3_s1_bn, \ n.conv2_3x3_s1_scale, n.conv2_3x3_s1_relu, n.conv3_3x3_s1, n.conv3_3x3_s1_bn, n.conv3_3x3_s1_scale, n.conv3_3x3_s1_relu, \ n.inception_stem1_3x3_s2, n.inception_stem1_3x3_s2_bn, n.inception_stem1_3x3_s2_scale, n.inception_stem1_3x3_s2_relu, \ n.inception_stem1_pool, n.inception_stem1, n.inception_stem2_3x3_reduce, n.inception_stem2_3x3_reduce_bn, \ n.inception_stem2_3x3_reduce_scale, n.inception_stem2_3x3_reduce_relu, n.inception_stem2_3x3, \ n.inception_stem2_3x3_bn, n.inception_stem2_3x3_scale, n.inception_stem2_3x3_relu, n.inception_stem2_7x1_reduce, \ n.inception_stem2_7x1_reduce_bn, n.inception_stem2_7x1_reduce_scale, n.inception_stem2_7x1_reduce_relu, \ n.inception_stem2_7x1, n.inception_stem2_7x1_bn, n.inception_stem2_7x1_scale, n.inception_stem2_7x1_relu, \ n.inception_stem2_1x7, n.inception_stem2_1x7_bn, n.inception_stem2_1x7_scale, n.inception_stem2_1x7_relu, \ n.inception_stem2_3x3_2, n.inception_stem2_3x3_2_bn, n.inception_stem2_3x3_2_scale, n.inception_stem2_3x3_2_relu, \ n.inception_stem2, n.inception_stem3_3x3_s2, n.inception_stem3_3x3_s2_bn, n.inception_stem3_3x3_s2_scale, \ n.inception_stem3_3x3_s2_relu, n.inception_stem3_pool, n.inception_stem3 = \ stem_299x299(n.data) # 384x35x35 # 5 x inception_resnet_a for i in xrange(5): if i == 0: bottom = 'n.inception_stem3' else: bottom = 'n.inception_resnet_a(order)_residual_eltwise'.replace('(order)', str(i)) exec (inception_resnet_a.replace('(order)', str(i + 1)).replace('bottom', bottom)) # 384x35x35 # reduction_a n.reduction_a_pool, n.reduction_a_3x3, n.reduction_a_3x3_bn, n.reduction_a_3x3_scale, n.reduction_a_3x3_relu, \ n.reduction_a_3x3_2_reduce, n.reduction_a_3x3_2_reduce_bn, n.reduction_a_3x3_2_reduce_scale, \ n.reduction_a_3x3_2_reduce_relu, n.reduction_a_3x3_2, n.reduction_a_3x3_2_bn, n.reduction_a_3x3_2_scale, \ n.reduction_a_3x3_2_relu, n.reduction_a_3x3_3, n.reduction_a_3x3_3_bn, n.reduction_a_3x3_3_scale, \ n.reduction_a_3x3_3_relu, n.reduction_a_concat = \ reduction_a(n.inception_resnet_a5_residual_eltwise) # 1152x17x17 # 10 x inception_resnet_b for i in xrange(10): if i == 0: bottom = 'n.reduction_a_concat' else: bottom = 'n.inception_resnet_b(order)_residual_eltwise'.replace('(order)', str(i)) exec (inception_resnet_b.replace('(order)', str(i + 1)).replace('bottom', bottom)) # 1152x17x17 # reduction_b n.reduction_b_pool, n.reduction_b_3x3_reduce, n.reduction_b_3x3_reduce_bn, n.reduction_b_3x3_reduce_scale, \ n.reduction_b_3x3_reduce_relu, n.reduction_b_3x3, n.reduction_b_3x3_bn, n.reduction_b_3x3_scale, \ n.reduction_b_3x3_relu, n.reduction_b_3x3_2_reduce, n.reduction_b_3x3_2_reduce_bn, n.reduction_b_3x3_2_reduce_scale, \ n.reduction_b_3x3_2_reduce_relu, n.reduction_b_3x3_2, n.reduction_b_3x3_2_bn, n.reduction_b_3x3_2_scale, \ n.reduction_b_3x3_2_relu, n.reduction_b_3x3_3_reduce, n.reduction_b_3x3_3_reduce_bn, n.reduction_b_3x3_3_reduce_scale, \ n.reduction_b_3x3_3_reduce_relu, n.reduction_b_3x3_3, n.reduction_b_3x3_3_bn, n.reduction_b_3x3_3_scale, \ n.reduction_b_3x3_3_relu, n.reduction_b_3x3_4, n.reduction_b_3x3_4_bn, n.reduction_b_3x3_4_scale, \ n.reduction_b_3x3_4_relu, n.reduction_b_concat = \ reduction_b(n.inception_resnet_b10_residual_eltwise) # 2048x8x8 # 5 x inception_resnet_c for i in xrange(5): if i == 0: bottom = 'n.reduction_b_concat' else: bottom = 'n.inception_resnet_c(order)_residual_eltwise'.replace('(order)', str(i)) exec (inception_resnet_c.replace('(order)', str(i + 1)).replace('bottom', bottom)) # 2048x8x8 n.pool_8x8_s1 = L.Pooling(n.inception_resnet_c5_residual_eltwise, pool=P.Pooling.AVE, global_pooling=True) # 2048x1x1 n.pool_8x8_s1_drop = L.Dropout(n.pool_8x8_s1, dropout_param=dict(dropout_ratio=0.2)) n.classifier = L.InnerProduct(n.pool_8x8_s1_drop, num_output=self.classifier_num, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) n.loss = L.SoftmaxWithLoss(n.classifier, n.label) if phase == 'TRAIN': pass else: n.accuracy_top1 = L.Accuracy(n.classifier, n.label, include=dict(phase=1)) n.accuracy_top5 = L.Accuracy(n.classifier, n.label, include=dict(phase=1), accuracy_param=dict(top_k=5)) return n.to_proto()
def DAPNet_Train(): time_postfix = time.strftime("%m-%d_%H-%M-%S", time.localtime()) ################################################################################ os.chdir(caffe_root) ################################################################################ # work dir ProjectName = "{}_{}_{}".format(BaseNet, Models, Ver) work_dir = "{}/{}/{}".format(Results_dir, Project, ProjectName) make_if_not_exist(work_dir) ################################################################################ # work and model dirs proto_dir = "{}/Proto".format(work_dir) log_dir = "{}/Logs".format(work_dir) model_dir = "{}/Models".format(work_dir) pic_dir = "{}/Pics".format(work_dir) job_dir = "{}/Job".format(work_dir) make_if_not_exist(proto_dir) make_if_not_exist(log_dir) make_if_not_exist(model_dir) make_if_not_exist(pic_dir) make_if_not_exist(job_dir) ################################################################################ # work file log_file = "{}/{}.log".format(log_dir, time_postfix) train_net_file = "{}/train.prototxt".format(proto_dir) test_net_file = "{}/test.prototxt".format(proto_dir) solver_file = "{}/solver.prototxt".format(proto_dir) snapshot_prefix = "{}/{}".format(model_dir, ProjectName) job_file = "{}/train.sh".format(job_dir) ################################################################################ # TRAIN net = caffe.NetSpec() net = get_DAPDataLayer(net, train=True, batchsize=batchsize_per_device) net = FaceBoxFPNNet(net, train=True, data_layer="data", gt_label="label",\ net_width=resized_width, net_height=resized_height) # net = DAPNet_hand_pool1(net, train=True, data_layer="data", gt_label="label", \ # net_width=resized_width, net_height=resized_height) with open(train_net_file, 'w') as f: print('name: "{}_train"'.format(ProjectName), file=f) print(net.to_proto(), file=f) ################################################################################ # TEST if isinstance(val_list, list): test_net_files = [] for id_val in xrange(len(val_list)): net = caffe.NetSpec() net = get_DAPDataLayer(net, train=False, batchsize=1, id_val=id_val) test_net_file = "{}/test{}.prototxt".format(proto_dir, id_val) net = FaceBoxFPNNet(net, train=False, data_layer="data", gt_label="label", \ net_width=resized_width, net_height=resized_height) with open(test_net_file, 'w') as f: print('name: "{}_test{}"'.format(ProjectName, id_val), file=f) print(net.to_proto(), file=f) test_net_files.append(test_net_file) test_net_file = test_net_files else: net = caffe.NetSpec() net = get_DAPDataLayer(net, train=False, batchsize=1) net = FaceBoxFPNNet(net, train=False, data_layer="data", gt_label="label", \ net_width=resized_width, net_height=resized_height) # net = DAPNet_hand_pool1(net, train=False, data_layer="data", gt_label="label", \ # net_width=resized_width, net_height=resized_height) with open(test_net_file, 'w') as f: print('name: "{}_test"'.format(ProjectName), file=f) print(net.to_proto(), file=f) test_net_file = [ test_net_file, ] ################################################################################ # Solver solver_param = get_solver_param() solver = caffe_pb2.SolverParameter(train_net=train_net_file, \ test_net=test_net_file,snapshot_prefix=snapshot_prefix,**solver_param) with open(solver_file, 'w') as f: print(solver, file=f) ################################################################################ # CaffeModel & Snapshot max_iter = 0 for file in os.listdir(model_dir): if file.endswith(".solverstate"): basename = os.path.splitext(file)[0] iter = int(basename.split("{}_iter_".format(ProjectName))[1]) if iter > max_iter: max_iter = iter if fine_tuning: train_param = '--weights="{}" \\\n'.format(get_pretained_model()) else: train_param = '' if resume_training: if max_iter > 0: train_param = '--snapshot="{}_iter_{}.solverstate" \\\n'.format( snapshot_prefix, max_iter) ################################################################################ # job scripts with open(job_file, 'w') as f: f.write('cd {}\n'.format(caffe_root)) f.write('./build/tools/caffe train \\\n') f.write('--solver="{}" \\\n'.format(solver_file)) f.write(train_param) if solver_param['solver_mode'] == P.Solver.GPU: f.write('--gpu {} 2>&1 | tee {}\n'.format(get_gpus(), log_file)) else: f.write('2>&1 | tee {}.log\n'.format(log_file)) os.chmod(job_file, stat.S_IRWXU) # ========================================================================== # Training subprocess.call(job_file, shell=True)
def fcn(split): n = caffe.NetSpec() pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892), seed=1337) pydata_params['cocostuff_dir'] = 'cocostuff' pylayer = 'COCOSTUFFSegDataLayer' n.data, n.label = L.Python(module='cocostuff_layers', layer=pylayer, ntop=2, param_str=str(pydata_params)) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr = L.Convolution( n.drop7, num_output=182, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore = L.Deconvolution(n.score_fr, convolution_param=dict(num_output=182, kernel_size=64, stride=32, bias_term=False), param=[dict(lr_mult=0)]) n.score = crop(n.upscore, n.data) n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=False, ignore_label=255)) return n.to_proto()
def deploy_pose_ren_net(dataset): n = caffe.NetSpec() point_num_ = util.get_joint_num(dataset) # input layers # n.data = 'input: \"data\"\ # input_shape \{\ # dim: 1\ # dim: 1\ # dim: 96\ # dim: 96\ # \}' # n.prev_pose = 'input: \"prev_pose\"\ # input_shape \{\ # dim: 1\ # dim: {}\ # \}'.format(point_num_*3) n.data = L.Input(name="data", shape=dict(dim=[1, 1, 96, 96])) n.prev_pose = L.Input(name="prev_pose", shape=dict(dim=[1, point_num_ * 3])) print str(n.to_proto()) # the base net n.conv0, n.relu0 = conv_relu(n.data, 16) n.conv1 = conv(n.relu0, 16) n.pool1 = max_pool(n.conv1) n.relu1 = L.ReLU(n.pool1, in_place=True) n.conv2_0, n.relu2_0 = conv_relu(n.pool1, 32, ks=1, pad=0) n.conv2, n.relu2 = conv_relu(n.relu2_0, 32) n.conv3 = conv(n.relu2, 32) n.res1 = L.Eltwise(n.conv2_0, n.conv3) n.pool2 = max_pool(n.res1) n.relu3 = L.ReLU(n.pool2, in_place=True) n.conv3_0, n.relu3_0 = conv_relu(n.relu3, 64, ks=1, pad=0) n.conv4, n.relu4 = conv_relu(n.relu3_0, 64) n.conv5 = conv(n.relu4, 64) n.res2 = L.Eltwise(n.conv3_0, n.conv5) n.pool3 = max_pool(n.res2) n.relu5 = L.ReLU(n.pool3, in_place=True) # pose guided region ensemble for idx in xrange(point_num_): if idx not in get_guided_joints(dataset): continue rois = 'rois_{}'.format(idx) n[rois] = L.Python(n.prev_pose, module='python_layers.py_generate_roi_layer', layer='PyGenerateROILayer', ntop=1, param_str=str( dict(joint_idx=idx, roi_h=6, roi_w=6, img_h=96, img_w=96, spatial_mul=8))) roipool = 'roi_pool_{}'.format(idx) n[roipool] = L.ROIPooling(n.pool3, n[rois], roi_pooling_param=dict(pooled_w=7, pooled_h=7, spatial_scale=0.125)) # fc fc1 = 'fc1_{}'.format(idx) relu6 = 'relu6_{}'.format(idx) drop1 = 'drop1_{}'.format(idx) n[fc1], n[relu6], n[drop1] = fc_relu_dropout(n[roipool], 2048, 0.5) # structure connection # connect_structure_1 = [[0,1,3], [0,4,6], [0,7,9], [0,10,12], [0,13,15]] connect_structure_1 = get_connect_structure(dataset) concate_bottom_final = [] for idx in xrange(len(connect_structure_1)): concate_bottom = [] for jdx in xrange(len(connect_structure_1[idx])): drop1 = 'drop1_{}'.format(connect_structure_1[idx][jdx]) concate_bottom.append(n[drop1]) concate_1 = 'concate_1_{}'.format(idx) n[concate_1] = L.Concat(*concate_bottom) fc2 = 'fc2_{}'.format(idx) relu7 = 'relu7_{}'.format(idx) drop2 = 'drop2_{}'.format(idx) n[fc2], n[relu7], n[drop2] = fc_relu_dropout(n[concate_1], 2048, 0.5) concate_bottom_final.append(n[drop2]) n.fc_concat = L.Concat(*concate_bottom_final) n.fc3_0 = fc(n.fc_concat, point_num_ * 3) return str(n.to_proto())
def silent_net(): n = caffe.NetSpec() n.data, n.data2 = L.DummyData(shape=dict(dim=3), ntop=2) n.silence_data = L.Silence(n.data, ntop=0) n.silence_data2 = L.Silence(n.data2, ntop=0) return n.to_proto()
def net(split): n = caffe.NetSpec() loss_param = dict(normalize=False) if split == 'train': data_params = dict(mean=(104.00699, 116.66877, 122.67892)) # 图像与标签 data_params['root'] = './datasets/CTW1500_Total_TCB' data_params['source'] = "CTW1500_Total_TCB.lst" data_params['shuffle'] = True data_params['ignore_label'] = -1 n.data, n.label = L.Python(module='pylayer_old', layer='ImageLabelmapDataLayer', ntop=2, \ param_str=str(data_params)) if data_params.has_key('ignore_label'): loss_param['ignore_label'] = int(data_params['ignore_label']) elif split == 'test': n.data = L.Input(name='data', input_param=dict(shape=dict(dim=[1, 3, 500, 500]))) else: raise Exception("Invalid phase") # The first conv stage n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=1) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) # The second conv stage n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) # The third conv stage n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv3_dilation1 = conv_dilation01(n.conv3_3, mult=[100, 1, 200, 0]) n.conv3_dilation2 = conv_dilation03(n.conv3_3, mult=[100, 1, 200, 0]) n.conv3_dilation3 = conv_dilation05(n.conv3_3, mult=[100, 1, 200, 0]) n.conv3_dilation4 = conv_dilation07(n.conv3_3, mult=[100, 1, 200, 0]) n.concat_conv33 = L.Concat(n.conv3_dilation1, n.conv3_dilation2, n.conv3_dilation3, n.conv3_dilation4, concat_param=dict({'concat_dim': 1})) # The fourth conv stage n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv4_dilation1 = conv_dilation1(n.conv4_3, mult=[100, 1, 200, 0]) n.conv4_dilation2 = conv_dilation3(n.conv4_3, mult=[100, 1, 200, 0]) n.conv4_dilation3 = conv_dilation5(n.conv4_3, mult=[100, 1, 200, 0]) n.conv4_dilation4 = conv_dilation7(n.conv4_3, mult=[100, 1, 200, 0]) n.concat_conv43 = L.Concat(n.conv4_dilation1, n.conv4_dilation2, n.conv4_dilation3, n.conv4_dilation4, concat_param=dict({'concat_dim': 1})) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.conv5_dilation1 = conv_dilation1(n.conv5_3, mult=[100, 1, 200, 0]) n.conv5_dilation2 = conv_dilation3(n.conv5_3, mult=[100, 1, 200, 0]) n.conv5_dilation3 = conv_dilation5(n.conv5_3, mult=[100, 1, 200, 0]) n.conv5_dilation4 = conv_dilation7(n.conv5_3, mult=[100, 1, 200, 0]) n.concat_conv53 = L.Concat(n.conv5_dilation1, n.conv5_dilation2, n.conv5_dilation3, n.conv5_dilation4, concat_param=dict({'concat_dim': 1})) # # DSN3 n.score_dsn3 = conv1x1(n.concat_conv33, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) n.score_dsn3_up = upsample(n.score_dsn3, stride=4) n.upscore_dsn3 = L.Crop(n.score_dsn3_up, n.data) if split == 'train': n.loss3 = L.BalanceCrossEntropyLoss(n.upscore_dsn3, n.label, loss_param=loss_param) if split == 'test': n.sigmoid_dsn3 = L.Sigmoid(n.upscore_dsn3) # # DSN4 n.score_dsn4 = conv1x1(n.concat_conv43, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) n.score_dsn4_up = upsample(n.score_dsn4, stride=8) n.upscore_dsn4 = L.Crop(n.score_dsn4_up, n.data) if split == 'train': n.loss4 = L.BalanceCrossEntropyLoss(n.upscore_dsn4, n.label, loss_param=loss_param) if split == 'test': n.sigmoid_dsn4 = L.Sigmoid(n.upscore_dsn4) # DSN5 n.score_dsn5 = conv1x1(n.concat_conv53, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) n.score_dsn5_up = upsample(n.score_dsn5, stride=16) n.upscore_dsn5 = L.Crop(n.score_dsn5_up, n.data) if split == 'train': n.loss5 = L.BalanceCrossEntropyLoss(n.upscore_dsn5, n.label, loss_param=loss_param) if split == 'test': n.sigmoid_dsn5 = L.Sigmoid(n.upscore_dsn5) # ############### concatenation and pass through attention model ######### n.concat_upscore = L.Concat(n.upscore_dsn3, n.upscore_dsn4, n.upscore_dsn5, name='concat', concat_param=dict({'concat_dim': 1})) n.output_mask_product03 = L.Convolution( n.upscore_dsn3, num_output=1, kernel_size=3, pad=1, param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), engine=1) n.output_mask_product04 = L.Convolution( n.upscore_dsn4, num_output=1, kernel_size=3, pad=1, param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), engine=1) n.output_mask_product05 = L.Convolution( n.upscore_dsn5, num_output=1, kernel_size=3, pad=1, param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), engine=1) ### attention model n.att_conv1_mask_512 = L.Convolution( n.concat_upscore, num_output=512, kernel_size=3, pad=1, param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], engine=1) n.relu_att_conv1 = L.ReLU(n.att_conv1_mask_512, in_place=True) n.drop_att_conv1_mask = L.Dropout(n.relu_att_conv1, dropout_ratio=0.5, in_place=True) n.att_fc_mask_512 = L.Convolution( n.drop_att_conv1_mask, num_output=3, kernel_size=1, param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], engine=1) n.attention = L.Softmax(n.att_fc_mask_512) n.attention3, n.attention4, n.attention5 = L.Slice(n.attention, name='slice_attention', slice_param=dict( axis=1, slice_point=[1, 2]), ntop=3) # # ---- multiply attention weights ---- n.output_mask3 = L.Eltwise(n.attention3, n.output_mask_product03, operation=P.Eltwise.PROD) n.output_mask4 = L.Eltwise(n.attention4, n.output_mask_product04, operation=P.Eltwise.PROD) n.output_mask5 = L.Eltwise(n.attention5, n.output_mask_product05, operation=P.Eltwise.PROD) n.output_fusion = L.Eltwise(n.output_mask3, n.output_mask4, n.output_mask5, operation=P.Eltwise.SUM) n.upscore_fuse = L.Convolution(n.concat_upscore, name='new-score-weighting', num_output=1, kernel_size=1, param=[ dict(lr_mult=0.001, decay_mult=1), dict(lr_mult=0.002, decay_mult=0) ], weight_filler=dict(type='constant', value=0.2), engine=1) if split == 'test': n.sigmoid_fuse = L.Sigmoid(n.upscore_fuse) n.sigmoid_output_fusion = L.Sigmoid(n.output_fusion) if split == 'train': n.loss_fuse = L.BalanceCrossEntropyLoss(n.upscore_fuse, n.label, loss_param=loss_param) n.loss_output_fusion = L.BalanceCrossEntropyLoss(n.output_fusion, n.label, loss_param=loss_param) # n.loss_fuse = L.BalanceCrossEntropyLoss(n.upscore_fuse, n.label, loss_param=loss_param) return n.to_proto()
def fcn(split): n = caffe.NetSpec() pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892), seed=1337) if split == 'train': pydata_params[ 'sbdd_dir'] = '/home/wen/caffe-master/semantic/fcn/data/sbdd/benchmark/benchmark_RELEASE/dataset' pylayer = 'SBDDSegDataLayer' else: pydata_params[ 'voc_dir'] = '/home/wen/caffe-master/semantic/fcn/data/pascal/VOC2012' pylayer = 'VOCSegDataLayer' n.data, n.label = L.Python(module='voc_layers', layer=pylayer, ntop=2, param_str=str(pydata_params)) n.conv1_1, n.relu1_1 = conv_relu(n.data, 16 * 4, pad=10) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 16 * 4) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 32 * 4) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 32 * 4) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 64 * 4) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 64 * 4) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 64 * 4) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 128 * 4) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 128 * 4) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 128 * 4) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 128 * 4) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 128 * 4) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 128 * 4) n.pool5 = max_pool(n.relu5_3) n.fc6_new, n.relu6 = conv_relu(n.pool5, 1024, ks=3, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7_new, n.relu7 = conv_relu(n.drop6, 1024, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr_new = L.Convolution( n.drop7, num_output=21, kernel_size=1, pad=0, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore_new = L.Deconvolution(n.score_fr_new, convolution_param=dict( num_output=21, kernel_size=128, stride=8, bias_term=False, ), param=[dict(lr_mult=0)]) n.score = L.Crop(n.upscore_new, n.data) n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=False, ignore_label=255)) return n.to_proto()
def cnn(split): n = caffe.NetSpec() if split == 'train': pydata_params = dict( dataset_dir='/home/kevin/dataset/ws_exp/gp_labelled', split=split, mean=(104.00698793, 116.66876762, 122.67891434), seed=1337, img_size=(224, 224), crop_size=(224, 224, 224, 224)) pylayer = 'WashingtonDataLayerSS' pydata_params['randomize'] = True pydata_params['batch_size'] = 64 elif split == 'test': pydata_params = dict( dataset_dir='/home/kevin/dataset/washington_rgbd_dataset', split=split, mean=(104.00698793, 116.66876762, 122.67891434), seed=1337, img_size=(224, 224), crop_size=(224, 224, 224, 224)) pylayer = 'WashingtonDataLayer' pydata_params['randomize'] = False pydata_params['batch_size'] = 1 else: n.img = L.Input( name='input', ntop=2, shape=[dict(dim=1), dict(dim=1), dict(dim=224), dict(dim=224)]) #---------------------------------Data Layer---------------------------------------# n.rgb, n.depth, n.label = L.Python( name="data", module='data_layers.washington_data_layer', layer=pylayer, ntop=3, param_str=str(pydata_params)) #n.rgb_crop = L.Python(n.rgb, name="crop_rgb", module='data_layers.random_crop_layer', layer='RandomCropLayer', ntop=1, param_str=str(dict(h=224,w=224))) #n.depth_crop = L.Python(n.depth, name="crop_depth", module='data_layers.random_crop_layer', layer='RandomCropLayer', ntop=1, param_str=str(dict(h=227,w=227))) #---------------------------------RGB-Net---------------------------------------# # the vgg 16 base net n.conv1_1, n.relu1_1 = conv_relu("conv1_1", n.rgb, 64, pad=1, lr1=0, lr2=0) n.conv1_2, n.relu1_2 = conv_relu("conv1_2", n.relu1_1, 64, lr1=0, lr2=0) n.rgb_pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu("conv2_1", n.rgb_pool1, 128, lr1=0, lr2=0) n.conv2_2, n.relu2_2 = conv_relu("conv2_2", n.relu2_1, 128, lr1=0, lr2=0) n.rgb_pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu("conv3_1", n.rgb_pool2, 256, lr1=0, lr2=0) n.conv3_2, n.relu3_2 = conv_relu("conv3_2", n.relu3_1, 256, lr1=0, lr2=0) n.conv3_3, n.relu3_3 = conv_relu("conv3_3", n.relu3_2, 256, lr1=0, lr2=0) n.rgb_pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu("conv4_1", n.rgb_pool3, 512, lr1=0, lr2=0) n.conv4_2, n.relu4_2 = conv_relu("conv4_2", n.relu4_1, 512, lr1=0, lr2=0) n.conv4_3, n.relu4_3 = conv_relu("conv4_3", n.relu4_2, 512, lr1=0, lr2=0) n.rgb_pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu("conv5_1", n.rgb_pool4, 512, lr1=0, lr2=0) n.conv5_2, n.relu5_2 = conv_relu("conv5_2", n.relu5_1, 512, lr1=0, lr2=0) n.conv5_3, n.relu5_3 = conv_relu("conv5_3", n.relu5_2, 512, lr1=0, lr2=0) n.rgb_pool5 = max_pool(n.relu5_3) # fully conv n.rgb_fc6, n.rgb_relu6 = fc_relu(n.rgb_pool5, 4096, lr1=0, lr2=0) n.rgb_drop6 = L.Dropout(n.rgb_relu6, dropout_ratio=0.5, in_place=True) n.rgb_fc7, n.rgb_relu7 = fc_relu(n.rgb_drop6, 4096, lr1=0, lr2=0) n.rgb_drop7 = L.Dropout(n.rgb_relu7, dropout_ratio=0.5, in_place=True) n.rgb_fc8 = fc(n.rgb_drop7, 51, lr1=0, lr2=0) #---------------------------------Depth-Net---------------------------------------# # the base net n.conv1, n.relu1 = conv_relu("conv1", n.depth, 128, ks=5, stride=2, pad=2, lr1=1, lr2=2) n.depth_pool1 = max_pool(n.relu1) n.norm1 = L.LRN(n.depth_pool1, lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2)) n.conv2, n.relu2 = conv_relu("conv2", n.norm1, 256, ks=5, stride=1, pad=2, lr1=1, lr2=2) n.depth_pool2 = max_pool(n.relu2) n.norm2 = L.LRN(n.depth_pool2, lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2)) n.conv3, n.relu3 = conv_relu("conv3", n.norm2, 384, ks=3, pad=1, group=2, lr1=1, lr2=2) n.depth_pool3 = max_pool(n.relu3) n.conv4, n.relu4 = conv_relu("conv4", n.depth_pool3, 512, ks=3, pad=1, group=1, lr1=1, lr2=2) n.conv5, n.relu5 = conv_relu("conv5", n.relu4, 512, ks=3, pad=1, group=1, lr1=1, lr2=2) n.depth_pool5 = max_pool(n.relu5) n.depth_fc6, n.depth_relu6 = fc_relu(n.depth_pool5, 4096, lr1=1, lr2=2) n.depth_drop6 = L.Dropout(n.depth_relu6, dropout_ratio=0.5, in_place=True) n.depth_fc7, n.depth_relu7 = fc_relu(n.depth_drop6, 4096, lr1=1, lr2=2) n.depth_drop7 = L.Dropout(n.depth_relu7, dropout_ratio=0.5, in_place=True) n.depth_fc8 = fc(n.depth_drop7, 51, lr1=1, lr2=2) #-----------------------------------final output---------------------------------# # Concatenation #n.concat = L.Concat(n.rgb_drop7, n.depth_drop7, axis=1) #n.rgbd_fc8 = fc(n.concat, 6, lr1=1, lr2=2) if split != 'deploy': n.rgb_accuracy = L.Accuracy(n.rgb_fc8, n.label) n.rgb_loss = L.SoftmaxWithLoss(n.rgb_fc8, n.label) n.depth_accuracy = L.Accuracy(n.depth_fc8, n.label) n.depth_loss = L.SoftmaxWithLoss(n.depth_fc8, n.label) #n.accuracy = L.Accuracy(n.rgbd_fc8, n.label) #n.loss = L.SoftmaxWithLoss(n.rgbd_fc8, n.label) return n.to_proto()
def generate_net(lmdb, label_file, PHASE, batch_size): net = caffe.NetSpec() if(PHASE=="TRAIN"): # data layer net.data, net.label = caffe.layers.AnnotatedData(ntop=2, include={'phase':caffe.TRAIN}, transform_param=dict(mirror=True, mean_value=[104, 117, 123], resize_param=dict(prob=1.0, resize_mode=caffe.params.Resize.WARP, height=300, width=300, interp_mode=[caffe.params.Resize.LINEAR,caffe.params.Resize.AREA,caffe.params.Resize.NEAREST,caffe.params.Resize.CUBIC,caffe.params.Resize.LANCZOS4]), emit_constraint=dict(emit_type=0), distort_param=dict(brightness_prob=0.5, brightness_delta=32.0, contrast_prob=0.5, contrast_lower=0.5, contrast_upper=1.5, hue_prob=0.5, hue_delta=18.0, saturation_prob=0.5, saturation_lower=0.5, saturation_upper=1.5, random_order_prob=0.0), expand_param=dict(prob=0.5, max_expand_ratio=4.0)), data_param=dict(source=lmdb, batch_size=batch_size, backend=caffe.params.Data.LMDB), annotated_data_param=dict( batch_sampler=[dict(max_sample=1, max_trials=1), dict(sampler=dict(min_scale=0.3, max_scale=1.0,min_aspect_ratio=0.5, max_aspect_ratio=2.0), sample_constraint=dict(min_jaccard_overlap=0.1), max_sample=1, max_trials=50), dict(sampler=dict(min_scale=0.3, max_scale=1.0,min_aspect_ratio=0.5, max_aspect_ratio=2.0), sample_constraint=dict(min_jaccard_overlap=0.3), max_sample=1, max_trials=50), dict(sampler=dict(min_scale=0.3, max_scale=1.0,min_aspect_ratio=0.5, max_aspect_ratio=2.0), sample_constraint=dict(min_jaccard_overlap=0.5), max_sample=1, max_trials=50), dict(sampler=dict(min_scale=0.3, max_scale=1.0,min_aspect_ratio=0.5, max_aspect_ratio=2.0), sample_constraint=dict(min_jaccard_overlap=0.7), max_sample=1, max_trials=50), dict(sampler=dict(min_scale=0.3, max_scale=1.0,min_aspect_ratio=0.5, max_aspect_ratio=2.0), sample_constraint=dict(min_jaccard_overlap=0.9), max_sample=1, max_trials=50), dict(sampler=dict(min_scale=0.3, max_scale=1.0,min_aspect_ratio=0.5, max_aspect_ratio=2.0), sample_constraint=dict(min_jaccard_overlap=1.0), max_sample=1, max_trials=50)], label_map_file=label_file)) elif(PHASE=="DEPLOY"): net.data = caffe.layers.Input(shape={'dim':[1,3,300,300]}) # bone net.conv1 = caffe.layers.Convolution(net.data, num_output=57, kernel_size=3, stride=2, weight_filler={"type":"xavier"}, param=[dict(lr_mult=1.0,decay_mult=0.0),dict(lr_mult=1.0,decay_mult=0.0)]) net.relu_conv1 = caffe.layers.ReLU(net.conv1,in_place=True) net.pool1 = caffe.layers.Pooling(net.relu_conv1, pool=caffe.params.Pooling.MAX, kernel_size=3, stride=2) # fire1 net.tops['fire2/concat'] = fire(net, net.pool1, 'fire2', 15,49, 53) # fire2 net.tops['fire3/concat'] = fire(net, net.tops['fire2/concat'], 'fire3', 15, 54, 52) net.pool3 = caffe.layers.Pooling(net.tops['fire3/concat'], pool=caffe.params.Pooling.MAX, kernel_size=3, stride=2) # fire3 net.tops['fire4/concat'] = fire(net, net.pool3, 'fire4', 29, 92, 94) # fire4 net.tops['fire5/concat'] = fire(net, net.tops['fire4/concat'], 'fire5', 29, 90, 83) net.pool5 = caffe.layers.Pooling(net.tops['fire5/concat'], pool=caffe.params.Pooling.MAX, kernel_size=3, stride=2) # fire5 net.tops['fire6/concat'] = fire(net, net.pool5, 'fire6', 44, 166, 161) # fire6 net.tops['fire7/concat'] = fire(net, net.tops['fire6/concat'], 'fire7', 45, 155, 146) # fire7 net.tops['fire8/concat'] = fire(net, net.tops['fire7/concat'], 'fire8', 49, 163, 171) # fire8 net.tops['fire9/concat'] = fire(net, net.tops['fire8/concat'], 'fire9', 25, 29, 54) net.pool9 = caffe.layers.Pooling(net.tops['fire9/concat'], pool=caffe.params.Pooling.MAX, kernel_size=3, stride=2) # fire9 net.tops['fire10/concat'] = fire(net, net.pool9, 'fire10', 37, 45, 56) net.pool10 = caffe.layers.Pooling(net.tops['fire10/concat'], pool=caffe.params.Pooling.MAX, kernel_size=3, stride=2) # fire10 net.tops['fire11/concat'] = fire(net, net.pool10, 'fire11', 38, 41, 44) # conv12 net.conv12_1 = caffe.layers.Convolution(net.tops['fire11/concat'], param=[dict(lr_mult=1.0, decay_mult=1.0)], convolution_param={'num_output':51, 'bias_term':False, 'kernel_size':1, 'weight_filler':{'type':'msra'}}) net.tops['conv12_1/bn'] = caffe.layers.BatchNorm(net.conv12_1, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], in_place=True) net.tops['conv12_1/scale'] = caffe.layers.Scale(net.tops['conv12_1/bn'], param=[dict(lr_mult=1.0, decay_mult=0.0), dict(lr_mult=2.0, decay_mult=0.0)], scale_param={'filler':{'value':1}, 'bias_term':True, 'bias_filler':{'value':0}}, in_place=True) net.tops['conv12_1/relu'] = caffe.layers.ReLU(net.tops['conv12_1/scale'], in_place=True) net.conv12_2 = caffe.layers.Convolution(net.tops['conv12_1/relu'], param=[dict(lr_mult=1.0, decay_mult=1.0)], convolution_param={'num_output':46, 'bias_term':False, 'pad':1, 'kernel_size':3, 'stride':2, 'weight_filler':{'type':'msra'}}) net.tops['conv12_2/bn'] = caffe.layers.BatchNorm(net.conv12_2, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], in_place=True) net.tops['conv12_2/scale'] = caffe.layers.Scale(net.tops['conv12_2/bn'], param=[dict(lr_mult=1.0, decay_mult=0.0), dict(lr_mult=2.0, decay_mult=0.0)], scale_param={'filler':{'value':1}, 'bias_term':True, 'bias_filler':{'value':0}}, in_place=True) net.tops['conv12_2/relu'] = caffe.layers.ReLU(net.tops['conv12_2/scale'], in_place=True) # conv13 net.conv13_1 = caffe.layers.Convolution(net.tops['conv12_2/relu'], param=[dict(lr_mult=1.0, decay_mult=1.0)], convolution_param={'num_output':55, 'bias_term':False, 'kernel_size':1, 'weight_filler':{'type':'msra'}}) net.tops['conv13_1/bn'] = caffe.layers.BatchNorm(net.conv13_1, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], in_place=True) net.tops['conv13_1/scale'] = caffe.layers.Scale(net.tops['conv13_1/bn'], param=[dict(lr_mult=1.0, decay_mult=0.0), dict(lr_mult=2.0, decay_mult=0.0)], scale_param={'filler':{'value':1}, 'bias_term':True, 'bias_filler':{'value':0}}, in_place=True) net.tops['conv13_1/relu'] = caffe.layers.ReLU(net.conv13_1, in_place=True) net.conv13_2 = caffe.layers.Convolution(net.tops['conv13_1/relu'], param=[dict(lr_mult=1.0, decay_mult=1.0)], convolution_param={'num_output':85, 'bias_term':False, 'pad':1, 'kernel_size':3, 'stride':2, 'weight_filler':{'type':'msra'}}) net.tops['conv13_2/bn'] = caffe.layers.BatchNorm(net.conv13_2, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], in_place=True) net.tops['conv13_2/scale'] = caffe.layers.Scale(net.tops['conv13_2/bn'], param=[dict(lr_mult=1.0, decay_mult=0.0), dict(lr_mult=2.0, decay_mult=0.0)], scale_param={'filler':{'value':1}, 'bias_term':True, 'bias_filler':{'value':0}}, in_place=True) net.tops['conv13_2/relu'] = caffe.layers.ReLU(net.tops['conv13_2/scale'], in_place=True) # fire 5 prior box prior_box(net, net.tops['fire5/concat'], 'fire5', 16, 84, 21.0, 45.0, [2.0], 8) # fire 9 prior box prior_box(net, net.tops['fire9/concat'], 'fire9', 24, 126, 45.0, 99.0, [2.0, 3.0], 16) # fire 10 prior box prior_box(net, net.tops['fire10/concat'], 'fire10', 24, 126, 99.0, 153.0, [2.0, 3.0], 32) # fire 11 prior box prior_box(net, net.tops['fire11/concat'], 'fire11', 24, 126, 153.0, 207.0, [2.0, 3.0], 64) # conv12_2 prior box prior_box(net, net.tops['conv12_2'], 'conv12_2', 24, 126, 207.0, 261.0, [2.0, 3.0], 100) # conv13_2 prior box prior_box(net, net.tops['conv13_2'], 'conv13_2', 16, 84, 261.0, 315.0, [2.0], 300) # last process net.tops['mbox_loc'] = caffe.layers.Concat(net.tops['fire5_mbox_loc_flat'], net.tops['fire9_mbox_loc_flat'], net.tops['fire10_mbox_loc_flat'], net.tops['fire11_mbox_loc_flat'], net.tops['conv12_2_mbox_loc_flat'], net.tops['conv13_2_mbox_loc_flat'], concat_param={'axis':1}) net.tops['mbox_conf'] = caffe.layers.Concat(net.tops['fire5_mbox_conf_flat'], net.tops['fire9_mbox_conf_flat'], net.tops['fire10_mbox_conf_flat'], net.tops['fire11_mbox_conf_flat'], net.tops['conv12_2_mbox_conf_flat'], net.tops['conv13_2_mbox_conf_flat'], concat_param={'axis':1}) net.tops['mbox_priorbox'] = caffe.layers.Concat(net.tops['fire5_mbox_priorbox'], net.tops['fire9_mbox_priorbox'], net.tops['fire10_mbox_priorbox'], net.tops['fire11_mbox_priorbox'], net.tops['conv12_2_mbox_priorbox'], net.tops['conv13_2_mbox_priorbox'], concat_param={'axis':2}) if(PHASE=='TRAIN'): net.tops['mbox_loss'] = caffe.layers.MultiBoxLoss(net.tops['mbox_loc'], net.tops['mbox_conf'], net.tops['mbox_priorbox'], net.label, include={'phase':caffe.TRAIN}, propagate_down=[True, True, False, False], loss_param={'normalization':caffe.params.Loss.VALID}, multibox_loss_param={'loc_loss_type':caffe.params.MultiBoxLoss.SMOOTH_L1, 'conf_loss_type':caffe.params.MultiBoxLoss.SOFTMAX, 'loc_weight':1.0, 'num_classes':21, 'share_location':True, 'match_type':caffe.params.MultiBoxLoss.PER_PREDICTION, 'overlap_threshold':0.5, 'use_prior_for_matching':True, 'background_label_id':0, 'use_difficult_gt':True, 'neg_pos_ratio':3.0, 'neg_overlap':0.5, 'code_type':caffe.params.PriorBox.CENTER_SIZE, 'ignore_cross_boundary_bbox':False, 'mining_type':caffe.params.MultiBoxLoss.MAX_NEGATIVE}) elif(PHASE=='DEPLOY'): net.tops['mbox_conf_reshape'] = caffe.layers.Reshape(net.tops['mbox_conf'], reshape_param={'shape':{'dim':[0,-1,21]}}) net.tops['mbox_conf_softmax'] = caffe.layers.Softmax(net.tops['mbox_conf_reshape'], softmax_param={'axis':2}) net.tops['mbox_conf_flatten'] = caffe.layers.Flatten(net.tops['mbox_conf_softmax'], flatten_param={'axis':1}) net.tops['detection_out'] = caffe.layers.DetectionOutput(net.tops['mbox_loc'], net.tops['mbox_conf_flatten'], net.tops['mbox_priorbox'], include={ 'phase':caffe.TEST}, detection_output_param={'num_classes':21, 'share_location':True, 'background_label_id':0, 'nms_param':{'nms_threshold':0.45, 'top_k':100}, 'code_type':caffe.params.PriorBox.CENTER_SIZE, 'keep_top_k':100, 'confidence_threshold':0.25}) return str(net.to_proto())
def fcn(obj_cls, part, split): n = caffe.NetSpec() n.data, n.label = L.Python( module='pascalpart_layers', layer='PASCALPartSegDataLayer', ntop=2, param_str=str( dict(voc_dir='/home/cv/hdl/caffe/data/pascal/VOC', part_dir='/home/cv/hdl/caffe/data/pascal/pascal-part', obj_cls=obj_cls, part=part, split=split, seed=1337))) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr = L.Convolution( n.drop7, num_output=11, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore2 = L.Deconvolution(n.score_fr, convolution_param=dict(num_output=11, kernel_size=4, stride=2, bias_term=False), param=[dict(lr_mult=0)]) n.score_pool4 = L.Convolution( n.pool4, num_output=11, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.score_pool4c = crop(n.score_pool4, n.upscore2) n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c, operation=P.Eltwise.SUM) n.upscore16 = L.Deconvolution(n.fuse_pool4, convolution_param=dict(num_output=11, kernel_size=32, stride=16, bias_term=False), param=[dict(lr_mult=0)]) n.score = crop(n.upscore16, n.data) n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=False, ignore_label=255)) return n.to_proto()
def convert_symbol2proto(symbol): def looks_like_weight(name): """Internal helper to figure out if node should be hidden with `hide_weights`. """ if name.endswith("_weight"): return True if name.endswith("_bias"): return True if name.endswith("_beta") or name.endswith("_gamma") or name.endswith("_moving_var") or name.endswith( "_moving_mean"): return True return False json_symbol = json.loads(symbol.tojson()) all_nodes = json_symbol['nodes'] no_weight_nodes = [] for node in all_nodes: op = node['op'] name = node['name'] if op == 'null': if looks_like_weight(name): continue no_weight_nodes.append(node) # build next node dict next_node = dict() for node in no_weight_nodes: node_name = node['name'] for input in node['inputs']: last_node_name = all_nodes[input[0]]['name'] if last_node_name in next_node: next_node[last_node_name].append(node_name) else: next_node[last_node_name] = [node_name] supported_op_type = ['null', 'BatchNorm', 'Convolution', 'Activation', 'Pooling', 'elemwise_add', 'SliceChannel', 'FullyConnected', 'SoftmaxOutput', '_maximum', 'add_n', 'Concat', '_mul_scalar', 'Deconvolution', 'UpSampling'] top_dict = dict() caffe_net = caffe.NetSpec() for node in no_weight_nodes: if node['op'] == 'null': input_param = dict() if node['name'] == 'data': input_param['shape'] = dict(dim=[1, 3, 160, 160]) else: input_param['shape'] = dict(dim=[1]) top_data = CL.Input(ntop=1, input_param=input_param) top_dict[node['name']] = [top_data] setattr(caffe_net, node['name'], top_data) elif node['op'].endswith('_copy'): pass elif node['op'] == 'BatchNorm': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] in_place = False if len(next_node[bottom_node_name]) == 1: in_place = True if 'momentum' in attr: momentum = float(attr['momentum']) else: momentum = 0.9 if 'eps' in attr: eps = float(attr['eps']) else: eps = 0.001 if NO_INPLACE: in_place = False bn_top = CL.BatchNorm(top_dict[bottom_node_name][input[1]], ntop=1, batch_norm_param=dict(use_global_stats=True, moving_average_fraction=momentum, eps=eps), in_place=in_place) setattr(caffe_net, node['name'], bn_top) scale_top = CL.Scale(bn_top, ntop=1, scale_param=dict(bias_term=True), in_place=not NO_INPLACE) top_dict[node['name']] = [scale_top] setattr(caffe_net, node['name'] + '_scale', scale_top) elif node['op'] == 'Convolution': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] convolution_param = dict() if 'kernel' in attr: kernel_size = eval(attr['kernel']) assert kernel_size[0] == kernel_size[1] convolution_param['kernel_size'] = kernel_size[0] else: convolution_param['kernel_size'] = 1 if 'no_bias' in attr: convolution_param['bias_term'] = not eval(attr['no_bias']) if 'num_group' in attr: convolution_param['group'] = int(attr['num_group']) convolution_param['num_output'] = int(attr['num_filter']) if 'pad' in attr: pad_size = eval(attr['pad']) assert pad_size[0] == pad_size[1] convolution_param['pad'] = pad_size[0] if 'stride' in attr: stride_size = eval(attr['stride']) assert stride_size[0] == stride_size[1] convolution_param['stride'] = stride_size[0] conv_top = CL.Convolution(top_dict[bottom_node_name][input[1]], ntop=1, convolution_param=convolution_param) top_dict[node['name']] = [conv_top] setattr(caffe_net, node['name'], conv_top) elif node['op'] == 'Deconvolution': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] convolution_param = dict() if 'kernel' in attr: kernel_size = eval(attr['kernel']) assert kernel_size[0] == kernel_size[1] convolution_param['kernel_size'] = kernel_size[0] else: convolution_param['kernel_size'] = 1 if 'no_bias' in attr: convolution_param['bias_term'] = not eval(attr['no_bias']) else: convolution_param['bias_term'] = False if 'num_group' in attr: convolution_param['group'] = int(attr['num_group']) convolution_param['num_output'] = int(attr['num_filter']) if 'pad' in attr: pad_size = eval(attr['pad']) assert pad_size[0] == pad_size[1] convolution_param['pad'] = pad_size[0] if 'stride' in attr: stride_size = eval(attr['stride']) assert stride_size[0] == stride_size[1] convolution_param['stride'] = stride_size[0] conv_top = CL.Deconvolution(top_dict[bottom_node_name][input[1]], ntop=1, convolution_param=convolution_param) top_dict[node['name']] = [conv_top] setattr(caffe_net, node['name'], conv_top) elif node['op'] == 'UpSampling': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] convolution_param = dict() if 'scale' in attr: kernel_size = 2 * eval(attr['scale']) - eval(attr['scale']) % 2 convolution_param['kernel_size'] = kernel_size else: convolution_param['kernel_size'] = 1 convolution_param['bias_term'] = False convolution_param['num_output'] = int(attr['num_filter']) convolution_param['group'] = int(attr['num_filter']) convolution_param['pad'] = int(math.ceil((eval(attr['scale']) - 1) / 2.)) convolution_param['stride'] = eval(attr['scale']) conv_top = CL.Deconvolution(top_dict[bottom_node_name][input[1]], ntop=1, convolution_param=convolution_param) top_dict[node['name']] = [conv_top] setattr(caffe_net, node['name'], conv_top) elif node['op'] == 'Activation': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] in_place = False if len(next_node[bottom_node_name]) == 1: in_place = True if NO_INPLACE: in_place = False if attr['act_type'] == 'relu': ac_top = CL.ReLU(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place) elif attr['act_type'] == 'sigmoid': ac_top = CL.Sigmoid(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place) elif attr['act_type'] == 'tanh': ac_top = CL.TanH(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place) top_dict[node['name']] = [ac_top] setattr(caffe_net, node['name'], ac_top) elif node['op'] == 'Pooling': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] pooling_param = dict() if attr['pool_type'] == 'avg': pooling_param['pool'] = 1 elif attr['pool_type'] == 'max': pooling_param['pool'] = 0 else: assert False, attr['pool_type'] if 'global_pool' in attr and eval(attr['global_pool']) is True: pooling_param['global_pooling'] = True else: if 'kernel' in attr: kernel_size = eval(attr['kernel']) assert kernel_size[0] == kernel_size[1] pooling_param['kernel_size'] = kernel_size[0] if 'pad' in attr: pad_size = eval(attr['pad']) assert pad_size[0] == pad_size[1] pooling_param['pad'] = pad_size[0] if 'stride' in attr: stride_size = eval(attr['stride']) assert stride_size[0] == stride_size[1] pooling_param['stride'] = stride_size[0] pool_top = CL.Pooling(top_dict[bottom_node_name][input[1]], ntop=1, pooling_param=pooling_param) top_dict[node['name']] = [pool_top] setattr(caffe_net, node['name'], pool_top) elif node['op'] == 'elemwise_add' or node['op'] == 'add_n': input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] eltwise_param = dict() eltwise_param['operation'] = 1 ele_add_top = CL.Eltwise(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1, eltwise_param=eltwise_param) top_dict[node['name']] = [ele_add_top] setattr(caffe_net, node['name'], ele_add_top) elif node['op'] == '_maximum': input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] eltwise_param = dict() eltwise_param['operation'] = 2 ele_add_top = CL.Eltwise(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1, eltwise_param=eltwise_param) top_dict[node['name']] = [ele_add_top] setattr(caffe_net, node['name'], ele_add_top) elif node['op'] == '_mul_scalar': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] in_place = False if len(next_node[bottom_node_name]) == 1: in_place = True if NO_INPLACE: in_place = False scale_top = CL.Scale(top_dict[bottom_node_name][input[1]], ntop=1, scale_param=dict(bias_term=False, filler=dict(value=-1)), in_place=in_place) # scale_top = CL.Power(top_dict[bottom_node_name][input[1]], power=1.0, scale=float(attr['scalar']), shift=0, in_place=in_place) top_dict[node['name']] = [scale_top] setattr(caffe_net, node['name'], scale_top) elif node['op'] == 'SliceChannel': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] slice_param = dict() slice_param['slice_dim'] = 1 slice_num = 2 slice_outputs = CL.Slice(top_dict[bottom_node_name][input[1]], ntop=slice_num, slice_param=slice_param) top_dict[node['name']] = slice_outputs for idx, output in enumerate(slice_outputs): setattr(caffe_net, node['name'] + '_' + str(idx), output) elif node['op'] == 'FullyConnected': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] inner_product_param = dict() inner_product_param['num_output'] = int(attr['num_hidden']) fc_top = CL.InnerProduct(top_dict[bottom_node_name][input[1]], ntop=1, inner_product_param=inner_product_param) top_dict[node['name']] = [fc_top] setattr(caffe_net, node['name'], fc_top) elif node['op'] == 'SoftmaxOutput': input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] softmax_loss = CL.SoftmaxWithLoss(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1) top_dict[node['name']] = [softmax_loss] setattr(caffe_net, node['name'], softmax_loss) elif node['op'] == 'Concat': if len(node['inputs']) == 2: input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] concat_top = CL.Concat(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1) top_dict[node['name']] = [concat_top] setattr(caffe_net, node['name'], concat_top) elif len(node['inputs']) == 3: input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break input_c = node['inputs'][2] while True: if all_nodes[input_c[0]]['op'] not in supported_op_type: input_c = all_nodes[input_c[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] bottom_node_name_c = all_nodes[input_c[0]]['name'] concat_top = CL.Concat(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], top_dict[bottom_node_name_c][input_c[1]], ntop=1) top_dict[node['name']] = [concat_top] setattr(caffe_net, node['name'], concat_top) else: logging.warn('unknown op type = %s' % node['op']) return caffe_net.to_proto()
def create_neural_net(input_file, batch_size=50): net = caffe.NetSpec() net.data, net.label = L.Data(batch_size=batch_size, source=input_file, backend=caffe.params.Data.LMDB, ntop=2, include=dict(phase=caffe.TEST), name='juniward04') ## pre-process net.conv1 = L.Convolution(net.data, num_output=16, kernel_size=4, stride=1, pad=1, weight_filler=dict(type='dct4'), param=[{ 'lr_mult': 0, 'decay_mult': 0 }], bias_term=False) TRUNCABS = caffe_pb2.QuantTruncAbsParameter.TRUNCABS net.quanttruncabs = L.QuantTruncAbs(net.conv1, process=TRUNCABS, threshold=8, in_place=True) ## block 1 [ net.conv1_proj, net.bn2, net.scale2, net.conv512_1, net.bn2_1, net.scale2_1, net.relu512_1, net.conv512_to_256, net.bn2_2, net.scale2_2, net.res512_to_256, net.relu512_to_256 ] = add_downsampling_block(net.quanttruncabs, 12) ## block 2 [ net.conv256_1, net.bn2_3, net.scale2_3, net.relu256_1, net.conv256_2, net.bn2_4, net.scale2_4, net.res256_2, net.relu256_2 ] = add_skip_block(net.res512_to_256, 24) ## block 2_1 [ net.conv256_4, net.bn3_1, net.scale3_1, net.relu256_4, net.conv256_5, net.bn3_2, net.scale3_2, net.res256_5, net.relu256_5 ] = add_skip_block(net.res256_2, 24) ## block 2_2 [ net.conv256_6, net.bn4_1, net.scale4_1, net.relu256_6, net.conv256_7, net.bn4_2, net.scale4_2, net.res256_7, net.relu256_7 ] = add_skip_block(net.res256_5, 24) ## block 3 [ net.res256_2_proj, net.bn2_5, net.scale2_5, net.conv256_3, net.bn2_6, net.scale2_6, net.relu256_3, net.conv256_to_128, net.bn2_7, net.scale2_7, net.res256_to_128, net.relu256_to_128 ] = add_downsampling_block(net.res256_7, 24) ## block 4 cur [ net.conv128_1, net.bn2_8, net.scale2_8, net.relu128_1, net.conv128_2, net.bn2_9, net.scale2_9, net.res128_2, net.relu128_2 ] = add_skip_block(net.res256_to_128, 48) ## block 4_1 [ net.conv128_4, net.bn3_3, net.scale3_3, net.relu128_4, net.conv128_5, net.bn3_4, net.scale3_4, net.res128_5, net.relu128_5 ] = add_skip_block(net.res128_2, 48) ## block 4_2 [ net.conv128_6, net.bn4_3, net.scale4_3, net.relu128_6, net.conv128_7, net.bn4_4, net.scale4_4, net.res128_7, net.relu128_7 ] = add_skip_block(net.res128_5, 48) ## block 5 [ net.res128_2_proj, net.bn2_10, net.scale2_10, net.conv128_3, net.bn2_11, net.scale2_11, net.relu128_3, net.conv128_to_64, net.bn2_12, net.scale2_12, net.res128_to_64, net.relu128_to_64 ] = add_downsampling_block(net.res128_7, 48) ## block 6 [ net.conv64_1, net.bn2_13, net.scale2_13, net.relu64_1, net.conv64_2, net.bn2_14, net.scale2_14, net.res64_2, net.relu64_2 ] = add_skip_block(net.res128_to_64, 96) ## block 6_1 [ net.conv64_4, net.bn3_5, net.scale3_5, net.relu64_4, net.conv64_5, net.bn3_6, net.scale3_6, net.res64_5, net.relu64_5 ] = add_skip_block(net.res64_2, 96) ## block 6_2 [ net.conv64_6, net.bn4_5, net.scale4_5, net.relu64_6, net.conv64_7, net.bn4_6, net.scale4_6, net.res64_7, net.relu64_7 ] = add_skip_block(net.res64_5, 96) ## block 7 [ net.res64_2_proj, net.bn2_15, net.scale2_15, net.conv64_3, net.bn2_16, net.scale2_16, net.relu64_3, net.conv64_to_32, net.bn2_17, net.scale2_17, net.res64_to_32, net.relu64_to_32 ] = add_downsampling_block(net.res64_7, 96) ## block 8 [ net.conv32_1, net.bn2_18, net.scale2_18, net.relu32_1, net.conv32_2, net.bn2_19, net.scale2_19, net.res32_2, net.relu32_2 ] = add_skip_block(net.res64_to_32, 192) ## block 8_1 [ net.conv32_4, net.bn3_7, net.scale3_7, net.relu32_4, net.conv32_5, net.bn3_8, net.scale3_8, net.res32_5, net.relu32_5 ] = add_skip_block(net.res32_2, 192) ## block 8_2 [ net.conv32_6, net.bn4_7, net.scale4_7, net.relu32_6, net.conv32_7, net.bn4_8, net.scale4_8, net.res32_7, net.relu32_7 ] = add_skip_block(net.res32_5, 192) ## block 9 [ net.res32_2_proj, net.bn2_20, net.scale2_20, net.conv32_3, net.bn2_21, net.scale2_21, net.relu32_3, net.conv32_to_16, net.bn2_22, net.scale2_22, net.res32_to_16, net.relu32_to_16 ] = add_downsampling_block(net.res32_7, 192) ## block 10 [ net.conv16_1, net.bn2_23, net.scale2_23, net.relu16_1, net.conv16_2, net.bn2_24, net.scale2_24, net.res16_2, net.relu16_2 ] = add_skip_block(net.res32_to_16, 384) ## block 10_1 [ net.conv16_3, net.bn3_9, net.scale3_9, net.relu16_3, net.conv16_4, net.bn3_10, net.scale3_10, net.res16_4, net.relu16_4 ] = add_skip_block(net.res16_2, 384) ## ## block 10_2 ## [net.conv16_5, net.bn4_9, net.scale4_9, net.relu16_5, net.conv16_6, net.bn4_10, ## net.scale4_10, net.res16_6, net.relu16_6] = add_skip_block(net.res16_4, 384) ## global pool AVE = caffe_pb2.PoolingParameter.AVE net.global_pool = L.Pooling(net.res16_4, pool=AVE, kernel_size=8, stride=1) ## full connecting net.fc = L.InnerProduct(net.global_pool, param=[{ 'lr_mult': 1 }, { 'lr_mult': 2 }], num_output=2, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) ## accuracy net.accuracy = L.Accuracy(net.fc, net.label, include=dict(phase=caffe.TEST)) ## loss net.loss = L.SoftmaxWithLoss(net.fc, net.label) return net.to_proto()
def net(split): n=caffe.NetSpec() loss_param=dict(normalize=False) if split == 'train': data_params=dict(mean=(104.00699, 116.66877, 122.67892)) data_params['root']='/home/liang/rcf/data/HED-BSDS' data_params['source']='bsds_pascal_train_pair.lst' data_params['shuffle']=True n.data,n.label=L.Python(module='ImageLabelmapData',layer='ImageLabelmapDataLayer',ntop=2,param_str=str(data_params)) elif split == 'test': n.data=L.Input(name='data',input_param=dict(shape=dict(dim=[1,3,500,500]))) else: raise Exception('Invalid split') #vgg architecture n.conv1_1,n.relu1_1=conv_relu(n.data,num_out=64) n.conv1_2,n.relu1_2=conv_relu(n.relu1_1,num_out=64) n.pool1=max_pool(n.relu1_2) n.conv2_1,n.relu2_1=conv_relu(n.pool1,num_out=128) n.conv2_2,n.relu2_2=conv_relu(n.relu2_1,num_out=128) n.pool2=max_pool(n.relu2_2) n.conv3_1,n.relu3_1=conv_relu(n.pool2,num_out=256) n.conv3_2,n.relu3_2=conv_relu(n.relu3_1,num_out=256) n.conv3_3,n.relu3_3=conv_relu(n.relu3_2,num_out=256) n.pool3=max_pool(n.relu3_3) n.conv4_1,n.relu4_1=conv_relu(n.pool3,num_out=512) n.conv4_2,n.relu4_2=conv_relu(n.relu4_1,num_out=512) n.conv4_3,n.relu4_3=conv_relu(n.relu4_2,num_out=512) n.pool4=max_pool(n.relu4_3) n.conv5_1,n.relu5_1=conv_relu(n.pool4,num_out=512,lr=[100,1,200,0]) n.conv5_2,n.relu5_2=conv_relu(n.relu5_1,num_out=512,lr=[100,1,200,0]) n.conv5_3,n.relu5_3=conv_relu(n.relu5_2,num_out=512,lr=[100,1,200,0]) #conv1 n.dsn1=conv1x1(n.conv1_2) n.dsn1_crop=crop(n.dsn1,n.data) if split=='train': n.dsn1_loss=L.SigmoidCrossEntropyLoss(n.dsn1_crop,n.label) else: n.sigmoid_dsn1=L.Sigmoid(n.dsn1_crop) #conv2 n.dsn2=conv1x1(n.conv2_2) n.dsn2_up=upsample(n.dsn2,stride=2) n.dsn2_crop=crop(n.dsn2_up,n.data) if split=='train': n.dsn2_loss=L.SigmoidCrossEntropyLoss(n.dsn2_crop,n.label) else: n.sigmoid_dsn2=L.Sigmoid(n.dsn2_crop) #conv3 n.dsn3=conv1x1(n.conv3_3) n.dsn3_up=upsample(n.dsn3,stride=4) n.dsn3_crop=crop(n.dsn3_up,n.data) if split=='train': n.dsn3_loss=L.SigmoidCrossEntropyLoss(n.dsn3_crop,n.label) else: n.sigmoid_dsn3=L.Sigmoid(n.dsn3_crop) #conv4 n.dsn4=conv1x1(n.conv4_3) n.dsn4_up=upsample(n.dsn4,stride=8) n.dsn4_crop=crop(n.dsn4_up,n.data) if split=='train': n.dsn4_loss=L.SigmoidCrossEntropyLoss(n.dsn4_crop,n.label) else: n.sigmoid_dsn4=L.Sigmoid(n.dsn4_crop) #conv5 n.dsn5=conv1x1(n.conv5_3) n.dsn5_up=upsample(n.dsn5,stride=16) n.dsn5_crop=crop(n.dsn5_up,n.data) if split=='train': n.dsn5_loss=L.SigmoidCrossEntropyLoss(n.dsn5_crop,n.label) else: n.sigmoid_dsn5=L.Sigmoid(n.dsn5_crop) #concat n.concat_5=L.Concat(n.dsn1_crop,n.dsn2_crop,n.dsn3_crop,n.dsn4_crop,n.dsn5_crop,name='concat',concat_param=dict(concat_dim=1)) n.dsn=L.Convolution(n.concat_5,name='dsn',num_output=1,kernel_size=1,param=[dict(lr_mult=0.001, decay_mult=1), dict(lr_mult=0.002, decay_mult=0)],weight_filler=dict(type='constant', value=0.2)) if split=='train': n.fuse_loss=L.SigmoidCrossEntropyLoss(n.dsn,n.label) else: n.sigmoid_fuss=L.Sigmoid(n.dsn) return n.to_proto()
def vgg_lowmem(data, labels=None, train=False, param=learned_param, num_classes=100, with_labels=True): """ Returns a protobuf text file specifying a variant of VGG - The Fully Connected (FC) layers (fc6 and fc7) have smaller dimensions due to the lower resolution of mini-places images (128x128) compared with ImageNet images (usually resized to 256x256) """ n = caffe.NetSpec() n.data = data conv_kwargs = dict(param=param, train=train) n.conv1_1, n.relu1_1 = conv_relu(n.data, 3, 64, stride=1, pad=1, **conv_kwargs) n.conv1_1, n.relu1_1 = conv_relu(n.data, 3, 64, stride=1, pad=1, **conv_kwargs) n.pool1 = max_pool(n.relu1_1, 3, stride=2, train=train) n.conv2, n.relu2 = conv_relu(n.pool1, 5, 256, pad=2, group=2, **conv_kwargs) n.pool2 = max_pool(n.relu2, 3, stride=2, train=train) n.conv3, n.relu3 = conv_relu(n.pool2, 3, 384, pad=1, **conv_kwargs) n.conv4, n.relu4 = conv_relu(n.relu3, 3, 384, pad=1, group=2, **conv_kwargs) n.conv5, n.relu5 = conv_relu(n.relu4, 3, 256, pad=1, group=2, **conv_kwargs) n.pool5 = max_pool(n.relu5, 3, stride=2, train=train) n.fc6, n.relu6 = fc_relu(n.pool5, 1024, param=param) n.drop6 = L.Dropout(n.relu6, in_place=True) n.fc7, n.relu7 = fc_relu(n.drop6, 1024, param=param) n.drop7 = L.Dropout(n.relu7, in_place=True) n.fc8 = L.InnerProduct(n.drop7, num_output=num_classes, param=param) preds = n.fc8 if not train: # Compute the per-label probabilities at test/inference time. preds = n.probs = L.Softmax(n.fc8) if with_labels: n.label = labels n.loss = L.SoftmaxWithLoss(n.fc8, n.label) n.accuracy_at_1 = L.Accuracy(preds, n.label) n.accuracy_at_5 = L.Accuracy(preds, n.label, accuracy_param=dict(top_k=5)) else: n.ignored_label = labels n.silence_label = L.Silence(n.ignored_label, ntop=0) return to_tempfile(str(n.to_proto()))
def net(): n = caffe.NetSpec() n.data = L.Input(input_param=dict(shape=dict(dim=data_shape))) n.dataout = L.Power(n.data, power=_power, scale=_scale, shift=_shift) return n.to_proto()