Esempio n. 1
0
 def net():
     n = caffe.NetSpec()
     n.data = L.Input(input_param=dict(shape=dict(dim=data_shape)))
     n.dataout = L.Log(n.data, base=_base, scale=_scale, shift=_shift)
     return n.to_proto()
Esempio n. 2
0
def fcn(split):
    n = caffe.NetSpec()
    n.data, n.sem, n.geo = L.Python(
        module='siftflow_layers',
        layer='SIFTFlowSegDataLayer',
        ntop=3,
        param_str=str(
            dict(siftflow_dir='../data/sift-flow/data/',
                 split=split,
                 seed=1337)))

    # the base net
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # fully conv
    n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)

    n.score_fr_sem = L.Convolution(
        n.drop7,
        num_output=33,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.upscore2_sem = L.Deconvolution(n.score_fr_sem,
                                     convolution_param=dict(num_output=33,
                                                            kernel_size=4,
                                                            stride=2,
                                                            bias_term=False),
                                     param=[dict(lr_mult=0)])

    n.score_pool4_sem = L.Convolution(
        n.pool4,
        num_output=33,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.score_pool4_semc = crop(n.score_pool4_sem, n.upscore2_sem)
    n.fuse_pool4_sem = L.Eltwise(n.upscore2_sem,
                                 n.score_pool4_semc,
                                 operation=P.Eltwise.SUM)
    n.upscore_pool4_sem = L.Deconvolution(n.fuse_pool4_sem,
                                          convolution_param=dict(
                                              num_output=33,
                                              kernel_size=4,
                                              stride=2,
                                              bias_term=False),
                                          param=[dict(lr_mult=0)])

    n.score_pool3_sem = L.Convolution(
        n.pool3,
        num_output=33,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.score_pool3_semc = crop(n.score_pool3_sem, n.upscore_pool4_sem)
    n.fuse_pool3_sem = L.Eltwise(n.upscore_pool4_sem,
                                 n.score_pool3_semc,
                                 operation=P.Eltwise.SUM)
    n.upscore8_sem = L.Deconvolution(n.fuse_pool3_sem,
                                     convolution_param=dict(num_output=33,
                                                            kernel_size=16,
                                                            stride=8,
                                                            bias_term=False),
                                     param=[dict(lr_mult=0)])

    n.score_sem = crop(n.upscore8_sem, n.data)
    # loss to make score happy (o.w. loss_sem)
    n.loss = L.SoftmaxWithLoss(n.score_sem,
                               n.sem,
                               loss_param=dict(normalize=False,
                                               ignore_label=255))

    n.score_fr_geo = L.Convolution(
        n.drop7,
        num_output=3,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])

    n.upscore2_geo = L.Deconvolution(n.score_fr_geo,
                                     convolution_param=dict(num_output=3,
                                                            kernel_size=4,
                                                            stride=2,
                                                            bias_term=False),
                                     param=[dict(lr_mult=0)])

    n.score_pool4_geo = L.Convolution(
        n.pool4,
        num_output=3,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.score_pool4_geoc = crop(n.score_pool4_geo, n.upscore2_geo)
    n.fuse_pool4_geo = L.Eltwise(n.upscore2_geo,
                                 n.score_pool4_geoc,
                                 operation=P.Eltwise.SUM)
    n.upscore_pool4_geo = L.Deconvolution(n.fuse_pool4_geo,
                                          convolution_param=dict(
                                              num_output=3,
                                              kernel_size=4,
                                              stride=2,
                                              bias_term=False),
                                          param=[dict(lr_mult=0)])

    n.score_pool3_geo = L.Convolution(
        n.pool3,
        num_output=3,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.score_pool3_geoc = crop(n.score_pool3_geo, n.upscore_pool4_geo)
    n.fuse_pool3_geo = L.Eltwise(n.upscore_pool4_geo,
                                 n.score_pool3_geoc,
                                 operation=P.Eltwise.SUM)
    n.upscore8_geo = L.Deconvolution(n.fuse_pool3_geo,
                                     convolution_param=dict(num_output=3,
                                                            kernel_size=16,
                                                            stride=8,
                                                            bias_term=False),
                                     param=[dict(lr_mult=0)])

    n.score_geo = crop(n.upscore8_geo, n.data)
    n.loss_geo = L.SoftmaxWithLoss(n.score_geo,
                                   n.geo,
                                   loss_param=dict(normalize=False,
                                                   ignore_label=255))

    return n.to_proto()
    'evaluate_difficult_gt': False,
    'name_size_file': name_size_file,
}

### Hopefully you don't need to change the following ###
# Check file.
check_if_exist(train_data)
check_if_exist(test_data)
check_if_exist(label_map_file)
check_if_exist(pretrain_model)
make_if_not_exist(save_dir)
make_if_not_exist(job_dir)
make_if_not_exist(snapshot_dir)

# Create train net.
net = caffe.NetSpec()
net.data, net.label = CreateAnnotatedDataLayer(
    train_data,
    batch_size=batch_size_per_device,
    train=True,
    output_label=True,
    label_map_file=label_map_file,
    transform_param=train_transform_param,
    batch_sampler=batch_sampler)

VGGNetBody(net,
           from_layer='data',
           fully_conv=True,
           reduced=True,
           dilated=True,
           dropout=False)
Esempio n. 4
0
def net(split):
    n = caffe.NetSpec()
    loss_param = dict(normalize=False)
    if split == 'train':
        data_params = dict(mean=(104.00699, 116.66877, 122.67892))
        # 图像与标签
        data_params['root'] = './datasets/ICDAR2013_TCB'
        data_params['source'] = "ICDAR2013_TCB.lst"

        data_params['shuffle'] = True
        data_params['ignore_label'] = -1
        n.data, n.label = L.Python(module='pylayer_old', layer='ImageLabelmapDataLayer', ntop=2, \
        param_str=str(data_params))
        if data_params.has_key('ignore_label'):
            loss_param['ignore_label'] = int(data_params['ignore_label'])
    elif split == 'test':
        n.data = L.Input(name='data',
                         input_param=dict(shape=dict(dim=[1, 3, 500, 500])))
    else:
        raise Exception("Invalid phase")

#第一个卷积阶段
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=1)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    #第二个卷积阶段
    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    #第三个卷积阶段
    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)

    # 第三个卷积阶段最后一个卷积层,接一个MCFE模块, Channel: 64, kernel: 3*3
    n.conv3_dilation1 = conv_dilation01(n.conv3_3, mult=[100, 1, 200, 0])
    n.conv3_dilation2 = conv_dilation03(n.conv3_3, mult=[100, 1, 200, 0])
    n.conv3_dilation3 = conv_dilation05(n.conv3_3, mult=[100, 1, 200, 0])
    n.conv3_dilation4 = conv_dilation07(n.conv3_3, mult=[100, 1, 200, 0])
    # 在Channel维度上进行拼接
    n.concat_conv33 = L.Concat(n.conv3_dilation1,
                               n.conv3_dilation2,
                               n.conv3_dilation3,
                               n.conv3_dilation4,
                               concat_param=dict({'concat_dim': 1}))

    # MCFE模块后接BLSTM module
    # # ===================== prepare lstm inputs =====================
    n.im2col_conv33 = L.Im2col(n.concat_conv33,
                               convolution_param=dict(kernel_size=3, pad=1))
    n.im2col_transpose_conv33 = L.Transpose(
        n.im2col_conv33, transpose_param=dict(dim=[3, 2, 0, 1]))
    n.lstm_input_conv33 = L.Reshape(n.im2col_transpose_conv33,
                                    reshape_param=dict(shape=dict(dim=-1),
                                                       axis=1,
                                                       num_axes=2))

    # 前向LSTM
    n.lstm_conv33 = L.Lstm(n.lstm_input_conv33,
                           lstm_param=dict(num_output=128,
                                           weight_filler=dict(type='gaussian',
                                                              std=0.01),
                                           bias_filler=dict(type='constant'),
                                           clipping_threshold=1))
    #后向LSTM
    n.rlstm_input_conv33 = L.Reverse(n.lstm_input_conv33,
                                     name='lstm_reverse1_conv33',
                                     reverse_param=dict(axis=0))
    n.rlstm_output_conv33 = L.Lstm(n.rlstm_input_conv33,
                                   name='rlstm_conv33',
                                   lstm_param=dict(num_output=128))
    n.rlstm_conv33 = L.Reverse(n.rlstm_output_conv33,
                               name='lstm_reverse2_conv33',
                               reverse_param=dict(axis=0))

    # lstm_conv33 和 rlstm_conv33经过Concat拼接,n*c*(h1+h2+...+hk)*w
    n.merge_lstm_rlstm_conv33 = L.Concat(n.lstm_conv33,
                                         n.rlstm_conv33,
                                         concat_param=dict(axis=2))
    n.lstm_output_reshape_conv33 = L.Reshape(n.merge_lstm_rlstm_conv33,
                                             reshape_param=dict(
                                                 shape=dict(dim=[-1, 1]),
                                                 axis=1,
                                                 num_axes=1))
    # transpose size of output as (N, C, H, W)
    n.lstm_output_conv33 = L.Transpose(n.lstm_output_reshape_conv33,
                                       transpose_param=dict(dim=[2, 3, 1, 0]))
    n.pool3 = max_pool(n.relu3_3)

    # 第四个卷积阶段
    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)

    # 第三个卷积阶段最后一个卷积层,接一个MCFE模块, Channel: 128, kernel: 3*3
    n.conv4_dilation1 = conv_dilation1(n.conv4_3, mult=[100, 1, 200, 0])
    n.conv4_dilation2 = conv_dilation3(n.conv4_3, mult=[100, 1, 200, 0])
    n.conv4_dilation3 = conv_dilation5(n.conv4_3, mult=[100, 1, 200, 0])
    n.conv4_dilation4 = conv_dilation7(n.conv4_3, mult=[100, 1, 200, 0])
    # 在Channel维度上进行拼接, n*(c1+c2+...+ck)*h*w
    n.concat_conv43 = L.Concat(n.conv4_dilation1,
                               n.conv4_dilation2,
                               n.conv4_dilation3,
                               n.conv4_dilation4,
                               concat_param=dict({'concat_dim': 1}))

    # BLSTM module
    # # ===================== prepare lstm inputs =====================
    n.im2col_conv43 = L.Im2col(n.concat_conv43,
                               convolution_param=dict(kernel_size=3, pad=1))
    n.im2col_transpose_conv43 = L.Transpose(
        n.im2col_conv43, transpose_param=dict(dim=[3, 2, 0, 1]))
    n.lstm_input_conv43 = L.Reshape(n.im2col_transpose_conv43,
                                    reshape_param=dict(shape=dict(dim=-1),
                                                       axis=1,
                                                       num_axes=2))
    # 前向LSTM
    n.lstm_conv43 = L.Lstm(n.lstm_input_conv43,
                           lstm_param=dict(num_output=256,
                                           weight_filler=dict(type='gaussian',
                                                              std=0.01),
                                           bias_filler=dict(type='constant'),
                                           clipping_threshold=1))
    # 后向LSTM
    n.rlstm_input_conv43 = L.Reverse(n.lstm_input_conv43,
                                     name='lstm_reverse1_conv43',
                                     reverse_param=dict(axis=0))
    n.rlstm_output_conv43 = L.Lstm(n.rlstm_input_conv43,
                                   name='rlstm_conv43',
                                   lstm_param=dict(num_output=256))
    n.rlstm_conv43 = L.Reverse(n.rlstm_output_conv43,
                               name='lstm_reverse2_conv43',
                               reverse_param=dict(axis=0))

    #lstm_conv43 和 rlstm_conv43经Concat拼接,n*c*(h1+h2+...+hk)*w
    n.merge_lstm_rlstm_conv43 = L.Concat(n.lstm_conv43,
                                         n.rlstm_conv43,
                                         concat_param=dict(axis=2))
    n.lstm_output_reshape_conv43 = L.Reshape(n.merge_lstm_rlstm_conv43,
                                             reshape_param=dict(
                                                 shape=dict(dim=[-1, 1]),
                                                 axis=1,
                                                 num_axes=1))
    # transpose size of output as (N, C, H, W)
    n.lstm_output_conv43 = L.Transpose(n.lstm_output_reshape_conv43,
                                       transpose_param=dict(dim=[2, 3, 1, 0]))
    n.pool4 = max_pool(n.relu4_3)

    # The fiveth conv stage
    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)

    # MCFE inception module, Channel: 128, kernel: 3*3
    n.conv5_dilation1 = conv_dilation1(n.conv5_3, mult=[100, 1, 200, 0])
    n.conv5_dilation2 = conv_dilation3(n.conv5_3, mult=[100, 1, 200, 0])
    n.conv5_dilation3 = conv_dilation5(n.conv5_3, mult=[100, 1, 200, 0])
    n.conv5_dilation4 = conv_dilation7(n.conv5_3, mult=[100, 1, 200, 0])
    n.concat_conv53 = L.Concat(n.conv5_dilation1,
                               n.conv5_dilation2,
                               n.conv5_dilation3,
                               n.conv5_dilation4,
                               concat_param=dict({'concat_dim': 1}))

    #  BLSTM module
    # ===================== prepare lstm inputs =====================
    n.im2col_conv53 = L.Im2col(n.concat_conv53,
                               convolution_param=dict(kernel_size=3, pad=1))
    n.im2col_transpose_conv53 = L.Transpose(
        n.im2col_conv53, transpose_param=dict(dim=[3, 2, 0, 1]))
    n.lstm_input_conv53 = L.Reshape(n.im2col_transpose_conv53,
                                    reshape_param=dict(shape=dict(dim=-1),
                                                       axis=1,
                                                       num_axes=2))

    # 前向LSTM
    n.lstm_conv53 = L.Lstm(n.lstm_input_conv53,
                           lstm_param=dict(num_output=256,
                                           weight_filler=dict(type='gaussian',
                                                              std=0.01),
                                           bias_filler=dict(type='constant'),
                                           clipping_threshold=1))

    #后向LSTM
    n.rlstm_input_conv53 = L.Reverse(n.lstm_input_conv53,
                                     name='lstm_reverse1_conv53',
                                     reverse_param=dict(axis=0))
    n.rlstm_output_conv53 = L.Lstm(n.rlstm_input_conv53,
                                   name='rlstm_conv53',
                                   lstm_param=dict(num_output=256))
    n.rlstm_conv53 = L.Reverse(n.rlstm_output_conv53,
                               name='lstm_reverse2_conv53',
                               reverse_param=dict(axis=0))
    # lstm_conv53和rlstm_conv53经过Concat拼接,n*c*(h1+h2+...+hk)*w
    n.merge_lstm_rlstm_conv53 = L.Concat(n.lstm_conv53,
                                         n.rlstm_conv53,
                                         concat_param=dict(axis=2))
    n.lstm_output_reshape_conv53 = L.Reshape(n.merge_lstm_rlstm_conv53,
                                             reshape_param=dict(
                                                 shape=dict(dim=[-1, 1]),
                                                 axis=1,
                                                 num_axes=1))
    # transpose size of output as (N, C, H, W)
    n.lstm_output_conv53 = L.Transpose(n.lstm_output_reshape_conv53,
                                       transpose_param=dict(dim=[2, 3, 1, 0]))

    # 第三个阶段,BLSTM的输出,经过1x1的卷积降维,4x上采样,裁剪成与原图像大小相同
    n.score_dsn3 = conv1x1(n.lstm_output_conv33,
                           lr=[0.01, 1, 0.02, 0],
                           wf=dict(type='gaussian', std=0.01))
    n.score_dsn3_up = upsample(n.score_dsn3, stride=4)
    n.upscore_dsn3 = L.Crop(n.score_dsn3_up, n.data)

    # BalanceCrossEntropyLoss
    if split == 'train':
        n.loss3 = L.BalanceCrossEntropyLoss(n.upscore_dsn3,
                                            n.label,
                                            loss_param=loss_param)
    if split == 'test':
        n.sigmoid_dsn3 = L.Sigmoid(n.upscore_dsn3)

#第四个阶段,BLSTM的输出,经过1x1的卷积降维,8x上采样,裁剪成与原图像大小相同
    n.score_dsn4 = conv1x1(n.lstm_output_conv43,
                           lr=[0.01, 1, 0.02, 0],
                           wf=dict(type='gaussian', std=0.01))
    n.score_dsn4_up = upsample(n.score_dsn4, stride=8)
    n.upscore_dsn4 = L.Crop(n.score_dsn4_up, n.data)

    # BalanceCrossEntropyLoss
    if split == 'train':
        n.loss4 = L.BalanceCrossEntropyLoss(n.upscore_dsn4,
                                            n.label,
                                            loss_param=loss_param)
    if split == 'test':
        n.sigmoid_dsn4 = L.Sigmoid(n.upscore_dsn4)

# 第五个阶段,BLSTM的输出,经过1x1的卷积降维,16x上采样,裁剪成与原图像大小相同
    n.score_dsn5 = conv1x1(n.lstm_output_conv53,
                           lr=[0.01, 1, 0.02, 0],
                           wf=dict(type='gaussian', std=0.01))
    n.score_dsn5_up = upsample(n.score_dsn5, stride=16)
    n.upscore_dsn5 = L.Crop(n.score_dsn5_up, n.data)

    # BalanceCrossEntropyLoss
    if split == 'train':
        n.loss5 = L.BalanceCrossEntropyLoss(n.upscore_dsn5,
                                            n.label,
                                            loss_param=loss_param)
    if split == 'test':
        n.sigmoid_dsn5 = L.Sigmoid(n.upscore_dsn5)


# 将三个阶段的输出,在Channel维度上进行拼接,作为Attention模块的输入
    n.concat_upscore = L.Concat(n.upscore_dsn3,
                                n.upscore_dsn4,
                                n.upscore_dsn5,
                                name='concat',
                                concat_param=dict({'concat_dim': 1}))

    # upscore_dsn3,upscore_dsn4,upscore_dsn5经3X3的卷积, 降维
    n.output_mask_product03 = L.Convolution(
        n.upscore_dsn3,
        num_output=1,
        kernel_size=3,
        pad=1,
        param=[dict(lr_mult=10, decay_mult=1),
               dict(lr_mult=20, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant'),
        engine=1)
    n.output_mask_product04 = L.Convolution(
        n.upscore_dsn4,
        num_output=1,
        kernel_size=3,
        pad=1,
        param=[dict(lr_mult=10, decay_mult=1),
               dict(lr_mult=20, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant'),
        engine=1)
    n.output_mask_product05 = L.Convolution(
        n.upscore_dsn5,
        num_output=1,
        kernel_size=3,
        pad=1,
        param=[dict(lr_mult=10, decay_mult=1),
               dict(lr_mult=20, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant'),
        engine=1)

    ### Attention 模块
    # 第一个卷积层num_output=512, kernel_size:3x3
    n.att_conv1_mask_512 = L.Convolution(
        n.concat_upscore,
        num_output=512,
        kernel_size=3,
        pad=1,
        param=[dict(lr_mult=10, decay_mult=1),
               dict(lr_mult=20, decay_mult=0)],
        engine=1)
    n.relu_att_conv1 = L.ReLU(n.att_conv1_mask_512, in_place=True)
    n.drop_att_conv1_mask = L.Dropout(n.relu_att_conv1,
                                      dropout_ratio=0.5,
                                      in_place=True)
    # 第二个卷积层num_output=3, kernel_size:1x1
    n.att_fc_mask_512 = L.Convolution(
        n.drop_att_conv1_mask,
        num_output=3,
        kernel_size=1,
        param=[dict(lr_mult=10, decay_mult=1),
               dict(lr_mult=20, decay_mult=0)],
        engine=1)
    n.attention = L.Softmax(n.att_fc_mask_512)
    # 生成三个注意力权重
    n.attention3, n.attention4, n.attention5 = L.Slice(n.attention,
                                                       name='slice_attention',
                                                       slice_param=dict(
                                                           axis=1,
                                                           slice_point=[1, 2]),
                                                       ntop=3)

    # 注意力权重与feature map相乘,进行融合
    n.output_mask3 = L.Eltwise(n.attention3,
                               n.output_mask_product03,
                               operation=P.Eltwise.PROD)
    n.output_mask4 = L.Eltwise(n.attention4,
                               n.output_mask_product04,
                               operation=P.Eltwise.PROD)
    n.output_mask5 = L.Eltwise(n.attention5,
                               n.output_mask_product05,
                               operation=P.Eltwise.PROD)

    n.output_fusion = L.Eltwise(n.output_mask3,
                                n.output_mask4,
                                n.output_mask5,
                                operation=P.Eltwise.SUM)

    #作为对比,不经过Attention模块, 将三个阶段的输出,在Channel维度上进行拼接,经1X1的卷积,输出
    n.upscore_fuse = L.Convolution(n.concat_upscore,
                                   name='new-score-weighting',
                                   num_output=1,
                                   kernel_size=1,
                                   param=[
                                       dict(lr_mult=0.001, decay_mult=1),
                                       dict(lr_mult=0.002, decay_mult=0)
                                   ],
                                   weight_filler=dict(type='constant',
                                                      value=0.2),
                                   engine=1)

    if split == 'train':
        n.loss_fuse = L.BalanceCrossEntropyLoss(n.upscore_fuse,
                                                n.label,
                                                loss_param=loss_param)
        n.loss_output_fusion = L.BalanceCrossEntropyLoss(n.output_fusion,
                                                         n.label,
                                                         loss_param=loss_param)
    if split == 'test':
        n.sigmoid_fuse = L.Sigmoid(n.upscore_fuse)
        n.sigmoid_output_fusion = L.Sigmoid(n.output_fusion)

    return n.to_proto()
Esempio n. 5
0
def write_prototxt(is_train, output_folder, \
                    filename, main_branch, \
                    num_output_stage1, \
                    blocks, sync_bn, uni_bn):

    netspec = caffe.NetSpec()

    #### Input Setting ####
    crop_size = 112
    width = 170
    height = 128
    length = 16
    step = 1
    num_segments = 1

    if is_train:
        use_global_stats = False
    else:
        use_global_stats = True

    #### Data layer ####
    if is_train:
        data_train_params = dict(name='data', \
                            ntop=2, \
                            video4d_data_param=dict( \
                                source="../kinetics_train_list.txt", \
                                batch_size=24, \
                                new_width=width, \
                                new_height=height, \
                                new_length=length, \
                                num_segments=num_segments, \
                                modality=0, \
                                step=step, \
                                rand_step=True, \
                                name_pattern='image_%06d.jpg', \
                                shuffle=True), \
                            transform_param=dict(
                                crop_size=crop_size, \
                                mirror=True, \
                                multi_scale=True, \
                                max_distort=1, \
                                scale_ratios=[1, 0.875, 0.75, 0.66], \
                                mean_value=[104]*length+[117]*length+[123]*length), \
                            include=dict(phase=0))

        data_val_params = dict(name='vdata', \
                                ntop=2, \
                                video4d_data_param=dict(
                                    source="../kinetics_val_list.txt", \
                                    batch_size=1, \
                                    new_width=width, \
                                    new_height=height, \
                                    new_length=length, \
                                    num_segments=num_segments, \
                                    modality=0, \
                                    step=step, \
                                    name_pattern='image_%06d.jpg'), \
                                transform_param=dict(
                                    crop_size=crop_size, \
                                    mirror=False, \
                                    mean_value=[104]*length+[117]*length+[123]*length), \
                                include=dict(phase=1))
        # pdb.set_trace()
        netspec.data, netspec.label = BaseModule('Video4dData',
                                                 data_train_params).attach(
                                                     netspec, [])
        netspec.vdata, netspec.vlabel = BaseModule('Video4dData',
                                                   data_val_params).attach(
                                                       netspec, [])
    else:
        data_params = dict(name='data', \
                            dummy_data_param=dict( \
                                shape=dict(\
                                    dim=[10, 3, length, crop_size, crop_size])))
        netspec.data = BaseModule('DummyData', data_params).attach(netspec, [])

    #### (Optional) Reshape Layer ####
    if is_train:
        reshape_params = dict(name='data_reshape', \
                            reshape_param=dict( \
                                shape=dict(dim=[-1, 3, length, crop_size, crop_size])))
        netspec.data_reshape = BaseModule('Reshape', reshape_params).attach(
            netspec, [netspec.data])

    #### Stage 1 ####
    channels = 3 * 7 * 7 * 3 * 64 / (7 * 7 * 3 + 3 * 64)
    name = '1_s'
    conv1xdxd_params = dict(name='conv'+name, \
                            num_output=channels, \
                            kernel_size=[1, 7, 7], \
                            pad=[0, 3, 3], \
                            stride=[1, 2, 2], \
                            engine=2)
    conv1xdxd = BaseModule('Convolution', conv1xdxd_params).attach(
        netspec, [netspec.data_reshape if is_train else netspec.data])
    name = '1_t'
    stage1 = SgpAttenModule(name_template=name, \
                            bn_params=dict(frozen=False), \
                            stride=2, \
                            num_output=64, \
                            t_conv=False, \
                            sync_bn=sync_bn, \
                            uni_bn=uni_bn).attach(netspec, [conv1xdxd])
    num_output = num_output_stage1

    #### Stages 2 - 5 ####
    last = stage1
    for stage in range(4):
        for block in range(blocks[stage]):
            # First block usually projection
            if block == 0:
                shortcut = 'projection'
                stride = 2
                if stage == 0:
                    shortcut = 'identity'
                    stride = 1
            else:
                shortcut = 'identity'
                stride = 1

            name = str(stage + 2) + num2letter[int(block)]
            curr_num_output = num_output * (2**(stage))

            if uni_bn:
                params = dict(name=name,
                              num_output=curr_num_output,
                              shortcut=shortcut,
                              main_branch=main_branch,
                              stride=stride,
                              frozen=False)
            else:
                params = dict(name=name,
                              num_output=curr_num_output,
                              shortcut=shortcut,
                              main_branch=main_branch,
                              stride=stride,
                              use_global_stats=use_global_stats)

            last = PreActWiderDecoupSgpAttenMixBlock(name_template=name, \
                shortcut=shortcut, \
                num_output=curr_num_output, \
                stride=stride, \
                t_conv=False, \
                sync_bn=sync_bn, \
                uni_bn=uni_bn).attach(netspec, [last])

            # else:
            #     last = PreActWiderDecoupSgpAttenStrongestBlock(name_template=name, \
            #         shortcut=shortcut, \
            #         num_output=curr_num_output, \
            #         stride=stride, \
            #         t_conv=True, \
            #         sync_bn=sync_bn, \
            #         uni_bn=uni_bn).attach(netspec, [last])

    #### Last Norm & ReLU ####
    if uni_bn:
        bn_params = dict(frozen=False)
    else:
        bn_params = dict(use_global_stats=use_global_stats)
    last = BNReLUModule(name_template='5b', \
                        bn_params=bn_params, \
                        sync_bn=sync_bn, \
                        uni_bn=uni_bn).attach(netspec, [last])

    #### pool5 ####
    pool_params = dict(global_pooling=True, pool=P.Pooling.AVE, name='pool5')
    pool = BaseModule('Pooling', pool_params).attach(netspec, [last])

    #### pool5_reshape ####
    reshape_params = dict(shape=dict(dim=[-1, num_output_stage1 * 8]),
                          name='pool5_reshape')
    reshape = BaseModule('Reshape', reshape_params).attach(netspec, [pool])

    #### dropout ####
    dropout_params = dict(dropout_ratio=0.2, name='dropout')
    dropout = BaseModule('Dropout', dropout_params).attach(netspec, [reshape])

    #### ip ####
    ip_params = dict(name='fc400', num_output=400)
    ip = BaseModule('InnerProduct', ip_params).attach(netspec, [dropout])

    if is_train:

        #### Softmax Loss ####
        smax_params = dict(name='loss')
        smax_loss = BaseModule('SoftmaxWithLoss',
                               smax_params).attach(netspec,
                                                   [ip, netspec.label])

        #### Top1 Accuracy ####
        top1_params = dict(name='top1',
                           accuracy_param=dict(top_k=1),
                           include=dict(phase=1))
        top1 = BaseModule('Accuracy',
                          top1_params).attach(netspec, [ip, netspec.label])

        #### Top5 Accuracy ####
        top5_params = dict(name='top5',
                           accuracy_param=dict(top_k=5),
                           include=dict(phase=1))
        top5 = BaseModule('Accuracy',
                          top5_params).attach(netspec, [ip, netspec.label])

    filepath = os.path.join(output_folder, filename)
    fp = open(filepath, 'w')
    print >> fp, netspec.to_proto()
    fp.close()
def build_VGG16Net(split,
                   num_classes,
                   batch_size,
                   resize_w,
                   resize_h,
                   crop_w=0,
                   crop_h=0,
                   crop_margin=0,
                   mirror=0,
                   rotate=0,
                   HSV_prob=0,
                   HSV_jitter=0,
                   train=True,
                   deploy=False):

    weight_param = dict(lr_mult=1, decay_mult=1)
    bias_param = dict(lr_mult=2, decay_mult=0)
    learned_param = [weight_param, bias_param]

    frozen_param = [dict(lr_mult=0)] * 2

    n = caffe.NetSpec()

    pydata_params = dict(split=split,
                         mean=(103.939, 116.779,
                               123.68))  #For VGG16 different mean than AlexNet

    pydata_params['dir'] = '../../../datasets/SocialMedia'
    pydata_params['train'] = train
    pydata_params['batch_size'] = batch_size
    pydata_params['resize_w'] = resize_w
    pydata_params['resize_h'] = resize_h
    pydata_params['crop_w'] = crop_w
    pydata_params['crop_h'] = crop_h
    pydata_params['crop_margin'] = crop_margin
    pydata_params['mirror'] = mirror
    pydata_params['rotate'] = rotate
    pydata_params['HSV_prob'] = HSV_prob
    pydata_params['HSV_jitter'] = HSV_jitter
    pydata_params['num_classes'] = num_classes

    pylayer = 'customDataLayer'

    n.data, n.label = L.Python(module='layers',
                               layer=pylayer,
                               ntop=2,
                               param_str=str(pydata_params))

    # conv
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 3, 64, pad=1, param=froozen_param)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1,
                                     3,
                                     64,
                                     pad=1,
                                     param=froozen_param)
    n.pool1 = max_pool(n.relu1_2, 2, stride=2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1,
                                     3,
                                     128,
                                     pad=1,
                                     param=froozen_param)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1,
                                     3,
                                     128,
                                     pad=1,
                                     param=froozen_param)
    n.pool2 = max_pool(n.relu2_2, 2, stride=2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2,
                                     3,
                                     256,
                                     pad=1,
                                     param=froozen_param)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1,
                                     3,
                                     256,
                                     pad=1,
                                     param=froozen_param)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2,
                                     3,
                                     256,
                                     pad=1,
                                     param=froozen_param)
    n.pool3 = max_pool(n.relu3_3, 2, stride=2)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3,
                                     3,
                                     512,
                                     pad=1,
                                     param=froozen_param)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1,
                                     3,
                                     512,
                                     pad=1,
                                     param=froozen_param)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2,
                                     3,
                                     512,
                                     pad=1,
                                     param=froozen_param)
    n.pool4 = max_pool(n.relu4_3, 2, stride=2)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4,
                                     3,
                                     512,
                                     pad=1,
                                     param=learned_param)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1,
                                     3,
                                     512,
                                     pad=1,
                                     param=learned_param)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2,
                                     3,
                                     512,
                                     pad=1,
                                     param=learned_param)
    n.pool5 = max_pool(n.relu5_3, 2, stride=2)

    # fully conn
    n.fc6, n.relu6 = fc_relu(n.pool5, 4096, param=boosted_param)
    if train:
        n.drop6 = fc7input = L.Dropout(n.relu6,
                                       in_place=True,
                                       dropout_ratio=0.5)  #0.5
    else:
        fc7input = n.relu6

    n.fc7, n.relu7 = fc_relu(fc7input, 4096, param=boosted_param)
    if train:
        n.drop7 = fc8input = L.Dropout(n.relu7,
                                       in_place=True,
                                       dropout_ratio=0.5)  #0.5
    else:
        fc8input = n.relu7

    n.fc8C = L.InnerProduct(fc8input,
                            num_output=num_classes,
                            param=boosted_param)

    if not deploy:
        n.loss = L.SigmoidCrossEntropyLoss(n.fc8C, n.label)

    if deploy:
        n.probs = L.Sigmoid(n.fc8C)
        with open('deploy.prototxt', 'w') as f:
            f.write(str(n.to_proto()))
            return f.name
    else:

        if train:
            with open('train.prototxt', 'w') as f:
                f.write(str(n.to_proto()))
                return f.name
        else:
            with open('val.prototxt', 'w') as f:
                f.write(str(n.to_proto()))
                return f.name

    if train:
        with open('train.prototxt', 'w') as f:
            f.write(str(n.to_proto()))
            return f.name
    else:
        with open('val.prototxt', 'w') as f:
            f.write(str(n.to_proto()))
            return f.name
def dump_model(operation='create', redo=False):
    # Creates graph from saved GraphDef.
    create_graph()
    sess = tf.InteractiveSession()

    # Creates caffe model.
    deploy_net_file = 'models/inception_v3/inception_v3_deploy.prototxt'
    model_file = 'models/inception_v3/inception_v3.caffemodel'
    net = []

    if operation == 'create' and (not os.path.exists(deploy_net_file) or redo):
        net = caffe.NetSpec()
    elif operation == 'save' and (not os.path.exists(model_file) or redo):
        caffe.set_device(1)
        caffe.set_mode_gpu()
        net = caffe.Net(deploy_net_file, caffe.TEST)
    else:
        return

    # dump the preprocessing parameters
    dump_inputlayer(sess, net, operation)

    # dump the filters
    dump_convbn(sess, net, 'data', 'conv', operation)
    dump_convbn(sess, net, 'conv', 'conv_1', operation)
    dump_convbn(sess, net, 'conv_1', 'conv_2', operation)
    dump_pool(sess, net, 'conv_2', 'pool', operation)
    dump_convbn(sess, net, 'pool', 'conv_3', operation)
    dump_convbn(sess, net, 'conv_3', 'conv_4', operation)
    dump_pool(sess, net, 'conv_4', 'pool_1', operation)

    # inceptions with 1x1, 3x3, 5x5 convolutions
    from_layer = 'pool_1'
    for inception_id in range(0, 3):
        if inception_id == 0:
            out_layer = 'mixed'
        else:
            out_layer = 'mixed_{}'.format(inception_id)
        dump_tower(sess, net, from_layer, out_layer, ['conv'], operation)
        dump_tower(sess, net, from_layer, '{}/tower'.format(out_layer),
                   ['conv', 'conv_1'], operation)
        dump_tower(sess, net, from_layer, '{}/tower_1'.format(out_layer),
                   ['conv', 'conv_1', 'conv_2'], operation)
        dump_tower(sess, net, from_layer, '{}/tower_2'.format(out_layer),
                   ['pool', 'conv'], operation)
        dump_inception(
            sess, net, out_layer,
            ['conv', 'tower/conv_1', 'tower_1/conv_2', 'tower_2/conv'],
            operation)
        from_layer = '{}/join'.format(out_layer)

    # inceptions with 1x1, 3x3(in sequence) convolutions
    out_layer = 'mixed_3'
    dump_tower(sess, net, from_layer, out_layer, ['conv'], operation)
    dump_tower(sess, net, from_layer, '{}/tower'.format(out_layer),
               ['conv', 'conv_1', 'conv_2'], operation)
    dump_tower(sess, net, from_layer, out_layer, ['pool'], operation)
    dump_inception(sess, net, out_layer, ['conv', 'tower/conv_2', 'pool'],
                   operation)
    from_layer = '{}/join'.format(out_layer)

    # inceptions with 1x1, 7x1, 1x7 convolutions
    for inception_id in range(4, 8):
        out_layer = 'mixed_{}'.format(inception_id)
        dump_tower(sess, net, from_layer, out_layer, ['conv'], operation)
        dump_tower(sess, net, from_layer, '{}/tower'.format(out_layer),
                   ['conv', 'conv_1', 'conv_2'], operation)
        dump_tower(sess, net, from_layer, '{}/tower_1'.format(out_layer),
                   ['conv', 'conv_1', 'conv_2', 'conv_3', 'conv_4'], operation)
        dump_tower(sess, net, from_layer, '{}/tower_2'.format(out_layer),
                   ['pool', 'conv'], operation)
        dump_inception(
            sess, net, out_layer,
            ['conv', 'tower/conv_2', 'tower_1/conv_4', 'tower_2/conv'],
            operation)
        from_layer = '{}/join'.format(out_layer)

    # inceptions with 1x1, 3x3, 1x7, 7x1 filters
    out_layer = 'mixed_8'
    dump_tower(sess, net, from_layer, '{}/tower'.format(out_layer),
               ['conv', 'conv_1'], operation)
    dump_tower(sess, net, from_layer, '{}/tower_1'.format(out_layer),
               ['conv', 'conv_1', 'conv_2', 'conv_3'], operation)
    dump_tower(sess, net, from_layer, out_layer, ['pool'], operation)
    dump_inception(sess, net, out_layer,
                   ['tower/conv_1', 'tower_1/conv_3', 'pool'], operation)
    from_layer = '{}/join'.format(out_layer)

    for inception_id in range(9, 11):
        out_layer = 'mixed_{}'.format(inception_id)
        dump_tower(sess, net, from_layer, out_layer, ['conv'], operation)
        dump_tower(sess, net, from_layer, '{}/tower'.format(out_layer),
                   ['conv'], operation)
        dump_tower(sess, net, '{}/tower/conv'.format(out_layer),
                   '{}/tower/mixed'.format(out_layer), ['conv'], operation)
        dump_tower(sess, net, '{}/tower/conv'.format(out_layer),
                   '{}/tower/mixed'.format(out_layer), ['conv_1'], operation)
        dump_inception(sess, net, '{}/tower/mixed'.format(out_layer),
                       ['conv', 'conv_1'], operation, False)
        dump_tower(sess, net, from_layer, '{}/tower_1'.format(out_layer),
                   ['conv', 'conv_1'], operation)
        dump_tower(sess, net, '{}/tower_1/conv_1'.format(out_layer),
                   '{}/tower_1/mixed'.format(out_layer), ['conv'], operation)
        dump_tower(sess, net, '{}/tower_1/conv_1'.format(out_layer),
                   '{}/tower_1/mixed'.format(out_layer), ['conv_1'], operation)
        dump_inception(sess, net, '{}/tower_1/mixed'.format(out_layer),
                       ['conv', 'conv_1'], operation, False)
        dump_tower(sess, net, from_layer, '{}/tower_2'.format(out_layer),
                   ['pool', 'conv'], operation)
        dump_inception(
            sess, net, out_layer,
            ['conv', 'tower/mixed', 'tower_1/mixed', 'tower_2/conv'],
            operation)
        from_layer = '{}/join'.format(out_layer)

    dump_pool(sess, net, from_layer, 'pool_3', operation)
    dump_softmax(sess, net, 'pool_3', 'softmax', operation)

    if operation == 'create' and (not os.path.exists(deploy_net_file) or redo):
        model_dir = os.path.dirname(deploy_net_file)
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        with open(deploy_net_file, 'w') as f:
            print('name: "inception_v3_deploy"', file=f)
            print(net.to_proto(), file=f)
    elif operation == 'save' and (not os.path.exists(model_file) or redo):
        net.save(model_file)
    sess.close()
Esempio n. 8
0
def fcn(split, tops):
    n = caffe.NetSpec()
    n.data, n.label = L.Python(module='nyud_layers',
                               layer='NYUDSegDataLayer',
                               ntop=2,
                               param_str=str(
                                   dict(nyud_dir='../data',
                                        split=split,
                                        tops=tops,
                                        seed=1337)))

    # the base net
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # fully conv
    n.fc6_new, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7_new, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)

    n.score_fr = L.Convolution(
        n.drop7,
        num_output=40,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.upscore = L.Deconvolution(n.score_fr,
                                convolution_param=dict(num_output=40,
                                                       kernel_size=64,
                                                       stride=32,
                                                       bias_term=False),
                                param=[dict(lr_mult=0)])
    n.score = crop(n.upscore, n.data)
    n.loss = L.SoftmaxWithLoss(n.score,
                               n.label,
                               loss_param=dict(normalize=False,
                                               ignore_label=255))

    return n.to_proto()
Esempio n. 9
0
def alexnet(train_lmdb, test_lmdb, mean_file, train_batch_size, test_batch_size, output_path):
    # train
    ntrain = caffe.NetSpec()
    # val
    nval = caffe.NetSpec()
    # deploy
    ndeploy = caffe.NetSpec()

    #--------------------------------------------------
    # train + val
    ntrain.data, ntrain.label = L.Data(name='data', batch_size=train_batch_size, backend=P.Data.LMDB,
                                       source=train_lmdb, transform_param=dict(mirror=True, crop_size=227, mean_file=mean_file), include=dict(phase=caffe.TRAIN), ntop=2)
    nval.data, nval.label = L.Data(name='data', batch_size=test_batch_size, backend=P.Data.LMDB,
                                   source=test_lmdb, transform_param=dict(mirror=False, crop_size=227, mean_file=mean_file), include=dict(phase=caffe.TEST), ntop=2)

    ntrain.conv1 = L.Convolution(ntrain.data, name='conv1', kernel_size=11, num_output=96, stride=4, weight_filler=dict(
        type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    ntrain.relu1 = L.ReLU(ntrain.conv1, name='relu1', in_place=True)
    ntrain.norm1 = L.LRN(ntrain.relu1, name='norm1',
                         local_size=5, alpha=1e-4, beta=0.75)
    ntrain.pool1 = L.Pooling(ntrain.norm1, name='pool1', kernel_size=3,
                             stride=2, pool=P.Pooling.MAX)

    ntrain.conv2 = L.Convolution(ntrain.pool1, name='conv2', kernel_size=5, num_output=256, pad=2, group=2, weight_filler=dict(
        type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0.1), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    ntrain.relu2 = L.ReLU(ntrain.conv2, name='relu2', in_place=True)
    ntrain.norm2 = L.LRN(ntrain.relu2, name='norm2',
                         local_size=5, alpha=1e-4, beta=0.75)
    ntrain.pool2 = L.Pooling(ntrain.norm2, name='pool2', kernel_size=3,
                             stride=2, pool=P.Pooling.MAX)

    ntrain.conv3 = L.Convolution(ntrain.pool2, name='conv3', kernel_size=3, num_output=384, pad=1, weight_filler=dict(
        type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    ntrain.relu3 = L.ReLU(ntrain.conv3, name='relu3', in_place=True)

    ntrain.conv4 = L.Convolution(ntrain.relu3, name='conv4', kernel_size=3, num_output=384, pad=1, group=2, weight_filler=dict(
        type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0.1), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    ntrain.relu4 = L.ReLU(ntrain.conv4, name='relu4', in_place=True)

    ntrain.conv5 = L.Convolution(ntrain.relu4, name='conv5', kernel_size=3, num_output=256, pad=1, group=2, weight_filler=dict(
        type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0.1), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    ntrain.relu5 = L.ReLU(ntrain.conv5, name='relu5', in_place=True)
    ntrain.pool5 = L.Pooling(ntrain.relu5, name='pool5', kernel_size=3,
                             stride=2, pool=P.Pooling.MAX)

    ntrain.fc6 = L.InnerProduct(ntrain.pool5, name='fc6', num_output=4096,
                                weight_filler=dict(type='gaussian', std=0.005), bias_filler=dict(type='constant', value=1e-1), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    ntrain.relu6 = L.ReLU(ntrain.fc6, name='relu6', in_place=True)
    ntrain.drop6 = L.Dropout(ntrain.relu6, name='drop6',
                             dropout_ratio=0.5, in_place=True)

    ntrain.fc7 = L.InnerProduct(ntrain.drop6, name='fc7', num_output=4096,
                                weight_filler=dict(type='gaussian', std=0.005), bias_filler=dict(type='constant', value=1e-1), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    ntrain.relu7 = L.ReLU(ntrain.fc7, name='relu7', in_place=True)
    ntrain.drop7 = L.Dropout(ntrain.relu7, name='drop7',
                             dropout_ratio=0.5, in_place=True)

    ntrain.fc8 = L.InnerProduct(ntrain.drop7, name='fc8', num_output=1000,
                                weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])

    ntrain.accuracy = L.Accuracy(ntrain.fc8, ntrain.label, name='accuracy',
                                 include=dict(phase=caffe.TEST))
    ntrain.loss = L.SoftmaxWithLoss(ntrain.fc8, ntrain.label, name='loss')

    #--------------------------------------------------
    # deploy,删去lr_mult、decay_mult、weight_filler、bias_filler
    # ({'shape': {'dim': [batch_size, channels, n_rows, n_cols]}})
    ndeploy.data = L.Input(input_param={'shape': {'dim': [10, 13, 227, 227]}})

    ndeploy.conv1 = L.Convolution(
        ndeploy.data, name='conv1', kernel_size=11, num_output=96, stride=4)
    ndeploy.relu1 = L.ReLU(ndeploy.conv1, name='relu1', in_place=True)
    ndeploy.norm1 = L.LRN(ndeploy.relu1, name='norm1',
                          local_size=5, alpha=1e-4, beta=0.75)
    ndeploy.pool1 = L.Pooling(ndeploy.norm1, name='pool1', kernel_size=3,
                              stride=2, pool=P.Pooling.MAX)

    ndeploy.conv2 = L.Convolution(
        ndeploy.pool1, name='conv2', kernel_size=5, num_output=256, pad=2, group=2)
    ndeploy.relu2 = L.ReLU(ndeploy.conv2, name='relu2', in_place=True)
    ndeploy.norm2 = L.LRN(ndeploy.relu2, name='norm2',
                          local_size=5, alpha=1e-4, beta=0.75)
    ndeploy.pool2 = L.Pooling(ndeploy.norm2, name='pool2', kernel_size=3,
                              stride=2, pool=P.Pooling.MAX)

    ndeploy.conv3 = L.Convolution(
        ndeploy.pool2, name='conv3', kernel_size=3, num_output=384, pad=1)
    ndeploy.relu3 = L.ReLU(ndeploy.conv3, name='relu3', in_place=True)

    ndeploy.conv4 = L.Convolution(
        ndeploy.relu3, name='conv4', kernel_size=3, num_output=384, pad=1, group=2)
    ndeploy.relu4 = L.ReLU(ndeploy.conv4, name='relu4', in_place=True)

    ndeploy.conv5 = L.Convolution(
        ndeploy.relu4, name='conv5', kernel_size=3, num_output=256, pad=1, group=2)
    ndeploy.relu5 = L.ReLU(ndeploy.conv5, name='relu5', in_place=True)
    ndeploy.pool5 = L.Pooling(ndeploy.relu5, name='pool5', kernel_size=3,
                              stride=2, pool=P.Pooling.MAX)

    ndeploy.fc6 = L.InnerProduct(ndeploy.pool5, name='fc6', num_output=4096)
    ndeploy.relu6 = L.ReLU(ndeploy.fc6, name='relu6', in_place=True)
    ndeploy.drop6 = L.Dropout(ndeploy.relu6, name='drop6',
                              dropout_ratio=5e-1, in_place=True)

    ndeploy.fc7 = L.InnerProduct(ndeploy.drop6, name='fc7', num_output=4096)
    ndeploy.relu7 = L.ReLU(ndeploy.fc7, name='relu7', in_place=True)
    ndeploy.drop7 = L.Dropout(ndeploy.relu7, name='drop7',
                              dropout_ratio=5e-1, in_place=True)

    ndeploy.fc8 = L.InnerProduct(ndeploy.drop7, name='fc8', num_output=1000)

    ndeploy.prob = L.Softmax(ndeploy.fc8, name='prob')

    out_train_val = str('name: "AlexNet"\n') + \
        str(nval.to_proto()) + str(ntrain.to_proto())
    with open(output_path + '/alexnet_train_val.prototxt', 'w') as f:
        f.write(out_train_val)

    out_deploy = str('name: "AlexNet"\n') + str(ndeploy.to_proto())
    with open(output_path + '/alexnet_deploy.prototxt', 'w') as f:
        f.write(out_deploy)
Esempio n. 10
0
 def net():
     n = caffe.NetSpec()
     n.data = L.Input(input_param=dict(shape=dict(dim=data_shape)))
     n.dataout = L.Reduction(n.data, axis=0, coeff=1, operation=_operation)
     return n.to_proto()
Esempio n. 11
0
 def net():
     n = caffe.NetSpec()
     n.data = L.Input(input_param=dict(shape=dict(dim=data_shape)))
     n.dataout = L.Tile(n.data, axis=3, tiles=3)
     return n.to_proto()
Esempio n. 12
0
 def net():
     n = caffe.NetSpec()
     n.data = L.Input(input_param=dict(shape=dict(dim=data_shape)))
     n.dataout = L.Flatten(n.data, axis=_axis, end_axis=_end_axis)
     return n.to_proto()
Esempio n. 13
0
 def net():
     n = caffe.NetSpec()
     n.data1 = L.Input(input_param=dict(shape=dict(dim=data_shape)))
     n.data2 = L.Input(input_param=dict(shape=dict(dim=data_shape)))
     n.dataout = L.Crop(n.data1, n.data2, axis=_axis, offset=_offset)
     return n.to_proto()
Esempio n. 14
0
 def net():
     n = caffe.NetSpec()
     n.data = L.Input(input_param=dict(shape=dict(dim=data_shape)))
     n.dataout = L.AbsVal(n.data)
     return n.to_proto()
Esempio n. 15
0
def gen_prototxt(nangles,
                 max_order,
                 scales,
                 filter_size_factor=wavelet.DEFAULT_SIZE,
                 nchannels_input=3,
                 intput_shape=[256, 256],
                 data=None,
                 verbose=False,
                 output_path=None):
    n = caffe.NetSpec()
    if data is None:
        data = L.Input(shape=dict(dim=[1, nchannels_input] + intput_shape))

    n.data = data

    scat_count = -1
    dim_total = nchannels_input
    layers = [[(data, [None], 0)]]
    for o in range(max_order):
        layer = []
        for s in scales:
            kernel_size = s * filter_size_factor * 2
            delta_offset = kernel_size // 2

            for c0, s0, offset in layers[-1]:
                if s0[-1] is not None and s <= s0[-1]:
                    continue
                scat_count += 1
                dim_in = nchannels_input * nangles**o
                dim_out = nchannels_input * nangles**(o + 1)
                dim_total += dim_out
                name = 'scat%i_%i_%ito%i' % (s, scat_count, dim_in, dim_out)

                c = scat_layer(c0,
                               dim=dim_out,
                               kernel_size=kernel_size,
                               name=name,
                               group=dim_in)

                layer.append((c, s0 + [s], offset + delta_offset))

                if verbose:
                    print "%s:" % name
                    print "  kernel size: %i" % kernel_size
                    print "  %s (%i)" % ("->".join(map(str,
                                                       (s0 + [s]))), dim_out)

        layers.append(layer)

    if verbose:
        print "Total output dimensionality: %i" % dim_total

    # Crop the coefficients before concatenation
    # The last coefficient is the smallest because it's having the highest order.
    last_coefficient = layers[-1][-1][0]
    max_offset = layers[-1][-1][2]
    coefficients = []
    for layer in layers:
        for c, _, offset in layer:
            coefficients.append(
                L.Crop(c, last_coefficient, offset=max_offset - offset))

    concat = L.Concat(*coefficients)

    # Do the final gaussian blur and resampling
    kernel_size = scales[-1] * filter_size_factor * 2
    stride = scales[-1]
    c = conv_layer(concat,
                   dim=dim_total,
                   group=dim_total,
                   kernel_size=kernel_size,
                   name='psi',
                   stride=stride)

    n.output = c

    proto_str = str(n.to_proto())

    if output_path:
        with open(output_path, 'w+') as f:
            f.write(proto_str)
            return f.name
    else:
        with tempfile.NamedTemporaryFile(delete=False) as f:
            f.write(proto_str)
            return f.name
Esempio n. 16
0
    def inception_resnet_v2_proto(self, batch_size, phase='TRAIN'):
        n = caffe.NetSpec()
        if phase == 'TRAIN':
            source_data = self.train_data
            mirror = True
        else:
            source_data = self.test_data
            mirror = False
        n.data, n.label = L.Data(source=source_data, backend=P.Data.LMDB, batch_size=batch_size, ntop=2,
                                 transform_param=dict(crop_size=299, mean_value=[104, 117, 123], mirror=mirror))

        # stem
        n.conv1_3x3_s2, n.conv1_3x3_s2_bn, n.conv1_3x3_s2_scale, n.conv1_3x3_s2_relu, n.conv2_3x3_s1, n.conv2_3x3_s1_bn, \
        n.conv2_3x3_s1_scale, n.conv2_3x3_s1_relu, n.conv3_3x3_s1, n.conv3_3x3_s1_bn, n.conv3_3x3_s1_scale, n.conv3_3x3_s1_relu, \
        n.inception_stem1_3x3_s2, n.inception_stem1_3x3_s2_bn, n.inception_stem1_3x3_s2_scale, n.inception_stem1_3x3_s2_relu, \
        n.inception_stem1_pool, n.inception_stem1, n.inception_stem2_3x3_reduce, n.inception_stem2_3x3_reduce_bn, \
        n.inception_stem2_3x3_reduce_scale, n.inception_stem2_3x3_reduce_relu, n.inception_stem2_3x3, \
        n.inception_stem2_3x3_bn, n.inception_stem2_3x3_scale, n.inception_stem2_3x3_relu, n.inception_stem2_7x1_reduce, \
        n.inception_stem2_7x1_reduce_bn, n.inception_stem2_7x1_reduce_scale, n.inception_stem2_7x1_reduce_relu, \
        n.inception_stem2_7x1, n.inception_stem2_7x1_bn, n.inception_stem2_7x1_scale, n.inception_stem2_7x1_relu, \
        n.inception_stem2_1x7, n.inception_stem2_1x7_bn, n.inception_stem2_1x7_scale, n.inception_stem2_1x7_relu, \
        n.inception_stem2_3x3_2, n.inception_stem2_3x3_2_bn, n.inception_stem2_3x3_2_scale, n.inception_stem2_3x3_2_relu, \
        n.inception_stem2, n.inception_stem3_3x3_s2, n.inception_stem3_3x3_s2_bn, n.inception_stem3_3x3_s2_scale, \
        n.inception_stem3_3x3_s2_relu, n.inception_stem3_pool, n.inception_stem3 = \
            stem_299x299(n.data)  # 384x35x35

        # 5 x inception_resnet_a
        for i in xrange(5):
            if i == 0:
                bottom = 'n.inception_stem3'
            else:
                bottom = 'n.inception_resnet_a(order)_residual_eltwise'.replace('(order)', str(i))
            exec (inception_resnet_a.replace('(order)', str(i + 1)).replace('bottom', bottom))  # 384x35x35

        # reduction_a
        n.reduction_a_pool, n.reduction_a_3x3, n.reduction_a_3x3_bn, n.reduction_a_3x3_scale, n.reduction_a_3x3_relu, \
        n.reduction_a_3x3_2_reduce, n.reduction_a_3x3_2_reduce_bn, n.reduction_a_3x3_2_reduce_scale, \
        n.reduction_a_3x3_2_reduce_relu, n.reduction_a_3x3_2, n.reduction_a_3x3_2_bn, n.reduction_a_3x3_2_scale, \
        n.reduction_a_3x3_2_relu, n.reduction_a_3x3_3, n.reduction_a_3x3_3_bn, n.reduction_a_3x3_3_scale, \
        n.reduction_a_3x3_3_relu, n.reduction_a_concat = \
            reduction_a(n.inception_resnet_a5_residual_eltwise)  # 1152x17x17

        # 10 x inception_resnet_b
        for i in xrange(10):
            if i == 0:
                bottom = 'n.reduction_a_concat'
            else:
                bottom = 'n.inception_resnet_b(order)_residual_eltwise'.replace('(order)', str(i))
            exec (inception_resnet_b.replace('(order)', str(i + 1)).replace('bottom', bottom))  # 1152x17x17

        # reduction_b
        n.reduction_b_pool, n.reduction_b_3x3_reduce, n.reduction_b_3x3_reduce_bn, n.reduction_b_3x3_reduce_scale, \
        n.reduction_b_3x3_reduce_relu, n.reduction_b_3x3, n.reduction_b_3x3_bn, n.reduction_b_3x3_scale, \
        n.reduction_b_3x3_relu, n.reduction_b_3x3_2_reduce, n.reduction_b_3x3_2_reduce_bn, n.reduction_b_3x3_2_reduce_scale, \
        n.reduction_b_3x3_2_reduce_relu, n.reduction_b_3x3_2, n.reduction_b_3x3_2_bn, n.reduction_b_3x3_2_scale, \
        n.reduction_b_3x3_2_relu, n.reduction_b_3x3_3_reduce, n.reduction_b_3x3_3_reduce_bn, n.reduction_b_3x3_3_reduce_scale, \
        n.reduction_b_3x3_3_reduce_relu, n.reduction_b_3x3_3, n.reduction_b_3x3_3_bn, n.reduction_b_3x3_3_scale, \
        n.reduction_b_3x3_3_relu, n.reduction_b_3x3_4, n.reduction_b_3x3_4_bn, n.reduction_b_3x3_4_scale, \
        n.reduction_b_3x3_4_relu, n.reduction_b_concat = \
            reduction_b(n.inception_resnet_b10_residual_eltwise)  # 2048x8x8

        # 5 x inception_resnet_c
        for i in xrange(5):
            if i == 0:
                bottom = 'n.reduction_b_concat'
            else:
                bottom = 'n.inception_resnet_c(order)_residual_eltwise'.replace('(order)', str(i))
            exec (inception_resnet_c.replace('(order)', str(i + 1)).replace('bottom', bottom))  # 2048x8x8

        n.pool_8x8_s1 = L.Pooling(n.inception_resnet_c5_residual_eltwise,
                                  pool=P.Pooling.AVE,
                                  global_pooling=True)  # 2048x1x1
        n.pool_8x8_s1_drop = L.Dropout(n.pool_8x8_s1, dropout_param=dict(dropout_ratio=0.2))
        n.classifier = L.InnerProduct(n.pool_8x8_s1_drop, num_output=self.classifier_num,
                                      param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
                                      weight_filler=dict(type='xavier'),
                                      bias_filler=dict(type='constant', value=0))
        n.loss = L.SoftmaxWithLoss(n.classifier, n.label)
        if phase == 'TRAIN':
            pass
        else:
            n.accuracy_top1 = L.Accuracy(n.classifier, n.label, include=dict(phase=1))
            n.accuracy_top5 = L.Accuracy(n.classifier, n.label, include=dict(phase=1),
                                         accuracy_param=dict(top_k=5))

        return n.to_proto()
Esempio n. 17
0
def DAPNet_Train():
    time_postfix = time.strftime("%m-%d_%H-%M-%S", time.localtime())
    ################################################################################
    os.chdir(caffe_root)
    ################################################################################
    # work dir
    ProjectName = "{}_{}_{}".format(BaseNet, Models, Ver)
    work_dir = "{}/{}/{}".format(Results_dir, Project, ProjectName)
    make_if_not_exist(work_dir)
    ################################################################################
    # work and model dirs
    proto_dir = "{}/Proto".format(work_dir)
    log_dir = "{}/Logs".format(work_dir)
    model_dir = "{}/Models".format(work_dir)
    pic_dir = "{}/Pics".format(work_dir)
    job_dir = "{}/Job".format(work_dir)
    make_if_not_exist(proto_dir)
    make_if_not_exist(log_dir)
    make_if_not_exist(model_dir)
    make_if_not_exist(pic_dir)
    make_if_not_exist(job_dir)
    ################################################################################
    # work file
    log_file = "{}/{}.log".format(log_dir, time_postfix)
    train_net_file = "{}/train.prototxt".format(proto_dir)
    test_net_file = "{}/test.prototxt".format(proto_dir)
    solver_file = "{}/solver.prototxt".format(proto_dir)
    snapshot_prefix = "{}/{}".format(model_dir, ProjectName)
    job_file = "{}/train.sh".format(job_dir)
    ################################################################################
    # TRAIN
    net = caffe.NetSpec()
    net = get_DAPDataLayer(net, train=True, batchsize=batchsize_per_device)
    net = FaceBoxFPNNet(net, train=True, data_layer="data", gt_label="label",\
        net_width=resized_width, net_height=resized_height)
    # net = DAPNet_hand_pool1(net, train=True, data_layer="data", gt_label="label", \
    # 			 net_width=resized_width, net_height=resized_height)

    with open(train_net_file, 'w') as f:
        print('name: "{}_train"'.format(ProjectName), file=f)
        print(net.to_proto(), file=f)
    ################################################################################
    # TEST
    if isinstance(val_list, list):
        test_net_files = []
        for id_val in xrange(len(val_list)):
            net = caffe.NetSpec()
            net = get_DAPDataLayer(net,
                                   train=False,
                                   batchsize=1,
                                   id_val=id_val)
            test_net_file = "{}/test{}.prototxt".format(proto_dir, id_val)
            net = FaceBoxFPNNet(net, train=False, data_layer="data", gt_label="label", \
                     net_width=resized_width, net_height=resized_height)
            with open(test_net_file, 'w') as f:
                print('name: "{}_test{}"'.format(ProjectName, id_val), file=f)
                print(net.to_proto(), file=f)
            test_net_files.append(test_net_file)
        test_net_file = test_net_files
    else:
        net = caffe.NetSpec()
        net = get_DAPDataLayer(net, train=False, batchsize=1)
        net = FaceBoxFPNNet(net, train=False, data_layer="data", gt_label="label", \
           net_width=resized_width, net_height=resized_height)
        # net = DAPNet_hand_pool1(net, train=False, data_layer="data", gt_label="label", \
        # 			 net_width=resized_width, net_height=resized_height)

        with open(test_net_file, 'w') as f:
            print('name: "{}_test"'.format(ProjectName), file=f)
            print(net.to_proto(), file=f)
        test_net_file = [
            test_net_file,
        ]
    ################################################################################
    # Solver
    solver_param = get_solver_param()
    solver = caffe_pb2.SolverParameter(train_net=train_net_file, \
          test_net=test_net_file,snapshot_prefix=snapshot_prefix,**solver_param)
    with open(solver_file, 'w') as f:
        print(solver, file=f)
    ################################################################################
    # CaffeModel & Snapshot
    max_iter = 0
    for file in os.listdir(model_dir):
        if file.endswith(".solverstate"):
            basename = os.path.splitext(file)[0]
            iter = int(basename.split("{}_iter_".format(ProjectName))[1])
            if iter > max_iter:
                max_iter = iter
    if fine_tuning:
        train_param = '--weights="{}" \\\n'.format(get_pretained_model())
    else:
        train_param = ''

    if resume_training:
        if max_iter > 0:
            train_param = '--snapshot="{}_iter_{}.solverstate" \\\n'.format(
                snapshot_prefix, max_iter)
    ################################################################################
    # job scripts
    with open(job_file, 'w') as f:
        f.write('cd {}\n'.format(caffe_root))
        f.write('./build/tools/caffe train \\\n')
        f.write('--solver="{}" \\\n'.format(solver_file))
        f.write(train_param)
        if solver_param['solver_mode'] == P.Solver.GPU:
            f.write('--gpu {} 2>&1 | tee {}\n'.format(get_gpus(), log_file))
        else:
            f.write('2>&1 | tee {}.log\n'.format(log_file))
    os.chmod(job_file, stat.S_IRWXU)
    # ==========================================================================
    # Training
    subprocess.call(job_file, shell=True)
Esempio n. 18
0
def fcn(split):
    n = caffe.NetSpec()
    pydata_params = dict(split=split,
                         mean=(104.00699, 116.66877, 122.67892),
                         seed=1337)
    pydata_params['cocostuff_dir'] = 'cocostuff'
    pylayer = 'COCOSTUFFSegDataLayer'
    n.data, n.label = L.Python(module='cocostuff_layers',
                               layer=pylayer,
                               ntop=2,
                               param_str=str(pydata_params))

    # the base net
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # fully conv
    n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
    n.score_fr = L.Convolution(
        n.drop7,
        num_output=182,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.upscore = L.Deconvolution(n.score_fr,
                                convolution_param=dict(num_output=182,
                                                       kernel_size=64,
                                                       stride=32,
                                                       bias_term=False),
                                param=[dict(lr_mult=0)])
    n.score = crop(n.upscore, n.data)
    n.loss = L.SoftmaxWithLoss(n.score,
                               n.label,
                               loss_param=dict(normalize=False,
                                               ignore_label=255))

    return n.to_proto()
Esempio n. 19
0
def deploy_pose_ren_net(dataset):
    n = caffe.NetSpec()
    point_num_ = util.get_joint_num(dataset)
    # input layers
    # n.data = 'input: \"data\"\
    #                 input_shape \{\
    #                     dim: 1\
    #                     dim: 1\
    #                     dim: 96\
    #                     dim: 96\
    #                 \}'
    # n.prev_pose = 'input: \"prev_pose\"\
    #                 input_shape \{\
    #                     dim: 1\
    #                     dim: {}\
    #                 \}'.format(point_num_*3)
    n.data = L.Input(name="data", shape=dict(dim=[1, 1, 96, 96]))
    n.prev_pose = L.Input(name="prev_pose",
                          shape=dict(dim=[1, point_num_ * 3]))

    print str(n.to_proto())

    # the base net
    n.conv0, n.relu0 = conv_relu(n.data, 16)
    n.conv1 = conv(n.relu0, 16)
    n.pool1 = max_pool(n.conv1)
    n.relu1 = L.ReLU(n.pool1, in_place=True)

    n.conv2_0, n.relu2_0 = conv_relu(n.pool1, 32, ks=1, pad=0)
    n.conv2, n.relu2 = conv_relu(n.relu2_0, 32)
    n.conv3 = conv(n.relu2, 32)
    n.res1 = L.Eltwise(n.conv2_0, n.conv3)
    n.pool2 = max_pool(n.res1)
    n.relu3 = L.ReLU(n.pool2, in_place=True)

    n.conv3_0, n.relu3_0 = conv_relu(n.relu3, 64, ks=1, pad=0)
    n.conv4, n.relu4 = conv_relu(n.relu3_0, 64)
    n.conv5 = conv(n.relu4, 64)
    n.res2 = L.Eltwise(n.conv3_0, n.conv5)
    n.pool3 = max_pool(n.res2)
    n.relu5 = L.ReLU(n.pool3, in_place=True)

    # pose guided region ensemble
    for idx in xrange(point_num_):
        if idx not in get_guided_joints(dataset):
            continue
        rois = 'rois_{}'.format(idx)
        n[rois] = L.Python(n.prev_pose,
                           module='python_layers.py_generate_roi_layer',
                           layer='PyGenerateROILayer',
                           ntop=1,
                           param_str=str(
                               dict(joint_idx=idx,
                                    roi_h=6,
                                    roi_w=6,
                                    img_h=96,
                                    img_w=96,
                                    spatial_mul=8)))
        roipool = 'roi_pool_{}'.format(idx)
        n[roipool] = L.ROIPooling(n.pool3,
                                  n[rois],
                                  roi_pooling_param=dict(pooled_w=7,
                                                         pooled_h=7,
                                                         spatial_scale=0.125))
        # fc
        fc1 = 'fc1_{}'.format(idx)
        relu6 = 'relu6_{}'.format(idx)
        drop1 = 'drop1_{}'.format(idx)
        n[fc1], n[relu6], n[drop1] = fc_relu_dropout(n[roipool], 2048, 0.5)
    # structure connection
    # connect_structure_1 = [[0,1,3], [0,4,6], [0,7,9], [0,10,12], [0,13,15]]
    connect_structure_1 = get_connect_structure(dataset)
    concate_bottom_final = []
    for idx in xrange(len(connect_structure_1)):
        concate_bottom = []
        for jdx in xrange(len(connect_structure_1[idx])):
            drop1 = 'drop1_{}'.format(connect_structure_1[idx][jdx])
            concate_bottom.append(n[drop1])
        concate_1 = 'concate_1_{}'.format(idx)
        n[concate_1] = L.Concat(*concate_bottom)
        fc2 = 'fc2_{}'.format(idx)
        relu7 = 'relu7_{}'.format(idx)
        drop2 = 'drop2_{}'.format(idx)
        n[fc2], n[relu7], n[drop2] = fc_relu_dropout(n[concate_1], 2048, 0.5)
        concate_bottom_final.append(n[drop2])

    n.fc_concat = L.Concat(*concate_bottom_final)
    n.fc3_0 = fc(n.fc_concat, point_num_ * 3)

    return str(n.to_proto())
Esempio n. 20
0
def silent_net():
    n = caffe.NetSpec()
    n.data, n.data2 = L.DummyData(shape=dict(dim=3), ntop=2)
    n.silence_data = L.Silence(n.data, ntop=0)
    n.silence_data2 = L.Silence(n.data2, ntop=0)
    return n.to_proto()
Esempio n. 21
0
def net(split):
    n = caffe.NetSpec()
    loss_param = dict(normalize=False)
    if split == 'train':
        data_params = dict(mean=(104.00699, 116.66877, 122.67892))
        # 图像与标签

        data_params['root'] = './datasets/CTW1500_Total_TCB'
        data_params['source'] = "CTW1500_Total_TCB.lst"

        data_params['shuffle'] = True
        data_params['ignore_label'] = -1
        n.data, n.label = L.Python(module='pylayer_old', layer='ImageLabelmapDataLayer', ntop=2, \
        param_str=str(data_params))
        if data_params.has_key('ignore_label'):
            loss_param['ignore_label'] = int(data_params['ignore_label'])
    elif split == 'test':
        n.data = L.Input(name='data',
                         input_param=dict(shape=dict(dim=[1, 3, 500, 500])))
    else:
        raise Exception("Invalid phase")

# The first conv stage
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=1)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    # The second conv stage
    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    # The third conv stage
    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv3_dilation1 = conv_dilation01(n.conv3_3, mult=[100, 1, 200, 0])
    n.conv3_dilation2 = conv_dilation03(n.conv3_3, mult=[100, 1, 200, 0])
    n.conv3_dilation3 = conv_dilation05(n.conv3_3, mult=[100, 1, 200, 0])
    n.conv3_dilation4 = conv_dilation07(n.conv3_3, mult=[100, 1, 200, 0])
    n.concat_conv33 = L.Concat(n.conv3_dilation1,
                               n.conv3_dilation2,
                               n.conv3_dilation3,
                               n.conv3_dilation4,
                               concat_param=dict({'concat_dim': 1}))

    # The fourth conv stage
    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv4_dilation1 = conv_dilation1(n.conv4_3, mult=[100, 1, 200, 0])
    n.conv4_dilation2 = conv_dilation3(n.conv4_3, mult=[100, 1, 200, 0])
    n.conv4_dilation3 = conv_dilation5(n.conv4_3, mult=[100, 1, 200, 0])
    n.conv4_dilation4 = conv_dilation7(n.conv4_3, mult=[100, 1, 200, 0])
    n.concat_conv43 = L.Concat(n.conv4_dilation1,
                               n.conv4_dilation2,
                               n.conv4_dilation3,
                               n.conv4_dilation4,
                               concat_param=dict({'concat_dim': 1}))

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)

    n.conv5_dilation1 = conv_dilation1(n.conv5_3, mult=[100, 1, 200, 0])
    n.conv5_dilation2 = conv_dilation3(n.conv5_3, mult=[100, 1, 200, 0])
    n.conv5_dilation3 = conv_dilation5(n.conv5_3, mult=[100, 1, 200, 0])
    n.conv5_dilation4 = conv_dilation7(n.conv5_3, mult=[100, 1, 200, 0])
    n.concat_conv53 = L.Concat(n.conv5_dilation1,
                               n.conv5_dilation2,
                               n.conv5_dilation3,
                               n.conv5_dilation4,
                               concat_param=dict({'concat_dim': 1}))

    # # DSN3
    n.score_dsn3 = conv1x1(n.concat_conv33,
                           lr=[0.01, 1, 0.02, 0],
                           wf=dict(type='gaussian', std=0.01))
    n.score_dsn3_up = upsample(n.score_dsn3, stride=4)
    n.upscore_dsn3 = L.Crop(n.score_dsn3_up, n.data)

    if split == 'train':
        n.loss3 = L.BalanceCrossEntropyLoss(n.upscore_dsn3,
                                            n.label,
                                            loss_param=loss_param)
    if split == 'test':
        n.sigmoid_dsn3 = L.Sigmoid(n.upscore_dsn3)

# # DSN4
    n.score_dsn4 = conv1x1(n.concat_conv43,
                           lr=[0.01, 1, 0.02, 0],
                           wf=dict(type='gaussian', std=0.01))
    n.score_dsn4_up = upsample(n.score_dsn4, stride=8)
    n.upscore_dsn4 = L.Crop(n.score_dsn4_up, n.data)

    if split == 'train':
        n.loss4 = L.BalanceCrossEntropyLoss(n.upscore_dsn4,
                                            n.label,
                                            loss_param=loss_param)
    if split == 'test':
        n.sigmoid_dsn4 = L.Sigmoid(n.upscore_dsn4)

# DSN5
    n.score_dsn5 = conv1x1(n.concat_conv53,
                           lr=[0.01, 1, 0.02, 0],
                           wf=dict(type='gaussian', std=0.01))
    n.score_dsn5_up = upsample(n.score_dsn5, stride=16)
    n.upscore_dsn5 = L.Crop(n.score_dsn5_up, n.data)

    if split == 'train':
        n.loss5 = L.BalanceCrossEntropyLoss(n.upscore_dsn5,
                                            n.label,
                                            loss_param=loss_param)
    if split == 'test':
        n.sigmoid_dsn5 = L.Sigmoid(n.upscore_dsn5)


# ############### concatenation and pass through attention model #########
    n.concat_upscore = L.Concat(n.upscore_dsn3,
                                n.upscore_dsn4,
                                n.upscore_dsn5,
                                name='concat',
                                concat_param=dict({'concat_dim': 1}))

    n.output_mask_product03 = L.Convolution(
        n.upscore_dsn3,
        num_output=1,
        kernel_size=3,
        pad=1,
        param=[dict(lr_mult=10, decay_mult=1),
               dict(lr_mult=20, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant'),
        engine=1)
    n.output_mask_product04 = L.Convolution(
        n.upscore_dsn4,
        num_output=1,
        kernel_size=3,
        pad=1,
        param=[dict(lr_mult=10, decay_mult=1),
               dict(lr_mult=20, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant'),
        engine=1)
    n.output_mask_product05 = L.Convolution(
        n.upscore_dsn5,
        num_output=1,
        kernel_size=3,
        pad=1,
        param=[dict(lr_mult=10, decay_mult=1),
               dict(lr_mult=20, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant'),
        engine=1)

    ### attention model
    n.att_conv1_mask_512 = L.Convolution(
        n.concat_upscore,
        num_output=512,
        kernel_size=3,
        pad=1,
        param=[dict(lr_mult=10, decay_mult=1),
               dict(lr_mult=20, decay_mult=0)],
        engine=1)
    n.relu_att_conv1 = L.ReLU(n.att_conv1_mask_512, in_place=True)
    n.drop_att_conv1_mask = L.Dropout(n.relu_att_conv1,
                                      dropout_ratio=0.5,
                                      in_place=True)
    n.att_fc_mask_512 = L.Convolution(
        n.drop_att_conv1_mask,
        num_output=3,
        kernel_size=1,
        param=[dict(lr_mult=10, decay_mult=1),
               dict(lr_mult=20, decay_mult=0)],
        engine=1)
    n.attention = L.Softmax(n.att_fc_mask_512)
    n.attention3, n.attention4, n.attention5 = L.Slice(n.attention,
                                                       name='slice_attention',
                                                       slice_param=dict(
                                                           axis=1,
                                                           slice_point=[1, 2]),
                                                       ntop=3)

    # # ---- multiply attention weights ----
    n.output_mask3 = L.Eltwise(n.attention3,
                               n.output_mask_product03,
                               operation=P.Eltwise.PROD)
    n.output_mask4 = L.Eltwise(n.attention4,
                               n.output_mask_product04,
                               operation=P.Eltwise.PROD)
    n.output_mask5 = L.Eltwise(n.attention5,
                               n.output_mask_product05,
                               operation=P.Eltwise.PROD)

    n.output_fusion = L.Eltwise(n.output_mask3,
                                n.output_mask4,
                                n.output_mask5,
                                operation=P.Eltwise.SUM)
    n.upscore_fuse = L.Convolution(n.concat_upscore,
                                   name='new-score-weighting',
                                   num_output=1,
                                   kernel_size=1,
                                   param=[
                                       dict(lr_mult=0.001, decay_mult=1),
                                       dict(lr_mult=0.002, decay_mult=0)
                                   ],
                                   weight_filler=dict(type='constant',
                                                      value=0.2),
                                   engine=1)

    if split == 'test':
        n.sigmoid_fuse = L.Sigmoid(n.upscore_fuse)
        n.sigmoid_output_fusion = L.Sigmoid(n.output_fusion)
    if split == 'train':
        n.loss_fuse = L.BalanceCrossEntropyLoss(n.upscore_fuse,
                                                n.label,
                                                loss_param=loss_param)
        n.loss_output_fusion = L.BalanceCrossEntropyLoss(n.output_fusion,
                                                         n.label,
                                                         loss_param=loss_param)
        # n.loss_fuse = L.BalanceCrossEntropyLoss(n.upscore_fuse, n.label, loss_param=loss_param)
    return n.to_proto()
Esempio n. 22
0
def fcn(split):
    n = caffe.NetSpec()
    pydata_params = dict(split=split,
                         mean=(104.00699, 116.66877, 122.67892),
                         seed=1337)

    if split == 'train':
        pydata_params[
            'sbdd_dir'] = '/home/wen/caffe-master/semantic/fcn/data/sbdd/benchmark/benchmark_RELEASE/dataset'
        pylayer = 'SBDDSegDataLayer'
    else:
        pydata_params[
            'voc_dir'] = '/home/wen/caffe-master/semantic/fcn/data/pascal/VOC2012'
        pylayer = 'VOCSegDataLayer'
    n.data, n.label = L.Python(module='voc_layers',
                               layer=pylayer,
                               ntop=2,
                               param_str=str(pydata_params))

    n.conv1_1, n.relu1_1 = conv_relu(n.data, 16 * 4, pad=10)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 16 * 4)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 32 * 4)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 32 * 4)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 64 * 4)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 64 * 4)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 64 * 4)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 128 * 4)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 128 * 4)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 128 * 4)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 128 * 4)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 128 * 4)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 128 * 4)
    n.pool5 = max_pool(n.relu5_3)

    n.fc6_new, n.relu6 = conv_relu(n.pool5, 1024, ks=3, pad=0)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7_new, n.relu7 = conv_relu(n.drop6, 1024, ks=1, pad=0)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)

    n.score_fr_new = L.Convolution(
        n.drop7,
        num_output=21,
        kernel_size=1,
        pad=0,
        weight_filler=dict(type='xavier'),
        bias_filler=dict(type='constant'),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])

    n.upscore_new = L.Deconvolution(n.score_fr_new,
                                    convolution_param=dict(
                                        num_output=21,
                                        kernel_size=128,
                                        stride=8,
                                        bias_term=False,
                                    ),
                                    param=[dict(lr_mult=0)])

    n.score = L.Crop(n.upscore_new, n.data)
    n.loss = L.SoftmaxWithLoss(n.score,
                               n.label,
                               loss_param=dict(normalize=False,
                                               ignore_label=255))

    return n.to_proto()
Esempio n. 23
0
def cnn(split):
    n = caffe.NetSpec()

    if split == 'train':
        pydata_params = dict(
            dataset_dir='/home/kevin/dataset/ws_exp/gp_labelled',
            split=split,
            mean=(104.00698793, 116.66876762, 122.67891434),
            seed=1337,
            img_size=(224, 224),
            crop_size=(224, 224, 224, 224))
        pylayer = 'WashingtonDataLayerSS'
        pydata_params['randomize'] = True
        pydata_params['batch_size'] = 64
    elif split == 'test':
        pydata_params = dict(
            dataset_dir='/home/kevin/dataset/washington_rgbd_dataset',
            split=split,
            mean=(104.00698793, 116.66876762, 122.67891434),
            seed=1337,
            img_size=(224, 224),
            crop_size=(224, 224, 224, 224))
        pylayer = 'WashingtonDataLayer'
        pydata_params['randomize'] = False
        pydata_params['batch_size'] = 1
    else:
        n.img = L.Input(
            name='input',
            ntop=2,
            shape=[dict(dim=1),
                   dict(dim=1),
                   dict(dim=224),
                   dict(dim=224)])

    #---------------------------------Data Layer---------------------------------------#
    n.rgb, n.depth, n.label = L.Python(
        name="data",
        module='data_layers.washington_data_layer',
        layer=pylayer,
        ntop=3,
        param_str=str(pydata_params))

    #n.rgb_crop = L.Python(n.rgb, name="crop_rgb", module='data_layers.random_crop_layer', layer='RandomCropLayer', ntop=1, param_str=str(dict(h=224,w=224)))
    #n.depth_crop = L.Python(n.depth, name="crop_depth", module='data_layers.random_crop_layer', layer='RandomCropLayer', ntop=1, param_str=str(dict(h=227,w=227)))

    #---------------------------------RGB-Net---------------------------------------#

    # the vgg 16 base net
    n.conv1_1, n.relu1_1 = conv_relu("conv1_1", n.rgb, 64, pad=1, lr1=0, lr2=0)
    n.conv1_2, n.relu1_2 = conv_relu("conv1_2", n.relu1_1, 64, lr1=0, lr2=0)
    n.rgb_pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu("conv2_1", n.rgb_pool1, 128, lr1=0, lr2=0)
    n.conv2_2, n.relu2_2 = conv_relu("conv2_2", n.relu2_1, 128, lr1=0, lr2=0)
    n.rgb_pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu("conv3_1", n.rgb_pool2, 256, lr1=0, lr2=0)
    n.conv3_2, n.relu3_2 = conv_relu("conv3_2", n.relu3_1, 256, lr1=0, lr2=0)
    n.conv3_3, n.relu3_3 = conv_relu("conv3_3", n.relu3_2, 256, lr1=0, lr2=0)
    n.rgb_pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu("conv4_1", n.rgb_pool3, 512, lr1=0, lr2=0)
    n.conv4_2, n.relu4_2 = conv_relu("conv4_2", n.relu4_1, 512, lr1=0, lr2=0)
    n.conv4_3, n.relu4_3 = conv_relu("conv4_3", n.relu4_2, 512, lr1=0, lr2=0)
    n.rgb_pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu("conv5_1", n.rgb_pool4, 512, lr1=0, lr2=0)
    n.conv5_2, n.relu5_2 = conv_relu("conv5_2", n.relu5_1, 512, lr1=0, lr2=0)
    n.conv5_3, n.relu5_3 = conv_relu("conv5_3", n.relu5_2, 512, lr1=0, lr2=0)
    n.rgb_pool5 = max_pool(n.relu5_3)

    # fully conv
    n.rgb_fc6, n.rgb_relu6 = fc_relu(n.rgb_pool5, 4096, lr1=0, lr2=0)
    n.rgb_drop6 = L.Dropout(n.rgb_relu6, dropout_ratio=0.5, in_place=True)
    n.rgb_fc7, n.rgb_relu7 = fc_relu(n.rgb_drop6, 4096, lr1=0, lr2=0)
    n.rgb_drop7 = L.Dropout(n.rgb_relu7, dropout_ratio=0.5, in_place=True)

    n.rgb_fc8 = fc(n.rgb_drop7, 51, lr1=0, lr2=0)

    #---------------------------------Depth-Net---------------------------------------#

    # the base net
    n.conv1, n.relu1 = conv_relu("conv1",
                                 n.depth,
                                 128,
                                 ks=5,
                                 stride=2,
                                 pad=2,
                                 lr1=1,
                                 lr2=2)
    n.depth_pool1 = max_pool(n.relu1)
    n.norm1 = L.LRN(n.depth_pool1,
                    lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2))

    n.conv2, n.relu2 = conv_relu("conv2",
                                 n.norm1,
                                 256,
                                 ks=5,
                                 stride=1,
                                 pad=2,
                                 lr1=1,
                                 lr2=2)
    n.depth_pool2 = max_pool(n.relu2)
    n.norm2 = L.LRN(n.depth_pool2,
                    lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2))

    n.conv3, n.relu3 = conv_relu("conv3",
                                 n.norm2,
                                 384,
                                 ks=3,
                                 pad=1,
                                 group=2,
                                 lr1=1,
                                 lr2=2)
    n.depth_pool3 = max_pool(n.relu3)

    n.conv4, n.relu4 = conv_relu("conv4",
                                 n.depth_pool3,
                                 512,
                                 ks=3,
                                 pad=1,
                                 group=1,
                                 lr1=1,
                                 lr2=2)

    n.conv5, n.relu5 = conv_relu("conv5",
                                 n.relu4,
                                 512,
                                 ks=3,
                                 pad=1,
                                 group=1,
                                 lr1=1,
                                 lr2=2)

    n.depth_pool5 = max_pool(n.relu5)

    n.depth_fc6, n.depth_relu6 = fc_relu(n.depth_pool5, 4096, lr1=1, lr2=2)
    n.depth_drop6 = L.Dropout(n.depth_relu6, dropout_ratio=0.5, in_place=True)
    n.depth_fc7, n.depth_relu7 = fc_relu(n.depth_drop6, 4096, lr1=1, lr2=2)
    n.depth_drop7 = L.Dropout(n.depth_relu7, dropout_ratio=0.5, in_place=True)

    n.depth_fc8 = fc(n.depth_drop7, 51, lr1=1, lr2=2)

    #-----------------------------------final output---------------------------------#
    # Concatenation
    #n.concat = L.Concat(n.rgb_drop7, n.depth_drop7, axis=1)
    #n.rgbd_fc8 = fc(n.concat, 6, lr1=1, lr2=2)

    if split != 'deploy':
        n.rgb_accuracy = L.Accuracy(n.rgb_fc8, n.label)
        n.rgb_loss = L.SoftmaxWithLoss(n.rgb_fc8, n.label)
        n.depth_accuracy = L.Accuracy(n.depth_fc8, n.label)
        n.depth_loss = L.SoftmaxWithLoss(n.depth_fc8, n.label)
    #n.accuracy = L.Accuracy(n.rgbd_fc8, n.label)
    #n.loss = L.SoftmaxWithLoss(n.rgbd_fc8, n.label)

    return n.to_proto()
Esempio n. 24
0
def generate_net(lmdb, label_file, PHASE, batch_size):
    net = caffe.NetSpec()

    if(PHASE=="TRAIN"):
        # data layer
        net.data, net.label = caffe.layers.AnnotatedData(ntop=2, include={'phase':caffe.TRAIN}, 
                transform_param=dict(mirror=True, mean_value=[104, 117, 123],
                    resize_param=dict(prob=1.0, resize_mode=caffe.params.Resize.WARP, height=300, width=300, 
                    interp_mode=[caffe.params.Resize.LINEAR,caffe.params.Resize.AREA,caffe.params.Resize.NEAREST,caffe.params.Resize.CUBIC,caffe.params.Resize.LANCZOS4]),
                    emit_constraint=dict(emit_type=0), 
                    distort_param=dict(brightness_prob=0.5, brightness_delta=32.0,
                            contrast_prob=0.5, contrast_lower=0.5, contrast_upper=1.5, hue_prob=0.5, hue_delta=18.0,
                            saturation_prob=0.5, saturation_lower=0.5, saturation_upper=1.5, random_order_prob=0.0),
                    expand_param=dict(prob=0.5, max_expand_ratio=4.0)),

                data_param=dict(source=lmdb, batch_size=batch_size, backend=caffe.params.Data.LMDB), 
               
                annotated_data_param=dict(
                batch_sampler=[dict(max_sample=1, max_trials=1),
                dict(sampler=dict(min_scale=0.3, max_scale=1.0,min_aspect_ratio=0.5, max_aspect_ratio=2.0), 
                    sample_constraint=dict(min_jaccard_overlap=0.1), max_sample=1, max_trials=50),
                dict(sampler=dict(min_scale=0.3, max_scale=1.0,min_aspect_ratio=0.5, max_aspect_ratio=2.0), 
                    sample_constraint=dict(min_jaccard_overlap=0.3), max_sample=1, max_trials=50),
                dict(sampler=dict(min_scale=0.3, max_scale=1.0,min_aspect_ratio=0.5, max_aspect_ratio=2.0), 
                    sample_constraint=dict(min_jaccard_overlap=0.5), max_sample=1, max_trials=50),
                dict(sampler=dict(min_scale=0.3, max_scale=1.0,min_aspect_ratio=0.5, max_aspect_ratio=2.0), 
                    sample_constraint=dict(min_jaccard_overlap=0.7), max_sample=1, max_trials=50),
                dict(sampler=dict(min_scale=0.3, max_scale=1.0,min_aspect_ratio=0.5, max_aspect_ratio=2.0), 
                    sample_constraint=dict(min_jaccard_overlap=0.9), max_sample=1, max_trials=50),
                dict(sampler=dict(min_scale=0.3, max_scale=1.0,min_aspect_ratio=0.5, max_aspect_ratio=2.0), 
                    sample_constraint=dict(min_jaccard_overlap=1.0), max_sample=1, max_trials=50)],
                label_map_file=label_file))
    elif(PHASE=="DEPLOY"):
        net.data = caffe.layers.Input(shape={'dim':[1,3,300,300]})
   
    # bone
    net.conv1 = caffe.layers.Convolution(net.data, num_output=57, kernel_size=3, stride=2, weight_filler={"type":"xavier"},
            param=[dict(lr_mult=1.0,decay_mult=0.0),dict(lr_mult=1.0,decay_mult=0.0)])
    net.relu_conv1 = caffe.layers.ReLU(net.conv1,in_place=True)
    net.pool1 = caffe.layers.Pooling(net.relu_conv1, pool=caffe.params.Pooling.MAX, kernel_size=3, stride=2)
   
    # fire1
    net.tops['fire2/concat'] = fire(net, net.pool1, 'fire2', 15,49, 53)
    # fire2
    net.tops['fire3/concat'] = fire(net, net.tops['fire2/concat'], 'fire3', 15, 54, 52)
    net.pool3 = caffe.layers.Pooling(net.tops['fire3/concat'], pool=caffe.params.Pooling.MAX, kernel_size=3, stride=2)
    # fire3
    net.tops['fire4/concat'] = fire(net, net.pool3, 'fire4', 29, 92, 94)
    # fire4
    net.tops['fire5/concat'] = fire(net, net.tops['fire4/concat'], 'fire5', 29, 90, 83)
    net.pool5 = caffe.layers.Pooling(net.tops['fire5/concat'], pool=caffe.params.Pooling.MAX, kernel_size=3, stride=2)
    # fire5
    net.tops['fire6/concat'] = fire(net, net.pool5, 'fire6', 44, 166, 161)
    # fire6
    net.tops['fire7/concat'] = fire(net, net.tops['fire6/concat'], 'fire7', 45, 155, 146)
    # fire7
    net.tops['fire8/concat'] = fire(net, net.tops['fire7/concat'], 'fire8', 49, 163, 171)
    # fire8
    net.tops['fire9/concat'] = fire(net, net.tops['fire8/concat'], 'fire9', 25, 29, 54)
    net.pool9 = caffe.layers.Pooling(net.tops['fire9/concat'], pool=caffe.params.Pooling.MAX, kernel_size=3, stride=2)
    # fire9
    net.tops['fire10/concat'] = fire(net, net.pool9, 'fire10', 37, 45, 56)
    net.pool10 = caffe.layers.Pooling(net.tops['fire10/concat'], pool=caffe.params.Pooling.MAX, kernel_size=3, stride=2)
    # fire10
    net.tops['fire11/concat'] = fire(net, net.pool10, 'fire11', 38, 41, 44)

    # conv12
    net.conv12_1 = caffe.layers.Convolution(net.tops['fire11/concat'], param=[dict(lr_mult=1.0, decay_mult=1.0)],
            convolution_param={'num_output':51, 'bias_term':False, 'kernel_size':1, 'weight_filler':{'type':'msra'}})
    net.tops['conv12_1/bn'] = caffe.layers.BatchNorm(net.conv12_1, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], in_place=True)
    net.tops['conv12_1/scale'] = caffe.layers.Scale(net.tops['conv12_1/bn'], param=[dict(lr_mult=1.0, decay_mult=0.0), dict(lr_mult=2.0, decay_mult=0.0)], 
            scale_param={'filler':{'value':1}, 'bias_term':True, 'bias_filler':{'value':0}}, in_place=True)
    net.tops['conv12_1/relu'] = caffe.layers.ReLU(net.tops['conv12_1/scale'], in_place=True)
    net.conv12_2 = caffe.layers.Convolution(net.tops['conv12_1/relu'], param=[dict(lr_mult=1.0, decay_mult=1.0)],
            convolution_param={'num_output':46, 'bias_term':False, 'pad':1, 'kernel_size':3, 'stride':2, 'weight_filler':{'type':'msra'}})
    net.tops['conv12_2/bn'] = caffe.layers.BatchNorm(net.conv12_2, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], in_place=True)
    net.tops['conv12_2/scale'] = caffe.layers.Scale(net.tops['conv12_2/bn'], param=[dict(lr_mult=1.0, decay_mult=0.0), dict(lr_mult=2.0, decay_mult=0.0)], 
            scale_param={'filler':{'value':1}, 'bias_term':True, 'bias_filler':{'value':0}}, in_place=True)
    net.tops['conv12_2/relu'] = caffe.layers.ReLU(net.tops['conv12_2/scale'], in_place=True)

    # conv13
    net.conv13_1 = caffe.layers.Convolution(net.tops['conv12_2/relu'], param=[dict(lr_mult=1.0, decay_mult=1.0)],
            convolution_param={'num_output':55, 'bias_term':False, 'kernel_size':1, 'weight_filler':{'type':'msra'}})
    net.tops['conv13_1/bn'] = caffe.layers.BatchNorm(net.conv13_1, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], in_place=True)
    net.tops['conv13_1/scale'] = caffe.layers.Scale(net.tops['conv13_1/bn'], param=[dict(lr_mult=1.0, decay_mult=0.0), dict(lr_mult=2.0, decay_mult=0.0)], 
            scale_param={'filler':{'value':1}, 'bias_term':True, 'bias_filler':{'value':0}}, in_place=True)
    net.tops['conv13_1/relu'] = caffe.layers.ReLU(net.conv13_1, in_place=True)
    net.conv13_2 = caffe.layers.Convolution(net.tops['conv13_1/relu'], param=[dict(lr_mult=1.0, decay_mult=1.0)],
            convolution_param={'num_output':85, 'bias_term':False, 'pad':1, 'kernel_size':3, 'stride':2, 'weight_filler':{'type':'msra'}})
    net.tops['conv13_2/bn'] = caffe.layers.BatchNorm(net.conv13_2, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], in_place=True)
    net.tops['conv13_2/scale'] = caffe.layers.Scale(net.tops['conv13_2/bn'], param=[dict(lr_mult=1.0, decay_mult=0.0), dict(lr_mult=2.0, decay_mult=0.0)], 
            scale_param={'filler':{'value':1}, 'bias_term':True, 'bias_filler':{'value':0}}, in_place=True)
    net.tops['conv13_2/relu'] = caffe.layers.ReLU(net.tops['conv13_2/scale'], in_place=True)


    # fire 5 prior box
    prior_box(net, net.tops['fire5/concat'], 'fire5', 16, 84, 21.0, 45.0, [2.0], 8)
    # fire 9 prior box
    prior_box(net, net.tops['fire9/concat'], 'fire9', 24, 126, 45.0, 99.0, [2.0, 3.0], 16)
    # fire 10 prior box
    prior_box(net, net.tops['fire10/concat'], 'fire10', 24, 126, 99.0, 153.0, [2.0, 3.0], 32)
    # fire 11 prior box
    prior_box(net, net.tops['fire11/concat'], 'fire11', 24, 126, 153.0, 207.0, [2.0, 3.0], 64)
    # conv12_2 prior box
    prior_box(net, net.tops['conv12_2'], 'conv12_2', 24, 126, 207.0, 261.0, [2.0, 3.0], 100)
    # conv13_2 prior box
    prior_box(net, net.tops['conv13_2'], 'conv13_2', 16, 84, 261.0, 315.0, [2.0], 300)

    # last process
    net.tops['mbox_loc'] = caffe.layers.Concat(net.tops['fire5_mbox_loc_flat'], net.tops['fire9_mbox_loc_flat'], net.tops['fire10_mbox_loc_flat'], net.tops['fire11_mbox_loc_flat'],
            net.tops['conv12_2_mbox_loc_flat'], net.tops['conv13_2_mbox_loc_flat'], concat_param={'axis':1})
    net.tops['mbox_conf'] = caffe.layers.Concat(net.tops['fire5_mbox_conf_flat'], net.tops['fire9_mbox_conf_flat'], net.tops['fire10_mbox_conf_flat'], net.tops['fire11_mbox_conf_flat'],
            net.tops['conv12_2_mbox_conf_flat'], net.tops['conv13_2_mbox_conf_flat'], concat_param={'axis':1})
    net.tops['mbox_priorbox'] = caffe.layers.Concat(net.tops['fire5_mbox_priorbox'], net.tops['fire9_mbox_priorbox'], net.tops['fire10_mbox_priorbox'], net.tops['fire11_mbox_priorbox'],
            net.tops['conv12_2_mbox_priorbox'], net.tops['conv13_2_mbox_priorbox'], concat_param={'axis':2})
    if(PHASE=='TRAIN'):
        net.tops['mbox_loss'] = caffe.layers.MultiBoxLoss(net.tops['mbox_loc'], net.tops['mbox_conf'], net.tops['mbox_priorbox'], net.label, include={'phase':caffe.TRAIN},
                propagate_down=[True, True, False, False], loss_param={'normalization':caffe.params.Loss.VALID}, multibox_loss_param={'loc_loss_type':caffe.params.MultiBoxLoss.SMOOTH_L1, 
                    'conf_loss_type':caffe.params.MultiBoxLoss.SOFTMAX, 'loc_weight':1.0, 
                    'num_classes':21, 'share_location':True, 'match_type':caffe.params.MultiBoxLoss.PER_PREDICTION, 'overlap_threshold':0.5, 'use_prior_for_matching':True, 
                    'background_label_id':0, 'use_difficult_gt':True, 'neg_pos_ratio':3.0, 'neg_overlap':0.5, 
                    'code_type':caffe.params.PriorBox.CENTER_SIZE, 'ignore_cross_boundary_bbox':False, 'mining_type':caffe.params.MultiBoxLoss.MAX_NEGATIVE})
    elif(PHASE=='DEPLOY'):
        net.tops['mbox_conf_reshape'] = caffe.layers.Reshape(net.tops['mbox_conf'], reshape_param={'shape':{'dim':[0,-1,21]}})
        net.tops['mbox_conf_softmax'] = caffe.layers.Softmax(net.tops['mbox_conf_reshape'], softmax_param={'axis':2})
        net.tops['mbox_conf_flatten'] = caffe.layers.Flatten(net.tops['mbox_conf_softmax'], flatten_param={'axis':1})
        net.tops['detection_out'] = caffe.layers.DetectionOutput(net.tops['mbox_loc'], net.tops['mbox_conf_flatten'], net.tops['mbox_priorbox'], include={
            'phase':caffe.TEST}, detection_output_param={'num_classes':21, 'share_location':True, 'background_label_id':0, 
            'nms_param':{'nms_threshold':0.45, 'top_k':100}, 'code_type':caffe.params.PriorBox.CENTER_SIZE, 'keep_top_k':100, 'confidence_threshold':0.25})

    return str(net.to_proto())
Esempio n. 25
0
File: net.py Progetto: gadkins/caffe
def fcn(obj_cls, part, split):
    n = caffe.NetSpec()
    n.data, n.label = L.Python(
        module='pascalpart_layers',
        layer='PASCALPartSegDataLayer',
        ntop=2,
        param_str=str(
            dict(voc_dir='/home/cv/hdl/caffe/data/pascal/VOC',
                 part_dir='/home/cv/hdl/caffe/data/pascal/pascal-part',
                 obj_cls=obj_cls,
                 part=part,
                 split=split,
                 seed=1337)))

    # the base net
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # fully conv
    n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)

    n.score_fr = L.Convolution(
        n.drop7,
        num_output=11,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.upscore2 = L.Deconvolution(n.score_fr,
                                 convolution_param=dict(num_output=11,
                                                        kernel_size=4,
                                                        stride=2,
                                                        bias_term=False),
                                 param=[dict(lr_mult=0)])

    n.score_pool4 = L.Convolution(
        n.pool4,
        num_output=11,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.score_pool4c = crop(n.score_pool4, n.upscore2)
    n.fuse_pool4 = L.Eltwise(n.upscore2,
                             n.score_pool4c,
                             operation=P.Eltwise.SUM)
    n.upscore16 = L.Deconvolution(n.fuse_pool4,
                                  convolution_param=dict(num_output=11,
                                                         kernel_size=32,
                                                         stride=16,
                                                         bias_term=False),
                                  param=[dict(lr_mult=0)])

    n.score = crop(n.upscore16, n.data)
    n.loss = L.SoftmaxWithLoss(n.score,
                               n.label,
                               loss_param=dict(normalize=False,
                                               ignore_label=255))

    return n.to_proto()
Esempio n. 26
0
def convert_symbol2proto(symbol):
    def looks_like_weight(name):
        """Internal helper to figure out if node should be hidden with `hide_weights`.
        """
        if name.endswith("_weight"):
            return True
        if name.endswith("_bias"):
            return True
        if name.endswith("_beta") or name.endswith("_gamma") or name.endswith("_moving_var") or name.endswith(
                "_moving_mean"):
            return True
        return False

    json_symbol = json.loads(symbol.tojson())
    all_nodes = json_symbol['nodes']
    no_weight_nodes = []
    for node in all_nodes:
        op = node['op']
        name = node['name']
        if op == 'null':
            if looks_like_weight(name):
                continue
        no_weight_nodes.append(node)

    # build next node dict
    next_node = dict()
    for node in no_weight_nodes:
        node_name = node['name']
        for input in node['inputs']:
            last_node_name = all_nodes[input[0]]['name']
            if last_node_name in next_node:
                next_node[last_node_name].append(node_name)
            else:
                next_node[last_node_name] = [node_name]

    supported_op_type = ['null', 'BatchNorm', 'Convolution', 'Activation', 'Pooling', 'elemwise_add', 'SliceChannel',
                         'FullyConnected', 'SoftmaxOutput', '_maximum', 'add_n', 'Concat', '_mul_scalar', 'Deconvolution', 'UpSampling']
    top_dict = dict()
    caffe_net = caffe.NetSpec()
    for node in no_weight_nodes:
        if node['op'] == 'null':
            input_param = dict()
            if node['name'] == 'data':
                input_param['shape'] = dict(dim=[1, 3, 160, 160])
            else:
                input_param['shape'] = dict(dim=[1])
            top_data = CL.Input(ntop=1, input_param=input_param)
            top_dict[node['name']] = [top_data]
            setattr(caffe_net, node['name'], top_data)
        elif node['op'].endswith('_copy'):
            pass
        elif node['op'] == 'BatchNorm':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            in_place = False
            if len(next_node[bottom_node_name]) == 1:
                in_place = True
            if 'momentum' in attr:
                momentum = float(attr['momentum'])
            else:
                momentum = 0.9
            if 'eps' in attr:
                eps = float(attr['eps'])
            else:
                eps = 0.001
            if NO_INPLACE:
                in_place = False
            bn_top = CL.BatchNorm(top_dict[bottom_node_name][input[1]], ntop=1,
                                  batch_norm_param=dict(use_global_stats=True,
                                                        moving_average_fraction=momentum,
                                                        eps=eps), in_place=in_place)
            setattr(caffe_net, node['name'], bn_top)
            scale_top = CL.Scale(bn_top, ntop=1, scale_param=dict(bias_term=True), in_place=not NO_INPLACE)
            top_dict[node['name']] = [scale_top]
            setattr(caffe_net, node['name'] + '_scale', scale_top)
        elif node['op'] == 'Convolution':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            convolution_param = dict()
            if 'kernel' in attr:
                kernel_size = eval(attr['kernel'])
                assert kernel_size[0] == kernel_size[1]
                convolution_param['kernel_size'] = kernel_size[0]
            else:
                convolution_param['kernel_size'] = 1
            if 'no_bias' in attr:
                convolution_param['bias_term'] = not eval(attr['no_bias'])
            if 'num_group' in attr:
                convolution_param['group'] = int(attr['num_group'])
            convolution_param['num_output'] = int(attr['num_filter'])
            if 'pad' in attr:
                pad_size = eval(attr['pad'])
                assert pad_size[0] == pad_size[1]
                convolution_param['pad'] = pad_size[0]
            if 'stride' in attr:
                stride_size = eval(attr['stride'])
                assert stride_size[0] == stride_size[1]
                convolution_param['stride'] = stride_size[0]
            conv_top = CL.Convolution(top_dict[bottom_node_name][input[1]], ntop=1, convolution_param=convolution_param)
            top_dict[node['name']] = [conv_top]
            setattr(caffe_net, node['name'], conv_top)
        elif node['op'] == 'Deconvolution':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            convolution_param = dict()
            if 'kernel' in attr:
                kernel_size = eval(attr['kernel'])
                assert kernel_size[0] == kernel_size[1]
                convolution_param['kernel_size'] = kernel_size[0]
            else:
                convolution_param['kernel_size'] = 1
            if 'no_bias' in attr:
                convolution_param['bias_term'] = not eval(attr['no_bias'])
            else:
                convolution_param['bias_term'] = False
            if 'num_group' in attr:
                convolution_param['group'] = int(attr['num_group'])
            convolution_param['num_output'] = int(attr['num_filter'])
            if 'pad' in attr:
                pad_size = eval(attr['pad'])
                assert pad_size[0] == pad_size[1]
                convolution_param['pad'] = pad_size[0]
            if 'stride' in attr:
                stride_size = eval(attr['stride'])
                assert stride_size[0] == stride_size[1]
                convolution_param['stride'] = stride_size[0]
            conv_top = CL.Deconvolution(top_dict[bottom_node_name][input[1]], ntop=1, convolution_param=convolution_param)
            top_dict[node['name']] = [conv_top]
            setattr(caffe_net, node['name'], conv_top)
        elif node['op'] == 'UpSampling':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            convolution_param = dict()
            if 'scale' in attr:
                kernel_size = 2 * eval(attr['scale']) - eval(attr['scale']) % 2
                convolution_param['kernel_size'] = kernel_size
            else:
                convolution_param['kernel_size'] = 1
            convolution_param['bias_term'] = False
            convolution_param['num_output'] = int(attr['num_filter'])
            convolution_param['group'] = int(attr['num_filter'])
            convolution_param['pad'] = int(math.ceil((eval(attr['scale']) - 1) / 2.))
            convolution_param['stride'] = eval(attr['scale'])
            conv_top = CL.Deconvolution(top_dict[bottom_node_name][input[1]], ntop=1,
                                        convolution_param=convolution_param)
            top_dict[node['name']] = [conv_top]
            setattr(caffe_net, node['name'], conv_top)
        elif node['op'] == 'Activation':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            in_place = False
            if len(next_node[bottom_node_name]) == 1:
                in_place = True
            if NO_INPLACE:
                in_place = False
            if attr['act_type'] == 'relu':
                ac_top = CL.ReLU(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place)
            elif attr['act_type'] == 'sigmoid':
                ac_top = CL.Sigmoid(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place)
            elif attr['act_type'] == 'tanh':
                ac_top = CL.TanH(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place)
            top_dict[node['name']] = [ac_top]
            setattr(caffe_net, node['name'], ac_top)
        elif node['op'] == 'Pooling':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            pooling_param = dict()
            if attr['pool_type'] == 'avg':
                pooling_param['pool'] = 1
            elif attr['pool_type'] == 'max':
                pooling_param['pool'] = 0
            else:
                assert False, attr['pool_type']
            if 'global_pool' in attr and eval(attr['global_pool']) is True:
                pooling_param['global_pooling'] = True
            else:
                if 'kernel' in attr:
                    kernel_size = eval(attr['kernel'])
                    assert kernel_size[0] == kernel_size[1]
                    pooling_param['kernel_size'] = kernel_size[0]
                if 'pad' in attr:
                    pad_size = eval(attr['pad'])
                    assert pad_size[0] == pad_size[1]
                    pooling_param['pad'] = pad_size[0]
                if 'stride' in attr:
                    stride_size = eval(attr['stride'])
                    assert stride_size[0] == stride_size[1]
                    pooling_param['stride'] = stride_size[0]
            pool_top = CL.Pooling(top_dict[bottom_node_name][input[1]], ntop=1, pooling_param=pooling_param)
            top_dict[node['name']] = [pool_top]
            setattr(caffe_net, node['name'], pool_top)
        elif node['op'] == 'elemwise_add' or node['op'] == 'add_n':
            input_a = node['inputs'][0]
            while True:
                if all_nodes[input_a[0]]['op'] not in supported_op_type:
                    input_a = all_nodes[input_a[0]]['inputs'][0]
                else:
                    break
            input_b = node['inputs'][1]
            while True:
                if all_nodes[input_b[0]]['op'] not in supported_op_type:
                    input_b = all_nodes[input_b[0]]['inputs'][0]
                else:
                    break
            bottom_node_name_a = all_nodes[input_a[0]]['name']
            bottom_node_name_b = all_nodes[input_b[0]]['name']
            eltwise_param = dict()
            eltwise_param['operation'] = 1
            ele_add_top = CL.Eltwise(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]],
                                     ntop=1, eltwise_param=eltwise_param)
            top_dict[node['name']] = [ele_add_top]
            setattr(caffe_net, node['name'], ele_add_top)
        elif node['op'] == '_maximum':
            input_a = node['inputs'][0]
            while True:
                if all_nodes[input_a[0]]['op'] not in supported_op_type:
                    input_a = all_nodes[input_a[0]]['inputs'][0]
                else:
                    break
            input_b = node['inputs'][1]
            while True:
                if all_nodes[input_b[0]]['op'] not in supported_op_type:
                    input_b = all_nodes[input_b[0]]['inputs'][0]
                else:
                    break
            bottom_node_name_a = all_nodes[input_a[0]]['name']
            bottom_node_name_b = all_nodes[input_b[0]]['name']
            eltwise_param = dict()
            eltwise_param['operation'] = 2
            ele_add_top = CL.Eltwise(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]],
                                     ntop=1, eltwise_param=eltwise_param)
            top_dict[node['name']] = [ele_add_top]
            setattr(caffe_net, node['name'], ele_add_top)
        elif node['op'] == '_mul_scalar':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            in_place = False
            if len(next_node[bottom_node_name]) == 1:
                in_place = True
            if NO_INPLACE:
                in_place = False

            scale_top = CL.Scale(top_dict[bottom_node_name][input[1]], ntop=1, scale_param=dict(bias_term=False, filler=dict(value=-1)), in_place=in_place)
            # scale_top = CL.Power(top_dict[bottom_node_name][input[1]], power=1.0, scale=float(attr['scalar']), shift=0, in_place=in_place)

            top_dict[node['name']] = [scale_top]
            setattr(caffe_net, node['name'], scale_top)
        elif node['op'] == 'SliceChannel':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            slice_param = dict()
            slice_param['slice_dim'] = 1
            slice_num = 2
            slice_outputs = CL.Slice(top_dict[bottom_node_name][input[1]], ntop=slice_num, slice_param=slice_param)
            top_dict[node['name']] = slice_outputs
            for idx, output in enumerate(slice_outputs):
                setattr(caffe_net, node['name'] + '_' + str(idx), output)
        elif node['op'] == 'FullyConnected':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            inner_product_param = dict()
            inner_product_param['num_output'] = int(attr['num_hidden'])
            fc_top = CL.InnerProduct(top_dict[bottom_node_name][input[1]], ntop=1,
                                     inner_product_param=inner_product_param)
            top_dict[node['name']] = [fc_top]
            setattr(caffe_net, node['name'], fc_top)
        elif node['op'] == 'SoftmaxOutput':
            input_a = node['inputs'][0]
            while True:
                if all_nodes[input_a[0]]['op'] not in supported_op_type:
                    input_a = all_nodes[input_a[0]]['inputs'][0]
                else:
                    break
            input_b = node['inputs'][1]
            while True:
                if all_nodes[input_b[0]]['op'] not in supported_op_type:
                    input_b = all_nodes[input_b[0]]['inputs'][0]
                else:
                    break
            bottom_node_name_a = all_nodes[input_a[0]]['name']
            bottom_node_name_b = all_nodes[input_b[0]]['name']
            softmax_loss = CL.SoftmaxWithLoss(top_dict[bottom_node_name_a][input_a[1]],
                                              top_dict[bottom_node_name_b][input_b[1]], ntop=1)
            top_dict[node['name']] = [softmax_loss]
            setattr(caffe_net, node['name'], softmax_loss)
        elif node['op'] == 'Concat':
            if len(node['inputs']) == 2:
                input_a = node['inputs'][0]
                while True:
                    if all_nodes[input_a[0]]['op'] not in supported_op_type:
                        input_a = all_nodes[input_a[0]]['inputs'][0]
                    else:
                        break
                input_b = node['inputs'][1]
                while True:
                    if all_nodes[input_b[0]]['op'] not in supported_op_type:
                        input_b = all_nodes[input_b[0]]['inputs'][0]
                    else:
                        break
                bottom_node_name_a = all_nodes[input_a[0]]['name']
                bottom_node_name_b = all_nodes[input_b[0]]['name']
                concat_top = CL.Concat(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1)
                top_dict[node['name']] = [concat_top]
                setattr(caffe_net, node['name'], concat_top)
            elif len(node['inputs']) == 3:
                input_a = node['inputs'][0]
                while True:
                    if all_nodes[input_a[0]]['op'] not in supported_op_type:
                        input_a = all_nodes[input_a[0]]['inputs'][0]
                    else:
                        break
                input_b = node['inputs'][1]
                while True:
                    if all_nodes[input_b[0]]['op'] not in supported_op_type:
                        input_b = all_nodes[input_b[0]]['inputs'][0]
                    else:
                        break
                input_c = node['inputs'][2]
                while True:
                    if all_nodes[input_c[0]]['op'] not in supported_op_type:
                        input_c = all_nodes[input_c[0]]['inputs'][0]
                    else:
                        break
                bottom_node_name_a = all_nodes[input_a[0]]['name']
                bottom_node_name_b = all_nodes[input_b[0]]['name']
                bottom_node_name_c = all_nodes[input_c[0]]['name']
                concat_top = CL.Concat(top_dict[bottom_node_name_a][input_a[1]],
                                       top_dict[bottom_node_name_b][input_b[1]],
                                       top_dict[bottom_node_name_c][input_c[1]], ntop=1)
                top_dict[node['name']] = [concat_top]
                setattr(caffe_net, node['name'], concat_top)
        else:
            logging.warn('unknown op type = %s' % node['op'])

    return caffe_net.to_proto()
def create_neural_net(input_file, batch_size=50):
    net = caffe.NetSpec()
    net.data, net.label = L.Data(batch_size=batch_size,
                                 source=input_file,
                                 backend=caffe.params.Data.LMDB,
                                 ntop=2,
                                 include=dict(phase=caffe.TEST),
                                 name='juniward04')

    ## pre-process
    net.conv1 = L.Convolution(net.data,
                              num_output=16,
                              kernel_size=4,
                              stride=1,
                              pad=1,
                              weight_filler=dict(type='dct4'),
                              param=[{
                                  'lr_mult': 0,
                                  'decay_mult': 0
                              }],
                              bias_term=False)
    TRUNCABS = caffe_pb2.QuantTruncAbsParameter.TRUNCABS
    net.quanttruncabs = L.QuantTruncAbs(net.conv1,
                                        process=TRUNCABS,
                                        threshold=8,
                                        in_place=True)

    ## block 1
    [
        net.conv1_proj, net.bn2, net.scale2, net.conv512_1, net.bn2_1,
        net.scale2_1, net.relu512_1, net.conv512_to_256, net.bn2_2,
        net.scale2_2, net.res512_to_256, net.relu512_to_256
    ] = add_downsampling_block(net.quanttruncabs, 12)
    ## block 2
    [
        net.conv256_1, net.bn2_3, net.scale2_3, net.relu256_1, net.conv256_2,
        net.bn2_4, net.scale2_4, net.res256_2, net.relu256_2
    ] = add_skip_block(net.res512_to_256, 24)
    ## block 2_1
    [
        net.conv256_4, net.bn3_1, net.scale3_1, net.relu256_4, net.conv256_5,
        net.bn3_2, net.scale3_2, net.res256_5, net.relu256_5
    ] = add_skip_block(net.res256_2, 24)
    ## block 2_2
    [
        net.conv256_6, net.bn4_1, net.scale4_1, net.relu256_6, net.conv256_7,
        net.bn4_2, net.scale4_2, net.res256_7, net.relu256_7
    ] = add_skip_block(net.res256_5, 24)
    ## block 3
    [
        net.res256_2_proj, net.bn2_5, net.scale2_5, net.conv256_3, net.bn2_6,
        net.scale2_6, net.relu256_3, net.conv256_to_128, net.bn2_7,
        net.scale2_7, net.res256_to_128, net.relu256_to_128
    ] = add_downsampling_block(net.res256_7, 24)
    ## block 4 cur
    [
        net.conv128_1, net.bn2_8, net.scale2_8, net.relu128_1, net.conv128_2,
        net.bn2_9, net.scale2_9, net.res128_2, net.relu128_2
    ] = add_skip_block(net.res256_to_128, 48)
    ## block 4_1
    [
        net.conv128_4, net.bn3_3, net.scale3_3, net.relu128_4, net.conv128_5,
        net.bn3_4, net.scale3_4, net.res128_5, net.relu128_5
    ] = add_skip_block(net.res128_2, 48)
    ## block 4_2
    [
        net.conv128_6, net.bn4_3, net.scale4_3, net.relu128_6, net.conv128_7,
        net.bn4_4, net.scale4_4, net.res128_7, net.relu128_7
    ] = add_skip_block(net.res128_5, 48)
    ## block 5
    [
        net.res128_2_proj, net.bn2_10, net.scale2_10, net.conv128_3,
        net.bn2_11, net.scale2_11, net.relu128_3, net.conv128_to_64,
        net.bn2_12, net.scale2_12, net.res128_to_64, net.relu128_to_64
    ] = add_downsampling_block(net.res128_7, 48)
    ## block 6
    [
        net.conv64_1, net.bn2_13, net.scale2_13, net.relu64_1, net.conv64_2,
        net.bn2_14, net.scale2_14, net.res64_2, net.relu64_2
    ] = add_skip_block(net.res128_to_64, 96)
    ## block 6_1
    [
        net.conv64_4, net.bn3_5, net.scale3_5, net.relu64_4, net.conv64_5,
        net.bn3_6, net.scale3_6, net.res64_5, net.relu64_5
    ] = add_skip_block(net.res64_2, 96)
    ## block 6_2
    [
        net.conv64_6, net.bn4_5, net.scale4_5, net.relu64_6, net.conv64_7,
        net.bn4_6, net.scale4_6, net.res64_7, net.relu64_7
    ] = add_skip_block(net.res64_5, 96)
    ## block 7
    [
        net.res64_2_proj, net.bn2_15, net.scale2_15, net.conv64_3, net.bn2_16,
        net.scale2_16, net.relu64_3, net.conv64_to_32, net.bn2_17,
        net.scale2_17, net.res64_to_32, net.relu64_to_32
    ] = add_downsampling_block(net.res64_7, 96)
    ## block 8
    [
        net.conv32_1, net.bn2_18, net.scale2_18, net.relu32_1, net.conv32_2,
        net.bn2_19, net.scale2_19, net.res32_2, net.relu32_2
    ] = add_skip_block(net.res64_to_32, 192)
    ## block 8_1
    [
        net.conv32_4, net.bn3_7, net.scale3_7, net.relu32_4, net.conv32_5,
        net.bn3_8, net.scale3_8, net.res32_5, net.relu32_5
    ] = add_skip_block(net.res32_2, 192)
    ## block 8_2
    [
        net.conv32_6, net.bn4_7, net.scale4_7, net.relu32_6, net.conv32_7,
        net.bn4_8, net.scale4_8, net.res32_7, net.relu32_7
    ] = add_skip_block(net.res32_5, 192)
    ## block 9
    [
        net.res32_2_proj, net.bn2_20, net.scale2_20, net.conv32_3, net.bn2_21,
        net.scale2_21, net.relu32_3, net.conv32_to_16, net.bn2_22,
        net.scale2_22, net.res32_to_16, net.relu32_to_16
    ] = add_downsampling_block(net.res32_7, 192)
    ## block 10
    [
        net.conv16_1, net.bn2_23, net.scale2_23, net.relu16_1, net.conv16_2,
        net.bn2_24, net.scale2_24, net.res16_2, net.relu16_2
    ] = add_skip_block(net.res32_to_16, 384)
    ## block 10_1
    [
        net.conv16_3, net.bn3_9, net.scale3_9, net.relu16_3, net.conv16_4,
        net.bn3_10, net.scale3_10, net.res16_4, net.relu16_4
    ] = add_skip_block(net.res16_2, 384)
    ##    ## block 10_2
    ##    [net.conv16_5, net.bn4_9, net.scale4_9, net.relu16_5, net.conv16_6, net.bn4_10,
    ##     net.scale4_10, net.res16_6, net.relu16_6] = add_skip_block(net.res16_4, 384)

    ## global pool
    AVE = caffe_pb2.PoolingParameter.AVE
    net.global_pool = L.Pooling(net.res16_4, pool=AVE, kernel_size=8, stride=1)

    ## full connecting
    net.fc = L.InnerProduct(net.global_pool,
                            param=[{
                                'lr_mult': 1
                            }, {
                                'lr_mult': 2
                            }],
                            num_output=2,
                            weight_filler=dict(type='xavier'),
                            bias_filler=dict(type='constant'))
    ## accuracy
    net.accuracy = L.Accuracy(net.fc,
                              net.label,
                              include=dict(phase=caffe.TEST))
    ## loss
    net.loss = L.SoftmaxWithLoss(net.fc, net.label)

    return net.to_proto()
Esempio n. 28
0
def net(split):
	n=caffe.NetSpec()
	loss_param=dict(normalize=False)
	if split == 'train':
		data_params=dict(mean=(104.00699, 116.66877, 122.67892))
		data_params['root']='/home/liang/rcf/data/HED-BSDS'
		data_params['source']='bsds_pascal_train_pair.lst'
		data_params['shuffle']=True
		n.data,n.label=L.Python(module='ImageLabelmapData',layer='ImageLabelmapDataLayer',ntop=2,param_str=str(data_params))
	elif split == 'test':
		n.data=L.Input(name='data',input_param=dict(shape=dict(dim=[1,3,500,500])))
	else:
		raise Exception('Invalid split')
	#vgg architecture
	n.conv1_1,n.relu1_1=conv_relu(n.data,num_out=64)
	n.conv1_2,n.relu1_2=conv_relu(n.relu1_1,num_out=64)
	n.pool1=max_pool(n.relu1_2)

	n.conv2_1,n.relu2_1=conv_relu(n.pool1,num_out=128)
	n.conv2_2,n.relu2_2=conv_relu(n.relu2_1,num_out=128)
	n.pool2=max_pool(n.relu2_2)

	n.conv3_1,n.relu3_1=conv_relu(n.pool2,num_out=256)
	n.conv3_2,n.relu3_2=conv_relu(n.relu3_1,num_out=256)
	n.conv3_3,n.relu3_3=conv_relu(n.relu3_2,num_out=256)
	n.pool3=max_pool(n.relu3_3)
	
	n.conv4_1,n.relu4_1=conv_relu(n.pool3,num_out=512)
	n.conv4_2,n.relu4_2=conv_relu(n.relu4_1,num_out=512)
	n.conv4_3,n.relu4_3=conv_relu(n.relu4_2,num_out=512)
	n.pool4=max_pool(n.relu4_3)

	n.conv5_1,n.relu5_1=conv_relu(n.pool4,num_out=512,lr=[100,1,200,0])
	n.conv5_2,n.relu5_2=conv_relu(n.relu5_1,num_out=512,lr=[100,1,200,0])
	n.conv5_3,n.relu5_3=conv_relu(n.relu5_2,num_out=512,lr=[100,1,200,0])

	#conv1
	n.dsn1=conv1x1(n.conv1_2)
	n.dsn1_crop=crop(n.dsn1,n.data)

	if split=='train':
		n.dsn1_loss=L.SigmoidCrossEntropyLoss(n.dsn1_crop,n.label)
	else:
		n.sigmoid_dsn1=L.Sigmoid(n.dsn1_crop)


	#conv2
	n.dsn2=conv1x1(n.conv2_2)
	n.dsn2_up=upsample(n.dsn2,stride=2)
	n.dsn2_crop=crop(n.dsn2_up,n.data)
	if split=='train':
		n.dsn2_loss=L.SigmoidCrossEntropyLoss(n.dsn2_crop,n.label)
	else:
		n.sigmoid_dsn2=L.Sigmoid(n.dsn2_crop)

	#conv3
	n.dsn3=conv1x1(n.conv3_3)
	n.dsn3_up=upsample(n.dsn3,stride=4)
	n.dsn3_crop=crop(n.dsn3_up,n.data)
	if split=='train':
		n.dsn3_loss=L.SigmoidCrossEntropyLoss(n.dsn3_crop,n.label)
	else:
		n.sigmoid_dsn3=L.Sigmoid(n.dsn3_crop)

	#conv4
	n.dsn4=conv1x1(n.conv4_3)
	n.dsn4_up=upsample(n.dsn4,stride=8)
	n.dsn4_crop=crop(n.dsn4_up,n.data)
	if split=='train':
		n.dsn4_loss=L.SigmoidCrossEntropyLoss(n.dsn4_crop,n.label)
	else:
		n.sigmoid_dsn4=L.Sigmoid(n.dsn4_crop)



	#conv5
	n.dsn5=conv1x1(n.conv5_3)
	n.dsn5_up=upsample(n.dsn5,stride=16)
	n.dsn5_crop=crop(n.dsn5_up,n.data)
	if split=='train':
		n.dsn5_loss=L.SigmoidCrossEntropyLoss(n.dsn5_crop,n.label)
	else:
		n.sigmoid_dsn5=L.Sigmoid(n.dsn5_crop)

	
	#concat
	n.concat_5=L.Concat(n.dsn1_crop,n.dsn2_crop,n.dsn3_crop,n.dsn4_crop,n.dsn5_crop,name='concat',concat_param=dict(concat_dim=1))
	n.dsn=L.Convolution(n.concat_5,name='dsn',num_output=1,kernel_size=1,param=[dict(lr_mult=0.001, decay_mult=1), dict(lr_mult=0.002, decay_mult=0)],weight_filler=dict(type='constant', value=0.2))
	if split=='train':
		n.fuse_loss=L.SigmoidCrossEntropyLoss(n.dsn,n.label)
	else:
		n.sigmoid_fuss=L.Sigmoid(n.dsn)
	return n.to_proto()
def vgg_lowmem(data,
               labels=None,
               train=False,
               param=learned_param,
               num_classes=100,
               with_labels=True):
    """
    Returns a protobuf text file specifying a variant of VGG
        - The Fully Connected (FC) layers (fc6 and fc7) have smaller dimensions
          due to the lower resolution of mini-places images (128x128) compared
          with ImageNet images (usually resized to 256x256)
    """
    n = caffe.NetSpec()
    n.data = data
    conv_kwargs = dict(param=param, train=train)
    n.conv1_1, n.relu1_1 = conv_relu(n.data,
                                     3,
                                     64,
                                     stride=1,
                                     pad=1,
                                     **conv_kwargs)
    n.conv1_1, n.relu1_1 = conv_relu(n.data,
                                     3,
                                     64,
                                     stride=1,
                                     pad=1,
                                     **conv_kwargs)
    n.pool1 = max_pool(n.relu1_1, 3, stride=2, train=train)
    n.conv2, n.relu2 = conv_relu(n.pool1,
                                 5,
                                 256,
                                 pad=2,
                                 group=2,
                                 **conv_kwargs)
    n.pool2 = max_pool(n.relu2, 3, stride=2, train=train)
    n.conv3, n.relu3 = conv_relu(n.pool2, 3, 384, pad=1, **conv_kwargs)
    n.conv4, n.relu4 = conv_relu(n.relu3,
                                 3,
                                 384,
                                 pad=1,
                                 group=2,
                                 **conv_kwargs)
    n.conv5, n.relu5 = conv_relu(n.relu4,
                                 3,
                                 256,
                                 pad=1,
                                 group=2,
                                 **conv_kwargs)
    n.pool5 = max_pool(n.relu5, 3, stride=2, train=train)

    n.fc6, n.relu6 = fc_relu(n.pool5, 1024, param=param)
    n.drop6 = L.Dropout(n.relu6, in_place=True)

    n.fc7, n.relu7 = fc_relu(n.drop6, 1024, param=param)
    n.drop7 = L.Dropout(n.relu7, in_place=True)

    n.fc8 = L.InnerProduct(n.drop7, num_output=num_classes, param=param)

    preds = n.fc8

    if not train:
        # Compute the per-label probabilities at test/inference time.
        preds = n.probs = L.Softmax(n.fc8)
    if with_labels:
        n.label = labels
        n.loss = L.SoftmaxWithLoss(n.fc8, n.label)
        n.accuracy_at_1 = L.Accuracy(preds, n.label)
        n.accuracy_at_5 = L.Accuracy(preds,
                                     n.label,
                                     accuracy_param=dict(top_k=5))
    else:
        n.ignored_label = labels
        n.silence_label = L.Silence(n.ignored_label, ntop=0)
    return to_tempfile(str(n.to_proto()))
Esempio n. 30
0
 def net():
     n = caffe.NetSpec()
     n.data = L.Input(input_param=dict(shape=dict(dim=data_shape)))
     n.dataout = L.Power(n.data, power=_power, scale=_scale, shift=_shift)
     return n.to_proto()