Exemple #1
0
def fcn(split):
    n = caffe.NetSpec()
    pydata_params = dict(split=split,
                         mean=(104.00699, 116.66877, 122.67892),
                         seed=1337)
    if split == 'train':
        pydata_params['sbdd_dir'] = '../data/sbdd/dataset'
        pylayer = 'SBDDSegDataLayer'
    else:
        pydata_params['voc_dir'] = '../data/pascal/VOC2012'
        pylayer = 'VOCSegDataLayer'
    n.data, n.label = L.Python(module='voc_layers',
                               layer=pylayer,
                               ntop=2,
                               param_str=str(pydata_params))

    # the base net
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # fully conv
    n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
    n.score_fr = L.Convolution(
        n.drop7,
        num_output=21,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.upscore = L.Deconvolution(n.score_fr,
                                convolution_param=dict(num_output=21,
                                                       kernel_size=64,
                                                       stride=32,
                                                       bias_term=False),
                                param=[dict(lr_mult=0)])
    n.score = crop(n.upscore, n.data)
    n.loss = L.SoftmaxWithLoss(n.score,
                               n.label,
                               loss_param=dict(normalize=False,
                                               ignore_label=255))

    return n.to_proto()
def fcn(train, mask, batch_size=8):
    n = caffe.NetSpec()
    # n.data, n.sem, n.geo = L.Python(module='siftflow_layers',
    #         layer='SIFTFlowSegDataLayer', ntop=3,
    #         param_str=str(dict(siftflow_dir='../data/sift-flow',
    #             split=split, seed=1337)))

    n.data = L.Data(backend=P.Data.LMDB,
                    batch_size=batch_size,
                    source=train,
                    transform_param=dict(scale=1. / 255),
                    ntop=1)

    n.geo = L.Data(backend=P.Data.LMDB,
                   batch_size=batch_size,
                   source=mask,
                   ntop=1)
    # the base net
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # fully conv

    dropout = True
    Deconv_filters = 300
    if dropout:
        n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
        n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
        n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
        n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
        n.score_fr_geo = L.Convolution(n.drop7,
                                       num_output=Deconv_filters,
                                       kernel_size=1,
                                       pad=0,
                                       param=[
                                           dict(lr_mult=1, decay_mult=1),
                                           dict(lr_mult=2, decay_mult=0)
                                       ])
    else:
        n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
        n.fc7, n.relu7 = conv_relu(n.relu6, 4096, ks=1, pad=0)
        # upsampling
        n.score_fr_geo = L.Convolution(n.relu7,
                                       num_output=Deconv_filters,
                                       kernel_size=1,
                                       pad=0,
                                       param=[
                                           dict(lr_mult=1, decay_mult=1),
                                           dict(lr_mult=2, decay_mult=0)
                                       ])

    n.upscore2_geo = L.Deconvolution(
        n.score_fr_geo,
        convolution_param=dict(num_output=Deconv_filters,
                               kernel_size=4,
                               stride=2,
                               bias_term=False,
                               weight_filler=dict(type="msra")),
        param=[dict(lr_mult=0)],
    )

    n.score_pool4_geo = L.Convolution(
        n.pool4,
        num_output=Deconv_filters,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.score_pool4_geoc = crop(n.score_pool4_geo, n.upscore2_geo)
    n.fuse_pool4_geo = L.Eltwise(n.upscore2_geo,
                                 n.score_pool4_geoc,
                                 operation=P.Eltwise.SUM)
    n.upscore_pool4_geo = L.Deconvolution(n.fuse_pool4_geo,
                                          convolution_param=dict(
                                              num_output=Deconv_filters,
                                              kernel_size=4,
                                              stride=2,
                                              bias_term=False,
                                              weight_filler=dict(type="msra")),
                                          param=[dict(lr_mult=0)])

    n.score_pool3_geo = L.Convolution(
        n.pool3,
        num_output=Deconv_filters,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.score_pool3_geoc = crop(n.score_pool3_geo, n.upscore_pool4_geo)
    n.fuse_pool3_geo = L.Eltwise(n.upscore_pool4_geo,
                                 n.score_pool3_geoc,
                                 operation=P.Eltwise.SUM)
    n.upscore8_geo = L.Deconvolution(
        n.fuse_pool3_geo,
        convolution_param=dict(
            num_output=Deconv_filters,
            kernel_size=16,
            stride=8,  #ks 16
            bias_term=False,
            weight_filler=dict(type="msra")),
        param=[dict(lr_mult=0)])

    b = L.Convolution(
        n.upscore8_geo,
        kernel_size=3,
        stride=1,
        num_output=2,
        pad=1,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='msra'))

    n.score_geo = max_pool(crop(b, n.data))
    #n.score_geo = max_pool(crop(n.upscore8_geo, n.data))

    n.loss_geo = L.SoftmaxWithLoss(
        n.score_geo, n.geo,
        loss_param=dict(normalize=False))  #, ignore_label=255))

    return n.to_proto()
def act_proto(mode, batchsize, exp_vocab_size, use_gt=True):
    n = caffe.NetSpec()
    mode_str = json.dumps({'mode': mode, 'batchsize': batchsize})
    n.img_feature, n.label, n.exp, n.exp_out, n.exp_cont_1, n.exp_cont_2 = \
        L.Python(module='activity_data_provider_layer', layer='ActivityDataProviderLayer', param_str=mode_str, ntop=6)

    # Attention
    n.att_conv1 = L.Convolution(n.img_feature,
                                kernel_size=1,
                                stride=1,
                                num_output=512,
                                pad=0,
                                weight_filler=dict(type='xavier'))
    n.att_conv1_relu = L.ReLU(n.att_conv1)
    n.att_conv2 = L.Convolution(n.att_conv1_relu,
                                kernel_size=1,
                                stride=1,
                                num_output=1,
                                pad=0,
                                weight_filler=dict(type='xavier'))
    n.att_reshaped = L.Reshape(
        n.att_conv2, reshape_param=dict(shape=dict(dim=[-1, 1, 14 * 14])))
    n.att_softmax = L.Softmax(n.att_reshaped, axis=2)
    n.att_map = L.Reshape(n.att_softmax,
                          reshape_param=dict(shape=dict(dim=[-1, 1, 14, 14])))

    dummy = L.DummyData(shape=dict(dim=[batchsize, 1]),
                        data_filler=dict(type='constant', value=1),
                        ntop=1)
    n.att_feature = L.SoftAttention(n.img_feature, n.att_map, dummy)
    n.att_feature_resh = L.Reshape(
        n.att_feature, reshape_param=dict(shape=dict(dim=[-1, 2048])))

    # Prediction
    n.prediction = L.InnerProduct(n.att_feature_resh,
                                  num_output=config.NUM_OUTPUT_UNITS,
                                  weight_filler=dict(type='xavier'),
                                  param=fixed_weights)

    # Take GT answer or Take the logits of the VQA model and get predicted answer to embed
    if use_gt:
        n.exp_emb_ans = L.Embed(n.label,
                                input_dim=config.NUM_OUTPUT_UNITS,
                                num_output=300,
                                weight_filler=dict(type='uniform',
                                                   min=-0.08,
                                                   max=0.08))
    else:
        n.vqa_ans = L.ArgMax(n.prediction, axis=1)
        n.exp_emb_ans = L.Embed(n.vqa_ans,
                                input_dim=config.NUM_OUTPUT_UNITS,
                                num_output=300,
                                weight_filler=dict(type='uniform',
                                                   min=-0.08,
                                                   max=0.08))
    n.exp_emb_ans_tanh = L.TanH(n.exp_emb_ans)
    n.exp_emb_ans2 = L.InnerProduct(n.exp_emb_ans_tanh,
                                    num_output=2048,
                                    weight_filler=dict(type='xavier'))

    # Merge activity answer and visual feature
    n.exp_emb_resh = L.Reshape(
        n.exp_emb_ans2, reshape_param=dict(shape=dict(dim=[-1, 2048, 1, 1])))
    n.exp_emb_tiled_1 = L.Tile(n.exp_emb_resh, axis=2, tiles=14)
    n.exp_emb_tiled = L.Tile(n.exp_emb_tiled_1, axis=3, tiles=14)

    n.img_embed = L.Convolution(n.img_feature,
                                kernel_size=1,
                                stride=1,
                                num_output=2048,
                                pad=0,
                                weight_filler=dict(type='xavier'))
    n.exp_eltwise = L.Eltwise(n.img_embed,
                              n.exp_emb_tiled,
                              eltwise_param={'operation': P.Eltwise.PROD})
    n.exp_eltwise_sqrt = L.SignedSqrt(n.exp_eltwise)
    n.exp_eltwise_l2 = L.L2Normalize(n.exp_eltwise_sqrt)
    n.exp_eltwise_drop = L.Dropout(n.exp_eltwise_l2,
                                   dropout_param={'dropout_ratio': 0.3})

    # Attention for Explanation
    n.exp_att_conv1 = L.Convolution(n.exp_eltwise_drop,
                                    kernel_size=1,
                                    stride=1,
                                    num_output=512,
                                    pad=0,
                                    weight_filler=dict(type='xavier'))
    n.exp_att_conv1_relu = L.ReLU(n.exp_att_conv1)
    n.exp_att_conv2 = L.Convolution(n.exp_att_conv1_relu,
                                    kernel_size=1,
                                    stride=1,
                                    num_output=1,
                                    pad=0,
                                    weight_filler=dict(type='xavier'))
    n.exp_att_reshaped = L.Reshape(
        n.exp_att_conv2, reshape_param=dict(shape=dict(dim=[-1, 1, 14 * 14])))
    n.exp_att_softmax = L.Softmax(n.exp_att_reshaped, axis=2)
    n.exp_att_map = L.Reshape(
        n.exp_att_softmax, reshape_param=dict(shape=dict(dim=[-1, 1, 14, 14])))

    exp_dummy = L.DummyData(shape=dict(dim=[batchsize, 1]),
                            data_filler=dict(type='constant', value=1),
                            ntop=1)
    n.exp_att_feature_prev = L.SoftAttention(n.img_feature, n.exp_att_map,
                                             exp_dummy)
    n.exp_att_feature_resh = L.Reshape(
        n.exp_att_feature_prev, reshape_param=dict(shape=dict(dim=[-1, 2048])))
    n.exp_att_feature_embed = L.InnerProduct(n.exp_att_feature_resh,
                                             num_output=2048,
                                             weight_filler=dict(type='xavier'))
    n.exp_att_feature = L.Eltwise(n.exp_emb_ans2,
                                  n.exp_att_feature_embed,
                                  eltwise_param={'operation': P.Eltwise.PROD})

    n.silence_exp_att = L.Silence(n.exp_att_feature, ntop=0)

    return n.to_proto()
def qlstm(mode, batchsize, T, question_vocab_size):
    n = caffe.NetSpec()
    mode_str = json.dumps({'mode': mode, 'batchsize': batchsize})
    n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\
        module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=5 )

    n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \
        weight_filler=dict(type='uniform',min=-0.08,max=0.08))
    n.embed = L.TanH(n.embed_ba)
    concat_word_embed = [n.embed, n.glove]
    n.concat_embed = L.Concat(*concat_word_embed,
                              concat_param={'axis': 2})  # T x N x 600

    # LSTM1
    n.lstm1 = L.LSTM(\
                   n.concat_embed, n.cont,\
                   recurrent_param=dict(\
                       num_output=1024,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)))
    tops1 = L.Slice(n.lstm1, ntop=T, slice_param={'axis': 0})
    for i in range(T - 1):
        n.__setattr__('slice_first' + str(i), tops1[int(i)])
        n.__setattr__('silence_data_first' + str(i),
                      L.Silence(tops1[int(i)], ntop=0))
    n.lstm1_out = tops1[T - 1]
    n.lstm1_reshaped = L.Reshape(n.lstm1_out,\
                          reshape_param=dict(\
                              shape=dict(dim=[-1,1024])))
    n.lstm1_reshaped_droped = L.Dropout(n.lstm1_reshaped,
                                        dropout_param={'dropout_ratio': 0.3})
    n.lstm1_droped = L.Dropout(n.lstm1, dropout_param={'dropout_ratio': 0.3})
    # LSTM2
    n.lstm2 = L.LSTM(\
                   n.lstm1_droped, n.cont,\
                   recurrent_param=dict(\
                       num_output=1024,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)))
    tops2 = L.Slice(n.lstm2, ntop=T, slice_param={'axis': 0})
    for i in range(T - 1):
        n.__setattr__('slice_second' + str(i), tops2[int(i)])
        n.__setattr__('silence_data_second' + str(i),
                      L.Silence(tops2[int(i)], ntop=0))
    n.lstm2_out = tops2[T - 1]
    n.lstm2_reshaped = L.Reshape(n.lstm2_out,\
                          reshape_param=dict(\
                              shape=dict(dim=[-1,1024])))
    n.lstm2_reshaped_droped = L.Dropout(n.lstm2_reshaped,
                                        dropout_param={'dropout_ratio': 0.3})
    concat_botom = [n.lstm1_reshaped_droped, n.lstm2_reshaped_droped]
    n.lstm_12 = L.Concat(*concat_botom)

    n.q_emb_tanh_droped_resh = L.Reshape(
        n.lstm_12, reshape_param=dict(shape=dict(dim=[-1, 2048, 1, 1])))
    n.q_emb_tanh_droped_resh_tiled_1 = L.Tile(n.q_emb_tanh_droped_resh,
                                              axis=2,
                                              tiles=14)
    n.q_emb_tanh_droped_resh_tiled = L.Tile(n.q_emb_tanh_droped_resh_tiled_1,
                                            axis=3,
                                            tiles=14)
    n.i_emb_tanh_droped_resh = L.Reshape(
        n.img_feature, reshape_param=dict(shape=dict(dim=[-1, 2048, 14, 14])))
    n.blcf = L.CompactBilinear(n.q_emb_tanh_droped_resh_tiled,
                               n.i_emb_tanh_droped_resh,
                               compact_bilinear_param=dict(num_output=16000,
                                                           sum_pool=False))
    n.blcf_sign_sqrt = L.SignedSqrt(n.blcf)
    n.blcf_sign_sqrt_l2 = L.L2Normalize(n.blcf_sign_sqrt)
    n.blcf_droped = L.Dropout(n.blcf_sign_sqrt_l2,
                              dropout_param={'dropout_ratio': 0.1})

    # multi-channel attention
    n.att_conv1 = L.Convolution(n.blcf_droped,
                                kernel_size=1,
                                stride=1,
                                num_output=512,
                                pad=0,
                                weight_filler=dict(type='xavier'))
    n.att_conv1_relu = L.ReLU(n.att_conv1)
    n.att_conv2 = L.Convolution(n.att_conv1_relu,
                                kernel_size=1,
                                stride=1,
                                num_output=2,
                                pad=0,
                                weight_filler=dict(type='xavier'))
    n.att_reshaped = L.Reshape(
        n.att_conv2, reshape_param=dict(shape=dict(dim=[-1, 2, 14 * 14])))
    n.att_softmax = L.Softmax(n.att_reshaped, axis=2)
    n.att = L.Reshape(n.att_softmax,
                      reshape_param=dict(shape=dict(dim=[-1, 2, 14, 14])))
    att_maps = L.Slice(n.att, ntop=2, slice_param={'axis': 1})
    n.att_map0 = att_maps[0]
    n.att_map1 = att_maps[1]
    dummy = L.DummyData(shape=dict(dim=[batchsize, 1]),
                        data_filler=dict(type='constant', value=1),
                        ntop=1)
    n.att_feature0 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map0,
                                     dummy)
    n.att_feature1 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map1,
                                     dummy)
    n.att_feature0_resh = L.Reshape(
        n.att_feature0, reshape_param=dict(shape=dict(dim=[-1, 2048])))
    n.att_feature1_resh = L.Reshape(
        n.att_feature1, reshape_param=dict(shape=dict(dim=[-1, 2048])))
    n.att_feature = L.Concat(n.att_feature0_resh, n.att_feature1_resh)

    # merge attention and lstm with compact bilinear pooling
    n.att_feature_resh = L.Reshape(
        n.att_feature, reshape_param=dict(shape=dict(dim=[-1, 4096, 1, 1])))
    n.lstm_12_resh = L.Reshape(
        n.lstm_12, reshape_param=dict(shape=dict(dim=[-1, 2048, 1, 1])))
    n.bc_att_lstm = L.CompactBilinear(n.att_feature_resh,
                                      n.lstm_12_resh,
                                      compact_bilinear_param=dict(
                                          num_output=16000, sum_pool=False))
    n.bc_sign_sqrt = L.SignedSqrt(n.bc_att_lstm)
    n.bc_sign_sqrt_l2 = L.L2Normalize(n.bc_sign_sqrt)

    n.bc_dropped = L.Dropout(n.bc_sign_sqrt_l2,
                             dropout_param={'dropout_ratio': 0.1})
    n.bc_dropped_resh = L.Reshape(
        n.bc_dropped, reshape_param=dict(shape=dict(dim=[-1, 16000])))

    n.prediction = L.InnerProduct(n.bc_dropped_resh,
                                  num_output=3000,
                                  weight_filler=dict(type='xavier'))
    n.loss = L.SoftmaxWithLoss(n.prediction, n.label)
    return n.to_proto()
Exemple #5
0
    def body(self):
        n = caffe.NetSpec()

        conv_defaults = dict(
            param=[
                dict(lr_mult=1, decay_mult=1),
                dict(lr_mult=2, decay_mult=0)
            ],
            weight_filler=dict(type="gaussian", std=0.01),
        )

        lrn_defaults = dict(local_size=5, alpha=0.0001, beta=0.75)

        fc_defaults = dict(param=[
            dict(lr_mult=1, decay_mult=1),
            dict(lr_mult=2, decay_mult=0)
        ],
                           weight_filler=dict(type="gaussian", std=0.005),
                           bias_filler=dict(type="constant", value=1))

        n.conv1 = L.Convolution(bottom="data",
                                num_output=96,
                                kernel_size=11,
                                stride=4,
                                bias_filler=dict(type="constant", value=0),
                                **conv_defaults)
        n.relu1 = L.ReLU(n.conv1, in_place=True)
        n.pool1 = L.Pooling(n.relu1,
                            pool=P.Pooling.MAX,
                            kernel_size=3,
                            stride=2)
        n.norm1 = L.LRN(n.pool1, **lrn_defaults)

        n.conv2 = L.Convolution(n.norm1,
                                num_output=256,
                                kernel_size=5,
                                stride=1,
                                pad=2,
                                group=2,
                                bias_filler=dict(type="constant", value=1),
                                **conv_defaults)
        n.relu2 = L.ReLU(n.conv2, in_place=True)
        n.pool2 = L.Pooling(n.relu2,
                            pool=P.Pooling.MAX,
                            kernel_size=3,
                            stride=2)
        n.norm2 = L.LRN(n.pool2, **lrn_defaults)

        n.conv3 = L.Convolution(n.norm2,
                                num_output=384,
                                kernel_size=3,
                                stride=1,
                                pad=1,
                                bias_filler=dict(type="constant", value=0),
                                **conv_defaults)
        n.relu3 = L.ReLU(n.conv3, in_place=True)

        n.conv4 = L.Convolution(n.relu3,
                                num_output=384,
                                kernel_size=3,
                                stride=1,
                                pad=1,
                                group=2,
                                bias_filler=dict(type="constant", value=1),
                                **conv_defaults)
        n.relu4 = L.ReLU(n.conv4, in_place=True)

        n.conv5 = L.Convolution(n.relu4,
                                num_output=256,
                                kernel_size=3,
                                stride=1,
                                pad=1,
                                group=2,
                                bias_filler=dict(type="constant", value=1),
                                **conv_defaults)
        n.relu5 = L.ReLU(n.conv5, in_place=True)
        n.pool5 = L.Pooling(n.relu5,
                            pool=P.Pooling.MAX,
                            kernel_size=3,
                            stride=2)

        n.fc6 = L.InnerProduct(n.pool5, num_output=4096, **fc_defaults)
        n.relu6 = L.ReLU(n.fc6, in_place=True)
        n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)

        n.fc7 = L.InnerProduct(n.drop6, num_output=4096, **fc_defaults)
        n.relu7 = L.ReLU(n.fc7, in_place=True)
        n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)

        n.out = L.InnerProduct(n.drop7,
                               num_output=self.params['num_output'],
                               param=[
                                   dict(lr_mult=1, decay_mult=1),
                                   dict(lr_mult=2, decay_mult=0)
                               ],
                               weight_filler=dict(type="gaussian", std=0.01),
                               bias_filler=dict(type="constant", value=0))

        return n.to_proto(), "out"
Exemple #6
0
def simple_net(split,
               initialize_fc8=False,
               cur_shape=None,
               next_shape=None,
               batch_size=1,
               num_threads=1,
               max_queue_size=5):

    #Get crop layer parameters
    tmp_net = caffe.NetSpec()
    tmp_net.im, tmp_net.label = L.MemoryData(batch_size=1,
                                             channels=3,
                                             height=244,
                                             width=244,
                                             ntop=2)
    conv_vgg(tmp_net,
             tmp_net.im,
             suffix='',
             last_layer_pad=0,
             first_layer_pad=100)
    tmp_net.fc6, tmp_net.relu6 = conv_relu(tmp_net.conv5_3,
                                           4096,
                                           ks=7,
                                           dilation=4)
    tmp_net.fc7, tmp_net.relu7 = conv_relu(tmp_net.relu6, 4096, ks=1, pad=0)
    tmp_net.fc8 = L.Convolution(tmp_net.relu7, kernel_size=1, num_output=2)
    tmp_net.upscore = L.Deconvolution(tmp_net.fc8,
                                      convolution_param=dict(kernel_size=16,
                                                             stride=8,
                                                             num_output=2))

    ax, a, b = coord_map_from_to(tmp_net.upscore, tmp_net.im)
    assert (a == 1).all(), 'scale mismatch on crop (a = {})'.format(a)
    assert (b <= 0).all(), 'cannot crop negative offset (b = {})'.format(b)
    assert (np.round(b) == b
            ).all(), 'cannot crop noninteger offset (b = {})'.format(b)
    #

    #Create network
    n = caffe.NetSpec()

    if split == 'train':
        pydata_params = dict(batch_size=batch_size,
                             im_shape=tuple(next_shape),
                             num_threads=num_threads,
                             max_queue_size=max_queue_size)
        n.cur_im, n.masked_im, n.next_im, n.label = L.Python(
            module='coco_transformed_datalayers_prefetch',
            layer='CocoTransformedDataLayerPrefetch',
            ntop=4,
            param_str=str(pydata_params))
    elif split == 'val':
        pydata_params = dict(batch_size=batch_size,
                             im_shape=tuple(next_shape),
                             num_threads=num_threads,
                             max_queue_size=max_queue_size)
        n.cur_im, n.masked_im, n.next_im, n.label = L.Python(
            module='coco_transformed_datalayers_prefetch',
            layer='CocoTransformedDataLayerPrefetch',
            ntop=4,
            param_str=str(pydata_params))
    elif split == 'deploy':
        n.cur_im, n.label_1 = L.MemoryData(batch_size=1,
                                           channels=3,
                                           height=244,
                                           width=244,
                                           ntop=2)
        n.masked_im, n.label_2 = L.MemoryData(batch_size=1,
                                              channels=3,
                                              height=244,
                                              width=244,
                                              ntop=2)
        n.next_im, n.label_3 = L.MemoryData(batch_size=1,
                                            channels=3,
                                            height=244,
                                            width=244,
                                            ntop=2)
    else:
        raise Exception

    if cur_shape is None or next_shape is None:
        concat_pad = np.zeros((2, ))
    else:
        concat_pad = (next_shape - cur_shape) / 2.0 / 8.0
    if not all(concat_pad == np.round(concat_pad)):
        raise Exception

    conv_vgg(n,
             n.cur_im,
             suffix='c',
             last_layer_pad=concat_pad,
             first_layer_pad=100)
    conv_vgg(n,
             n.masked_im,
             suffix='m',
             last_layer_pad=concat_pad,
             first_layer_pad=100)
    conv_vgg(n, n.next_im, suffix='n', last_layer_pad=0, first_layer_pad=100)

    # concatination
    n.concat1 = L.Concat(n.relu5_3c, n.relu5_3m, n.relu5_3n)

    # fully conv
    n.fc6, n.relu6 = conv_relu(n.concat1, 4096, ks=7, dilation=4)
    if split == 'train':
        n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
        n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
        n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
        n.fc8 = L.Convolution(n.drop7,
                              kernel_size=1,
                              param=[
                                  dict(lr_mult=1, decay_mult=1),
                                  dict(lr_mult=2, decay_mult=0)
                              ],
                              num_output=2)
    else:
        n.fc7, n.relu7 = conv_relu(n.relu6, 4096, ks=1, pad=0)
        if initialize_fc8:
            n.fc8 = L.Convolution(n.relu7,
                                  kernel_size=1,
                                  param=[
                                      dict(lr_mult=1, decay_mult=1),
                                      dict(lr_mult=2, decay_mult=0)
                                  ],
                                  weight_filler=dict(type='gaussian', std=.01),
                                  num_output=2)
        else:
            n.fc8 = L.Convolution(n.relu7,
                                  kernel_size=1,
                                  param=[
                                      dict(lr_mult=1, decay_mult=1),
                                      dict(lr_mult=2, decay_mult=0)
                                  ],
                                  num_output=2)

    n.upscore = L.Deconvolution(n.fc8,
                                convolution_param=dict(
                                    kernel_size=16,
                                    stride=8,
                                    num_output=2,
                                    group=2,
                                    weight_filler=dict(type='bilinear'),
                                    bias_term=False),
                                param=dict(lr_mult=0, decay_mult=0))

    n.score = L.Crop(
        n.upscore,
        n.next_im,
        crop_param=dict(
            axis=ax + 1,  # +1 for first cropping dim.
            offset=list(-np.round(b).astype(int))))

    if split != 'deploy':
        n.loss = L.SoftmaxWithLoss(n.score,
                                   n.label,
                                   loss_param=dict(ignore_label=255))
    else:
        n.prop = L.Softmax(n.score)
    return n
Exemple #7
0
def vgg_16(lmdb,
           bs_train=16,
           bs_val=50,
           rate=0,
           lmdb_flag=False,
           not_deploy=True):
    n = caffe.NetSpec()
    if not_deploy:
        if lmdb_flag:
            n.data, n.label = L.Data(source=lmdb + 'ilsvrc12_train_lmdb',
                                     backend=P.Data.LMDB,
                                     include=dict(phase=caffe_pb2.TRAIN),
                                     batch_size=bs_train,
                                     ntop=2,
                                     transform_param=dict(
                                         crop_size=224,
                                         mean_value=[104, 117, 123],
                                         mirror=True))
            data_str = n.to_proto()
            n.data, n.label = L.Data(source=lmdb + 'ilsvrc12_val_lmdb',
                                     backend=P.Data.LMDB,
                                     include=dict(phase=caffe_pb2.TEST),
                                     batch_size=bs_val,
                                     ntop=2,
                                     transform_param=dict(
                                         crop_size=224,
                                         mean_value=[104, 117, 123],
                                         mirror=False))
        else:
            n.data, n.label = L.Data(source=lmdb + 'ilsvrc12_train_leveldb',
                                     backend=P.Data.LEVELDB,
                                     include=dict(phase=caffe_pb2.TRAIN),
                                     batch_size=bs_train,
                                     ntop=2,
                                     transform_param=dict(
                                         crop_size=224,
                                         mean_value=[104, 117, 123],
                                         mirror=True))
            data_str = n.to_proto()
            n.data, n.label = L.Data(source=lmdb + 'ilsvrc12_val_leveldb',
                                     backend=P.Data.LEVELDB,
                                     include=dict(phase=caffe_pb2.TEST),
                                     batch_size=bs_val,
                                     ntop=2,
                                     transform_param=dict(
                                         crop_size=224,
                                         mean_value=[104, 117, 123],
                                         mirror=False))
    else:
        data_str = 'input: "data"\ninput_dim: 1\ninput_dim: 3\ninput_dim: 224\ninput_dim: 224'
        n.data = L.Data()

    # the net itself
    n.conv1_1, n.relu1_1 = conv_relu(n.data,
                                     nout=int(rate[0] * 64),
                                     pad=1,
                                     ks=3)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1,
                                     nout=int(rate[1] * 64),
                                     pad=1,
                                     ks=3)
    n.pool1 = max_pool(n.relu1_2, ks=2, stride=2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1,
                                     nout=int(rate[2] * 128),
                                     pad=1,
                                     ks=3)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1,
                                     nout=int(rate[3] * 128),
                                     pad=1,
                                     ks=3)
    n.pool2 = max_pool(n.relu2_2, ks=2, stride=2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2,
                                     nout=int(rate[4] * 256),
                                     pad=1,
                                     ks=3)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1,
                                     nout=int(rate[5] * 256),
                                     pad=1,
                                     ks=3)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2,
                                     nout=int(rate[6] * 256),
                                     pad=1,
                                     ks=3)
    n.pool3 = max_pool(n.relu3_3, ks=2, stride=2)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3,
                                     nout=int(rate[7] * 512),
                                     pad=1,
                                     ks=3)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1,
                                     nout=int(rate[8] * 512),
                                     pad=1,
                                     ks=3)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2,
                                     nout=int(rate[9] * 512),
                                     pad=1,
                                     ks=3)
    n.pool4 = max_pool(n.relu4_3, ks=2, stride=2)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4,
                                     nout=int(rate[10] * 512),
                                     pad=1,
                                     ks=3)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1,
                                     nout=int(rate[11] * 512),
                                     pad=1,
                                     ks=3)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2,
                                     nout=int(rate[12] * 512),
                                     pad=1,
                                     ks=3)
    n.pool5 = max_pool(n.relu5_3, ks=2, stride=2)

    n.fc6, n.relu6 = fc_relu(n.pool5, nout=4096)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = fc_relu(n.relu6, nout=4096)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)

    n.fc8 = L.InnerProduct(
        n.relu7,
        num_output=1000,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])

    if not_deploy:
        n.loss = L.SoftmaxWithLoss(n.fc8, n.label)
        n.acc_top_1 = L.Accuracy(n.fc8, n.label, top_k=1)
        n.acc_top_5 = L.Accuracy(n.fc8, n.label, top_k=5)
    else:
        n.prob = L.Softmax(n.fc8)
    model_str = str(n.to_proto())
    if not not_deploy:
        model_str = model_str[54:-1]
    return str(data_str) + '\n' + model_str
Exemple #8
0
    def __wide_basic(self, net, top, basename, num_input, num_output, stride,
                     generate_deploy):
        if config['bottleneck']:
            conv_params = [{
                'kernel_size': 1,
                'stride': stride,
                'pad': 0,
                'num_output': num_output / 4
            }, {
                'kernel_size': 3,
                'stride': 1,
                'pad': 1,
                'num_output': num_output / 4
            }, {
                'kernel_size': 1,
                'stride': 1,
                'pad': 0,
                'num_output': num_output
            }]
            dropout_layers = [1]
        else:
            conv_params = [{
                'kernel_size': 3,
                'stride': stride,
                'pad': 1,
                'num_output': num_output
            }, {
                'kernel_size': 3,
                'stride': 1,
                'pad': 1,
                'num_output': num_output
            }]
            dropout_layers = [1]

        resunit_layer = top
        shortcut_layer = top

        for i, p in enumerate(conv_params):
            branch_layer_name = '%sa_%d' % (basename, i + 1)
            add_dropout = i in dropout_layers and config['dropout']

            if generate_deploy:
                bn = L.BatchNorm(resunit_layer,
                                 in_place=i > 0,
                                 batch_norm_param={'use_global_stats': True})
            else:
                bn = L.BatchNorm(resunit_layer, in_place=i > 0)
            scale = L.Scale(bn, in_place=True, scale_param={'bias_term': True})
            relu = L.ReLU(scale, in_place=True)
            if add_dropout:
                drop = L.Dropout(relu,
                                 in_place=True,
                                 dropout_ratio=config['dropout'])
            conv = L.Convolution(drop if add_dropout else relu,
                                 weight_filler={'type': 'msra'},
                                 bias_term=False,
                                 **p)

            net[branch_layer_name + '_bn'] = bn
            net[branch_layer_name + '_scale'] = scale
            net[branch_layer_name + '_relu'] = relu
            if add_dropout:
                net[branch_layer_name + '_dropout'] = drop
            net[branch_layer_name + '_%dx%d_s%d' %
                (p['kernel_size'], p['kernel_size'], p['stride'])] = conv
            resunit_layer = conv

        if num_input != num_output:
            conv = L.Convolution(shortcut_layer,
                                 kernel_size=1,
                                 stride=stride,
                                 pad=0,
                                 num_output=num_output,
                                 weight_filler={'type': 'xavier'},
                                 bias_term=False)
            net['%sb_1x1_s%d' % (basename, stride)] = conv
            shortcut_layer = conv
        eltwise = L.Eltwise(resunit_layer,
                            shortcut_layer,
                            operation=P.Eltwise.SUM)
        net[basename] = eltwise
        return eltwise
Exemple #9
0
def cnn(split):
    n = caffe.NetSpec()
    pydata_params = dict(
        dataset_dir='/home/kevin/dataset/washington_rgbd_dataset',
        split=split,
        mean=(104.00698793, 116.66876762, 122.67891434),
        seed=1337,
        img_size=(224, 224),
        crop_size=(224, 224, 224, 224))

    if split == 'train':
        pylayer = 'WashingtonDataLayer'
        pydata_params['randomize'] = True
        pydata_params['batch_size'] = 32
    elif split == 'test':
        pylayer = 'WashingtonDataLayer'
        pydata_params['randomize'] = False
        pydata_params['batch_size'] = 1
    else:
        n.img = L.Input(
            name='input',
            ntop=2,
            shape=[dict(dim=1),
                   dict(dim=1),
                   dict(dim=224),
                   dict(dim=224)])

    #---------------------------------Data Layer---------------------------------------#
    n.rgb, n.depth, n.label = L.Python(
        name="data",
        module='data_layers.washington_data_layer',
        layer=pylayer,
        ntop=3,
        param_str=str(pydata_params))

    #---------------------------------RGB-Net---------------------------------------#

    # the vgg 16 base net
    n.conv1_1, n.relu1_1 = conv_relu("conv1_1", n.rgb, 64, pad=1, lr1=0, lr2=0)
    n.conv1_2, n.relu1_2 = conv_relu("conv1_2", n.relu1_1, 64, lr1=0, lr2=0)
    n.rgb_pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu("conv2_1", n.rgb_pool1, 128, lr1=0, lr2=0)
    n.conv2_2, n.relu2_2 = conv_relu("conv2_2", n.relu2_1, 128, lr1=0, lr2=0)
    n.rgb_pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu("conv3_1", n.rgb_pool2, 256, lr1=0, lr2=0)
    n.conv3_2, n.relu3_2 = conv_relu("conv3_2", n.relu3_1, 256, lr1=0, lr2=0)
    n.conv3_3, n.relu3_3 = conv_relu("conv3_3", n.relu3_2, 256, lr1=0, lr2=0)
    n.rgb_pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu("conv4_1", n.rgb_pool3, 512, lr1=0, lr2=0)
    n.conv4_2, n.relu4_2 = conv_relu("conv4_2", n.relu4_1, 512, lr1=0, lr2=0)
    n.conv4_3, n.relu4_3 = conv_relu("conv4_3", n.relu4_2, 512, lr1=0, lr2=0)
    n.rgb_pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu("conv5_1", n.rgb_pool4, 512, lr1=0, lr2=0)
    n.conv5_2, n.relu5_2 = conv_relu("conv5_2", n.relu5_1, 512, lr1=0, lr2=0)
    n.conv5_3, n.relu5_3 = conv_relu("conv5_3", n.relu5_2, 512, lr1=0, lr2=0)
    n.rgb_pool5 = max_pool(n.relu5_3)

    # fully conv
    n.rgb_fc6, n.rgb_relu6 = fc_relu(n.rgb_pool5, 4096, lr1=0, lr2=0)
    n.rgb_drop6 = L.Dropout(n.rgb_relu6, dropout_ratio=0.5, in_place=True)
    n.rgb_fc7, n.rgb_relu7 = fc_relu(n.rgb_drop6, 4096, lr1=0, lr2=0)
    n.rgb_drop7 = L.Dropout(n.rgb_relu7, dropout_ratio=0.5, in_place=True)

    n.rgb_fc8 = fc(n.rgb_drop7, 51, lr1=0, lr2=0)

    #---------------------------------Depth-Net---------------------------------------#

    # the base net
    n.conv1, n.relu1 = conv_relu("conv1",
                                 n.depth,
                                 128,
                                 ks=5,
                                 stride=2,
                                 pad=2,
                                 lr1=0,
                                 lr2=0)
    n.depth_pool1 = max_pool(n.relu1, ks=3)
    n.norm1 = L.LRN(n.depth_pool1,
                    lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2))

    n.conv2, n.relu2 = conv_relu("conv2",
                                 n.norm1,
                                 256,
                                 ks=5,
                                 stride=1,
                                 pad=2,
                                 lr1=0,
                                 lr2=0)
    n.depth_pool2 = max_pool(n.relu2, ks=3)
    n.norm2 = L.LRN(n.depth_pool2,
                    lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2))

    n.conv3, n.relu3 = conv_relu("conv3",
                                 n.norm2,
                                 384,
                                 ks=3,
                                 pad=1,
                                 group=2,
                                 lr1=0,
                                 lr2=0)
    n.depth_pool3 = max_pool(n.relu3, ks=3)

    n.conv4, n.relu4 = conv_relu("conv4",
                                 n.depth_pool3,
                                 512,
                                 ks=3,
                                 pad=1,
                                 group=1,
                                 lr1=0,
                                 lr2=0)

    n.conv5, n.relu5 = conv_relu("conv5",
                                 n.relu4,
                                 512,
                                 ks=3,
                                 pad=1,
                                 group=1,
                                 lr1=0,
                                 lr2=0)

    n.depth_pool5 = max_pool(n.relu5, ks=3)

    n.depth_fc6, n.depth_relu6 = fc_relu(n.depth_pool5, 4096, lr1=0, lr2=0)
    n.depth_drop6 = L.Dropout(n.depth_relu6, dropout_ratio=0.5, in_place=True)
    n.depth_fc7, n.depth_relu7 = fc_relu(n.depth_drop6, 4096, lr1=0, lr2=0)
    n.depth_drop7 = L.Dropout(n.depth_relu7, dropout_ratio=0.5, in_place=True)

    n.depth_fc8 = fc(n.depth_drop7, 51, lr1=0, lr2=0)

    #-----------------------------------final output---------------------------------#
    # Concatenation
    n.concat = L.Concat(n.rgb_drop7, n.depth_drop7, axis=1)
    #n.fuse_fc1 = fc(n.concat, 4096, lr1=1, lr2=2)
    #n.fuse_drop1 = L.Dropout(n.fuse_fc1, dropout_ratio=0.9, in_place=True)
    #n.fuse_fc2 = fc(n.fuse_drop1, 4096, lr1=1, lr2=2)
    #n.fuse_drop2 = L.Dropout(n.fuse_fc2, dropout_ratio=0.9, in_place=True)
    n.rgbd_fc8 = fc(n.concat, 51, lr1=1, lr2=2)

    if split != 'deploy':
        n.rgb_accuracy = L.Accuracy(n.rgb_fc8, n.label)
        n.rgb_loss = L.SoftmaxWithLoss(n.rgb_fc8, n.label)
        n.depth_accuracy = L.Accuracy(n.depth_fc8, n.label)
        n.depth_loss = L.SoftmaxWithLoss(n.depth_fc8, n.label)
        n.rgbd_accuracy = L.Accuracy(n.rgbd_fc8, n.label)
        n.rgbd_loss = L.SoftmaxWithLoss(n.rgbd_fc8, n.label)

    return n.to_proto()
Exemple #10
0
def buildnet(inputdb,
             mean_file,
             batch_size,
             height,
             width,
             nchannels,
             net_type="train"):
    net = caffe.NetSpec()

    crop_size = -1
    if augment_data:
        crop_size = width

    train = False
    if net_type == "train":
        train = True

    data_layers, label = lt.data_layer_trimese(net,
                                               inputdb,
                                               mean_file,
                                               batch_size,
                                               net_type,
                                               height,
                                               width,
                                               nchannels, [4, 8],
                                               crop_size=768)

    # First conv  layer
    branch_ends = []
    for n, layer in enumerate(data_layers):
        conv1 = lt.convolution_layer(net,
                                     layer,
                                     "plane%d_conv1" % (n),
                                     "tri_conv1",
                                     32,
                                     2,
                                     7,
                                     3,
                                     0.05,
                                     addbatchnorm=True,
                                     train=train)
        pool1 = lt.pool_layer(net, conv1, "plane%d_pool1" % (n), 3, 1)

        conv2 = lt.convolution_layer(net,
                                     pool1,
                                     "plane%d_conv2" % (n),
                                     "tri_conv2",
                                     16,
                                     2,
                                     3,
                                     3,
                                     0.05,
                                     addbatchnorm=True,
                                     train=train)

        conv3 = lt.convolution_layer(net,
                                     conv2,
                                     "plane%d_conv3" % (n),
                                     "tri_conv3",
                                     16,
                                     2,
                                     3,
                                     3,
                                     0.05,
                                     addbatchnorm=True,
                                     train=train)

        pool3 = lt.pool_layer(net, conv3, "plane%d_pool3" % (n), 3, 1)

        branch_ends.append(pool3)

    concat = lt.concat_layer(net, "mergeplanes", *branch_ends)

    resnet1 = lt.resnet_module(net, concat, "resnet1", 16 * 3, 3, 1, 1, 8, 16,
                               use_batch_norm, train)
    resnet2 = lt.resnet_module(net, resnet1, "resnet2", 16, 3, 1, 1, 8, 16,
                               use_batch_norm, train)
    resnet3 = lt.resnet_module(net, resnet2, "resnet3", 16, 3, 1, 1, 8, 32,
                               use_batch_norm, train)

    resnet4 = lt.resnet_module(net, resnet3, "resnet4", 32, 3, 1, 1, 8, 32,
                               use_batch_norm, train)
    resnet5 = lt.resnet_module(net, resnet4, "resnet5", 32, 3, 1, 1, 8, 32,
                               use_batch_norm, train)
    resnet6 = lt.resnet_module(net, resnet5, "resnet6", 32, 3, 1, 1, 16, 64,
                               use_batch_norm, train)

    resnet7 = lt.resnet_module(net, resnet6, "resnet7", 64, 3, 1, 1, 16, 64,
                               use_batch_norm, train)
    resnet8 = lt.resnet_module(net, resnet7, "resnet8", 64, 3, 1, 1, 16, 64,
                               use_batch_norm, train)
    resnet9 = lt.resnet_module(net, resnet8, "resnet9", 64, 3, 1, 1, 32, 128,
                               use_batch_norm, train)

    net.lastpool = lt.pool_layer(net, resnet9, "lastpool", 7, 1, P.Pooling.AVE)
    lastpool_layer = net.lastpool

    if use_dropout:
        net.lastpool_dropout = L.Dropout(net.lastpool,
                                         in_place=True,
                                         dropout_param=dict(dropout_ratio=0.5))
        lastpool_layer = net.lastpool_dropout

    fc1 = lt.final_fully_connect(net, lastpool_layer, nclasses=256)
    fc2 = lt.final_fully_connect(net, fc1, nclasses=4096)
    fc3 = lt.final_fully_connect(net, fc2, nclasses=2)

    if train:
        net.loss = L.SoftmaxWithLoss(fc3, net.label)
        net.acc = L.Accuracy(fc3, net.label)
    else:
        net.probt = L.Softmax(fc3)
        net.acc = L.Accuracy(fc3, net.label)

    return net
Exemple #11
0
def cnn(split):
    n = caffe.NetSpec()
    pydata_params = dict(
        dataset_dir='/home/kevin/dataset/washington_rgbd_dataset',
        split=split,
        mean=(104.00698793, 116.66876762, 122.67891434),
        seed=1337,
        batch_size=128,
        img_size=(227, 227))
    if split == 'deploy':
        n.img = L.Input(
            name='input',
            ntop=2,
            shape=[dict(dim=1),
                   dict(dim=1),
                   dict(dim=224),
                   dict(dim=224)])
    else:
        pylayer = 'WashingtonDataLayer'

    #---------------------------------Data Layer---------------------------------------#
    n.rgb, n.depth, n.label = L.Python(
        name="data",
        module='data_layers.washington_data_layer',
        layer=pylayer,
        ntop=3,
        param_str=str(pydata_params))

    #---------------------------------RGB-Net---------------------------------------#

    # the caffe-net (alex-net)
    n.rgb_conv1, n.rgb_relu1 = conv_relu(n.rgb, 96, ks=11, stride=4, pad=0)
    n.rgb_pool1 = max_pool(n.rgb_relu1, ks=3)
    n.rgb_norm1 = L.LRN(n.rgb_pool1,
                        lrn_param=dict(local_size=5,
                                       alpha=0.0005,
                                       beta=0.75,
                                       k=2))

    n.rgb_conv2, n.rgb_relu2 = conv_relu(n.rgb_norm1,
                                         256,
                                         ks=5,
                                         pad=2,
                                         group=2)
    n.rgb_pool2 = max_pool(n.rgb_relu2, ks=3)
    n.rgb_norm2 = L.LRN(n.rgb_pool2,
                        lrn_param=dict(local_size=5,
                                       alpha=0.0005,
                                       beta=0.75,
                                       k=2))

    n.rgb_conv3, n.rgb_relu3 = conv_relu(n.rgb_norm2,
                                         384,
                                         ks=3,
                                         pad=1,
                                         lr1=1,
                                         lr2=2)
    n.rgb_conv4, n.rgb_relu4 = conv_relu(n.rgb_relu3,
                                         384,
                                         ks=3,
                                         pad=1,
                                         group=2,
                                         lr1=1,
                                         lr2=2)

    n.rgb_conv5, n.rgb_relu5 = conv_relu(n.rgb_relu4,
                                         256,
                                         ks=3,
                                         pad=1,
                                         group=2,
                                         lr1=1,
                                         lr2=2)
    n.rgb_pool5 = max_pool(n.rgb_relu5, ks=3)

    # fully conv
    n.rgb_fc6, n.rgb_relu6 = fc_relu(n.rgb_pool5, 4096, lr1=1, lr2=2)
    n.rgb_drop6 = L.Dropout(n.rgb_relu6, dropout_ratio=0.5, in_place=True)
    n.rgb_fc7, n.rgb_relu7 = fc_relu(n.rgb_drop6, 4096, lr1=1, lr2=2)
    n.rgb_drop7 = L.Dropout(n.rgb_relu7, dropout_ratio=0.5, in_place=True)

    n.rgb_fc8 = fc(n.rgb_drop7, 51, lr1=1, lr2=2)

    #---------------------------------Depth-Net---------------------------------------#

    # the caffe-net (alex-net)
    n.depth_conv1, n.depth_relu1 = conv_relu(n.depth,
                                             96,
                                             ks=11,
                                             stride=4,
                                             pad=0)
    n.depth_pool1 = max_pool(n.depth_relu1, ks=3)
    n.depth_norm1 = L.LRN(n.depth_pool1,
                          lrn_param=dict(local_size=5,
                                         alpha=0.0005,
                                         beta=0.75,
                                         k=2))

    n.depth_conv2, n.depth_relu2 = conv_relu(n.depth_norm1,
                                             256,
                                             ks=5,
                                             pad=2,
                                             group=2)
    n.depth_pool2 = max_pool(n.depth_relu2, ks=3)
    n.depth_norm2 = L.LRN(n.depth_pool2,
                          lrn_param=dict(local_size=5,
                                         alpha=0.0005,
                                         beta=0.75,
                                         k=2))

    n.depth_conv3, n.depth_relu3 = conv_relu(n.depth_norm2,
                                             384,
                                             ks=3,
                                             pad=1,
                                             lr1=1,
                                             lr2=2)
    n.depth_conv4, n.depth_relu4 = conv_relu(n.depth_relu3,
                                             384,
                                             ks=3,
                                             pad=1,
                                             group=2,
                                             lr1=1,
                                             lr2=2)

    n.depth_conv5, n.depth_relu5 = conv_relu(n.depth_relu4,
                                             256,
                                             ks=3,
                                             pad=1,
                                             group=2,
                                             lr1=1,
                                             lr2=2)

    n.depth_pool5 = max_pool(n.depth_relu5, ks=3)

    # fully conv
    n.depth_fc6, n.depth_relu6 = fc_relu(n.depth_pool5, 4096, lr1=1, lr2=2)
    n.depth_drop6 = L.Dropout(n.depth_relu6, dropout_ratio=0.5, in_place=True)
    n.depth_fc7, n.depth_relu7 = fc_relu(n.depth_drop6, 4096, lr1=1, lr2=2)
    n.depth_drop7 = L.Dropout(n.depth_relu7, dropout_ratio=0.5, in_place=True)

    n.depth_fc8 = fc(n.depth_drop7, 51, lr1=1, lr2=2)

    #-----------------------------------final output---------------------------------#
    # Concatenation
    n.concat = L.Concat(n.rgb_drop7, n.depth_drop7, axis=1)
    n.rgbd_fc8 = fc(n.concat, 51, lr1=1, lr2=2)

    if split != 'deploy':
        n.rgb_accuracy = L.Accuracy(n.rgb_fc8, n.label)
        n.rgb_loss = L.SoftmaxWithLoss(n.rgb_fc8, n.label)
        n.depth_accuracy = L.Accuracy(n.depth_fc8, n.label)
        n.depth_loss = L.SoftmaxWithLoss(n.depth_fc8, n.label)
        n.overall_accuracy = L.Accuracy(n.rgbd_fc8, n.label)
        n.overall_loss = L.SoftmaxWithLoss(n.rgbd_fc8, n.label)

    return n.to_proto()
Exemple #12
0
def create_net(phase): 
    global train_transform_param
    global test_transform_param
    train_transform_param = {
            'mirror': True,
            'mean_file': Params['mean_file'] 
            }
    test_transform_param = {
            'mean_file': Params['mean_file'] 
            }
    if phase == 'train':
        lmdb_file = Params['train_lmdb']
        transform_param = train_transform_param
        batch_size = Params['batch_size_per_device']
    else:
        lmdb_file = Params['test_lmdb']
        transform_param = test_transform_param
        batch_size = Params['test_batch_size']

    net = caffe.NetSpec()
    net.data, net.label = L.Data(batch_size=batch_size,
        backend=P.Data.LMDB,
        source=lmdb_file,
        transform_param=transform_param,
        ntop=2) 
        #include=dict(phase=caffe_pb2.Phase.Value('TRAIN')),
   
    kwargs = {
            'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
            'weight_filler': dict(type='gaussian', std=0.0001),
            'bias_filler': dict(type='constant')}
    net.conv1 = L.Convolution(net.data, num_output=16, kernel_size=3, **kwargs)
    net.pool1 = L.Pooling(net.conv1, pool=P.Pooling.MAX, kernel_size=3, stride=2)
    net.relu1 = L.ReLU(net.pool1, in_place=True)
    kwargs = {
            'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
            'weight_filler': dict(type='gaussian', std=0.005),
            'bias_filler': dict(type='constant')}
    net.fc2 = L.InnerProduct(net.pool1, num_output=16, **kwargs)
    net.relu2 = L.ReLU(net.fc2, in_place=True)
    net.drop2 = L.Dropout(net.fc2, in_place=True, dropout_param=dict(dropout_ratio=0.5))
    kwargs = {
            'param': [dict(lr_mult=1, decay_mult=100), dict(lr_mult=2, decay_mult=0)],
            'weight_filler': dict(type='gaussian', std=0.01),
            'bias_filler': dict(type='constant', value=0)}
    net.fc3 = L.InnerProduct(net.fc2, num_output=2, **kwargs)
    if phase == 'train':
        net.loss = L.SoftmaxWithLoss(net.fc3, net.label)
    elif phase == 'test':
        net.accuracy = L.Accuracy(net.fc3, net.label)
    else:
        net.prob = L.Softmax(net.fc3)

    net_proto = net.to_proto()
    if phase == 'deploy':
        del net_proto.layer[0]
        #del net_proto.layer[-1]
        net_proto.input.extend(['data'])
        net_proto.input_dim.extend([64,3,12,36])
    net_proto.name = '{}_{}'.format(Params['model_name'], phase)
    return net_proto
Exemple #13
0
def fcn(split, tops):
    n = caffe.NetSpec()
    n.data, n.label = L.Python(
        module='nyud_layers',
        layer='NYUDSegDataLayer',
        ntop=2,
        param_str=str(
            dict(image_path='/media/ssd500/autocity_dataset/images/',
                 image_list="/media/ssd500/autocity_dataset/image_train.txt",
                 label_list="/media/ssd500/autocity_dataset/label_train.txt",
                 label_path="/media/ssd500/autocity_dataset/labels/0/",
                 split=split,
                 tops=tops,
                 seed=1337)))

    # the base net
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # fully conv
    n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)

    n.score_fr = L.Convolution(
        n.drop7,
        num_output=2,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.upscore = L.Deconvolution(n.score_fr,
                                convolution_param=dict(num_output=2,
                                                       kernel_size=64,
                                                       stride=32,
                                                       bias_term=False),
                                param=[dict(lr_mult=0)])
    n.score = crop(n.upscore, n.data)
    n.loss = L.SoftmaxWithLoss(
        n.score, n.label,
        loss_param=dict(normalize=False))  #, ignore_label=255

    return n.to_proto()
def create_cifar10_googlenet(input_shape, classes=1000, deploy=False):
    net_name = "cifar10_googlenet"
    data_root_dir = "/home/tim/datasets/cifar10/"

    if deploy:
        net_filename = "{0}_deploy.prototxt".format(net_name)
    else:
        net_filename = "{0}_train_test.prototxt".format(net_name)

    # net name
    with open(net_filename, "w") as f:
        f.write('name: "{0}"\n'.format(net_name))

    if deploy:
        net = caffe.NetSpec()
        """
        The conventional blob dimensions for batches of image data are 
        number N x channel K x height H x width W. Blob memory is row-major in layout, 
        so the last / rightmost dimension changes fastest. 
        For example, in a 4D blob, the value at index (n, k, h, w) is 
        physically located at index ((n * K + k) * H + h) * W + w.
        """
        # batch_size, channel, height, width
        net.data = L.Input(input_param=dict(
            shape=[dict(dim=list(input_shape))]))
    else:
        net = caffe.NetSpec()
        batch_size = 32
        lmdb = data_root_dir + "train_lmdb"
        net.data, net.label = L.Data(
            batch_size=batch_size,
            backend=P.Data.LMDB,
            source=lmdb,
            transform_param=dict(
                mirror=True,
                # crop_size=32,
                mean_file=data_root_dir + "mean.binaryproto"),
            # mean_value=[104, 117, 123]),
            ntop=2,
            include=dict(phase=caffe_pb2.Phase.Value("TRAIN")))

        with open(net_filename, "a") as f:
            f.write(str(net.to_proto()))

        del net
        net = caffe.NetSpec()
        batch_size = 50
        lmdb = data_root_dir + "test_lmdb"
        net.data, net.label = L.Data(
            batch_size=batch_size,
            backend=P.Data.LMDB,
            source=lmdb,
            transform_param=dict(
                mirror=False,
                # crop_size=224,
                mean_file=data_root_dir + "mean.binaryproto"),
            # mean_value=[104, 117, 123]),
            ntop=2,
            include=dict(phase=caffe_pb2.Phase.Value("TEST")))

    # padding = 'same', equal to pad = 1
    net.conv1_7x7_2s = L.Convolution(
        net.data,
        kernel_size=7,
        num_output=64,
        pad=3,
        stride=2,
        weight_filler=dict(type="xavier"),
        bias_filler=dict(type="constant", value=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.conv1_7x7_2s_relu = L.ReLU(net.conv1_7x7_2s, in_place=True)
    # net.conv1_maxpool1_3x3_2s = L.Pooling(net.conv1_7x7_2s_relu, kernel_size=3, stride=2, pool=P.Pooling.MAX)
    net.conv1_norm1 = L.LRN(net.conv1_7x7_2s_relu,
                            local_size=5,
                            alpha=0.0001,
                            beta=0.75)

    net.conv2_1x1_1v = L.Convolution(
        net.conv1_norm1,
        kernel_size=1,
        num_output=64,
        weight_filler=dict(type="xavier"),
        bias_filler=dict(type="constant", value=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.conv2_1x1_1v_relu = L.ReLU(net.conv2_1x1_1v, in_place=True)
    net.conv2_3x3_1s = L.Convolution(
        net.conv2_1x1_1v_relu,
        kernel_size=3,
        num_output=192,
        pad=1,
        weight_filler=dict(type="xavier"),
        bias_filler=dict(type="constant", value=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.conv2_3x3_1s_relu = L.ReLU(net.conv2_3x3_1s, in_place=True)
    net.conv2_norm2 = L.LRN(net.conv2_3x3_1s_relu,
                            local_size=5,
                            alpha=0.0001,
                            beta=0.75)
    # net.conv2_pool_3x3_2s = L.Pooling(net.conv2_norm2, kernel_size=3, stride=2, pool=P.Pooling.MAX)

    # inception(3a)
    inception3a_output = inception(net=net,
                                   pre_layer=net.conv2_norm2,
                                   conv1x1_num=64,
                                   conv3x3_reduce_num=96,
                                   conv3x3_num=128,
                                   conv5x5_reduce_num=16,
                                   conv5x5_num=32,
                                   maxpool3x3_proj1x1_num=32,
                                   name="inception3a")
    # inception(3b)
    inception3b_output = inception(net=net,
                                   pre_layer=inception3a_output,
                                   conv1x1_num=128,
                                   conv3x3_reduce_num=128,
                                   conv3x3_num=192,
                                   conv5x5_reduce_num=32,
                                   conv5x5_num=96,
                                   maxpool3x3_proj1x1_num=64,
                                   name="inception3b")

    # max pool
    net.inception3_maxpool = L.Pooling(inception3b_output,
                                       kernel_size=3,
                                       stride=2,
                                       pool=P.Pooling.MAX)

    # inception(4a)
    inception4a_output = inception(net=net,
                                   pre_layer=net.inception3_maxpool,
                                   conv1x1_num=192,
                                   conv3x3_reduce_num=96,
                                   conv3x3_num=208,
                                   conv5x5_reduce_num=16,
                                   conv5x5_num=48,
                                   maxpool3x3_proj1x1_num=64,
                                   name="inception4a")

    # loss1
    if not deploy:
        # avg pool
        net.loss1_avgpool5x5_3v = L.Pooling(inception4a_output,
                                            kernel_size=5,
                                            stride=3,
                                            pool=P.Pooling.AVE)

        # conv1x1_1s
        net.loss1_conv1x1_1s = L.Convolution(net.loss1_avgpool5x5_3v,
                                             kernel_size=1,
                                             num_output=128,
                                             weight_filler=dict(type="xavier"),
                                             bias_filler=dict(type="constant",
                                                              value=0.2),
                                             param=[
                                                 dict(lr_mult=1, decay_mult=1),
                                                 dict(lr_mult=2, decay_mult=0)
                                             ])
        net.loss1_conv1x1_1s_relu = L.ReLU(net.loss1_conv1x1_1s, in_place=True)

        net.loss1_fc1 = L.InnerProduct(net.loss1_conv1x1_1s_relu,
                                       num_output=1024,
                                       weight_filler=dict(type="xavier"),
                                       bias_filler=dict(type="constant",
                                                        value=0),
                                       param=[
                                           dict(lr_mult=1, decay_mult=1),
                                           dict(lr_mult=2, decay_mult=0)
                                       ])
        net.loss1_fc1_relu1 = L.ReLU(net.loss1_fc1, in_place=True)

        net.loss1_dropout = L.Dropout(net.loss1_fc1_relu1,
                                      dropout_param=dict(dropout_ratio=0.7),
                                      in_place=True)
        net.loss1_pred_fc = L.InnerProduct(net.loss1_dropout,
                                           num_output=classes,
                                           weight_filler=dict(type="xavier"),
                                           bias_filler=dict(type="constant",
                                                            value=0),
                                           param=[
                                               dict(lr_mult=1, decay_mult=1),
                                               dict(lr_mult=2, decay_mult=0)
                                           ])
        net.loss1 = L.SoftmaxWithLoss(net.loss1_pred_fc,
                                      net.label,
                                      loss_weight=0.3)
        # net.loss1_accuracy_top_1 = L.Accuracy(net.loss1_pred_fc, net.label,
        #                       include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        # net.loss1_accuracy_top_5 = L.Accuracy(net.loss1_pred_fc, net.label,
        #                                   include=dict(phase=caffe_pb2.Phase.Value('TEST')),
        #                                       accuracy_param=dict(top_k=5))

    # inception(4b)
    inception4b_output = inception(net=net,
                                   pre_layer=inception4a_output,
                                   conv1x1_num=160,
                                   conv3x3_reduce_num=112,
                                   conv3x3_num=224,
                                   conv5x5_reduce_num=24,
                                   conv5x5_num=64,
                                   maxpool3x3_proj1x1_num=64,
                                   name="inception4b")

    # inception(4c)
    inception4c_output = inception(net=net,
                                   pre_layer=inception4b_output,
                                   conv1x1_num=128,
                                   conv3x3_reduce_num=128,
                                   conv3x3_num=256,
                                   conv5x5_reduce_num=24,
                                   conv5x5_num=64,
                                   maxpool3x3_proj1x1_num=64,
                                   name="inception4c")

    # inception(4d)
    inception4d_output = inception(net=net,
                                   pre_layer=inception4c_output,
                                   conv1x1_num=112,
                                   conv3x3_reduce_num=144,
                                   conv3x3_num=288,
                                   conv5x5_reduce_num=32,
                                   conv5x5_num=64,
                                   maxpool3x3_proj1x1_num=64,
                                   name="inception4d")

    # loss2
    if not deploy:
        # avg pool
        net.loss2_avgpool5x5_3v = L.Pooling(inception4d_output,
                                            kernel_size=5,
                                            stride=3,
                                            pool=P.Pooling.AVE)

        # conv1x1_1s
        net.loss2_conv1x1_1s = L.Convolution(net.loss2_avgpool5x5_3v,
                                             kernel_size=1,
                                             num_output=128,
                                             weight_filler=dict(type="xavier"),
                                             bias_filler=dict(type="constant",
                                                              value=0.2),
                                             param=[
                                                 dict(lr_mult=1, decay_mult=1),
                                                 dict(lr_mult=2, decay_mult=0)
                                             ])
        net.loss2_conv1x1_1s_relu = L.ReLU(net.loss2_conv1x1_1s, in_place=True)

        net.loss2_fc1 = L.InnerProduct(net.loss2_conv1x1_1s_relu,
                                       num_output=1024,
                                       weight_filler=dict(type="xavier"),
                                       bias_filler=dict(type="constant",
                                                        value=0),
                                       param=[
                                           dict(lr_mult=1, decay_mult=1),
                                           dict(lr_mult=2, decay_mult=0)
                                       ])
        net.loss2_fc1_relu1 = L.ReLU(net.loss2_fc1, in_place=True)

        net.loss2_dropout = L.Dropout(net.loss2_fc1_relu1,
                                      dropout_param=dict(dropout_ratio=0.7),
                                      in_place=True)
        net.loss2_pred_fc = L.InnerProduct(net.loss2_dropout,
                                           num_output=classes,
                                           weight_filler=dict(type="xavier"),
                                           bias_filler=dict(type="constant",
                                                            value=0),
                                           param=[
                                               dict(lr_mult=1, decay_mult=1),
                                               dict(lr_mult=2, decay_mult=0)
                                           ])
        net.loss2 = L.SoftmaxWithLoss(net.loss2_pred_fc,
                                      net.label,
                                      loss_weight=0.3)
        # net.loss2_accuracy_top_1 = L.Accuracy(net.loss2_pred_fc, net.label,
        #                                   include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        # net.loss2_accuracy_top_5 = L.Accuracy(net.loss2_pred_fc, net.label,
        #                                   include=dict(phase=caffe_pb2.Phase.Value('TEST')),
        #                                   accuracy_param=dict(top_k=5))

    # inception(4e)
    inception4e_output = inception(net=net,
                                   pre_layer=inception4d_output,
                                   conv1x1_num=256,
                                   conv3x3_reduce_num=160,
                                   conv3x3_num=320,
                                   conv5x5_reduce_num=32,
                                   conv5x5_num=128,
                                   maxpool3x3_proj1x1_num=128,
                                   name="inception4e")

    # max pool
    net.inception4_maxpool = L.Pooling(inception4e_output,
                                       kernel_size=2,
                                       stride=2,
                                       pool=P.Pooling.MAX)

    # inception(5a)
    inception5a_output = inception(net=net,
                                   pre_layer=net.inception4_maxpool,
                                   conv1x1_num=256,
                                   conv3x3_reduce_num=160,
                                   conv3x3_num=320,
                                   conv5x5_reduce_num=32,
                                   conv5x5_num=128,
                                   maxpool3x3_proj1x1_num=128,
                                   name="inception5a")

    # inception(5b)
    inception5b_output = inception(net=net,
                                   pre_layer=inception5a_output,
                                   conv1x1_num=384,
                                   conv3x3_reduce_num=192,
                                   conv3x3_num=384,
                                   conv5x5_reduce_num=48,
                                   conv5x5_num=128,
                                   maxpool3x3_proj1x1_num=128,
                                   name="inception5b")

    # avg pool
    net.avgpool7x7_s1 = L.Pooling(inception5b_output,
                                  kernel_size=4,
                                  stride=1,
                                  pool=P.Pooling.AVE)

    # dropout
    net.avgpool7x7_s1_dropout = L.Dropout(
        net.avgpool7x7_s1,
        dropout_param=dict(dropout_ratio=0.4),
        in_place=True)

    # pred fc
    net.loss_pred_fc = L.InnerProduct(
        net.avgpool7x7_s1_dropout,
        num_output=classes,
        weight_filler=dict(type="xavier"),
        bias_filler=dict(type="constant", value=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    # loss
    if deploy:
        net.prob = L.Softmax(net.loss_pred_fc)
    else:
        net.loss = L.SoftmaxWithLoss(net.loss_pred_fc, net.label)
        net.accuracy = L.Accuracy(
            net.loss_pred_fc,
            net.label,
            include=dict(phase=caffe_pb2.Phase.Value('TEST')))

    with open(net_filename, "a") as f:
        f.write(str(net.to_proto()))
Exemple #15
0
def create_net(lmdb, batch_size, mean_file, model):

    n = caffe.NetSpec()
    #数据层
    if model == False:
        n.data, n.label = L.Data(batch_size=batch_size,
                                 backend=P.Data.LMDB,
                                 source=lmdb,
                                 include=dict(phase=0),
                                 transform_param=dict(scale=1. / 255,
                                                      mirror=True,
                                                      crop_size=227,
                                                      mean_file=mean_file),
                                 ntop=2)
    if model == True:
        n.data, n.label = L.Data(batch_size=batch_size,
                                 backend=P.Data.LMDB,
                                 source=lmdb,
                                 include=dict(phase=1),
                                 transform_param=dict(scale=1. / 255,
                                                      mirror=True,
                                                      crop_size=227,
                                                      mean_file=mean_file),
                                 ntop=2)

    #卷积层conv1
    n.conv1 = L.Convolution(
        n.data,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        kernel_size=11,
        stride=4,
        num_output=96,
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type='constant', value=0))
    #ReLu层
    n.relu1 = L.ReLU(n.conv1, in_place=True)

    #LRN层
    n.norm1 = L.LRN(n.conv1, local_size=5, alpha=0.0001, beta=0.75)

    #Pooling层
    n.pool1 = L.Pooling(n.norm1, kernel_size=3, stride=2, pool=P.Pooling.MAX)

    #卷积层conv2
    n.conv2 = L.Convolution(
        n.pool1,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        kernel_size=5,
        num_output=256,
        pad=2,
        group=2,
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type='constant', value=0.1))

    # ReLu2层
    n.relu2 = L.ReLU(n.conv2, in_place=True)

    # LRN2层
    n.norm2 = L.LRN(n.conv2, local_size=5, alpha=0.0001, beta=0.75)

    # Pooling2层
    n.pool2 = L.Pooling(n.norm2, kernel_size=3, stride=2, pool=P.Pooling.MAX)

    # 卷积层conv3
    n.conv3 = L.Convolution(
        n.pool2,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        kernel_size=3,
        num_output=384,
        pad=1,
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type='constant', value=0))
    # ReLu3层
    n.relu3 = L.ReLU(n.conv3, in_place=True)

    # 卷积层conv4
    n.conv4 = L.Convolution(
        n.conv3,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        kernel_size=3,
        num_output=384,
        pad=1,
        group=2,
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type='constant', value=0.1))
    # ReLu4层
    n.relu4 = L.ReLU(n.conv4, in_place=True)

    # 卷积层conv5
    n.conv5 = L.Convolution(
        n.conv4,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        kernel_size=3,
        num_output=256,
        pad=1,
        group=2,
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type='constant', value=0.1))
    # ReLu5层
    n.relu5 = L.ReLU(n.conv5, in_place=True)

    # Pooling5层
    n.pool5 = L.Pooling(n.conv5, kernel_size=3, stride=2, pool=P.Pooling.MAX)

    #全连接层fc6
    n.fc6 = L.InnerProduct(
        n.pool5,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        num_output=4096,
        weight_filler=dict(type="gaussian", std=0.005),
        bias_filler=dict(type='constant', value=0.1))

    n.relu6 = L.ReLU(n.fc6, in_place=True)

    #Dropout6层
    n.drop6 = L.Dropout(n.fc6, dropout_ratio=0.5, in_place=True)  #丢弃数据的概率

    # 全连接层fc7
    n.fc7 = L.InnerProduct(
        n.fc6,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        num_output=4096,
        weight_filler=dict(type="gaussian", std=0.005),
        bias_filler=dict(type='constant', value=0.1))

    # ReLu7层
    n.relu7 = L.ReLU(n.fc7, in_place=True)

    # Dropout7层
    n.drop7 = L.Dropout(n.fc7, dropout_ratio=0.5, in_place=True)  # 丢弃数据的概率

    # 全连接层fc8

    n.fc8 = L.InnerProduct(
        n.fc7,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        num_output=1000,
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type='constant', value=0))

    if model:
        n.acc = L.Accuracy(n.fc8, n.label)
    else:
        n.loss = L.SoftmaxWithLoss(n.fc8, n.label)

    return n.to_proto()
Exemple #16
0
def lenet(lmdb_data, lmdb_label, batch_size, deploy, crop=64, mirror=False):
    """Simple LeNet to predict cdf."""
    data_transforms = dict(scale=1.)
    if crop:  # will crop images to [crop]x[crop] with random center
        data_transforms['crop_size'] = crop
    if mirror:  # will randomly flip images
        data_transforms['mirror'] = 1

    n = caffe.NetSpec()
    if deploy:
        input_ = "data"
        dim1 = batch_size
        dim2 = 3  # need to change these manually
        dim3 = 64
        dim4 = 64
        n.data = L.Layer()
    else:
        n.data = L.Data(batch_size=batch_size,
                        backend=P.Data.LMDB,
                        source=lmdb_data,
                        transform_param=data_transforms,
                        ntop=1)
        n.label = L.Data(batch_size=batch_size,
                         backend=P.Data.LMDB,
                         source=lmdb_label,
                         ntop=1)

    # first convolutional layer
    n.conv1 = L.Convolution(n.data,
                            kernel_size=5,
                            num_output=40,
                            weight_filler=dict(type='xavier'))
    n.norm1 = L.BatchNorm(n.conv1)
    n.relu1 = L.ReLU(n.norm1, in_place=True)
    n.pool1 = L.Pooling(n.relu1, kernel_size=2, stride=2, pool=P.Pooling.MAX)

    # second convolutional layer
    n.conv2 = L.Convolution(n.pool1,
                            kernel_size=5,
                            num_output=40,
                            weight_filler=dict(type='xavier'))
    n.norm2 = L.BatchNorm(n.conv2)
    n.relu2 = L.ReLU(n.norm2, in_place=True)
    n.pool2 = L.Pooling(n.relu2, kernel_size=2, stride=2, pool=P.Pooling.MAX)

    # fully connected layers
    n.drop = L.Dropout(n.pool2, dropout_ratio=0.5)
    n.ip1 = L.InnerProduct(n.drop,
                           num_output=600,
                           weight_filler=dict(type='xavier'))
    n.out = L.Sigmoid(n.ip1)
    if deploy:
        deploy_str = ('input: {}\ninput_dim: {}\n'
                      'input_dim: {}\ninput_dim: {}\n'
                      'input_dim: {}').format('"%s"' % input_, dim1, dim2,
                                              dim3, dim4)
        return (deploy_str + '\n' + 'layer {' +
                'layer {'.join(str(n.to_proto()).split('layer {')[2:]))
    else:
        n.loss = L.EuclideanLoss(n.out, n.label)
        return str(n.to_proto())
Exemple #17
0
def VGGNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False,
               dilated=False, nopool=False, dropout=True, freeze_layers=[]):
    kwargs = {
        'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
        'weight_filler': dict(type='xavier'),
        'bias_filler': dict(type='constant', value=0)}

    assert from_layer in net.keys()
    net.conv1_1 = L.Convolution(net[from_layer], num_output=64, pad=1, kernel_size=3, **kwargs)

    net.relu1_1 = L.ReLU(net.conv1_1, in_place=True)
    net.conv1_2 = L.Convolution(net.relu1_1, num_output=64, pad=1, kernel_size=3, **kwargs)
    net.relu1_2 = L.ReLU(net.conv1_2, in_place=True)

    if nopool:
        name = 'conv1_3'
        net[name] = L.Convolution(net.relu1_2, num_output=64, pad=1, kernel_size=3, stride=2, **kwargs)
    else:
        name = 'pool1'
        net.pool1 = L.Pooling(net.relu1_2, pool=P.Pooling.MAX, kernel_size=2, stride=2)

    net.conv2_1 = L.Convolution(net[name], num_output=128, pad=1, kernel_size=3, **kwargs)
    net.relu2_1 = L.ReLU(net.conv2_1, in_place=True)
    net.conv2_2 = L.Convolution(net.relu2_1, num_output=128, pad=1, kernel_size=3, **kwargs)
    net.relu2_2 = L.ReLU(net.conv2_2, in_place=True)

    if nopool:
        name = 'conv2_3'
        net[name] = L.Convolution(net.relu2_2, num_output=128, pad=1, kernel_size=3, stride=2, **kwargs)
    else:
        name = 'pool2'
        net[name] = L.Pooling(net.relu2_2, pool=P.Pooling.MAX, kernel_size=2, stride=2)

    net.conv3_1 = L.Convolution(net[name], num_output=256, pad=1, kernel_size=3, **kwargs)
    net.relu3_1 = L.ReLU(net.conv3_1, in_place=True)
    net.conv3_2 = L.Convolution(net.relu3_1, num_output=256, pad=1, kernel_size=3, **kwargs)
    net.relu3_2 = L.ReLU(net.conv3_2, in_place=True)
    net.conv3_3 = L.Convolution(net.relu3_2, num_output=256, pad=1, kernel_size=3, **kwargs)
    net.relu3_3 = L.ReLU(net.conv3_3, in_place=True)

    if nopool:
        name = 'conv3_4'
        net[name] = L.Convolution(net.relu3_3, num_output=256, pad=1, kernel_size=3, stride=2, **kwargs)
    else:
        name = 'pool3'
        net[name] = L.Pooling(net.relu3_3, pool=P.Pooling.MAX, kernel_size=2, stride=2)

    net.conv4_1 = L.Convolution(net[name], num_output=512, pad=1, kernel_size=3, **kwargs)
    net.relu4_1 = L.ReLU(net.conv4_1, in_place=True)
    net.conv4_2 = L.Convolution(net.relu4_1, num_output=512, pad=1, kernel_size=3, **kwargs)
    net.relu4_2 = L.ReLU(net.conv4_2, in_place=True)
    net.conv4_3 = L.Convolution(net.relu4_2, num_output=512, pad=1, kernel_size=3, **kwargs)
    net.relu4_3 = L.ReLU(net.conv4_3, in_place=True)

    if nopool:
        name = 'conv4_4'
        net[name] = L.Convolution(net.relu4_3, num_output=512, pad=1, kernel_size=3, stride=2, **kwargs)
    else:
        name = 'pool4'
        net[name] = L.Pooling(net.relu4_3, pool=P.Pooling.MAX, kernel_size=2, stride=2)

    net.conv5_1 = L.Convolution(net[name], num_output=512, pad=1, kernel_size=3, **kwargs)
    net.relu5_1 = L.ReLU(net.conv5_1, in_place=True)
    net.conv5_2 = L.Convolution(net.relu5_1, num_output=512, pad=1, kernel_size=3, **kwargs)
    net.relu5_2 = L.ReLU(net.conv5_2, in_place=True)
    net.conv5_3 = L.Convolution(net.relu5_2, num_output=512, pad=1, kernel_size=3, **kwargs)
    net.relu5_3 = L.ReLU(net.conv5_3, in_place=True)

    if need_fc:
        if dilated:
            if nopool:
                name = 'conv5_4'
                net[name] = L.Convolution(net.relu5_3, num_output=512, pad=1, kernel_size=3, stride=1, **kwargs)
            else:
                name = 'pool5'
                net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=1)
        else:
            if nopool:
                name = 'conv5_4'
                net[name] = L.Convolution(net.relu5_3, num_output=512, pad=1, kernel_size=3, stride=2, **kwargs)
            else:
                name = 'pool5'
                net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, kernel_size=2, stride=2)

        if fully_conv:
            if dilated:
                if reduced:
                    net.fc6 = L.Convolution(net[name], num_output=1024, pad=6, kernel_size=3, dilation=6, **kwargs)
                else:
                    net.fc6 = L.Convolution(net[name], num_output=4096, pad=6, kernel_size=7, dilation=2, **kwargs)
            else:
                if reduced:
                    net.fc6 = L.Convolution(net[name], num_output=1024, pad=3, kernel_size=3, dilation=3, **kwargs)
                else:
                    net.fc6 = L.Convolution(net[name], num_output=4096, pad=3, kernel_size=7, **kwargs)

            net.relu6 = L.ReLU(net.fc6, in_place=True)
            if dropout:
                net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True)

            if reduced:
                net.fc7 = L.Convolution(net.relu6, num_output=1024, kernel_size=1, **kwargs)
            else:
                net.fc7 = L.Convolution(net.relu6, num_output=4096, kernel_size=1, **kwargs)
            net.relu7 = L.ReLU(net.fc7, in_place=True)
            if dropout:
                net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True)
        else:
            net.fc6 = L.InnerProduct(net.pool5, num_output=4096)
            net.relu6 = L.ReLU(net.fc6, in_place=True)
            if dropout:
                net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True)
            net.fc7 = L.InnerProduct(net.relu6, num_output=4096)
            net.relu7 = L.ReLU(net.fc7, in_place=True)
            if dropout:
                net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True)

    # Update freeze layers.
    kwargs['param'] = [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]
    layers = net.keys()
    for freeze_layer in freeze_layers:
        if freeze_layer in layers:
            net.update(freeze_layer, kwargs)

    return net
def mfb_coatt(mode, batchsize, T, question_vocab_size, folder):
    n = caffe.NetSpec()
    mode_str = json.dumps({
        'mode': mode,
        'batchsize': batchsize,
        'folder': folder
    })
    if mode == 'val':
        n.data, n.cont, n.img_feature, n.label, n.glove = L.Python( \
            module='vqa_data_layer', layer='VQADataProviderLayer', \
            param_str=mode_str, ntop=5 )
    else:
        n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\
            module='vqa_data_layer_kld', layer='VQADataProviderLayer', \
            param_str=mode_str, ntop=5 )
    n.embed = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \
                         weight_filler=dict(type='xavier'))
    n.embed_tanh = L.TanH(n.embed)
    concat_word_embed = [n.embed_tanh, n.glove]
    n.concat_embed = L.Concat(*concat_word_embed,
                              concat_param={'axis': 2})  # T x N x 600

    # LSTM
    n.lstm1 = L.LSTM(\
                   n.concat_embed, n.cont,\
                   recurrent_param=dict(\
                       num_output=config.LSTM_UNIT_NUM,\
                       weight_filler=dict(type='xavier')))
    n.lstm1_droped = L.Dropout(
        n.lstm1, dropout_param={'dropout_ratio': config.LSTM_DROPOUT_RATIO})
    n.lstm1_resh = L.Permute(n.lstm1_droped,
                             permute_param=dict(order=[1, 2, 0]))
    n.lstm1_resh2 = L.Reshape(n.lstm1_resh, \
            reshape_param=dict(shape=dict(dim=[0,0,0,1])))
    '''
    Question Attention
    '''
    n.qatt_conv1 = L.Convolution(n.lstm1_resh2,
                                 kernel_size=1,
                                 stride=1,
                                 num_output=512,
                                 pad=0,
                                 weight_filler=dict(type='xavier'))
    n.qatt_relu = L.ReLU(n.qatt_conv1)
    n.qatt_conv2 = L.Convolution(n.qatt_relu,
                                 kernel_size=1,
                                 stride=1,
                                 num_output=config.NUM_QUESTION_GLIMPSE,
                                 pad=0,
                                 weight_filler=dict(type='xavier'))
    n.qatt_reshape = L.Reshape(
        n.qatt_conv2,
        reshape_param=dict(shape=dict(dim=[
            -1, config.NUM_QUESTION_GLIMPSE, config.MAX_WORDS_IN_QUESTION, 1
        ])))  # N*NUM_QUESTION_GLIMPSE*15
    n.qatt_softmax = L.Softmax(n.qatt_reshape, axis=2)

    qatt_maps = L.Slice(n.qatt_softmax,
                        ntop=config.NUM_QUESTION_GLIMPSE,
                        slice_param={'axis': 1})
    dummy_lstm = L.DummyData(shape=dict(dim=[batchsize, 1]),
                             data_filler=dict(type='constant', value=1),
                             ntop=1)
    qatt_feature_list = []
    for i in xrange(config.NUM_QUESTION_GLIMPSE):
        if config.NUM_QUESTION_GLIMPSE == 1:
            n.__setattr__(
                'qatt_feat%d' % i,
                L.SoftAttention(n.lstm1_resh2, qatt_maps, dummy_lstm))
        else:
            n.__setattr__(
                'qatt_feat%d' % i,
                L.SoftAttention(n.lstm1_resh2, qatt_maps[i], dummy_lstm))
        qatt_feature_list.append(n.__getattr__('qatt_feat%d' % i))
    n.qatt_feat_concat = L.Concat(*qatt_feature_list)
    '''
    Image Attention with MFB
    '''
    n.q_feat_resh = L.Reshape(
        n.qatt_feat_concat, reshape_param=dict(shape=dict(dim=[0, -1, 1, 1])))
    n.i_feat_resh = L.Reshape(
        n.img_feature,
        reshape_param=dict(shape=dict(
            dim=[0, -1, config.IMG_FEAT_WIDTH, config.IMG_FEAT_WIDTH])))

    n.iatt_q_proj = L.InnerProduct(n.q_feat_resh,
                                   num_output=config.JOINT_EMB_SIZE,
                                   weight_filler=dict(type='xavier'))
    n.iatt_q_resh = L.Reshape(
        n.iatt_q_proj,
        reshape_param=dict(shape=dict(dim=[-1, config.JOINT_EMB_SIZE, 1, 1])))
    n.iatt_q_tile1 = L.Tile(n.iatt_q_resh, axis=2, tiles=config.IMG_FEAT_WIDTH)
    n.iatt_q_tile2 = L.Tile(n.iatt_q_tile1,
                            axis=3,
                            tiles=config.IMG_FEAT_WIDTH)

    n.iatt_i_conv = L.Convolution(n.i_feat_resh,
                                  kernel_size=1,
                                  stride=1,
                                  num_output=config.JOINT_EMB_SIZE,
                                  pad=0,
                                  weight_filler=dict(type='xavier'))
    n.iatt_i_resh1 = L.Reshape(n.iatt_i_conv,
                               reshape_param=dict(shape=dict(dim=[
                                   -1, config.JOINT_EMB_SIZE,
                                   config.IMG_FEAT_WIDTH, config.IMG_FEAT_WIDTH
                               ])))
    n.iatt_iq_eltwise = L.Eltwise(n.iatt_q_tile2,
                                  n.iatt_i_resh1,
                                  eltwise_param=dict(operation=0))
    n.iatt_iq_droped = L.Dropout(
        n.iatt_iq_eltwise,
        dropout_param={'dropout_ratio': config.MFB_DROPOUT_RATIO})
    n.iatt_iq_resh2 = L.Reshape(n.iatt_iq_droped,
                                reshape_param=dict(shape=dict(
                                    dim=[-1, config.JOINT_EMB_SIZE, 196, 1])))
    n.iatt_iq_permute1 = L.Permute(n.iatt_iq_resh2,
                                   permute_param=dict(order=[0, 2, 1, 3]))
    n.iatt_iq_resh2 = L.Reshape(
        n.iatt_iq_permute1,
        reshape_param=dict(shape=dict(dim=[
            -1, config.IMG_FEAT_SIZE, config.MFB_OUT_DIM, config.MFB_FACTOR_NUM
        ])))
    n.iatt_iq_sumpool = L.Pooling(n.iatt_iq_resh2, pool=P.Pooling.SUM, \
                              pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1))
    n.iatt_iq_permute2 = L.Permute(n.iatt_iq_sumpool,
                                   permute_param=dict(order=[0, 2, 1, 3]))

    n.iatt_iq_sqrt = L.SignedSqrt(n.iatt_iq_permute2)
    n.iatt_iq_l2 = L.L2Normalize(n.iatt_iq_sqrt)

    ## 2 conv layers 1000 -> 512 -> 2
    n.iatt_conv1 = L.Convolution(n.iatt_iq_l2,
                                 kernel_size=1,
                                 stride=1,
                                 num_output=512,
                                 pad=0,
                                 weight_filler=dict(type='xavier'))
    n.iatt_relu = L.ReLU(n.iatt_conv1)
    n.iatt_conv2 = L.Convolution(n.iatt_relu,
                                 kernel_size=1,
                                 stride=1,
                                 num_output=config.NUM_IMG_GLIMPSE,
                                 pad=0,
                                 weight_filler=dict(type='xavier'))
    n.iatt_resh = L.Reshape(
        n.iatt_conv2,
        reshape_param=dict(shape=dict(
            dim=[-1, config.NUM_IMG_GLIMPSE, config.IMG_FEAT_SIZE])))
    n.iatt_softmax = L.Softmax(n.iatt_resh, axis=2)
    n.iatt_softmax_resh = L.Reshape(
        n.iatt_softmax,
        reshape_param=dict(shape=dict(dim=[
            -1, config.NUM_IMG_GLIMPSE, config.IMG_FEAT_WIDTH,
            config.IMG_FEAT_WIDTH
        ])))
    iatt_maps = L.Slice(n.iatt_softmax_resh,
                        ntop=config.NUM_IMG_GLIMPSE,
                        slice_param={'axis': 1})
    dummy = L.DummyData(shape=dict(dim=[batchsize, 1]),
                        data_filler=dict(type='constant', value=1),
                        ntop=1)
    iatt_feature_list = []
    for i in xrange(config.NUM_IMG_GLIMPSE):
        if config.NUM_IMG_GLIMPSE == 1:
            n.__setattr__('iatt_feat%d' % i,
                          L.SoftAttention(n.i_feat_resh, iatt_maps, dummy))
        else:
            n.__setattr__('iatt_feat%d' % i,
                          L.SoftAttention(n.i_feat_resh, iatt_maps[i], dummy))
        n.__setattr__('iatt_feat%d_resh'%i, L.Reshape(n.__getattr__('iatt_feat%d'%i), \
                                reshape_param=dict(shape=dict(dim=[0,-1]))))
        iatt_feature_list.append(n.__getattr__('iatt_feat%d_resh' % i))
    n.iatt_feat_concat = L.Concat(*iatt_feature_list)
    n.iatt_feat_concat_resh = L.Reshape(
        n.iatt_feat_concat, reshape_param=dict(shape=dict(dim=[0, -1, 1, 1])))
    '''
    Fine-grained Image-Question MFB fusion
    '''

    n.mfb_q_proj = L.InnerProduct(n.q_feat_resh,
                                  num_output=config.JOINT_EMB_SIZE,
                                  weight_filler=dict(type='xavier'))
    n.mfb_i_proj = L.InnerProduct(n.iatt_feat_concat_resh,
                                  num_output=config.JOINT_EMB_SIZE,
                                  weight_filler=dict(type='xavier'))
    n.mfb_iq_eltwise = L.Eltwise(n.mfb_q_proj,
                                 n.mfb_i_proj,
                                 eltwise_param=dict(operation=0))
    n.mfb_iq_drop = L.Dropout(
        n.mfb_iq_eltwise,
        dropout_param={'dropout_ratio': config.MFB_DROPOUT_RATIO})
    n.mfb_iq_resh = L.Reshape(
        n.mfb_iq_drop,
        reshape_param=dict(shape=dict(
            dim=[-1, 1, config.MFB_OUT_DIM, config.MFB_FACTOR_NUM])))
    n.mfb_iq_sumpool = L.Pooling(n.mfb_iq_resh, pool=P.Pooling.SUM, \
                                      pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1))
    n.mfb_out = L.Reshape(n.mfb_iq_sumpool,\
                                    reshape_param=dict(shape=dict(dim=[-1,config.MFB_OUT_DIM])))
    n.mfb_sign_sqrt = L.SignedSqrt(n.mfb_out)
    n.mfb_l2 = L.L2Normalize(n.mfb_sign_sqrt)

    n.prediction = L.InnerProduct(n.mfb_l2,
                                  num_output=config.NUM_OUTPUT_UNITS,
                                  weight_filler=dict(type='xavier'))
    if mode == 'val':
        n.loss = L.SoftmaxWithLoss(n.prediction, n.label)
    else:
        n.loss = L.SoftmaxKLDLoss(n.prediction, n.label)
    return n.to_proto()
def modified_u_net(split):
    n = caffe.NetSpec()
    pydata_params = dict(split=split,
                         mean=(41.4661, 69.1061, 126.993),
                         seed=1337)
    if split == 'train':
        pydata_params['train_dir'] = '../image_augmentor/DRIVE/training'
        pylayer = 'TRAINSegDataLayer'
    else:
        pydata_params['val_dir'] = '../image_augmentor/DRIVE/val'
        pylayer = 'VALSegDataLayer'
    n.data, n.label = L.Python(module='train_val',
                               layer=pylayer,
                               ntop=2,
                               param_str=str(pydata_params))

    # layer group 1
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 32)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 32)
    n.pool1 = max_pool(n.relu1_2)

    # layer group 2
    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 64)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 64)
    n.pool2 = max_pool(n.relu2_2)

    # layer group 3
    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 128)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 128)

    # layer group 4
    n.upconv4 = L.Deconvolution(n.relu3_2,
                                param=[dict(lr_mult=1),
                                       dict(lr_mult=2)],
                                convolution_param=dict(num_output=128,
                                                       kernel_size=2,
                                                       stride=2))
    n.concat4 = L.Concat(n.upconv4, n.relu2_2, axis=1)
    n.conv4_1, n.relu4_1 = conv_relu(n.concat4, 64)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 64)

    # layer group 5
    n.upconv5 = L.Deconvolution(n.relu4_2,
                                param=[dict(lr_mult=1),
                                       dict(lr_mult=2)],
                                convolution_param=dict(num_output=64,
                                                       kernel_size=2,
                                                       stride=2))
    n.concat5 = L.Concat(n.upconv5, n.conv1_2, axis=1)
    n.conv5_1, n.relu5_1 = conv_relu(n.concat5, 32)
    # n.drop5 = L.Dropout(n.relu5_1, dropout_ratio=0.2, in_place=True)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 32)

    # layer group 6
    n.score = L.Convolution(
        n.relu5_2,
        pad=0,
        kernel_size=1,
        num_output=2,
        weight_filler=dict(type='xavier'),
        bias_filler=dict(type='constant', value=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.seg = L.Dropout(n.score, dropout_ratio=0.5, in_place=True)

    # others
    n.loss = L.SoftmaxWithLoss(n.seg,
                               n.label,
                               loss_param=dict(normalize=False))
    # n.softmax = L.Softmax(n.seg,
    #                       include={'phase':caffe.TEST})
    # n.argmax = L.ArgMax(n.softmax, axis=1,
    #                     include={'phase':caffe.TEST})
    # n.accuracy = L.Accuracy(n.seg, n.label, exclude={'stage': 'deploy'})

    return n.to_proto()
Exemple #20
0
    def define_structure(self, stage):

        n = caffe.NetSpec()

        if stage != CaffeLocations.STAGE_DEPLOY:
            source_params = dict(stage=stage)
            source_params['data_dir'] = self.DATA_DIR
            source_params['split_dir'] = self.SPLIT_DIR
            n.data, n.label = L.Python(module='DataLayer',
                                       layer='DataLayer',
                                       ntop=2,
                                       param_str=str(source_params))
        else:
            n.data = L.Input(shape=dict(dim=[1, 3, self.WSIZE, self.WSIZE]))

        # the base net
        n.conv1_1, n.relu1_1 = conv_relu(n.data, 32, pad=85)
        n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 32)
        n.pool1 = max_pool(n.conv1_2)

        n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 64)
        n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 64)
        n.pool2 = max_pool(n.relu2_2)

        n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 128)
        n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 128)
        n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 128)
        n.pool3 = max_pool(n.relu3_3)

        n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 256)
        n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 256)
        n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 256)
        n.pool4 = max_pool(n.relu4_3)

        n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 256)
        n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 256)
        n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 256)
        n.pool5 = max_pool(n.relu5_3)

        # fully conv
        n.fc6, n.relu6 = conv_relu(n.pool5, 2048, ks=7, pad=0)
        n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)

        n.fc7, n.relu7 = conv_relu(n.drop6, 2048, ks=1, pad=0)
        n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)

        n.score_fr = L.Convolution(
            n.drop7,
            num_output=CaffeLocations.NUM_LABELS,
            kernel_size=1,
            pad=0,
            param=[
                dict(lr_mult=1, decay_mult=1),
                dict(lr_mult=2, decay_mult=0)
            ],
            weight_filler=dict(type='xavier'),
            bias_filler=dict(
                type='constant'))  # must be 1 x num_classes x 1 x 1

        n.upscore_a = L.Deconvolution(n.score_fr,
                                      convolution_param=dict(
                                          num_output=CaffeLocations.NUM_LABELS,
                                          kernel_size=4,
                                          stride=2,
                                          bias_term=False,
                                          weight_filler=dict(type='xavier'),
                                          bias_filler=dict(type='constant')),
                                      param=[dict(lr_mult=1, decay_mult=1)])

        n.score_pool4 = L.Convolution(n.pool4,
                                      num_output=CaffeLocations.NUM_LABELS,
                                      kernel_size=1,
                                      pad=0,
                                      param=[
                                          dict(lr_mult=1, decay_mult=1),
                                          dict(lr_mult=2, decay_mult=0)
                                      ],
                                      weight_filler=dict(type='xavier'),
                                      bias_filler=dict(type='constant'))

        n.score_pool4c = crop(n.score_pool4, n.upscore_a)

        n.fuse_pool4 = L.Eltwise(n.upscore_a,
                                 n.score_pool4c,
                                 operation=P.Eltwise.SUM)

        n.upscore_pool4 = L.Deconvolution(
            n.fuse_pool4,
            convolution_param=dict(num_output=CaffeLocations.NUM_LABELS,
                                   kernel_size=4,
                                   stride=2,
                                   bias_term=False),
            param=[dict(lr_mult=1, decay_mult=1)])

        n.score_pool3 = L.Convolution(n.pool3,
                                      num_output=CaffeLocations.NUM_LABELS,
                                      kernel_size=1,
                                      pad=0,
                                      param=[
                                          dict(lr_mult=1, decay_mult=1),
                                          dict(lr_mult=2, decay_mult=0)
                                      ],
                                      weight_filler=dict(type='xavier'),
                                      bias_filler=dict(type='constant'))

        n.score_pool3c = crop(n.score_pool3, n.upscore_pool4)
        n.fuse_pool3 = L.Eltwise(n.upscore_pool4,
                                 n.score_pool3c,
                                 operation=P.Eltwise.SUM)

        n.upscore8 = L.Deconvolution(n.fuse_pool3,
                                     convolution_param=dict(
                                         num_output=CaffeLocations.NUM_LABELS,
                                         kernel_size=16,
                                         stride=8,
                                         bias_term=False),
                                     param=[dict(lr_mult=1, decay_mult=1)])

        n.score = crop(n.upscore8, n.data)

        if stage != CaffeLocations.STAGE_DEPLOY:
            n.loss = L.SoftmaxWithLoss(n.score,
                                       n.label,
                                       loss_param=dict(normalize=False))
        #else:
        #    n.output = L.Softmax(n.score)

        # n.loss = L.Python(n.score, n.label, module='LossLayer', layer='TopoLossLayer', loss_weight=1)

        return n.to_proto()
Exemple #21
0
def setLayers(data_source,
              batch_size,
              layername,
              kernel,
              stride,
              outCH,
              label_name,
              transform_param_in,
              deploy=False):
    # it is tricky to produce the deploy prototxt file, as the data input is not from a layer, so we have to creat a workaround
    # producing training and testing prototxt files is pretty straight forward
    n = caffe.NetSpec()
    assert len(layername) == len(kernel)
    assert len(layername) == len(stride)
    assert len(layername) == len(outCH)

    # produce data definition for deploy net
    if deploy == False:
        n.data, n.tops['label'] = L.CPMData(cpmdata_param=dict(
            backend=1, source=data_source, batch_size=batch_size),
                                            transform_param=transform_param_in,
                                            ntop=2)
        n.tops[label_name[1]], n.tops[label_name[0]] = L.Slice(
            n.label, slice_param=dict(axis=1, slice_point=15), ntop=2)
    else:
        input = "data"
        dim1 = 1
        dim2 = 4
        dim3 = 368
        dim4 = 368
        # make an empty "data" layer so the next layer accepting input will be able to take the correct blob name "data",
        # we will later have to remove this layer from the serialization string, since this is just a placeholder
        n.data = L.Layer()

    # something special before everything
    n.image, n.center_map = L.Slice(n.data,
                                    slice_param=dict(axis=1, slice_point=3),
                                    ntop=2)
    n.pool_center_lower = L.Pooling(n.center_map,
                                    kernel_size=9,
                                    stride=8,
                                    pool=P.Pooling.AVE)

    # just follow arrays..CPCPCPCPCCCC....
    last_layer = 'image'
    stage = 1
    conv_counter = 1
    pool_counter = 1
    drop_counter = 1
    state = 'image'  # can be image or fuse
    share_point = 0

    for l in range(0, len(layername)):
        if layername[l] == 'C':
            if state == 'image':
                conv_name = 'conv%d_stage%d' % (conv_counter, stage)
            else:
                conv_name = 'Mconv%d_stage%d' % (conv_counter, stage)
            if stage == 1:
                lr_m = 5
            else:
                lr_m = 1
            n.tops[conv_name] = L.Convolution(
                n.tops[last_layer],
                kernel_size=kernel[l],
                num_output=outCH[l],
                pad=int(math.floor(kernel[l] / 2)),
                param=[
                    dict(lr_mult=lr_m, decay_mult=1),
                    dict(lr_mult=lr_m * 2, decay_mult=0)
                ],
                weight_filler=dict(type='gaussian', std=0.01),
                bias_filler=dict(type='constant'))
            last_layer = conv_name
            if layername[l + 1] != 'L':
                if (state == 'image'):
                    ReLUname = 'relu%d_stage%d' % (conv_counter, stage)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer],
                                              in_place=True)
                else:
                    ReLUname = 'Mrelu%d_stage%d' % (conv_counter, stage)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer],
                                              in_place=True)
                last_layer = ReLUname
            conv_counter += 1
        elif layername[l] == 'P':  # Pooling
            n.tops['pool%d_stage%d' % (pool_counter, stage)] = L.Pooling(
                n.tops[last_layer],
                kernel_size=kernel[l],
                stride=stride[l],
                pool=P.Pooling.MAX)
            last_layer = 'pool%d_stage%d' % (pool_counter, stage)
            pool_counter += 1
        elif layername[l] == 'L':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            if deploy == False:
                if stage == 1:
                    n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                        n.tops[last_layer], n.tops[label_name[0]])
                else:
                    n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                        n.tops[last_layer], n.tops[label_name[1]])

            stage += 1
            last_connect = last_layer
            last_layer = 'image'
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            state = 'image'
        elif layername[l] == 'D':
            if deploy == False:
                n.tops['drop%d_stage%d' % (drop_counter, stage)] = L.Dropout(
                    n.tops[last_layer],
                    in_place=True,
                    dropout_param=dict(dropout_ratio=0.5))
                drop_counter += 1
        elif layername[l] == '@':
            n.tops['concat_stage%d' % stage] = L.Concat(
                n.tops[last_layer],
                n.tops[last_connect],
                n.pool_center_lower,
                concat_param=dict(axis=1))
            conv_counter = 1
            state = 'fuse'
            last_layer = 'concat_stage%d' % stage
        elif layername[l] == '$':
            if not share_point:
                share_point = last_layer
            else:
                last_layer = share_point

    # final process
    stage -= 1
    if stage == 1:
        n.silence = L.Silence(n.pool_center_lower, ntop=0)

    if deploy == False:
        return str(n.to_proto())
        # for generating the deploy net
    else:
        # generate the input information header string
        deploy_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format(
            '"' + input + '"', dim1, dim2, dim3, dim4)
        # assemble the input header with the net layers string.  remove the first placeholder layer from the net string.
        return deploy_str + '\n' + 'layer {' + 'layer {'.join(
            str(n.to_proto()).split('layer {')[2:])
Exemple #22
0
def cnn(split):
    n = caffe.NetSpec()
    pydata_params = dict(dataset_dir='/home/kevin/dataset/normal_feature',
                         variable='normal_map',
                         split=split,
                         mean=(0, 0, 0),
                         seed=1337,
                         batch_size=256,
                         img_size=(250, 250))
    if split == 'deploy':
        n.img = L.Input(
            name='input',
            ntop=2,
            shape=[dict(dim=1),
                   dict(dim=3),
                   dict(dim=130),
                   dict(dim=130)])
    else:
        if split is 'train':
            pydata_params['dtype'] = 'frame'
            pylayer = 'ModelNetDataLayer'
        else:
            pydata_params['dtype'] = 'object'
            pylayer = 'ModelNetDataLayer'

        n.img, n.label = L.Python(module='data_layers.model_net_layer',
                                  layer=pylayer,
                                  ntop=2,
                                  param_str=str(pydata_params))

    # the base net
    n.conv1, n.relu1 = conv_relu("conv1", n.img, 96, ks=11, stride=4, pad=0)
    n.pool1 = max_pool(n.relu1, ks=3)
    n.norm1 = L.LRN(n.pool1,
                    lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2))
    # n.bn1 = L.BatchNorm(n.pool1, param=[dict(lr_mult=0),dict(lr_mult=0),dict(lr_mult=0)], batch_norm_param=dict(use_global_stats=True))

    n.conv2, n.relu2 = conv_relu("conv2", n.norm1, 256, ks=5, pad=2, group=2)
    n.pool2 = max_pool(n.relu2, ks=3)
    n.norm2 = L.LRN(n.pool2,
                    lrn_param=dict(local_size=5, alpha=0.0005, beta=0.75, k=2))
    # n.bn2 = L.BatchNorm(n.pool2, param=[dict(lr_mult=0),dict(lr_mult=0),dict(lr_mult=0)], batch_norm_param=dict(use_global_stats=True))

    n.conv3, n.relu3 = conv_relu("conv3", n.norm2, 384, ks=3, pad=1, group=2)

    n.conv4, n.relu4 = conv_relu("conv4", n.relu3, 256, ks=3, pad=1, group=2)

    n.pool5 = max_pool(n.relu4, ks=3)

    n.fc6, n.relu6 = fc_relu(n.pool5, 4096, lr1=1, lr2=2)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = fc_relu(n.drop6, 4096, lr1=1, lr2=2)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
    n.fc8 = fc(n.drop7, 40, lr1=1, lr2=2)

    if split != 'deploy':
        #n.accuracyt = L.Accuracy(n.predictT, n.labelT)
        #n.losst = L.SoftmaxWithLoss(n.predictT, n.labelT)

        n.accuracy = L.Accuracy(n.fc8, n.label)
        n.loss = L.SoftmaxWithLoss(n.fc8, n.label)

    # n.display = L.Scale(n.corr, param=[dict(lr_mult=0)], filler=dict(type='constant',value=1.0))
    # n.fc9_bn = L.BatchNorm(n.relu9, param=[dict(lr_mult=0),dict(lr_mult=0),dict(lr_mult=0)], batch_norm_param=dict(use_global_stats=True))

    return n.to_proto()
Exemple #23
0
def fcn(split):
    n = caffe.NetSpec()
    pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892),
            seed=1337)
    if split.startswith('train'):
        pydata_params['sbdd_dir'] = '../data/sbdd-subsampl/dataset'
        pylayer = 'SBDDSegDataLayer'
    else:
        pydata_params['voc_dir'] = '../data/pascal-subsampl/VOC2011'
        pylayer = 'VOCSegDataLayer'

    n.data, n.label = L.Python(module='voc_layers', layer=pylayer,
            ntop=2, param_str=str(pydata_params))

    # the base net
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # fully conv
    n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)

    n.score_fr = L.Convolution(n.drop7, num_output=16, kernel_size=1, pad=0,
        weight_filler=dict(type='xavier'),
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    n.upscore2 = L.Deconvolution(n.score_fr,
        convolution_param=dict(num_output=16, kernel_size=4, stride=2,
            weight_filler=dict(type='xavier'),
            bias_term=False),
        param=[dict(lr_mult=0)])

    # scale pool4 skip for compatibility
    n.scale_pool4 = L.Scale(n.pool4, filler=dict(type='constant',
        value=0.01), param=[dict(lr_mult=0)])
    n.score_pool4 = L.Convolution(n.scale_pool4, num_output=16, kernel_size=1, pad=0,
        weight_filler=dict(type='xavier'),
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    n.score_pool4c = crop(n.score_pool4, n.upscore2)
    n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c,
            operation=P.Eltwise.SUM)
    n.upscore_pool4 = L.Deconvolution(n.fuse_pool4,
        convolution_param=dict(num_output=16, kernel_size=4, stride=2,
            weight_filler=dict(type='xavier'),
            bias_term=False),
        param=[dict(lr_mult=0)])

    # scale pool3 skip for compatibility
    n.scale_pool3 = L.Scale(n.pool3, filler=dict(type='constant',
        value=0.0001), param=[dict(lr_mult=0)])
    n.score_pool3 = L.Convolution(n.scale_pool3, num_output=16, kernel_size=1, pad=0,
        weight_filler=dict(type='xavier'),
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    n.score_pool3c = crop(n.score_pool3, n.upscore_pool4)
    n.fuse_pool3 = L.Eltwise(n.upscore_pool4, n.score_pool3c,
            operation=P.Eltwise.SUM)
    n.upscore8 = L.Deconvolution(n.fuse_pool3,
        convolution_param=dict(num_output=16, kernel_size=16, stride=8,
            weight_filler=dict(type='xavier'),
            bias_term=False),
        param=[dict(lr_mult=0)])

    n.score = crop(n.upscore8, n.data)
    n.loss = L.SoftmaxWithLoss(n.score, n.label,
            loss_param=dict(normalize=False, ignore_label=255))

    return n.to_proto()
def setLayers_twoBranches(data_source,
                          batch_size,
                          layername,
                          kernel,
                          stride,
                          outCH,
                          label_name,
                          transform_param_in,
                          deploy=False,
                          batchnorm=0,
                          lr_mult_distro=[1, 1, 1]):
    # it is tricky to produce the deploy prototxt file, as the data input is not from a layer, so we have to creat a workaround
    # producing training and testing prototxt files is pretty straight forward
    n = caffe.NetSpec()
    assert len(layername) == len(kernel)
    assert len(layername) == len(stride)
    assert len(layername) == len(outCH)
    num_parts = transform_param['num_parts']

    if deploy == False and "lmdb" not in data_source:
        if (len(label_name) == 1):
            n.data, n.tops[label_name[0]] = L.HDF5Data(hdf5_data_param=dict(
                batch_size=batch_size, source=data_source),
                                                       ntop=2)
        elif (len(label_name) == 2):
            n.data, n.tops[label_name[0]], n.tops[label_name[1]] = L.HDF5Data(
                hdf5_data_param=dict(batch_size=batch_size,
                                     source=data_source),
                ntop=3)
    # produce data definition for deploy net
    elif deploy == False:
        n.data, n.tops['label'] = L.CPMData(
            data_param=dict(backend=1,
                            source=data_source,
                            batch_size=batch_size),
            cpm_transform_param=transform_param_in,
            ntop=2)
        n.tops[label_name[2]], n.tops[label_name[3]], n.tops[
            label_name[4]], n.tops[label_name[5]] = L.Slice(
                n.label,
                slice_param=dict(
                    axis=1, slice_point=[38, num_parts + 1, num_parts + 39]),
                ntop=4)
        n.tops[label_name[0]] = L.Eltwise(n.tops[label_name[2]],
                                          n.tops[label_name[4]],
                                          operation=P.Eltwise.PROD)
        n.tops[label_name[1]] = L.Eltwise(n.tops[label_name[3]],
                                          n.tops[label_name[5]],
                                          operation=P.Eltwise.PROD)

    else:
        input = "data"
        dim1 = 1
        dim2 = 4
        dim3 = 368
        dim4 = 368
        # make an empty "data" layer so the next layer accepting input will be able to take the correct blob name "data",
        # we will later have to remove this layer from the serialization string, since this is just a placeholder
        n.data = L.Layer()

    # something special before everything
    n.image, n.center_map = L.Slice(n.data,
                                    slice_param=dict(axis=1, slice_point=3),
                                    ntop=2)
    n.silence2 = L.Silence(n.center_map, ntop=0)
    #n.pool_center_lower = L.Pooling(n.center_map, kernel_size=9, stride=8, pool=P.Pooling.AVE)

    # just follow arrays..CPCPCPCPCCCC....
    last_layer = ['image', 'image']
    stage = 1
    conv_counter = 1
    pool_counter = 1
    drop_counter = 1
    local_counter = 1
    state = 'image'  # can be image or fuse
    share_point = 0

    for l in range(0, len(layername)):
        if layername[l] == 'V':  #pretrained VGG layers
            conv_name = 'conv%d_%d' % (pool_counter, local_counter)
            lr_m = lr_mult_distro[0]
            n.tops[conv_name] = L.Convolution(
                n.tops[last_layer[0]],
                kernel_size=kernel[l],
                num_output=outCH[l],
                pad=int(math.floor(kernel[l] / 2)),
                param=[
                    dict(lr_mult=lr_m, decay_mult=1),
                    dict(lr_mult=lr_m * 2, decay_mult=0)
                ],
                weight_filler=dict(type='gaussian', std=0.01),
                bias_filler=dict(type='constant'))
            last_layer[0] = conv_name
            last_layer[1] = conv_name
            print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m)
            ReLUname = 'relu%d_%d' % (pool_counter, local_counter)
            n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True)
            local_counter += 1
            print ReLUname
        if layername[l] == 'B':
            pool_counter += 1
            local_counter = 1
        if layername[l] == 'C':
            if state == 'image':
                #conv_name = 'conv%d_stage%d' % (conv_counter, stage)
                conv_name = 'conv%d_%d_CPM' % (
                    pool_counter, local_counter
                )  # no image state in subsequent stages
                if stage == 1:
                    lr_m = lr_mult_distro[1]
                else:
                    lr_m = lr_mult_distro[1]
            else:  # fuse
                conv_name = 'Mconv%d_stage%d' % (conv_counter, stage)
                lr_m = lr_mult_distro[2]
                conv_counter += 1
            #if stage == 1:
            #    lr_m = 1
            #else:
            #    lr_m = lr_sub
            n.tops[conv_name] = L.Convolution(
                n.tops[last_layer[0]],
                kernel_size=kernel[l],
                num_output=outCH[l],
                pad=int(math.floor(kernel[l] / 2)),
                param=[
                    dict(lr_mult=lr_m, decay_mult=1),
                    dict(lr_mult=lr_m * 2, decay_mult=0)
                ],
                weight_filler=dict(type='gaussian', std=0.01),
                bias_filler=dict(type='constant'))
            last_layer[0] = conv_name
            last_layer[1] = conv_name
            print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m)

            if layername[l + 1] != 'L':
                if (state == 'image'):
                    if (batchnorm == 1):
                        batchnorm_name = 'bn%d_stage%d' % (conv_counter, stage)
                        n.tops[batchnorm_name] = L.BatchNorm(
                            n.tops[last_layer[0]],
                            param=[
                                dict(lr_mult=0),
                                dict(lr_mult=0),
                                dict(lr_mult=0)
                            ])
                        #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                        last_layer[0] = batchnorm_name
                    #ReLUname = 'relu%d_stage%d' % (conv_counter, stage)
                    ReLUname = 'relu%d_%d_CPM' % (pool_counter, local_counter)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]],
                                              in_place=True)
                else:
                    if (batchnorm == 1):
                        batchnorm_name = 'Mbn%d_stage%d' % (conv_counter,
                                                            stage)
                        n.tops[batchnorm_name] = L.BatchNorm(
                            n.tops[last_layer[0]],
                            param=[
                                dict(lr_mult=0),
                                dict(lr_mult=0),
                                dict(lr_mult=0)
                            ])
                        #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                        last_layer[0] = batchnorm_name
                    ReLUname = 'Mrelu%d_stage%d' % (conv_counter, stage)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]],
                                              in_place=True)
                #last_layer = ReLUname
                print ReLUname

            #conv_counter += 1
            local_counter += 1

        elif layername[l] == 'C2':
            for level in range(0, 2):
                if state == 'image':
                    #conv_name = 'conv%d_stage%d' % (conv_counter, stage)
                    conv_name = 'conv%d_%d_CPM_L%d' % (
                        pool_counter, local_counter, level + 1
                    )  # no image state in subsequent stages
                    if stage == 1:
                        lr_m = lr_mult_distro[1]
                    else:
                        lr_m = lr_mult_distro[1]
                else:  # fuse
                    conv_name = 'Mconv%d_stage%d_L%d' % (conv_counter, stage,
                                                         level + 1)
                    lr_m = lr_mult_distro[2]
                    #conv_counter += 1
                #if stage == 1:
                #    lr_m = 1
                #else:
                #    lr_m = lr_sub
                if layername[l + 1] == 'L2' or layername[l + 1] == 'L3':
                    if level == 0:
                        outCH[l] = 38
                    else:
                        outCH[l] = 19

                n.tops[conv_name] = L.Convolution(
                    n.tops[last_layer[level]],
                    kernel_size=kernel[l],
                    num_output=outCH[l],
                    pad=int(math.floor(kernel[l] / 2)),
                    param=[
                        dict(lr_mult=lr_m, decay_mult=1),
                        dict(lr_mult=lr_m * 2, decay_mult=0)
                    ],
                    weight_filler=dict(type='gaussian', std=0.01),
                    bias_filler=dict(type='constant'))
                last_layer[level] = conv_name
                print '%s\tch=%d\t%.1f' % (last_layer[level], outCH[l], lr_m)

                if layername[l + 1] != 'L2' and layername[l + 1] != 'L3':
                    if (state == 'image'):
                        if (batchnorm == 1):
                            batchnorm_name = 'bn%d_stage%d_L%d' % (
                                conv_counter, stage, level + 1)
                            n.tops[batchnorm_name] = L.BatchNorm(
                                n.tops[last_layer[level]],
                                param=[
                                    dict(lr_mult=0),
                                    dict(lr_mult=0),
                                    dict(lr_mult=0)
                                ])
                            #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                            last_layer[level] = batchnorm_name
                        #ReLUname = 'relu%d_stage%d' % (conv_counter, stage)
                        ReLUname = 'relu%d_%d_CPM_L%d' % (
                            pool_counter, local_counter, level + 1)
                        n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]],
                                                  in_place=True)
                    else:
                        if (batchnorm == 1):
                            batchnorm_name = 'Mbn%d_stage%d_L%d' % (
                                conv_counter, stage, level + 1)
                            n.tops[batchnorm_name] = L.BatchNorm(
                                n.tops[last_layer[level]],
                                param=[
                                    dict(lr_mult=0),
                                    dict(lr_mult=0),
                                    dict(lr_mult=0)
                                ])
                            #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                            last_layer[level] = batchnorm_name
                        ReLUname = 'Mrelu%d_stage%d_L%d' % (conv_counter,
                                                            stage, level + 1)
                        n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]],
                                                  in_place=True)
                    print ReLUname

            conv_counter += 1
            local_counter += 1

        elif layername[l] == 'P':  # Pooling
            n.tops['pool%d_stage%d' % (pool_counter, stage)] = L.Pooling(
                n.tops[last_layer[0]],
                kernel_size=kernel[l],
                stride=stride[l],
                pool=P.Pooling.MAX)
            last_layer[0] = 'pool%d_stage%d' % (pool_counter, stage)
            pool_counter += 1
            local_counter = 1
            conv_counter += 1
            print last_layer[0]

        elif layername[l] == 'L':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            if deploy == False and "lmdb" not in data_source:
                n.tops['map_vec_stage%d' % stage] = L.Flatten(
                    n.tops[last_layer[0]])
                n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                    n.tops['map_vec_stage%d' % stage], n.tops[label_name[1]])
            elif deploy == False:
                level = 1
                name = 'weight_stage%d' % stage
                n.tops[name] = L.Eltwise(n.tops[last_layer[level]],
                                         n.tops[label_name[(level + 2)]],
                                         operation=P.Eltwise.PROD)
                n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                    n.tops[name], n.tops[label_name[level]])

            print 'loss %d' % stage
            stage += 1
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            local_counter = 1
            state = 'image'

        elif layername[l] == 'L2':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            weight = [lr_mult_distro[3], 1]
            # print lr_mult_distro[3]
            for level in range(0, 2):
                if deploy == False and "lmdb" not in data_source:
                    n.tops['map_vec_stage%d_L%d' %
                           (stage, level + 1)] = L.Flatten(
                               n.tops[last_layer[level]])
                    n.tops['loss_stage%d_L%d' %
                           (stage, level + 1)] = L.EuclideanLoss(
                               n.tops['map_vec_stage%d' % stage],
                               n.tops[label_name[level]],
                               loss_weight=weight[level])
                elif deploy == False:
                    name = 'weight_stage%d_L%d' % (stage, level + 1)
                    n.tops[name] = L.Eltwise(n.tops[last_layer[level]],
                                             n.tops[label_name[(level + 2)]],
                                             operation=P.Eltwise.PROD)
                    n.tops['loss_stage%d_L%d' %
                           (stage, level + 1)] = L.EuclideanLoss(
                               n.tops[name],
                               n.tops[label_name[level]],
                               loss_weight=weight[level])

                print 'loss %d level %d' % (stage, level + 1)

            stage += 1
            #last_connect = last_layer
            #last_layer = 'image'
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            local_counter = 1
            state = 'image'

        elif layername[l] == 'L3':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            weight = [lr_mult_distro[3], 1]
            # print lr_mult_distro[3]
            if deploy == False:
                level = 0
                n.tops['loss_stage%d_L%d' %
                       (stage, level + 1)] = L.Euclidean2Loss(
                           n.tops[last_layer[level]],
                           n.tops[label_name[level]],
                           n.tops[label_name[2]],
                           loss_weight=weight[level])
                print 'loss %d level %d' % (stage, level + 1)
                level = 1
                n.tops['loss_stage%d_L%d' %
                       (stage, level + 1)] = L.EuclideanLoss(
                           n.tops[last_layer[level]],
                           n.tops[label_name[level]],
                           loss_weight=weight[level])
                print 'loss %d level %d' % (stage, level + 1)

            stage += 1
            #last_connect = last_layer
            #last_layer = 'image'
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            local_counter = 1
            state = 'image'

        elif layername[l] == 'D':
            if deploy == False:
                n.tops['drop%d_stage%d' % (drop_counter, stage)] = L.Dropout(
                    n.tops[last_layer[0]],
                    in_place=True,
                    dropout_param=dict(dropout_ratio=0.5))
                drop_counter += 1
        elif layername[l] == '@':
            #if not share_point:
            #    share_point = last_layer
            n.tops['concat_stage%d' % stage] = L.Concat(
                n.tops[last_layer[0]],
                n.tops[last_layer[1]],
                n.tops[share_point],
                concat_param=dict(axis=1))

            local_counter = 1
            state = 'fuse'
            last_layer[0] = 'concat_stage%d' % stage
            last_layer[1] = 'concat_stage%d' % stage
            print last_layer
        elif layername[l] == '$':
            share_point = last_layer[0]
            pool_counter += 1
            local_counter = 1
            print 'share'

    # final process
    stage -= 1
    #if stage == 1:
    #    n.silence = L.Silence(n.pool_center_lower, ntop=0)

    if deploy == False:
        return str(n.to_proto())
        # for generating the deploy net
    else:
        # generate the input information header string
        deploy_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format(
            '"' + input + '"', dim1, dim2, dim3, dim4)
        # assemble the input header with the net layers string.  remove the first placeholder layer from the net string.
        return deploy_str + '\n' + 'layer {' + 'layer {'.join(
            str(n.to_proto()).split('layer {')[2:])
def generate_model(split, config):
    n = caffe.NetSpec()
    batch_size = config.N
    mode_str = str(dict(split=split, batch_size=batch_size))
    n.language, n.cont, n.image, n.spatial, n.label = L.Python(
        module=config.data_provider,
        layer=config.data_provider_layer,
        param_str=mode_str,
        ntop=5)

    # the base net (VGG-16)
    n.conv1_1, n.relu1_1 = conv_relu(n.image,
                                     64,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1,
                                     64,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1,
                                     128,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1,
                                     128,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2,
                                     256,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1,
                                     256,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2,
                                     256,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3,
                                     512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1,
                                     512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2,
                                     512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4,
                                     512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1,
                                     512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2,
                                     512,
                                     fix_param=config.fix_vgg,
                                     finetune=(not config.fix_vgg))
    n.pool5 = max_pool(n.relu5_3)

    # fully conv
    n.fcn_fc6, n.fcn_relu6 = conv_relu(n.pool5, 4096, ks=7, pad=3)
    if config.vgg_dropout:
        n.fcn_drop6 = L.Dropout(n.fcn_relu6, dropout_ratio=0.5, in_place=True)
        n.fcn_fc7, n.fcn_relu7 = conv_relu(n.fcn_drop6, 4096, ks=1, pad=0)
        n.fcn_drop7 = L.Dropout(n.fcn_relu7, dropout_ratio=0.5, in_place=True)
        n.fcn_fc8 = conv(n.fcn_drop7, 1000, ks=1, pad=0)
    else:
        n.fcn_fc7, n.fcn_relu7 = conv_relu(n.fcn_relu6, 4096, ks=1, pad=0)
        n.fcn_fc8 = conv(n.fcn_relu7, 1000, ks=1, pad=0)

    # embedding
    n.embed = L.Embed(n.language,
                      input_dim=config.vocab_size,
                      num_output=config.embed_dim,
                      weight_filler=dict(type='uniform', min=-0.08, max=0.08))

    # LSTM
    n.lstm = L.LSTM(n.embed,
                    n.cont,
                    recurrent_param=dict(num_output=config.lstm_dim,
                                         weight_filler=dict(type='uniform',
                                                            min=-0.08,
                                                            max=0.08),
                                         bias_filler=dict(type='constant',
                                                          value=0)))
    tops = L.Slice(n.lstm, ntop=config.T, slice_param=dict(axis=0))
    for i in range(config.T - 1):
        n.__setattr__('slice' + str(i), tops[i])
        n.__setattr__('silence' + str(i), L.Silence(tops[i], ntop=0))
    n.lstm_out = tops[-1]
    n.lstm_feat = L.Reshape(
        n.lstm_out, reshape_param=dict(shape=dict(dim=[-1, config.lstm_dim])))

    # Dynamic conv filters
    n.dyn_l, n.dyn_sig = fc_sigmoid(n.lstm_feat, 1000 + 8)
    n.lstm_dyn_kernel = L.Reshape(
        n.dyn_sig,
        reshape_param=dict(shape=dict(dim=[-1, 1, config.lstm_dim + 8, 1, 1])))

    # Tile LSTM feature
    #n.lstm_resh = L.Reshape(n.lstm_feat, reshape_param=dict(shape=dict(dim=[-1, config.lstm_dim, 1, 1])))
    #n.lstm_tile_1 = L.Tile(n.lstm_resh, axis=2, tiles=config.featmap_H)
    #n.lstm_tile_2 = L.Tile(n.lstm_tile_1, axis=3, tiles=config.featmap_W)

    # L2 Normalize image and language features
    #n.img_l2norm = L.L2Normalize(n.fcn_fc8)
    #n.lstm_l2norm = L.L2Normalize(n.lstm_tile_2)

    # Concatenate
    #n.feat_all = L.Concat(n.lstm_l2norm, n.img_l2norm, n.spatial, concat_param=dict(axis=1))
    n.feat_all = L.Concat(n.fcn_fc8, n.spatial, concat_param=dict(axis=1))

    # MLP Classifier over concatenated feature
    #n.fcn_l1, n.fcn_relu1 = conv_relu(n.feat_all, config.mlp_hidden_dims, ks=1, pad=0)
    #if config.mlp_dropout:
    #    n.fcn_drop1 = L.Dropout(n.fcn_relu1, dropout_ratio=0.5, in_place=True)
    #    n.fcn_scores = conv(n.fcn_drop1, 1, ks=1, pad=0)
    #else:
    #    n.fcn_scores = conv(n.fcn_relu1, 1, ks=1, pad=0)

    # Dyn conv layer
    n.fcn_scores = L.DynamicConvolution(n.feat_all,
                                        n.lstm_dyn_kernel,
                                        convolution_param=dict(
                                            num_output=1,
                                            kernel_size=1,
                                            stride=1,
                                            pad=0,
                                            bias_term=False))

    # Loss Layer
    n.loss = L.SigmoidCrossEntropyLoss(n.fcn_scores, n.label)

    return n.to_proto()
def drop(bottom, dropout_ratio):
    return L.Dropout(bottom, dropout_ratio=0.25, in_place=True)
def yolo_net(data_lmdb, label_lmdb, batch_size):
    # our version of LeNet: a series of linear and simple nonlinear transformations
    n = caffe.NetSpec()
    # input
    n.data = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=data_lmdb, transform_param=dict(scale=1./255), ntop=1)
    n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=label_lmdb, ntop=1)

    # 7x7x64-s-2
    n.conv1  = ConvLayer(n.data, 64, 7, 2, 1)
    n.leaky1 = LeakyLayer(n.conv1)
    n.pool1  = MaxpoolingLayer(n.leaky1, 2, 2)

    # 3x3x192
    n.conv2  = ConvLayer(n.pool1, 192, 3, 1, 1)
    n.leaky2 = LeakyLayer(n.conv2)
    n.pool2  = MaxpoolingLayer(n.leaky2, 2, 2)

    n.conv3  = ConvLayer(n.pool2, 128, 1, 1, 1)
    n.leaky3 = LeakyLayer(n.conv3)
    n.conv4  = ConvLayer(n.leaky3, 256, 3, 1, 1)
    n.leaky4 = LeakyLayer(n.conv4)
    n.conv5  = ConvLayer(n.leaky4, 256, 1, 1, 1)
    n.leaky5 = LeakyLayer(n.conv5)
    n.conv6  = ConvLayer(n.leaky5, 512, 3, 1, 1)
    n.leaky6 = LeakyLayer(n.conv6)
    n.pool3  = MaxpoolingLayer(n.leaky6, 2, 2)

    n.conv7  = ConvLayer(n.pool3, 256, 1, 1, 1)
    n.leaky7 = LeakyLayer(n.conv7)
    n.conv8  = ConvLayer(n.leaky7, 512, 3, 1, 1)
    n.leaky8 = LeakyLayer(n.conv8)
    n.conv9  = ConvLayer(n.leaky8, 256, 1, 1, 1)
    n.leaky9 = LeakyLayer(n.conv9)
    n.conv10  = ConvLayer(n.leaky9, 512, 3, 1, 1)
    n.leaky10 = LeakyLayer(n.conv10)
    n.conv11  = ConvLayer(n.leaky10, 256, 1, 1, 1)
    n.leaky11 = LeakyLayer(n.conv11)
    n.conv12  = ConvLayer(n.leaky11, 512, 3, 1, 1)
    n.leaky12 = LeakyLayer(n.conv12)
    n.conv13  = ConvLayer(n.leaky12, 256, 1, 1, 1)
    n.leaky13 = LeakyLayer(n.conv13)
    n.conv14  = ConvLayer(n.leaky13, 512, 3, 1, 1)
    n.leaky14 = LeakyLayer(n.conv14)
    n.conv15  = ConvLayer(n.leaky14, 512, 1, 1, 1)
    n.leaky15 = LeakyLayer(n.conv15)
    n.conv16  = ConvLayer(n.leaky15, 1024, 3, 1, 1)
    n.leaky16 = LeakyLayer(n.conv16)
    n.pool4  = MaxpoolingLayer(n.leaky16, 2, 2)

    n.conv17  = ConvLayer(n.pool4, 512, 1, 1, 1)
    n.leaky17 = LeakyLayer(n.conv17)
    n.conv18  = ConvLayer(n.leaky17, 1024, 3, 1, 1)
    n.leaky18 = LeakyLayer(n.conv18)
    n.conv19  = ConvLayer(n.leaky18, 512, 1, 1, 1)
    n.leaky19 = LeakyLayer(n.conv19)
    n.conv20  = ConvLayer(n.leaky19, 1024, 3, 1, 1)
    n.leaky20 = LeakyLayer(n.conv20)
    n.pool5  = MaxpoolingLayer(n.leaky20, 2, 2)

    n.conv21  = ConvLayer(n.pool5, 512, 1, 1, 1)
    n.leaky21 = LeakyLayer(n.conv21)
    n.conv22  = ConvLayer(n.leaky21, 1024, 3, 1, 1)
    n.leaky22 = LeakyLayer(n.conv22)
    n.conv23  = ConvLayer(n.leaky22, 512, 1, 1, 1)
    n.leaky23 = LeakyLayer(n.conv23)
    n.conv24  = ConvLayer(n.leaky23, 1024, 3, 1, 1)
    n.leaky24 = LeakyLayer(n.conv24)

    n.fc1 = L.InnerProduct(n.leaky24, num_output=4096, weight_filler=dict(type='xavier'))
    n.leaky25 = LeakyLayer(n.fc1)
    n.dropout = L.Dropout(n.leaky25, dropout_ratio=0.5, in_place=True)
    n.fc2 = L.InnerProduct(n.dropout, num_output=1470, weight_filler=dict(type='xavier'))

    return n.to_proto()
Exemple #28
0
def ResNet(split):

    data, labels = L.Python(module='readDataLayer',
                            layer='input_layer',
                            ntop=2,
                            param_str=str(
                                dict(split=split,
                                     data_dir=this_dir + '/data/',
                                     train_data_name='train_',
                                     test_data_name='test',
                                     train_batches=128,
                                     test_batches=128,
                                     crop_size_x=33,
                                     crop_size_y=33,
                                     train_pack_nums=9,
                                     test_pack_nums=1)))
    HGG_1, _ = conv_BN_scale_relu(split, data, 64, 3, 1, 0)
    HGG_2, _ = conv_BN_scale_relu(split, HGG_1, 64, 3, 1, 0)
    HGG_3, _ = conv_BN_scale_relu(split, HGG_2, 64, 3, 1, 0)
    HGG_4 = L.Pooling(HGG_3,
                      pool=P.Pooling.MAX,
                      global_pooling=False,
                      stride=2,
                      kernel_size=3)

    HGG_5, _ = conv_BN_scale_relu(split, HGG_4, 128, 3, 1, 0)

    HGG_6, _ = conv_BN_scale_relu(split, HGG_5, 128, 3, 1, 0)

    HGG_7, _ = conv_BN_scale_relu(split, HGG_6, 128, 3, 1, 0)

    HGG_8 = L.Pooling(HGG_7,
                      pool=P.Pooling.MAX,
                      global_pooling=False,
                      stride=2,
                      kernel_size=3)

    HGG_8a = L.Flatten(HGG_8)

    HGG_9 = L.ReLU(HGG_8a)
    HGG_9a = L.InnerProduct(L.Dropout(HGG_9, dropout_ratio=0.1),
                            num_output=256,
                            weight_filler=dict(type='xavier'),
                            bias_filler=dict(type='constant'))
    #    HGG_9a = L.InnerProduct(HGG_9, num_output = 256)

    HGG_10 = L.ReLU(HGG_9a)
    HGG_10a = L.InnerProduct(L.Dropout(HGG_10, dropout_ratio=0.1),
                             num_output=256,
                             weight_filler=dict(type='xavier'),
                             bias_filler=dict(type='constant'))
    #    HGG_10a = L.InnerProduct(HGG_10,num_output = 256)

    HGG_11 = L.Dropout(HGG_10a, dropout_ratio=0.1)
    HGG_11a = L.InnerProduct(HGG_11,
                             num_output=5,
                             weight_filler=dict(type='xavier'),
                             bias_filler=dict(type='constant'))

    acc = L.Accuracy(HGG_11a, labels)
    loss = L.SoftmaxWithLoss(HGG_11a, labels)
    return to_proto(loss, acc)
Exemple #29
0
def fcn(split):
    n = caffe.NetSpec()
    n.data, n.sem, n.geo = L.Python(module='siftflow_layers',
                                    layer='SIFTFlowSegDataLayer',
                                    ntop=3,
                                    param_str=str(
                                        dict(siftflow_dir='../data/sift-flow',
                                             split=split,
                                             seed=1337)))

    # the base net
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # fully conv
    n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)

    n.score_fr_sem = L.Convolution(
        n.drop7,
        num_output=33,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.upscore_sem = L.Deconvolution(n.score_fr_sem,
                                    convolution_param=dict(num_output=33,
                                                           kernel_size=64,
                                                           stride=32,
                                                           bias_term=False),
                                    param=[dict(lr_mult=0)])
    n.score_sem = crop(n.upscore_sem, n.data)
    # loss to make score happy (o.w. loss_sem)
    n.loss = L.SoftmaxWithLoss(n.score_sem,
                               n.sem,
                               loss_param=dict(normalize=False,
                                               ignore_label=255))

    n.score_fr_geo = L.Convolution(
        n.drop7,
        num_output=3,
        kernel_size=1,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    n.upscore_geo = L.Deconvolution(n.score_fr_geo,
                                    convolution_param=dict(num_output=3,
                                                           kernel_size=64,
                                                           stride=32,
                                                           bias_term=False),
                                    param=[dict(lr_mult=0)])
    n.score_geo = crop(n.upscore_geo, n.data)
    n.loss_geo = L.SoftmaxWithLoss(n.score_geo,
                                   n.geo,
                                   loss_param=dict(normalize=False,
                                                   ignore_label=255))

    return n.to_proto()
Exemple #30
0
def custom_net(hdf5, batch_size):
    # define your own net!
    n = caffe.NetSpec()

    #keep this data layer for all networks
    #HDF5 DATA LAYER
    n.data, n.label = L.HDF5Data(batch_size=batch_size, source=hdf5, ntop=2)

    #    n.conv_d0a_b = L.Convolution(n.data,kernel_size=3,num_output=64,pad=0,weight_filler=dict(type='xavier'))
    #    n.relu_d0b = L.ReLU(n.conv_d0a_b)
    #    n.conv_d0b_c = L.Convolution(n.relu_d0b,kernel_size=3,num_output=64,pad=0,weight_filler=dict(type='xavier'))
    #    n.relu_d0c = L.ReLU(n.conv_d0b_c)
    #    n.pool_d0c_1a = L.Pooling(n.relu_d0c, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.conv_d0a_b, n.relu_d0b = conv_relu(n.data, 64)
    n.conv_d0b_c, n.relu_d0c = conv_relu(n.relu_d0b, 64)
    n.pool_d0c_1a = max_pool(n.relu_d0c)

    #    n.conv_d1a_b = L.Convolution(n.pool_d0c_1a,kernel_size=3,num_output=128,pad=0,weight_filler=dict(type='xavier'))
    #    n.relu_d1b = L.ReLU(n.conv_d1a_b)
    #    n.conv_d1b_c = L.Convolution(n.relu_d1b,kernel_size=3,num_output=128,pad=0,weight_filler=dict(type='xavier'))
    #    n.relu_d1c = L.ReLU(n.conv_d1b_c)
    #    n.pool_d1c_2a = L.Pooling(n.relu_d1c, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.conv_d1a_b, n.relu_d1b = conv_relu(n.pool_d0c_1a, 128)
    n.conv_d1b_c, n.relu_d1c = conv_relu(n.relu_d1b, 128)
    n.pool_d1c_2a = max_pool(n.relu_d1c)

    #    n.conv_d2a_b = L.Convolution(n.pool_d1c_2a,kernel_size=3,num_output=256,pad=0,weight_filler=dict(type='xavier'))
    #    n.relu_d2b = L.ReLU(n.conv_d2a_b)
    #    n.conv_d2b_c = L.Convolution(n.relu_d2b,kernel_size=3,num_output=256,pad=0,weight_filler=dict(type='xavier'))
    #    n.relu_d2c = L.ReLU(n.conv_d2b_c)
    #    n.pool_d2c_3a = L.Pooling(n.relu_d2c, kernel_size=2,stride = 2,pool = P.Pooling.MAX)
    n.conv_d2a_b, n.relu_d2b = conv_relu(n.pool_d1c_2a, 256)
    n.conv_d2b_c, n.relu_d2c = conv_relu(n.relu_d2b, 256)
    n.pool_d2c_3a = max_pool(n.relu_d2c)

    #    n.conv_d3a_b = L.Convolution(n.pool_d2c_3a,kernel_size=3,num_output=512,pad=0,weight_filler=dict(type='xavier'))
    #    n.relu_d3b = L.ReLU(n.conv_d3a_b)
    #    n.conv_d3b_c = L.Convolution(n.relu_d3b,kernel_size=3,num_output=512,pad=0,weight_filler=dict(type='xavier'))
    #    n.relu_d3c = L.ReLU(n.conv_d3b_c)
    #    n.dropout_d3c = L.Dropout(n.relu_d3c,dropout_ratio=0.5)
    #    n.pool_d3c_4a = L.Pooling(n.relu_d3c, kernel_size=2,stride = 2,pool = P.Pooling.MAX)
    n.conv_d3a_b, n.relu_d3b = conv_relu(n.pool_d2c_3a, 512)
    n.conv_d3b_c, n.relu_d3c = conv_relu(n.relu_d3b, 512)
    n.dropout_d3c = L.Dropout(n.relu_d3c, dropout_ratio=0.5)
    n.pool_d3c_4a = max_pool(n.dropout_d3c)

    #    n.conv_d4a_b = L.Convolution(n.pool_d3c_4a,kernel_size=3,num_output=1024,pad=0,weight_filler=dict(type='xavier'))
    #    n.relu_d4b = L.ReLU(n.conv_d4a_b)
    #    n.conv_d4b_c = L.Convolution(n.relu_d4b,kernel_size=3,num_output=1024,pad=0,weight_filler=dict(type='xavier'))
    #    n.relu_d4c = L.ReLU(n.conv_d4b_c)
    #    n.dropout_d4c = L.Dropout(n.relu_d4c,dropout_ratio=0.5)
    #    #n.upconv_d4c_u3a = L.DeConvolution(n.dropout_d4c,num_output = 512, pad=0, kernel_size=2,stride=2,weight_filler=dict(type='xavier'))
    #    n.upconv_d4c_u3a = L.Deconvolution(n.dropout_d4c)
    #    n.relu_u3a = L.ReLU(n.upconv_d4c_u3a)
    n.conv_d4a_b, n.relu_d4b = conv_relu(n.pool_d3c_4a, 1024)
    n.conv_d4b_c, n.relu_d4c = conv_relu(n.relu_d4b, 1024)
    n.dropout_d4c = L.Dropout(n.relu_d4c, dropout_ratio=0.5)
    n.upconv_d4c_u3a, n.relu_u3a = deconv_relu(n.dropout_d4c, 512)

    #    n.crop_d3c_d3cc = L.Crop(n.relu_d3c,n.relu_u3a)
    #    n.concat_d3cc_u3a_b = L.Concat(n.relu_u3a,n.crop_d3c_d3cc)
    #    n.conv_u3b_c = L.Convolution(n.concat_d3cc_u3a_b,num_output=512,pad=0,kernel_size=3,weight_filler=dict(type='xavier'))
    #    n.relu_u3c = L.ReLU(n.conv_u3b_c)
    #    n.conv_u3c_d = L.Convolution(n.relu_u3c, num_output=512,pad=0,kernel_size=3,weight_filler=dict(type='xavier'))
    #    n.relu_u3d = L.ReLU(n.conv_u3c_d)
    #    #n.upconv_u3d_u2a = L.Deconvolution(n.relu_u3d, num_output=256,pad =0,kernel_size=2,stride=2,weight_filler=dict(type='xavier'))
    #    n.upconv_u3d_u2a = L.Deconvolution(n.relu_u3d)
    #    n.relu_u2a = L.ReLU(n.upconv_u3d_u2a)
    n.crop_d3c_d3cc = L.Crop(n.relu_d3c, n.relu_u3a)
    n.concat_d3cc_u3a_b = L.Concat(n.relu_u3a, n.crop_d3c_d3cc)
    n.conv_u3b_c, n.relu_u3c = conv_relu(n.concat_d3cc_u3a_b, 512)
    n.conv_u3c_d, n.relu_u3d = conv_relu(n.relu_u3c, 512)
    n.upconv_u3d_u2a, n.relu_u2a = deconv_relu(n.relu_u3d, 256)

    #    n.crop_d2c_d2cc = L.Crop(n.relu_d2c,n.relu_u2a)
    #    n.concat_d2cc_u2a_b = L.Concat(n.relu_u2a,n.crop_d2c_d2cc)
    #    n.conv_u2b_c = L.Convolution(n.concat_d2cc_u2a_b,num_output=256,pad=0,kernel_size=3,weight_filler=dict(type='xavier'))
    #    n.relu_u2c = L.ReLU(n.conv_u2b_c)
    #    n.conv_u2c_d = L.Convolution(n.relu_u2c, num_output=256,pad=0,kernel_size=3,weight_filler=dict(type='xavier'))
    #    n.relu_u2d = L.ReLU(n.conv_u2c_d)
    #    #n.upconv_u2d_u1a = L.Deconvolution(n.relu_u2d, num_output=128,pad =0,kernel_size=2,stride=2,weight_filler=dict(type='xavier'))
    #    n.upconv_u2d_u1a = L.Deconvolution(n.relu_u2d)
    #    n.relu_u1a = L.ReLU(n.upconv_u2d_u1a)
    n.crop_d2c_d2cc = L.Crop(n.relu_d2c, n.relu_u2a)
    n.concat_d2cc_u2a_b = L.Concat(n.relu_u2a, n.crop_d2c_d2cc)
    n.conv_u2b_c, n.relu_u2c = conv_relu(n.concat_d2cc_u2a_b, 256)
    n.conv_u2c_d, n.relu_u2d = conv_relu(n.relu_u2c, 256)
    n.upconv_u2d_u1a, n.relu_u1a = deconv_relu(n.relu_u2d, 128)

    #    n.crop_d1c_d1cc = L.Crop(n.relu_d1c,n.relu_u1a)
    #    n.concat_d1cc_u1a_b = L.Concat(n.relu_u1a,n.crop_d1c_d1cc)
    #    n.conv_u1b_c = L.Convolution(n.concat_d1cc_u1a_b,num_output=128,pad=0,kernel_size=3,weight_filler=dict(type='xavier'))
    #    n.relu_u1c = L.ReLU(n.conv_u1b_c)
    #    n.conv_u1c_d = L.Convolution(n.relu_u1c, num_output=128,pad=0,kernel_size=3,weight_filler=dict(type='xavier'))
    #    n.relu_u1d = L.ReLU(n.conv_u1c_d)
    #    #n.upconv_u1d_u0a = L.Deconvolution(n.relu_u1d, num_output=64,pad =0,kernel_size=2,stride=2,weight_filler=dict(type='xavier'))
    #    n.upconv_u1d_u0a = L.Deconvolution(n.relu_u1d)
    #    n.relu_u0a = L.ReLU(n.upconv_u1d_u0a)
    n.crop_d1c_d1cc = L.Crop(n.relu_d1c, n.relu_u1a)
    n.concat_d1cc_u1a_b = L.Concat(n.relu_u1a, n.crop_d1c_d1cc)
    n.conv_u1b_c, n.relu_u1c = conv_relu(n.concat_d1cc_u1a_b, 128)
    n.conv_u1c_d, n.relu_u1d = conv_relu(n.relu_u1c, 128)
    n.upconv_u1d_u0a, n.relu_u0a = deconv_relu(n.relu_u1d, 128)

    #    n.crop_d0c_d0cc = L.Crop(n.relu_d0c,n.relu_u0a)
    #    n.concat_d0cc_u0a_b = L.Concat(n.relu_u0a,n.crop_d0c_d0cc)
    #    n.conv_u0b_c = L.Convolution(n.concat_d0cc_u0a_b,num_output=64,pad=0,kernel_size=3,weight_filler=dict(type='xavier'))
    #    n.relu_u0c = L.ReLU(n.conv_u0b_c)
    #    n.conv_u0c_d = L.Convolution(n.relu_u0c, num_output=64,pad=0,kernel_size=3,weight_filler=dict(type='xavier'))
    #    n.relu_u0d = L.ReLU(n.conv_u0c_d)
    n.crop_d0c_d0cc = L.Crop(n.relu_d0c, n.relu_u0a)
    n.concat_d0cc_u0a_b = L.Concat(n.relu_u0a, n.crop_d0c_d0cc)
    n.conv_u0b_c, n.relu_u0c = conv_relu(n.concat_d0cc_u0a_b, 64)
    n.conv_u0c_d, n.relu_u0d = conv_relu(n.relu_u0c, 64)

    n.conv_u0d_score = L.Convolution(
        n.relu_u0d,
        num_output=2,
        pad=0,
        kernel_size=1,
        weight_filler=dict(type='xavier'),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])

    # keep this loss layer for all networks
    n.loss = L.SoftmaxWithLoss(n.conv_u0d_score,
                               n.label,
                               loss_param=dict(ignore_label=2))

    return n.to_proto()