예제 #1
0
    def alexnet_bn_proto(self, batch_size, phase='TRAIN'):
        n = caffe.NetSpec()
        if phase == 'TRAIN':
            source_data = self.train_data
            mirror = True
        else:
            source_data = self.test_data
            mirror = False
        n.data, n.label = L.Data(source=source_data,
                                 backend=P.Data.LMDB,
                                 batch_size=batch_size,
                                 ntop=2,
                                 transform_param=dict(
                                     crop_size=227,
                                     mean_value=[104, 117, 123],
                                     mirror=mirror))

        n.conv1, n.conv1_bn, n.conv1_scale, n.conv1_relu = \
            factorization_conv_bn_scale_relu(n.data, num_output=96, kernel_size=11, stride=4,)  # 96x55x55
        n.pool1 = L.Pooling(n.conv1,
                            kernel_size=3,
                            stride=2,
                            pool=P.Pooling.MAX)  # 96x27x27

        n.conv2, n.conv2_bn, n.conv2_scale, n.conv2_relu = \
            factorization_conv_bn_scale_relu(n.pool1, num_output=256, kernel_size=5, pad=2)  # 256x27x27
        n.pool2 = L.Pooling(n.conv2,
                            kernel_size=3,
                            stride=2,
                            pool=P.Pooling.MAX)  # 256x13x13

        n.conv3, n.conv3_bn, n.conv3_scale, n.conv3_relu = \
            factorization_conv_bn_scale_relu(n.pool2, num_output=384, kernel_size=3, pad=1)  # 384x13x13

        n.conv4, n.conv4_bn, n.conv4_scale, n.conv4_relu = \
            factorization_conv_bn_scale_relu(n.conv3, num_output=384, kernel_size=3, pad=1)  # 384x13x13

        n.conv5, n.conv5_bn, n.conv5_scale, n.conv5_relu = \
            factorization_conv_bn_scale_relu(n.conv4, num_output=256, kernel_size=3, pad=1)  # 256x13x13
        n.pool5 = L.Pooling(n.conv5,
                            kernel_size=3,
                            stride=2,
                            pool=P.Pooling.MAX)  # 256x6x16

        n.fc6, n.relu6, n.drop6 = fc_relu_drop(n.pool5,
                                               num_output=2048)  # 1024x1x1
        n.fc7, n.relu7, n.drop7 = fc_relu_drop(n.fc6,
                                               num_output=2048)  # 1024x1x1
        n.fc8 = L.InnerProduct(n.fc7,
                               num_output=self.classifier_num,
                               param=[
                                   dict(lr_mult=1, decay_mult=1),
                                   dict(lr_mult=2, decay_mult=0)
                               ],
                               weight_filler=dict(type='gaussian', std=0.01),
                               bias_filler=dict(type='constant', value=0))
        if phase == 'TRAIN':
            pass
        else:
            n.accuracy_top1 = L.Accuracy(n.fc8, n.label, include=dict(phase=1))
            n.accuracy_top5 = L.Accuracy(n.fc8,
                                         n.label,
                                         include=dict(phase=1),
                                         accuracy_param=dict(top_k=5))
        n.loss = L.SoftmaxWithLoss(n.fc8, n.label)

        return n.to_proto()
예제 #2
0
def create_neural_net(input_file, batch_size=50):
    net = caffe.NetSpec()
    net.data, net.label = L.Data(batch_size=batch_size, source=input_file, 
                                  backend = caffe.params.Data.LMDB, ntop=2, 
                                  include=dict(phase=caffe.TEST), name='juniward04')

    ## pre-process
    net.conv1 = L.Convolution(net.data, num_output=16, kernel_size=4, stride=1,
                               pad=1, weight_filler=dict(type='dct4'),
                               param=[{'lr_mult':0, 'decay_mult':0}],
                               bias_term=False)
    TRUNCABS = caffe_pb2.QuantTruncAbsParameter.TRUNCABS
    net.quanttruncabs=L.QuantTruncAbs(net.conv1, process=TRUNCABS, threshold=8, in_place=True)

    ## block 1
    [net.conv1_proj, net.bn2, net.scale2, net.conv512_1, net.bn2_1, net.scale2_1,
     net.relu512_1, net.conv512_to_256, net.bn2_2, net.scale2_2, net.res512_to_256,
     net.relu512_to_256] = add_downsampling_block(net.quanttruncabs, 12)
    ## block 2
    [net.conv256_1, net.bn2_3, net.scale2_3, net.relu256_1, net.conv256_2, net.bn2_4, 
     net.scale2_4, net.res256_2, net.relu256_2] = add_skip_block(net.res512_to_256, 24)
    ## block 2_1
    [net.conv256_4, net.bn3_1, net.scale3_1, net.relu256_4, net.conv256_5, net.bn3_2, 
     net.scale3_2, net.res256_5, net.relu256_5] = add_skip_block(net.res256_2, 24)
    ## block 2_2
    [net.conv256_6, net.bn4_1, net.scale4_1, net.relu256_6, net.conv256_7, net.bn4_2, 
     net.scale4_2, net.res256_7, net.relu256_7] = add_skip_block(net.res256_5, 24)
    ## block 2_3
    [net.conv256_8, net.bn5_1, net.scale5_1, net.relu256_8, net.conv256_9, net.bn5_2, 
     net.scale5_2, net.res256_9, net.relu256_9] = add_skip_block(net.res256_7, 24)
    ## block 3
    [net.res256_2_proj, net.bn2_5, net.scale2_5, net.conv256_3, net.bn2_6, net.scale2_6, 
     net.relu256_3, net.conv256_to_128, net.bn2_7, net.scale2_7, net.res256_to_128, 
     net.relu256_to_128] = add_downsampling_block(net.res256_9, 24)
    ## block 4 
    [net.conv128_1, net.bn2_8, net.scale2_8, net.relu128_1, net.conv128_2, net.bn2_9, 
     net.scale2_9, net.res128_2, net.relu128_2] = add_skip_block(net.res256_to_128, 48)
    ## block 4_1
    [net.conv128_4, net.bn3_3, net.scale3_3, net.relu128_4, net.conv128_5, net.bn3_4, 
     net.scale3_4, net.res128_5, net.relu128_5] = add_skip_block(net.res128_2, 48)
    ## block 4_2
    [net.conv128_6, net.bn4_3, net.scale4_3, net.relu128_6, net.conv128_7, net.bn4_4, 
     net.scale4_4, net.res128_7, net.relu128_7] = add_skip_block(net.res128_5, 48)
    ## block 4_3
    [net.conv128_8, net.bn5_3, net.scale5_3, net.relu128_8, net.conv128_9, net.bn5_4, 
     net.scale5_4, net.res128_9, net.relu128_9] = add_skip_block(net.res128_7, 48)
    ## block 5
    [net.res128_2_proj, net.bn2_10, net.scale2_10, net.conv128_3, net.bn2_11, net.scale2_11, 
     net.relu128_3, net.conv128_to_64, net.bn2_12, net.scale2_12, net.res128_to_64, 
     net.relu128_to_64] = add_downsampling_block(net.res128_9, 48)
    ## block 6
    [net.conv64_1, net.bn2_13, net.scale2_13, net.relu64_1, net.conv64_2, net.bn2_14, 
     net.scale2_14, net.res64_2, net.relu64_2] = add_skip_block(net.res128_to_64, 96)
    ## block 6_1
    [net.conv64_4, net.bn3_5, net.scale3_5, net.relu64_4, net.conv64_5, net.bn3_6, 
     net.scale3_6, net.res64_5, net.relu64_5] = add_skip_block(net.res64_2, 96)
    ## block 6_2
    [net.conv64_6, net.bn4_5, net.scale4_5, net.relu64_6, net.conv64_7, net.bn4_6, 
     net.scale4_6, net.res64_7, net.relu64_7] = add_skip_block(net.res64_5, 96)
    ## block 6_3
    [net.conv64_8, net.bn5_5, net.scale5_5, net.relu64_8, net.conv64_9, net.bn5_6, 
     net.scale5_6, net.res64_9, net.relu64_9] = add_skip_block(net.res64_7, 96)
    ## block 7
    [net.res64_2_proj, net.bn2_15, net.scale2_15, net.conv64_3, net.bn2_16, net.scale2_16, 
     net.relu64_3, net.conv64_to_32, net.bn2_17, net.scale2_17, net.res64_to_32, 
     net.relu64_to_32] = add_downsampling_block(net.res64_9, 96)
    ## block 8
    [net.conv32_1, net.bn2_18, net.scale2_18, net.relu32_1, net.conv32_2, net.bn2_19, 
     net.scale2_19, net.res32_2, net.relu32_2] = add_skip_block(net.res64_to_32, 192)
    ## block 8_1
    [net.conv32_4, net.bn3_7, net.scale3_7, net.relu32_4, net.conv32_5, net.bn3_8, 
     net.scale3_8, net.res32_5, net.relu32_5] = add_skip_block(net.res32_2, 192)
    ## block 8_2
    [net.conv32_6, net.bn4_7, net.scale4_7, net.relu32_6, net.conv32_7, net.bn4_8, 
     net.scale4_8, net.res32_7, net.relu32_7] = add_skip_block(net.res32_5, 192)
    ## block 8_3
    [net.conv32_8, net.bn5_7, net.scale5_7, net.relu32_8, net.conv32_9, net.bn5_8, 
     net.scale5_8, net.res32_9, net.relu32_9] = add_skip_block(net.res32_7, 192)
    ## block 9
    [net.res32_2_proj, net.bn2_20, net.scale2_20, net.conv32_3, net.bn2_21, net.scale2_21, 
     net.relu32_3, net.conv32_to_16, net.bn2_22, net.scale2_22, net.res32_to_16, 
     net.relu32_to_16] = add_downsampling_block(net.res32_9, 192)
    ## block 10_2
    [net.conv16_5, net.bn4_9, net.scale4_9, net.relu16_5, net.conv16_6, net.bn4_10, 
     net.scale4_10, net.res16_6, net.relu16_6] = add_skip_block(net.res32_to_16, 384)
    ## block 10_3
    [net.conv16_7, net.bn5_9, net.scale5_9, net.relu16_7, net.conv16_8, net.bn5_10, 
     net.scale5_10, net.res16_8, net.relu16_8] = add_skip_block(net.res16_6, 384)
    ## block 10_1
    [net.conv16_3, net.bn3_9, net.scale3_9, net.relu16_3, net.conv16_4, net.bn3_10, 
     net.scale3_10, net.res16_4, net.relu16_4] = add_skip_block(net.res16_8, 384)
    ## block 10
    [net.conv16_1, net.bn2_23, net.scale2_23, net.relu16_1, net.conv16_2, net.bn2_24, 
     net.scale2_24, net.res16_2, net.relu16_2] = add_skip_block(net.res16_4, 384)
    
    ## global pool
    AVE = caffe_pb2.PoolingParameter.AVE
    net.global_pool = L.Pooling(net.res16_2, pool=AVE, kernel_size=8, stride=1)
    
    ## full connecting
    net.fc = L.InnerProduct(net.global_pool, param=[{'lr_mult':1}, {'lr_mult':2}], num_output=2, 
                            weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))
    ## accuracy
    net.accuracy = L.Accuracy(net.fc, net.label, include=dict(phase=caffe.TEST))
    ## loss
    net.loss = L.SoftmaxWithLoss(net.fc, net.label)
    
    return net.to_proto()
예제 #3
0
def InceptionV3Body(net, from_layer, output_pred=False):
    # scale is fixed to 1, thus we ignore it.
    use_scale = False

    out_layer = 'conv'
    ConvBNLayer(net,
                from_layer,
                out_layer,
                use_bn=True,
                use_relu=True,
                num_output=32,
                kernel_size=3,
                pad=0,
                stride=2,
                use_scale=use_scale)
    from_layer = out_layer

    out_layer = 'conv_1'
    ConvBNLayer(net,
                from_layer,
                out_layer,
                use_bn=True,
                use_relu=True,
                num_output=32,
                kernel_size=3,
                pad=0,
                stride=1,
                use_scale=use_scale)
    from_layer = out_layer

    out_layer = 'conv_2'
    ConvBNLayer(net,
                from_layer,
                out_layer,
                use_bn=True,
                use_relu=True,
                num_output=64,
                kernel_size=3,
                pad=1,
                stride=1,
                use_scale=use_scale)
    from_layer = out_layer

    out_layer = 'pool'
    net[out_layer] = L.Pooling(net[from_layer],
                               pool=P.Pooling.MAX,
                               kernel_size=3,
                               stride=2,
                               pad=0)
    from_layer = out_layer

    out_layer = 'conv_3'
    ConvBNLayer(net,
                from_layer,
                out_layer,
                use_bn=True,
                use_relu=True,
                num_output=80,
                kernel_size=1,
                pad=0,
                stride=1,
                use_scale=use_scale)
    from_layer = out_layer

    out_layer = 'conv_4'
    ConvBNLayer(net,
                from_layer,
                out_layer,
                use_bn=True,
                use_relu=True,
                num_output=192,
                kernel_size=3,
                pad=0,
                stride=1,
                use_scale=use_scale)
    from_layer = out_layer

    out_layer = 'pool_1'
    net[out_layer] = L.Pooling(net[from_layer],
                               pool=P.Pooling.MAX,
                               kernel_size=3,
                               stride=2,
                               pad=0)
    from_layer = out_layer

    # inceptions with 1x1, 3x3, 5x5 convolutions
    for inception_id in xrange(0, 3):
        if inception_id == 0:
            out_layer = 'mixed'
            tower_2_conv_num_output = 32
        else:
            out_layer = 'mixed_{}'.format(inception_id)
            tower_2_conv_num_output = 64
        towers = []
        tower_name = '{}'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv', num_output=64, kernel_size=1, pad=0, stride=1),
        ])
        towers.append(tower)
        tower_name = '{}/tower'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv', num_output=48, kernel_size=1, pad=0, stride=1),
            dict(name='conv_1', num_output=64, kernel_size=5, pad=2, stride=1),
        ])
        towers.append(tower)
        tower_name = '{}/tower_1'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv', num_output=64, kernel_size=1, pad=0, stride=1),
            dict(name='conv_1', num_output=96, kernel_size=3, pad=1, stride=1),
            dict(name='conv_2', num_output=96, kernel_size=3, pad=1, stride=1),
        ])
        towers.append(tower)
        tower_name = '{}/tower_2'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='pool',
                 pool=P.Pooling.AVE,
                 kernel_size=3,
                 pad=1,
                 stride=1),
            dict(name='conv',
                 num_output=tower_2_conv_num_output,
                 kernel_size=1,
                 pad=0,
                 stride=1),
        ])
        towers.append(tower)
        out_layer = '{}/join'.format(out_layer)
        net[out_layer] = L.Concat(*towers, axis=1)
        from_layer = out_layer

    # inceptions with 1x1, 3x3(in sequence) convolutions
    out_layer = 'mixed_3'
    towers = []
    tower_name = '{}'.format(out_layer)
    tower = InceptionTower(net, from_layer, tower_name, [
        dict(name='conv', num_output=384, kernel_size=3, pad=0, stride=2),
    ])
    towers.append(tower)
    tower_name = '{}/tower'.format(out_layer)
    tower = InceptionTower(net, from_layer, tower_name, [
        dict(name='conv', num_output=64, kernel_size=1, pad=0, stride=1),
        dict(name='conv_1', num_output=96, kernel_size=3, pad=1, stride=1),
        dict(name='conv_2', num_output=96, kernel_size=3, pad=0, stride=2),
    ])
    towers.append(tower)
    tower_name = '{}'.format(out_layer)
    tower = InceptionTower(net, from_layer, tower_name, [
        dict(name='pool', pool=P.Pooling.MAX, kernel_size=3, pad=0, stride=2),
    ])
    towers.append(tower)
    out_layer = '{}/join'.format(out_layer)
    net[out_layer] = L.Concat(*towers, axis=1)
    from_layer = out_layer

    # inceptions with 1x1, 7x1, 1x7 convolutions
    for inception_id in xrange(4, 8):
        if inception_id == 4:
            num_output = 128
        elif inception_id == 5 or inception_id == 6:
            num_output = 160
        elif inception_id == 7:
            num_output = 192
        out_layer = 'mixed_{}'.format(inception_id)
        towers = []
        tower_name = '{}'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1),
        ])
        towers.append(tower)
        tower_name = '{}/tower'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv',
                 num_output=num_output,
                 kernel_size=1,
                 pad=0,
                 stride=1),
            dict(name='conv_1',
                 num_output=num_output,
                 kernel_size=[1, 7],
                 pad=[0, 3],
                 stride=[1, 1]),
            dict(name='conv_2',
                 num_output=192,
                 kernel_size=[7, 1],
                 pad=[3, 0],
                 stride=[1, 1]),
        ])
        towers.append(tower)
        tower_name = '{}/tower_1'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv',
                 num_output=num_output,
                 kernel_size=1,
                 pad=0,
                 stride=1),
            dict(name='conv_1',
                 num_output=num_output,
                 kernel_size=[7, 1],
                 pad=[3, 0],
                 stride=[1, 1]),
            dict(name='conv_2',
                 num_output=num_output,
                 kernel_size=[1, 7],
                 pad=[0, 3],
                 stride=[1, 1]),
            dict(name='conv_3',
                 num_output=num_output,
                 kernel_size=[7, 1],
                 pad=[3, 0],
                 stride=[1, 1]),
            dict(name='conv_4',
                 num_output=192,
                 kernel_size=[1, 7],
                 pad=[0, 3],
                 stride=[1, 1]),
        ])
        towers.append(tower)
        tower_name = '{}/tower_2'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='pool',
                 pool=P.Pooling.AVE,
                 kernel_size=3,
                 pad=1,
                 stride=1),
            dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1),
        ])
        towers.append(tower)
        out_layer = '{}/join'.format(out_layer)
        net[out_layer] = L.Concat(*towers, axis=1)
        from_layer = out_layer

    # inceptions with 1x1, 3x3, 1x7, 7x1 filters
    out_layer = 'mixed_8'
    towers = []
    tower_name = '{}/tower'.format(out_layer)
    tower = InceptionTower(net, from_layer, tower_name, [
        dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1),
        dict(name='conv_1', num_output=320, kernel_size=3, pad=0, stride=2),
    ])
    towers.append(tower)
    tower_name = '{}/tower_1'.format(out_layer)
    tower = InceptionTower(net, from_layer, tower_name, [
        dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1),
        dict(name='conv_1',
             num_output=192,
             kernel_size=[1, 7],
             pad=[0, 3],
             stride=[1, 1]),
        dict(name='conv_2',
             num_output=192,
             kernel_size=[7, 1],
             pad=[3, 0],
             stride=[1, 1]),
        dict(name='conv_3', num_output=192, kernel_size=3, pad=0, stride=2),
    ])
    towers.append(tower)
    tower_name = '{}'.format(out_layer)
    tower = InceptionTower(net, from_layer, tower_name, [
        dict(name='pool', pool=P.Pooling.MAX, kernel_size=3, pad=0, stride=2),
    ])
    towers.append(tower)
    out_layer = '{}/join'.format(out_layer)
    net[out_layer] = L.Concat(*towers, axis=1)
    from_layer = out_layer

    for inception_id in xrange(9, 11):
        num_output = 384
        num_output2 = 448
        if inception_id == 9:
            pool = P.Pooling.AVE
        else:
            pool = P.Pooling.MAX
        out_layer = 'mixed_{}'.format(inception_id)
        towers = []
        tower_name = '{}'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv', num_output=320, kernel_size=1, pad=0, stride=1),
        ])
        towers.append(tower)

        tower_name = '{}/tower'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv',
                 num_output=num_output,
                 kernel_size=1,
                 pad=0,
                 stride=1),
        ])
        subtowers = []
        subtower_name = '{}/mixed'.format(tower_name)
        subtower = InceptionTower(net, '{}/conv'.format(tower_name),
                                  subtower_name, [
                                      dict(name='conv',
                                           num_output=num_output,
                                           kernel_size=[1, 3],
                                           pad=[0, 1],
                                           stride=[1, 1]),
                                  ])
        subtowers.append(subtower)
        subtower = InceptionTower(net, '{}/conv'.format(tower_name),
                                  subtower_name, [
                                      dict(name='conv_1',
                                           num_output=num_output,
                                           kernel_size=[3, 1],
                                           pad=[1, 0],
                                           stride=[1, 1]),
                                  ])
        subtowers.append(subtower)
        net[subtower_name] = L.Concat(*subtowers, axis=1)
        towers.append(net[subtower_name])

        tower_name = '{}/tower_1'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv',
                 num_output=num_output2,
                 kernel_size=1,
                 pad=0,
                 stride=1),
            dict(name='conv_1',
                 num_output=num_output,
                 kernel_size=3,
                 pad=1,
                 stride=1),
        ])
        subtowers = []
        subtower_name = '{}/mixed'.format(tower_name)
        subtower = InceptionTower(net, '{}/conv_1'.format(tower_name),
                                  subtower_name, [
                                      dict(name='conv',
                                           num_output=num_output,
                                           kernel_size=[1, 3],
                                           pad=[0, 1],
                                           stride=[1, 1]),
                                  ])
        subtowers.append(subtower)
        subtower = InceptionTower(net, '{}/conv_1'.format(tower_name),
                                  subtower_name, [
                                      dict(name='conv_1',
                                           num_output=num_output,
                                           kernel_size=[3, 1],
                                           pad=[1, 0],
                                           stride=[1, 1]),
                                  ])
        subtowers.append(subtower)
        net[subtower_name] = L.Concat(*subtowers, axis=1)
        towers.append(net[subtower_name])

        tower_name = '{}/tower_2'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='pool', pool=pool, kernel_size=3, pad=1, stride=1),
            dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1),
        ])
        towers.append(tower)
        out_layer = '{}/join'.format(out_layer)
        net[out_layer] = L.Concat(*towers, axis=1)
        from_layer = out_layer

    if output_pred:
        net.pool_3 = L.Pooling(net[from_layer],
                               pool=P.Pooling.AVE,
                               kernel_size=8,
                               pad=0,
                               stride=1)
        net.softmax = L.InnerProduct(net.pool_3, num_output=1008)
        net.softmax_prob = L.Softmax(net.softmax)

    return net
예제 #4
0
def shuffle_net(group,
                scale_f,
                input_size,
                se=False,
                num_classes=1000,
                asoft=True):
    # figure out network structure
    group_defs = {
        1: [36, 72, 144],
        2: [50, 100, 200],
        3: [60, 120, 240],
        4: [68, 136, 272],
        8: [96, 192, 384],
    }
    nouts_list = [int(v * scale_f) for v in group_defs[group]]
    nunits_list = [3, 7, 3]
    f_size = 24

    # setup the first couple of layers
    n = caffe.NetSpec()
    net = n.__dict__['tops']

    n.data, n.label = L.ImageData(batch_size=128,
                                  source="../data/train.list",
                                  root_folder="/",
                                  ntop=2,
                                  include=dict(phase=0),
                                  transform_param=dict(crop_size=input_size,
                                                       mirror=True,
                                                       scale=1 / 128.))

    # The data mean
    n.conv1 = L.Convolution(n.data,
                            kernel_size=3,
                            stride=2,
                            num_output=f_size,
                            pad=1,
                            bias_term=False,
                            param=[dict(lr_mult=1, decay_mult=1)],
                            weight_filler=dict(type="msra"))
    n.conv1_bn = L.BatchNorm(
        n.conv1,
        param=[dict(lr_mult=0),
               dict(lr_mult=0),
               dict(lr_mult=0)],
        in_place=False)
    n.conv1_scale = L.Scale(
        n.conv1_bn,
        scale_param=dict(bias_term=True),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=1)],
        in_place=True)
    n.conv1_relu = L.ReLU(n.conv1_scale, in_place=True)
    n.conv1_pool = L.Pooling(n.conv1_relu, stride=2, kernel_size=3)
    # make the convolutional body

    last_size = f_size / 4
    for i, (nout, nunit) in enumerate(zip(nouts_list, nunits_list)):
        s = 'Step' + str(i + 1) + '_reduction_'
        if i == 0:
            standard_unit(n,
                          nout - last_size,
                          s,
                          group,
                          se=se,
                          newdepth=True,
                          is_first=True)
        else:
            standard_unit(n, nout - last_size, s, group, se=se, newdepth=True)
        last_size = nout
        for unit in range(nunit):
            s = 'Step' + str(i + 1) + '_' + str(unit + 1) + '_'
            standard_unit(n, nout, s, group, se=se)

    # add the end layers

    net = n.__dict__['tops']
    bottom = net[list(net.keys())[-1]]  #find the last layer in netspec

    n.global_pool = L.Pooling(bottom,
                              pooling_param=dict(pool=1, global_pooling=True))
    n.score = L.InnerProduct(n.global_pool,
                             num_output=num_classes,
                             bias_term=False,
                             param=[dict(lr_mult=1, decay_mult=1)],
                             weight_filler=dict(type="msra"))
    n.loss = L.SoftmaxWithLoss(n.score, n.label)
    n.accuracy = L.Accuracy(n.score, n.label)

    return n
예제 #5
0
    def generate(self):
        """Returns a NetSpec specifying CaffeNet, following the original proto text
               specification (./models/bvlc_reference_caffenet/train_val.prototxt)."""
        conf = self
        n = caffe.NetSpec()
        param = LT.learned_param if conf.train else LT.frozen_param

        if self.train:

            n.data = L.Python(top=[
                "rois", 'labels', 'bbox_targets', 'bbox_inside_weights',
                'bbox_outside_weights'
            ],
                              python_param=dict(module='roi_data_layer.layer',
                                                layer='RoIDataLayer',
                                                param_str="num_classes: " +
                                                str(conf.num_classes)))
        else:
            n.data, n.im_info = LT.input()

        conv15_param = LT.learned_param if (
            conf.conv_1_to_5_learn) else LT.frozen_param
        LT.conv1_to_5(n, conv15_param)

        if not (self.train):
            n.rpn_conv1, n.rpn_relu1, n.rpn_cls_score, n.rpn_bbox_pred = LT.rpn_class_and_bbox_predictors(
                n, self, param)
            n.rpn_cls_score_reshape = LT.reshape(n.rpn_cls_score,
                                                 [0, 2, -1, 0])
            n.rpn_cls_prob, n.rpn_cls_prob_reshape, n.rois = LT.roi_proposal(
                n, self)

        n.roi_pool = L.ROIPooling(bottom=["conv5", "rois"],
                                  pooled_w=6,
                                  pooled_h=6,
                                  spatial_scale=0.0625)

        n.fc6, n.relu6 = LT.fc_relu(n.roi_pool, 4096, param=param)

        n.drop6 = fc7input = L.Dropout(n.relu6,
                                       in_place=True,
                                       dropout_ratio=0.5,
                                       scale_train=False)
        n.fc7, n.relu7 = LT.fc_relu(fc7input, 4096, param=param)
        n.drop7 = layer7 = L.Dropout(n.relu7,
                                     in_place=True,
                                     dropout_ratio=0.5,
                                     scale_train=False)
        weight_filler = (LT.WEIGHT_FILLER if conf.train else dict())
        bias_filler = (LT.BIAS_FILLER if conf.train else dict())
        n.cls_score = L.InnerProduct(layer7,
                                     num_output=conf.num_classes,
                                     weight_filler=weight_filler,
                                     bias_filler=bias_filler,
                                     param=LT.learned_param)

        n.bbox_pred = L.InnerProduct(layer7,
                                     num_output=conf.num_classes * 4,
                                     weight_filler=weight_filler,
                                     bias_filler=bias_filler,
                                     param=LT.learned_param)

        if conf.train:
            n.loss_cls = LT.soft_max_with_loss(["cls_score", "labels"])
            n.loss_bbox = L.SmoothL1Loss(bottom=[
                "bbox_pred", "bbox_targets", "bbox_inside_weights",
                "bbox_outside_weights"
            ],
                                         loss_weight=1)
        else:
            n.cls_prob = L.Softmax(n.cls_score,
                                   loss_param=dict(ignore_label=-1,
                                                   normalize=True))

        if self.train:
            n.rpn_conv1, n.rpn_relu1, n.rpn_cls_score, n.rpn_bbox_pred = LT.rpn_class_and_bbox_predictors(
                n, self, LT.frozen_param)

        n.silence_rpn_cls_score = LT.silence(n.rpn_cls_score)
        n.silence_rpn_bbox_pred = LT.silence(n.rpn_bbox_pred)
        # write the net to a temporary file and return its filename
        return self.save(n)
예제 #6
0
def mfb_baseline(mode, batchsize, T, question_vocab_size, folder):
    n = caffe.NetSpec()
    mode_str = json.dumps({
        'mode': mode,
        'batchsize': batchsize,
        'folder': folder
    })
    if mode == 'val':
        n.data, n.cont, n.img_feature, n.label = L.Python( \
            module='vqa_data_layer', layer='VQADataProviderLayer', \
            param_str=mode_str, ntop=4)
    else:
        n.data, n.cont, n.img_feature, n.label = L.Python( \
            module='vqa_data_layer_kld', layer='VQADataProviderLayer', \
            param_str=mode_str, ntop=4)
    n.embed = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \
                      weight_filler=dict(type='xavier'))
    n.embed_tanh = L.TanH(n.embed)

    # LSTM
    n.lstm1 = L.LSTM( \
        n.embed_tanh, n.cont, \
        recurrent_param=dict( \
            num_output=config.LSTM_UNIT_NUM, \
            weight_filler=dict(type='xavier')))
    tops1 = L.Slice(n.lstm1,
                    ntop=config.MAX_WORDS_IN_QUESTION,
                    slice_param={'axis': 0})
    for i in xrange(config.MAX_WORDS_IN_QUESTION - 1):
        n.__setattr__('slice_first' + str(i), tops1[int(i)])
        n.__setattr__('silence_data_first' + str(i),
                      L.Silence(tops1[int(i)], ntop=0))
    n.lstm1_out = tops1[config.MAX_WORDS_IN_QUESTION - 1]
    n.lstm1_reshaped = L.Reshape(n.lstm1_out, \
                                 reshape_param=dict( \
                                     shape=dict(dim=[-1, 1024])))
    n.q_feat = L.Dropout(
        n.lstm1_reshaped,
        dropout_param={'dropout_ratio': config.LSTM_DROPOUT_RATIO})
    '''
    Coarse Image-Question MFB fusion
    '''

    n.mfb_q_proj = L.InnerProduct(n.q_feat,
                                  num_output=config.JOINT_EMB_SIZE,
                                  weight_filler=dict(type='xavier'))
    n.mfb_i_proj = L.InnerProduct(n.img_feature,
                                  num_output=config.JOINT_EMB_SIZE,
                                  weight_filler=dict(type='xavier'))
    n.mfb_iq_eltwise = L.Eltwise(n.mfb_q_proj,
                                 n.mfb_i_proj,
                                 eltwise_param=dict(operation=0))
    n.mfb_iq_drop = L.Dropout(
        n.mfb_iq_eltwise,
        dropout_param={'dropout_ratio': config.MFB_DROPOUT_RATIO})
    n.mfb_iq_resh = L.Reshape(
        n.mfb_iq_drop,
        reshape_param=dict(shape=dict(
            dim=[-1, 1, config.MFB_OUT_DIM, config.MFB_FACTOR_NUM])))
    n.mfb_iq_sumpool = L.Pooling(n.mfb_iq_resh, pool=P.Pooling.SUM, \
                                 pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1))
    n.mfb_out = L.Reshape(n.mfb_iq_sumpool, \
                          reshape_param=dict(shape=dict(dim=[-1, config.MFB_OUT_DIM])))
    n.mfb_sign_sqrt = L.SignedSqrt(n.mfb_out)
    n.mfb_l2 = L.L2Normalize(n.mfb_sign_sqrt)

    n.prediction = L.InnerProduct(n.mfb_l2,
                                  num_output=config.NUM_OUTPUT_UNITS,
                                  weight_filler=dict(type='xavier'))
    if mode == 'val':
        n.loss = L.SoftmaxWithLoss(n.prediction, n.label)
    else:
        n.loss = L.SoftmaxKLDLoss(n.prediction, n.label)
    return n.to_proto()
def fc_relu(bottom, nout):
    fc = L.InnerProduct(bottom, num_output=nout)
    return fc, L.ReLU(fc, in_place=True)
예제 #8
0
def qlstm(mode, batchsize, T, question_vocab_size):
    n = caffe.NetSpec()
    mode_str = json.dumps({'mode':mode, 'batchsize':batchsize})
    # n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\
    #     module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=5 )
    n.data, n.cont, n.img_feature, n.label = L.Python(\
        module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=4 )
    
    # word embedding
    n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \
        weight_filler=dict(type='uniform',min=-0.08,max=0.08))
    # n.embed = L.TanH(n.embed_ba)
    n.embed_scale = L.Scale(n.embed_ba, n.cont, scale_param=dict(dict(axis=0)))
    n.embed_scale_resh = L.Reshape(n.embed_scale,\
                          reshape_param=dict(\
                              shape=dict(dim=[batchsize,1,T,-1])))
    
    # convolution
    n.word_feature_3_1 = L.Convolution(n.embed_scale_resh, kernel_h=1, kernel_w=300, stride=1, num_output=256, pad_h=0, pad_w=0, weight_filler=dict(type='xavier'))
    n.word_relu_3_1_r = L.ReLU(n.word_feature_3_1)
    n.word_feature_3_2 = L.Convolution(n.word_relu_3_1_r, kernel_h=3, kernel_w=1, stride=1, num_output=256, pad_h=1, pad_w=0, weight_filler=dict(type='xavier'))
    n.word_relu_3_2_r = L.ReLU(n.word_feature_3_2)
    n.word_feature_3 = L.Convolution(n.word_relu_3_2_r, kernel_h=1, kernel_w=1, stride=1, num_output=1024, pad_h=0, pad_w=0, weight_filler=dict(type='xavier'))

    n.word_feature_5_1 = L.Convolution(n.embed_scale_resh, kernel_h=1, kernel_w=300, stride=1, num_output=256, pad_h=0, pad_w=0, weight_filler=dict(type='xavier'))
    n.word_relu_5_1_r = L.ReLU(n.word_feature_5_1)
    n.word_feature_5_2 = L.Convolution(n.word_relu_5_1_r, kernel_h=5, kernel_w=1, stride=1, num_output=256, pad_h=2, pad_w=0, weight_filler=dict(type='xavier'))
    n.word_relu_5_2_r = L.ReLU(n.word_feature_5_2)
    n.word_feature_5 = L.Convolution(n.word_relu_5_2_r, kernel_h=1, kernel_w=1, stride=1, num_output=1024, pad_h=0, pad_w=0, weight_filler=dict(type='xavier'))
    
    n.word_relu_3 = L.ReLU(n.word_feature_3)
    n.word_relu_5 = L.ReLU(n.word_feature_5)

    n.word_vec_3 = L.Pooling(n.word_relu_3, kernel_h=T, kernel_w=1, stride=T, pool=P.Pooling.MAX)
    n.word_vec_5 = L.Pooling(n.word_relu_5, kernel_h=T, kernel_w=1, stride=T, pool=P.Pooling.MAX)
    
    word_vec = [n.word_vec_3, n.word_vec_5]
    n.concat_vec = L.Concat(*word_vec, concat_param={'axis': 1}) # N x 2*d_w x 1 x 1
    n.concat_vec_dropped = L.Dropout(n.concat_vec,dropout_param={'dropout_ratio':0.5})

    n.q_emb_tanh_droped_resh_tiled_1 = L.Tile(n.concat_vec_dropped, axis=2, tiles=14)
    n.q_emb_tanh_droped_resh_tiled = L.Tile(n.q_emb_tanh_droped_resh_tiled_1, axis=3, tiles=14)
    n.i_emb_tanh_droped_resh = L.Reshape(n.img_feature,reshape_param=dict(shape=dict(dim=[-1,2048,14,14])))
    n.blcf = L.CompactBilinear(n.q_emb_tanh_droped_resh_tiled, n.i_emb_tanh_droped_resh, compact_bilinear_param=dict(num_output=16000,sum_pool=False))
    n.blcf_sign_sqrt = L.SignedSqrt(n.blcf)
    n.blcf_sign_sqrt_l2 = L.L2Normalize(n.blcf_sign_sqrt)
    n.blcf_droped = L.Dropout(n.blcf_sign_sqrt_l2,dropout_param={'dropout_ratio':0.1})

    # multi-channel attention
    n.att_conv1 = L.Convolution(n.blcf_droped, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier'))
    n.att_conv1_relu = L.ReLU(n.att_conv1)
    n.att_conv2 = L.Convolution(n.att_conv1_relu, kernel_size=1, stride=1, num_output=2, pad=0, weight_filler=dict(type='xavier'))
    n.att_reshaped = L.Reshape(n.att_conv2,reshape_param=dict(shape=dict(dim=[-1,2,14*14])))
    n.att_softmax = L.Softmax(n.att_reshaped, axis=2)
    n.att = L.Reshape(n.att_softmax,reshape_param=dict(shape=dict(dim=[-1,2,14,14])))
    att_maps = L.Slice(n.att, ntop=2, slice_param={'axis':1})
    n.att_map0 = att_maps[0]
    n.att_map1 = att_maps[1]
    dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1)
    n.att_feature0  = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map0, dummy)
    n.att_feature1  = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map1, dummy)
    n.att_feature0_resh = L.Reshape(n.att_feature0, reshape_param=dict(shape=dict(dim=[-1,2048])))
    n.att_feature1_resh = L.Reshape(n.att_feature1, reshape_param=dict(shape=dict(dim=[-1,2048])))
    n.att_feature = L.Concat(n.att_feature0_resh, n.att_feature1_resh)

    # merge attention and lstm with compact bilinear pooling
    n.att_feature_resh = L.Reshape(n.att_feature, reshape_param=dict(shape=dict(dim=[-1,4096,1,1])))
    #n.lstm_12_resh = L.Reshape(n.lstm_12, reshape_param=dict(shape=dict(dim=[-1,2048,1,1])))
    n.bc_att_lstm = L.CompactBilinear(n.att_feature_resh, n.concat_vec_dropped, 
                                      compact_bilinear_param=dict(num_output=16000,sum_pool=False))
    n.bc_sign_sqrt = L.SignedSqrt(n.bc_att_lstm)
    n.bc_sign_sqrt_l2 = L.L2Normalize(n.bc_sign_sqrt)

    n.bc_dropped = L.Dropout(n.bc_sign_sqrt_l2, dropout_param={'dropout_ratio':0.1})
    n.bc_dropped_resh = L.Reshape(n.bc_dropped, reshape_param=dict(shape=dict(dim=[-1, 16000])))

    n.prediction = L.InnerProduct(n.bc_dropped_resh, num_output=3000, weight_filler=dict(type='xavier'))
    n.loss = L.SoftmaxWithLoss(n.prediction, n.label)
    return n.to_proto()
예제 #9
0
def ZFNetBody(net, from_layer, for_training=True):
    net.conv1 = L.Convolution(
        net[from_layer],
        kernel_size=k_conv1,
        stride=s_conv1,
        num_output=d_conv1,
        pad=p_conv1,
        bias_term=True,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', std=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.relu1 = L.ReLU(net.conv1, in_place=True)
    net.pool1 = L.Pooling(net.relu1,
                          pool=P.Pooling.MAX,
                          kernel_size=k_pool1,
                          stride=s_pool1)
    net.norm1 = L.LRN(net.pool1,
                      lrn_param=dict(local_size=local_size_norm1,
                                     alpha=alpha_norm1,
                                     beta=beta_norm1))

    net.conv2 = L.Convolution(
        net.norm1,
        kernel_size=k_conv2,
        stride=s_conv2,
        num_output=d_conv2,  #pad=p_conv2, 
        bias_term=True,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', std=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.relu2 = L.ReLU(net.conv2, in_place=True)
    net.pool2 = L.Pooling(net.relu2,
                          pool=P.Pooling.MAX,
                          kernel_size=k_pool2,
                          stride=s_pool2)
    net.norm2 = L.LRN(net.pool2,
                      lrn_param=dict(local_size=local_size_norm2,
                                     alpha=alpha_norm2,
                                     beta=beta_norm2))

    net.conv3 = L.Convolution(
        net.norm2,
        kernel_size=k_conv3,
        stride=s_conv3,
        num_output=d_conv3,
        pad=p_conv3,
        bias_term=True,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', std=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.relu3 = L.ReLU(net.conv3, in_place=True)

    net.conv4 = L.Convolution(
        net.relu3,
        kernel_size=k_conv4,
        stride=s_conv4,
        num_output=d_conv4,
        pad=p_conv4,
        bias_term=True,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', std=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.relu4 = L.ReLU(net.conv4, in_place=True)

    net.conv5 = L.Convolution(
        net.relu4,
        kernel_size=k_conv5,
        stride=s_conv5,
        num_output=d_conv5,
        pad=p_conv5,
        bias_term=True,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', std=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.relu5 = L.ReLU(net.conv5, in_place=True)
    net.pool5 = L.Pooling(net.relu5,
                          pool=P.Pooling.MAX,
                          kernel_size=k_pool5,
                          stride=s_pool5)

    net.fc6 = L.InnerProduct(
        net.pool5,
        num_output=k_ip6,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', std=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.relu6 = L.ReLU(net.fc6, in_place=True)
    net.drop6 = L.Dropout(net.relu6,
                          dropout_param=dict(dropout_ratio=r_drop6),
                          in_place=True)

    net.fc7 = L.InnerProduct(
        net.fc6,
        num_output=k_ip7,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', std=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    net.relu7 = L.ReLU(net.fc7, in_place=True)
    net.drop7 = L.Dropout(net.relu7,
                          dropout_param=dict(dropout_ratio=r_drop7),
                          in_place=True)

    net.fc8 = L.InnerProduct(
        net.fc7,
        num_output=k_ip8,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', std=0),
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])
    if not for_training:
        net.acc = L.Accuracy(net.fc8,
                             net.label,
                             include=dict(phase=caffe_pb2.Phase.Value('TEST')))

    net.loss = L.SoftmaxWithLoss(net.fc8, net.label)

    return net
예제 #10
0
    def test_workdir_setup(self):

        solver = bct.CaffeSolver(debug=True)

        solver.write(osp.join(self.workdir, 'solver.prototxt'))

        n = caffe.NetSpec()
        n.data, n.label = L.ImageData(transform_param=dict(crop_size=224,
                                                           mean_value=128),
                                      source='../static/imlist.txt',
                                      batch_size=50,
                                      ntop=2)
        net = vgg_core(n, learn=True)

        net.score = L.InnerProduct(net.fc7,
                                   num_output=2,
                                   param=[
                                       dict(lr_mult=5, decay_mult=1),
                                       dict(lr_mult=10, decay_mult=0)
                                   ])
        net.loss = L.SoftmaxWithLoss(net.score, n.label)

        with open(osp.join(self.workdir, 'trainnet.prototxt'), 'w') as w:
            w.write(str(net.to_proto()))

        with open(osp.join(self.workdir, 'testnet.prototxt'), 'w') as w:
            w.write(str(net.to_proto()))

        caffefile = '/runs/templates/VGG_ILSVRC_16_layers_initial.caffemodel'
        if osp.isfile(caffefile):
            shutil.copyfile(caffefile,
                            osp.join(self.workdir, 'initial.caffemodel'))

        bct.run(self.workdir, nbr_iters=3)

        self.assertTrue(osp.isfile(osp.join(self.workdir, 'train.log')))
        self.assertTrue(
            osp.isfile(osp.join(self.workdir, 'snapshot_iter_3.caffemodel')))

        caffemodel, iter_ = bct.find_latest_caffemodel(self.workdir)

        self.assertEqual(iter_, 3)
        net = bct.load_model(self.workdir,
                             caffemodel,
                             gpuid=0,
                             net_prototxt='testnet.prototxt',
                             phase=caffe.TEST)
        estlist, scorelist = bct.classify_from_datalayer(net,
                                                         n_testinstances=3,
                                                         batch_size=50,
                                                         scorelayer='score')

        self.assertEqual(len(scorelist), 3)
        self.assertEqual(len(estlist), 3)
        self.assertEqual(len(scorelist[0]), 2)

        img = np.asarray(Image.open('../static/bbc.jpg'))[:224, :224, :]
        imglist = []
        for itt in range(6):
            imglist.append(img)

        estlist, scorelist = bct.classify_from_imlist(imglist, net,
                                                      bct.Transformer(), 4)

        self.assertEqual(len(scorelist), 6)
        self.assertEqual(len(estlist), 6)
        self.assertEqual(len(scorelist[0]), 2)
예제 #11
0
    def resnet_layers_proto(self,
                            batch_size,
                            phase='TRAIN',
                            stages=(3, 4, 6, 3)):
        n = caffe.NetSpec()
        if phase == 'TRAIN':
            source_data = self.train_data
            need_mirror = True
        else:
            source_data = self.test_data
            need_mirror = False
        n.data, n.label = L.Data(source=source_data,
                                 backend=P.Data.LMDB,
                                 batch_size=batch_size,
                                 ntop=2,
                                 transform_param=dict(
                                     crop_size=224,
                                     mean_value=[128, 128, 128],
                                     mirror=need_mirror))

        n.conv1, n.conv1_bn, n.conv1_scale, n.conv1_relu = \
            block_conv_bn_scale_relu( n.data, num_output = 64, kernel_size = 7, stride = 2, pad = 3 )  # 64x112x112
        n.pool1 = L.Pooling(n.conv1,
                            kernel_size=3,
                            stride=2,
                            pool=P.Pooling.MAX)

        residual_num = 0
        for num in xrange(len(stages)):
            for i in xrange(stages[num]):
                residual_num = residual_num + 1

                if num == 0 and i == 0:
                    stage_string = skip_connect_with_dimen_match_no_patch_reduce
                    if residual_num == 1:
                        bottom_string = 'n.pool1'
                    else:
                        bottom_string = 'n.res%s_eletwise' % (
                            str(residual_num - 1))
                elif i == 0 and num > 0:
                    stage_string = skip_connect_with_dimen_match
                    if residual_num == 1:
                        bottom_string = 'n.pool1'
                    else:
                        bottom_string = 'n.res%s_eletwise' % (
                            str(residual_num - 1))
                else:
                    stage_string = skip_connect_no_dimen_match
                    bottom_string = 'n.res%s_eletwise' % (str(residual_num -
                                                              1))
                exec(
                    stage_string.replace('(stage)', str(residual_num)).replace(
                        '(bottom)',
                        bottom_string).replace('(num)', str(2**num * 64)))

        exec 'n.pool5 = L.Pooling( bottom_string, kernel_size=7, stride=1, pool=P.Pooling.AVE)'.replace(
            'bottom_string', 'n.res%s_eletwise' % str(residual_num))

        n.classifier = L.InnerProduct(n.pool5,
                                      num_output=self.classifier_num,
                                      param=[
                                          dict(lr_mult=1, decay_mult=1),
                                          dict(lr_mult=2, decay_mult=0)
                                      ],
                                      weight_filler=dict(type='xavier'),
                                      bias_filler=dict(type='constant',
                                                       value=0))
        n.loss = L.SoftmaxWithLoss(n.classifier, n.label)
        if phase == 'TRAIN':
            pass
        else:
            n.accuracy_top1 = L.Accuracy(n.classifier,
                                         n.label,
                                         include=dict(phase=1))
            n.accuracy_top5 = L.Accuracy(n.classifier,
                                         n.label,
                                         include=dict(phase=1),
                                         accuracy_param=dict(top_k=5))

        return n.to_proto()
예제 #12
0
def generate_layer(blobs, layer, n, net_params):
    """
    Parameters: blobs: weights for keras, layer: keras layer, n: Caffe NetSpec,
    net_params: Dictionary to store Caffe weights
    """
    if type(layer) == keras.layers.InputLayer:
        name = layer.name
        input_shape = list(layer.batch_input_shape)
        input_shape = [1, input_shape[3], input_shape[1], input_shape[2]]
        n[name] = L.Input(shape=[dict(dim=input_shape)])
        print(f'generate {name} ok...')

    elif type(layer) == keras.layers.Dense:
        name = layer.name
        config = layer.get_config()
        use_bias = config['use_bias']
        if use_bias == None:
            use_bias = False
        if use_bias:
            net_params[name] = (np.array(blobs[0]).transpose(1, 0),
                                np.array(blobs[1]))
        else:
            net_params[name] = (blobs[0])
        in_nodes = get_input_nodes(layer)
        n[name] = L.InnerProduct(n[in_nodes[0].name],
                                 num_output=layer.units,
                                 bias_term=use_bias)
        if layer.activation is not None and layer.activation.__name__ != 'linear':
            name_act = name + "_activation_" + layer.activation.__name__  # get function string
            n[name_act] = apply_activation(layer, n[name])
        print(f'generate {name} ok...')

    elif type(layer) == keras.layers.Flatten:
        raise Exception(f"{layer.name} is not implemented")

    elif type(layer) == keras.layers.Dropout:
        name = layer.name
        in_nodes = get_input_nodes(layer)
        n[name] = L.Dropout(n[in_nodes[0].name],
                            dropout_ratio=layer.rate,
                            in_place=True)
        print(f'generate {name} ok...')

    elif type(layer) == keras.layers.Add:
        name = layer.name
        in_nodes = get_input_nodes(layer)
        network_layers = []
        for ref in in_nodes:
            network_layers.append(n[ref.name])
        n[name] = L.Eltwise(*network_layers, operation=1)  # 1 is SUM
        print(f'generate {name} ok...')

    elif type(layer) == keras.layers.Multiply:
        name = layer.name
        in_nodes = get_input_nodes(layer)
        network_layers = []
        for ref in in_nodes:
            network_layers.append(n[ref.name])
        n[name] = L.Eltwise(*network_layers, operation=0)
        print(f'generate {name} ok...')

    elif type(layer) == keras.layers.Concatenate:
        name = layer.name
        in_nodes = get_input_nodes(layer)
        network_layers = []
        for ref in in_nodes:
            network_layers.append(n[ref.name])
        n[name] = L.Concat(*network_layers, axis=1)
        print(f'generate {name} ok...')

    elif type(layer) == keras.layers.Maximum:
        name = layer.name
        in_nodes = get_input_nodes(layer)
        network_layers = []
        for ref in in_nodes:
            network_layers += n[ref.name]
        n[name] = L.Eltwise(*network_layers, operation=2)
        print(f'generate {name} ok...')

    elif type(layer) == keras.layers.Conv2DTranspose:
        '''
            DeconvolutionLayer:
            output = (input - 1) * stride + kernel_size - 2 * pad;
            kernel_size: {{2 * factor - factor % 2}} stride: {{factor}}
            num_output: {{C}} group: {{C}}
            pad: {{ceil((factor - 1) / 2.)}}
        '''
        name = layer.name
        in_nodes = get_input_nodes(layer)
        # Stride
        if layer.strides is None:
            stride = (1, 1)
        else:
            stride = layer.strides

        # if layer.padding == 'same':  # Calculate the padding for 'same'
        #     padding = [layer.kernel_size[0] // 2, layer.kernel_size[1] // 2]
        # else:
        #     padding = [0, 0]  # If padding is valid(aka no padding)

        config = layer.get_config()
        use_bias = config['use_bias']
        if use_bias == None:
            use_bias = False
        n[name] = L.Deconvolution(n[in_nodes[0].name],
                                  convolution_param=dict(
                                      kernel_h=layer.kernel_size[0],
                                      kernel_w=layer.kernel_size[1],
                                      stride_h=stride[0],
                                      stride_w=stride[1],
                                      num_output=layer.filters,
                                      pad_h=math.ceil((stride[0] - 1) / 2.),
                                      pad_w=math.ceil((stride[1] - 1) / 2.),
                                      bias_term=use_bias))
        blobs[0] = np.array(blobs[0]).transpose(3, 2, 0, 1)
        net_params[name] = blobs
        if layer.activation is not None and layer.activation.__name__ != 'linear':
            name_act = name + "_activation_" + layer.activation.__name__  # get function string
            n[name_act] = apply_activation(layer, n[name])

        print(f'generate {name} ok...')

    elif type(layer) == keras.layers.BatchNormalization:
        name = layer.name
        in_nodes = get_input_nodes(layer)
        n[name] = L.BatchNorm(n[in_nodes[0].name],
                              moving_average_fraction=layer.momentum,
                              eps=layer.epsilon,
                              use_global_stats=True)
        variance = np.array(blobs[-1])
        mean = np.array(blobs[-2])

        config = layer.get_config()
        param = dict()
        if config['scale']:
            gamma = np.array(blobs[0])
        else:
            gamma = np.ones(mean.shape, dtype=np.float32)

        if config['center']:
            beta = np.array(blobs[1])
            param['bias_term'] = True
        else:
            beta = np.zeros(mean.shape, dtype=np.float32)
            param['bias_term'] = False

        net_params[name] = (mean, variance, np.array([1.0]))

        # Scale after batchNorm
        name_scale = name + '_scale'
        n[name_scale] = L.Scale(n[name], in_place=True, scale_param=param)
        net_params[name_scale] = (gamma, beta)
        print(f'generate {name} ok...')

    # TODO Needs to be implemented
    elif type(layer) == keras.layers.Conv1D:
        raise Exception(f"{layer.name} is not implemented")

    elif type(layer) == keras.layers.ZeroPadding2D:
        print(f"{layer.name} is passed...")

    elif type(layer) == keras.layers.Conv2D:
        '''
           ConvolutionLayer:
           output = (input + 2 * pad  - kernel_size)  / stride + 1;
           kernel_shape: [out,in,k_size_h,k_size_w]
        '''
        name = layer.name

        # Padding
        if layer.padding == 'same':  # Calculate the padding for 'same'
            padding = [layer.kernel_size[0] // 2, layer.kernel_size[1] // 2]
        else:
            padding = [0, 0]  # If padding is valid(aka no padding)

        in_nodes = get_input_nodes(layer)
        if type(in_nodes[0]) == keras.layers.ZeroPadding2D:
            in_nodes = get_input_nodes(in_nodes[0])
            padding = [layer.kernel_size[0] // 2, layer.kernel_size[1] // 2]

        if layer.strides is None:
            stride = (1, 1)
        else:
            stride = layer.strides

        # TODO The rest of the arguements including bias, regulizers, dilation,
        config = layer.get_config()
        # print(config)
        # get bias parameter
        use_bias = config['use_bias']
        if use_bias == None:
            use_bias = False
        n[name] = L.Convolution(n[in_nodes[0].name],
                                kernel_h=layer.kernel_size[0],
                                kernel_w=layer.kernel_size[1],
                                stride_h=stride[0],
                                stride_w=stride[1],
                                num_output=layer.filters,
                                pad_h=padding[0],
                                pad_w=padding[1],
                                bias_term=use_bias)
        # weights = blobs
        blobs[0] = np.array(blobs[0]).transpose((3, 2, 0, 1))
        # print(blobs[0].shape)
        net_params[name] = blobs
        if layer.activation is not None and layer.activation.__name__ != 'linear':
            name_act = name + "_activation_" + layer.activation.__name__  # get function string
            n[name_act] = apply_activation(layer, n[name])

        print(f'generate {name} ok...')

    elif type(layer) == keras.layers.MaxPooling2D or type(
            layer) == keras.layers.AveragePooling2D:
        name = layer.name
        in_nodes = get_input_nodes(layer)
        if type(layer) == keras.layers.MaxPooling2D:
            pool = P.Pooling.MAX
        else:  # NOTE AveragePooling needs to be implemented
            pool = P.Pooling.AVE
        # Padding
        # TODO The rest of the arguements including bias, regulizers, dilatin,
        if layer.strides is None:
            stride = (1, 1)
        else:
            stride = layer.strides
        # Padding
        if layer.padding == 'same':  # Calculate the padding for 'same'
            padding = [layer.pool_size[0] // 2, layer.pool_size[1] // 2]
        else:
            padding = [0, 0]  # If padding is valid(aka no padding)
        n[name] = L.Pooling(n[in_nodes[0].name],
                            kernel_h=layer.pool_size[0],
                            kernel_w=layer.pool_size[1],
                            stride_h=stride[0],
                            stride_w=stride[1],
                            pad_h=padding[0],
                            pad_w=padding[1],
                            pool=pool)
        print(f'generate {name} ok...')

    # Activation (wrapper for activations) and Advanced Activation Layers
    elif type(layer) == keras.layers.Activation:
        name = layer.name
        in_nodes = get_input_nodes(layer)
        n[name] = apply_activation(layer,
                                   n[in_nodes[0].name])  # TODO: Assert only 1
        print(f'generate {name} ok...')

    # Caffe lacks intializer, regulizer, and constraint params
    elif type(layer) == keras.layers.LeakyReLU:
        # TODO: figure out how to pass Leaky params
        name = layer.name
        in_nodes = get_input_nodes(layer)
        n[name] = L.PReLU(n[in_nodes[0].name])
        print(f'generate {name} ok...')

    elif type(layer) == keras.layers.PReLU:
        name = layer.name
        in_nodes = get_input_nodes(layer)
        n[name] = L.PReLU(n[in_nodes[0].name])
        print(f'generate {name} ok...')

    elif type(layer) == keras.layers.ELU:
        name = layer.name
        in_nodes = get_input_nodes(layer)
        n[name] = L.ELU(n[in_nodes[0].name], layer.alpha)
        print(f'generate {name} ok...')

    elif type(layer) == keras.layers.GlobalAveragePooling2D:
        name = layer.name
        in_nodes = get_input_nodes(layer)
        n[name] = L.Pooling(n[in_nodes[0].name],
                            kernel_size=layer.kernel_size[0],
                            stride=layer.strides[0],
                            pad=layer.kernel_size[0] // 2,
                            pool=P.Pooling.AVE)
        print(f'generate {name} ok...')

    else:
        raise Exception("Cannot convert model." + layer.name +
                        " is not supported.")
예제 #13
0
def InceptionResNetV2(train_lmdb,
                      test_lmdb,
                      input_size=299,
                      batch_size=256,
                      stages=[0, 5, 10, 5],
                      first_output=32,
                      include_acc=False):
    # now, this code can't recognize include phase, so there will only be a TEST phase data layer
    data, label = L.Data(source=train_lmdb,
                         backend=P.Data.LMDB,
                         batch_size=batch_size,
                         ntop=2,
                         transform_param=dict(crop_size=input_size,
                                              mean_value=[104, 117, 123],
                                              mirror=True),
                         include=dict(phase=getattr(caffe_pb2, 'TRAIN')))
    data, label = L.Data(source=test_lmdb,
                         backend=P.Data.LMDB,
                         batch_size=batch_size,
                         ntop=2,
                         transform_param=dict(crop_size=input_size,
                                              mean_value=[104, 117, 123],
                                              mirror=True),
                         include=dict(phase=getattr(caffe_pb2, 'TEST')))
    data, label = L.MemoryData(batch_size=batch_size,
                               height=input_size,
                               width=input_size,
                               channels=3,
                               ntop=2,
                               transform_param=dict(mean_value=[104, 117, 123],
                                                    mirror=True),
                               include=dict(phase=getattr(caffe_pb2, 'TEST')))

    Inception_ResNet_A_input = stem(bottom=data,
                                    conv1_num=32,
                                    conv2_num=32,
                                    conv3_num=64,
                                    conv4_num=96,
                                    conv5_num=64,
                                    conv6_num=96,
                                    conv7_num=64,
                                    conv8_num=64,
                                    conv9_num=64,
                                    conv10_num=96,
                                    conv11_num=192)
    for i in xrange(stages[1]):
        Inception_ResNet_A_input = Inception_ResNet_A(
            bottom=Inception_ResNet_A_input,
            bottom_size=384,
            num1x1=32,
            num3x3=48,
            num3x3double=64)

    Inception_ResNet_B_input = ReductionA(bottom=Inception_ResNet_A_input,
                                          num1x1_k=256,
                                          num3x3_l=256,
                                          num3x3_n=384,
                                          num3x3_m=384)

    for i in xrange(stages[2]):
        Inception_ResNet_B_input = Inception_ResNet_B(
            bottom=Inception_ResNet_B_input,
            bottom_size=1152,
            num1x1=192,
            num1x1double=128,
            num7x1=160,
            num1x7=192)

    Inception_ResNet_C_input = ReductionB(bottom=Inception_ResNet_B_input,
                                          num1x1=256,
                                          num3x3=384,
                                          num3x3double=288,
                                          num3x3three=320)

    for i in xrange(stages[3]):
        Inception_ResNet_C_input = Inception_ResNet_C(
            bottom=Inception_ResNet_C_input,
            bottom_size=2144,
            num1x1=192,
            num1x3=224,
            num3x1=256)

    glb_pool = L.Pooling(Inception_ResNet_C_input,
                         pool=P.Pooling.AVE,
                         global_pooling=True)
    dropout = L.Dropout(glb_pool, dropout_ratio=0.2)
    fc = L.InnerProduct(dropout, num_output=1000)
    loss = L.SoftmaxWithLoss(fc, label)
    acc = L.Accuracy(fc, label, include=dict(phase=getattr(caffe_pb2, 'TEST')))
    return to_proto(loss, acc)
예제 #14
0
def yolonet():
    # Python data layer
    pydata_params = dict(list_root='/home/zehao/WorkSpace/caffe/examples/yolo/lists')
    pydata_params['split'] = 'train'
    pydata_params['mean'] = (104.00699, 116.66877, 122.67892)
    pydata_params['batch_size'] = 16
    pydata_params['im_shape'] = (448, 448)
    pydata_params['classes'] = 20
    pydata_params['coords'] = 4
    pydata_params['num'] = 2
    pydata_params['side'] = 7
    pylayer = 'VOCLocDataLayerSyncSync'
    data, label = L.Python(module='voc_data_layer',name='DataLayer', layer=pylayer,
                            ntop=2, param_str=str(pydata_params))

    # the net itself
    conv1, relu1 = conv_relu(data, 7, 64, stride=2, pad=3)
    pool1 = max_pool(relu1, 2, stride=2)

    conv2, relu2 = conv_relu(pool1, 3, 192, stride=1, pad=1)
    pool2 = max_pool(relu2, 2, stride=2)

    conv3, relu3 = conv_relu(pool2, 1, 128, stride=1, pad=0)
    conv4, relu4 = conv_relu(relu3, 3, 256, stride=1, pad=1)
    conv5, relu5 = conv_relu(relu4, 1, 256, stride=1, pad=0)
    conv6, relu6 = conv_relu(relu5, 3, 512, stride=1, pad=1)
    pool6 = max_pool(relu6, 2, stride=2)

    conv7, relu7 = conv_relu(pool6, 1, 256, stride=1, pad=0)
    conv8, relu8 = conv_relu(relu7, 3, 512, stride=1, pad=1)
    conv9, relu9 = conv_relu(relu8, 1, 256, stride=1, pad=0)
    conv10, relu10 = conv_relu(relu9, 3, 512, stride=1, pad=1)
    conv11, relu11 = conv_relu(relu10, 1, 256, stride=1, pad=0)
    conv12, relu12 = conv_relu(relu11, 3, 512, stride=1, pad=1)
    conv13, relu13 = conv_relu(relu12, 1, 256, stride=1, pad=0)
    conv14, relu14 = conv_relu(relu13, 3, 512, stride=1, pad=1)

    conv15, relu15 = conv_relu(relu14, 1, 512, stride=1, pad=0)
    conv16, relu16 = conv_relu(relu15, 3, 1024, stride=1, pad=1)
    pool16 = max_pool(relu16, 2, stride=2)

    conv17, relu17 = conv_relu(pool16, 1, 512, stride=1, pad=0)
    conv18, relu18 = conv_relu(relu17, 3, 1024, stride=1, pad=1)
    conv19, relu19 = conv_relu(relu18, 1, 512, stride=1, pad=0)
    conv20, relu20 = conv_relu(relu19, 3, 1024, stride=1, pad=1)

    conv21, relu21 = conv_relu(relu20, 3, 1024, stride=1, pad=1)
    conv22, relu22 = conv_relu(relu21, 3, 1024, stride=2, pad=1)

    conv23, relu23 = conv_relu(relu22, 3, 1024, stride=1, pad=1)
    conv24, relu24 = conv_relu(relu23, 3, 1024, stride=1, pad=1)

    fc25, relu25 = fc_relu(relu24, 4096)
    result = L.InnerProduct(relu25, num_output=1470,
                        weight_filler=dict(type='gaussian', std=0.01),
                        bias_filler=dict(type='constant', value=0))
    # Python loss layer
    pydata_params = dict(classes=20)
    pydata_params['coords'] = 4
    pydata_params['side'] = 7
    pydata_params['num'] = 2
    pydata_params['object_scale'] = 1
    pydata_params['noobject_scale'] = 0.5
    pydata_params['class_scale'] = 1
    pydata_params['coord_scale'] = 5
    pydata_params['sqrt'] = True
    pylayer = 'YoloLossLayer'
    loss = L.Python(result, label, name='YoloLoss', module='yolo_loss_layer', layer=pylayer,
                            ntop=1, param_str=str(pydata_params))
    return to_proto(loss)
예제 #15
0
    def inception_v3_proto(self, batch_size, phase='TRAIN'):
        n = caffe.NetSpec()
        if phase == 'TRAIN':
            source_data = self.train_data
            mirror = True
        else:
            source_data = self.test_data
            mirror = False
        n.data, n.label = L.Data(source=source_data,
                                 backend=P.Data.LMDB,
                                 batch_size=batch_size,
                                 ntop=2,
                                 transform_param=dict(
                                     crop_size=299,
                                     mean_value=[104, 117, 123],
                                     mirror=mirror))
        # stage 1
        n.conv1_3x3_s2, n.conv1_3x3_s2_bn, n.conv1_3x3_relu, n.conv2_3x3_s1, n.conv2_3x3_s1_bn, n.conv2_3x3_relu, \
        n.conv3_3x3_s1, n.conv3_3x3_s1_bn, n.conv3_3x3_relu = \
            conv_bn_stack_3(n.data, dict(num_output=[32, 32, 64], kernel_size=[3, 3, 3], stride=[2, 1, 1],
                                         pad=[0, 0, 1], group=[1, 1, 1], weight_type=['xavier', 'xavier', 'xavier'],
                                         weight_std=[0.01, 0.01, 0.01],
                                         bias_type=['constant', 'constant', 'constant'], bias_value=[0.2, 0.2, 0.2]))
        n.pool1_3x3_s2 = L.Pooling(n.conv3_3x3_s1_bn,
                                   kernel_size=3,
                                   stride=2,
                                   pool=P.Pooling.MAX)
        # stage 2
        n.conv4_3x3_reduce, n.conv4_3x3_reduce_bn, n.conv4_relu_3x3_reduce, n.conv4_3x3, n.conv4_3x3_bn, n.conv4_relu_3x3 = \
            conv_bn_stack_2(n.pool1_3x3_s2,
                            dict(num_output=[80, 192], kernel_size=[1, 3], stride=[1, 1], pad=[0, 0], group=[1, 1],
                                 weight_type=['xavier', 'xavier'], weight_std=[0.01, 0.01],
                                 bias_type=['constant', 'constant'], bias_value=[0.2, 0.2]))
        n.pool2_3x3_s2 = L.Pooling(n.conv4_3x3_bn,
                                   kernel_size=3,
                                   stride=2,
                                   pool=P.Pooling.MAX)

        # stage 3
        n.inception_3a_1x1, n.inception_3a_1x1_bn, n.inception_3a_relu_1x1, n.inception_3a_5x5_reduce, \
        n.inception_3a_5x5_reduce_bn, n.inception_3a_relu_5x5_reduce, n.inception_3a_5x5, n.inception_3a_5x5_bn, \
        n.inception_3a_relu_5x5, n.inception_3a_3x3_reduce, n.inception_3a_3x3_reduce_bn, n.inception_3a_relu_3x3_reduce, \
        n.inception_3a_3x3_1, n.inception_3a_3x3_1_bn, n.inception_3a_relu_3x3_1, n.inception_3a_3x3_2, \
        n.inception_3a_3x3_2_bn, n.inception_3a_relu_3x3_2, n.inception_3a_pool, n.inception_3a_pool_proj, \
        n.inception_3a_pool_proj_bn, n.inception_3a_relu_pool_proj, n.inception_3a_output = \
            inception_v3_7a(n.pool2_3x3_s2,
                            dict(conv_1x1=64, conv_5x5_reduce=48, conv_5x5=64, conv_3x3_reduce=64, conv_3x3_1=96,
                                 conv_3x3_2=96, pool_proj=32))
        n.inception_3b_1x1, n.inception_3b_1x1_bn, n.inception_3b_relu_1x1, n.inception_3b_5x5_reduce, \
        n.inception_3b_5x5_reduce_bn, n.inception_3b_relu_5x5_reduce, n.inception_3b_5x5, n.inception_3b_5x5_bn, \
        n.inception_3b_relu_5x5, n.inception_3b_3x3_reduce, n.inception_3b_3x3_reduce_bn, n.inception_3b_relu_3x3_reduce, \
        n.inception_3b_3x3_1, n.inception_3b_3x3_1_bn, n.inception_3b_relu_3x3_1, n.inception_3b_3x3_2, \
        n.inception_3b_3x3_2_bn, n.inception_3b_relu_3x3_2, n.inception_3b_pool, n.inception_3b_pool_proj, \
        n.inception_3b_pool_proj_bn, n.inception_3b_relu_pool_proj, n.inception_3b_output = \
            inception_v3_7a(n.inception_3a_output,
                            dict(conv_1x1=64, conv_5x5_reduce=48, conv_5x5=64, conv_3x3_reduce=64, conv_3x3_1=96,
                                 conv_3x3_2=96, pool_proj=64))
        n.inception_3c_1x1, n.inception_3c_1x1_bn, n.inception_3c_relu_1x1, n.inception_3c_5x5_reduce, \
        n.inception_3c_5x5_reduce_bn, n.inception_3c_relu_5x5_reduce, n.inception_3c_5x5, n.inception_3c_5x5_bn, \
        n.inception_3c_relu_5x5, n.inception_3c_3x3_reduce, n.inception_3c_3x3_reduce_bn, n.inception_3c_relu_3x3_reduce, \
        n.inception_3c_3x3_1, n.inception_3c_3x3_1_bn, n.inception_3c_relu_3x3_1, n.inception_3c_3x3_2, \
        n.inception_3c_3x3_2_bn, n.inception_3c_relu_3x3_2, n.inception_3c_pool, n.inception_3c_pool_proj, \
        n.inception_3c_pool_proj_bn, n.inception_3c_relu_pool_proj, n.inception_3c_output = \
            inception_v3_7a(n.inception_3b_output,
                            dict(conv_1x1=64, conv_5x5_reduce=48, conv_5x5=64, conv_3x3_reduce=64, conv_3x3_1=96,
                                 conv_3x3_2=96, pool_proj=64))
        n.inception_3d_3x3_0, n.inception_3d_3x3_0_bn, n.inception_3d_relu_3x3_0, n.inception_3d_3x3_reduce, \
        n.inception_3d_3x3_reduce_bn, n.inception_3d_relu_3x3_reduce, n.inception_3d_3x3_1, n.inception_3d_3x3_1_bn, \
        n.inception_3d_relu_3x3_1, n.inception_3d_3x3_2, n.inception_3d_3x3_2_bn, n.inception_3d_relu_3x3_2, \
        n.inception_3d_pool, n.inception_3d_output = \
            inception_v3_7b(n.inception_3c_output,
                            dict(conv_3x3_0=384, conv_3x3_reduce=64, conv_3x3_1=96, conv_3x3_2=96))

        # stage 4
        n.inception_4a_1x1, n.inception_4a_1x1_bn, n.inception_4a_relu_1x1, n.inception_4a_1x7_reduce, \
        n.inception_4a_1x7_reduce_bn, n.inception_4a_relu_1x7_reduce, n.inception_4a_1x7_0, n.inception_4a_1x7_0_bn, \
        n.inception_4a_relu_1x7_0, n.inception_4a_7x1_0, n.inception_4a_7x1_0_bn, n.inception_4a_relu_7x1_0, \
        n.inception_4a_7x1_reduce, n.inception_4a_7x1_reduce_bn, n.inception_4a_relu_7x1_reduce, \
        n.inception_4a_7x1_1, n.inception_4a_7x1_1_bn, n.inception_4a_relu_7x1_1, n.inception_4a_1x7_1, \
        n.inception_4a_1x7_1_bn, n.inception_4a_relu_1x7_1, n.inception_4a_7x1_2, n.inception_4a_7x1_2_bn, \
        n.inception_4a_relu_7x1_2, n.inception_4a_1x7_2, n.inception_4a_1x7_2_bn, n.inception_4a_relu_1x7_2, \
        n.inception_4a_pool, n.inception_4a_pool_proj, n.inception_4a_pool_proj_bn, n.inception_4a_relu_pool_proj, \
        n.inception_4a_output = \
            inception_v3_7c(n.inception_3d_output,
                            dict(conv_1x1=192, conv_1x7_reduce=128, conv_1x7_0=128, conv_7x1_0=192, conv_7x1_reduce=128,
                                 conv_1x7_1=128, conv_7x1_1=128, conv_1x7_2=128, conv_7x1_2=192, pool_proj=192))
        n.inception_4b_1x1, n.inception_4b_1x1_bn, n.inception_4b_relu_1x1, n.inception_4b_1x7_reduce, \
        n.inception_4b_1x7_reduce_bn, n.inception_4b_relu_1x7_reduce, n.inception_4b_1x7_0, n.inception_4b_1x7_0_bn, \
        n.inception_4b_relu_1x7_0, n.inception_4b_7x1_0, n.inception_4b_7x1_0_bn, n.inception_4b_relu_7x1_0, \
        n.inception_4b_7x1_reduce, n.inception_4b_7x1_reduce_bn, n.inception_4b_relu_7x1_reduce, \
        n.inception_4b_7x1_1, n.inception_4b_7x1_1_bn, n.inception_4b_relu_7x1_1, n.inception_4b_1x7_1, \
        n.inception_4b_1x7_1_bn, n.inception_4b_relu_1x7_1, n.inception_4b_7x1_2, n.inception_4b_7x1_2_bn, \
        n.inception_4b_relu_7x1_2, n.inception_4b_1x7_2, n.inception_4b_1x7_2_bn, n.inception_4b_relu_1x7_2, \
        n.inception_4b_pool, n.inception_4b_pool_proj, n.inception_4b_pool_proj_bn, n.inception_4b_relu_pool_proj, \
        n.inception_4b_output = \
            inception_v3_7c(n.inception_4a_output,
                            dict(conv_1x1=192, conv_1x7_reduce=160, conv_1x7_0=160, conv_7x1_0=192, conv_7x1_reduce=160,
                                 conv_1x7_1=160, conv_7x1_1=160, conv_1x7_2=160, conv_7x1_2=160, pool_proj=192))
        n.inception_4c_1x1, n.inception_4c_1x1_bn, n.inception_4c_relu_1x1, n.inception_4c_1x7_reduce, \
        n.inception_4c_1x7_reduce_bn, n.inception_4c_relu_1x7_reduce, n.inception_4c_1x7_0, n.inception_4c_1x7_0_bn, \
        n.inception_4c_relu_1x7_0, n.inception_4c_7x1_0, n.inception_4c_7x1_0_bn, n.inception_4c_relu_7x1_0, \
        n.inception_4c_7x1_reduce, n.inception_4c_7x1_reduce_bn, n.inception_4c_relu_7x1_reduce, \
        n.inception_4c_7x1_1, n.inception_4c_7x1_1_bn, n.inception_4c_relu_7x1_1, n.inception_4c_1x7_1, \
        n.inception_4c_1x7_1_bn, n.inception_4c_relu_1x7_1, n.inception_4c_7x1_2, n.inception_4c_7x1_2_bn, \
        n.inception_4c_relu_7x1_2, n.inception_4c_1x7_2, n.inception_4c_1x7_2_bn, n.inception_4c_relu_1x7_2, \
        n.inception_4c_pool, n.inception_4c_pool_proj, n.inception_4c_pool_proj_bn, n.inception_4c_relu_pool_proj, \
        n.inception_4c_output = \
            inception_v3_7c(n.inception_4b_output,
                            dict(conv_1x1=192, conv_1x7_reduce=160, conv_1x7_0=160, conv_7x1_0=192, conv_7x1_reduce=160,
                                 conv_1x7_1=160, conv_7x1_1=160, conv_1x7_2=160, conv_7x1_2=160, pool_proj=192))
        n.inception_4d_1x1, n.inception_4d_1x1_bn, n.inception_4d_relu_1x1, n.inception_4d_1x7_reduce, \
        n.inception_4d_1x7_reduce_bn, n.inception_4d_relu_1x7_reduce, n.inception_4d_1x7_0, n.inception_4d_1x7_0_bn, \
        n.inception_4d_relu_1x7_0, n.inception_4d_7x1_0, n.inception_4d_7x1_0_bn, n.inception_4d_relu_7x1_0, \
        n.inception_4d_7x1_reduce, n.inception_4d_7x1_reduce_bn, n.inception_4d_relu_7x1_reduce, \
        n.inception_4d_7x1_1, n.inception_4d_7x1_1_bn, n.inception_4d_relu_7x1_1, n.inception_4d_1x7_1, \
        n.inception_4d_1x7_1_bn, n.inception_4d_relu_1x7_1, n.inception_4d_7x1_2, n.inception_4d_7x1_2_bn, \
        n.inception_4d_relu_7x1_2, n.inception_4d_1x7_2, n.inception_4d_1x7_2_bn, n.inception_4d_relu_1x7_2, \
        n.inception_4d_pool, n.inception_4d_pool_proj, n.inception_4d_pool_proj_bn, n.inception_4d_relu_pool_proj, \
        n.inception_4d_output = \
            inception_v3_7c(n.inception_4c_output,
                            dict(conv_1x1=192, conv_1x7_reduce=192, conv_1x7_0=192, conv_7x1_0=192, conv_7x1_reduce=192,
                                 conv_1x7_1=192, conv_7x1_1=192, conv_1x7_2=192, conv_7x1_2=192, pool_proj=192))
        n.inception_4e_3x3_reduce, n.inception_4e_3x3_reduce_bn, n.inception_4e_relu_3x3_reduce, n.inception_4e_3x3_0, \
        n.inception_4e_3x3_0_bn, n.inception_4e_relu_3x3_0, n.inception_4e_1x7_reduce, n.inception_4e_1x7_reduce_bn, \
        n.inception_4e_relu_1x7_reduce, n.inception_4e_1x7, n.inception_4e_1x7_bn, n.inception_4e_relu_1x7, \
        n.inception_4e_7x1, n.inception_4e_7x1_bn, n.inception_4e_relu_7x1, n.inception_4e_3x3_1, \
        n.inception_4e_3x3_1_bn, n.inception_4e_relu_3x3_1, n.inception_4e_pool, n.inception_4e_output = \
            inception_v3_7d(n.inception_4d_output,
                            dict(conv_3x3_reduce=192, conv_3x3_0=320, conv_1x7_reduce=192, conv_1x7=192, conv_7x1=192,
                                 conv_3x3_1=192))

        # stage 5
        n.inception_5a_1x1, n.inception_5a_1x1_bn, n.inception_5a_relu_1x1, n.inception_5a_3x3_0_reduce, \
        n.inception_5a_3x3_0_reduce_bn, n.inception_5a_relu_3x3_0_reduce, n.inception_5a_1x3_0, n.inception_5a_1x3_0_bn, \
        n.inception_5a_relu_1x3_0, n.inception_5a_3x1_0, n.inception_5a_3x1_0_bn, n.inception_5a_relu_3x1_0, \
        n.inception_5a_3x3_1_reduce, n.inception_5a_3x3_1_reduce_bn, n.inception_5a_relu_3x3_1_reduce, n.inception_5a_3x3_1, \
        n.inception_5a_3x3_1_bn, n.inception_5a_relu_3x3_1, n.inception_5a_1x3_1, n.inception_5a_1x3_1_bn, \
        n.inception_5a_relu_1x3_1, n.inception_5a_3x1_1, n.inception_5a_3x1_1_bn, n.inception_5a_relu_3x1_1, \
        n.inception_5a_pool, n.inception_5a_pool_proj, n.inception_5a_pool_proj_bn, n.inception_5a_relu_pool_proj, \
        n.inception_5a_output = \
            inception_v3_7e(n.inception_4e_output,
                            dict(conv_1x1=320, conv_3x3_0_reduce=384, conv_1x3_0=384, conv_3x1_0=384,
                                 conv_3x3_1_reduce=448, conv_3x3_1=384, conv_1x3_1=384, conv_3x1_1=384,
                                 pooling=P.Pooling.AVE, pool_proj=192))
        n.inception_5b_1x1, n.inception_5b_1x1_bn, n.inception_5b_relu_1x1, n.inception_5b_3x3_0_reduce, \
        n.inception_5b_3x3_0_reduce_bn, n.inception_5b_relu_3x3_0_reduce, n.inception_5b_1x3_0, n.inception_5b_1x3_0_bn, \
        n.inception_5b_relu_1x3_0, n.inception_5b_3x1_0, n.inception_5b_3x1_0_bn, n.inception_5b_relu_3x1_0, \
        n.inception_5b_3x3_1_reduce, n.inception_5b_3x3_1_reduce_bn, n.inception_5b_relu_3x3_1_reduce, n.inception_5b_3x3_1, \
        n.inception_5b_3x3_1_bn, n.inception_5b_relu_3x3_1, n.inception_5b_1x3_1, n.inception_5b_1x3_1_bn, \
        n.inception_5b_relu_1x3_1, n.inception_5b_3x1_1, n.inception_5b_3x1_1_bn, n.inception_5b_relu_3x1_1, \
        n.inception_5b_pool, n.inception_5b_pool_proj, n.inception_5b_pool_proj_bn, n.inception_5b_relu_pool_proj, \
        n.inception_5b_output = \
            inception_v3_7e(n.inception_5a_output,
                            dict(conv_1x1=320, conv_3x3_0_reduce=384, conv_1x3_0=384, conv_3x1_0=384,
                                 conv_3x3_1_reduce=448, conv_3x3_1=384, conv_1x3_1=384, conv_3x1_1=384,
                                 pooling=P.Pooling.MAX, pool_proj=192))

        n.pool3_7x7_s1 = L.Pooling(n.inception_5b_output,
                                   kernel_size=7,
                                   stride=1,
                                   pool=P.Pooling.AVE)
        n.classifier = L.InnerProduct(n.pool3_7x7_s1,
                                      num_output=self.classifier_num,
                                      param=[
                                          dict(lr_mult=1, decay_mult=1),
                                          dict(lr_mult=2, decay_mult=0)
                                      ],
                                      weight_filler=dict(type='xavier'),
                                      bias_filler=dict(type='constant',
                                                       value=0))

        n.loss = L.SoftmaxWithLoss(n.classifier, n.label)
        if phase == 'TRAIN':
            pass
        else:
            n.loss_top1, n.loss_top5 = accuracy_top1_top5(
                n.classifier, n.label)
        return n.to_proto()
예제 #16
0
    def inference_proto(self, bottom, mult=1., truncate_at=None, deploy=False):
        ns = self.netspec
        w_params = dict(lr_mult=mult, decay_mult=mult)
        b_params = dict(lr_mult=mult, decay_mult=0)
        conv_opt_params = dict(weight_filler=CONV_W_INIT,
                               bias_filler=CONV_B_INIT,
                               param=[w_params, b_params
                                      ]) if not deploy else {}
        fc_opt_params = dict(weight_filler=FC_W_INIT,
                             bias_filler=FC_B_INIT,
                             param=[w_params, b_params]) if not deploy else {}

        ns.conv1 = L.Convolution(bottom,
                                 num_output=96,
                                 kernel_size=11,
                                 stride=4,
                                 **conv_opt_params)
        ns.relu1 = L.ReLU(ns.conv1, in_place=True)
        ns.norm1 = L.LRN(ns.relu1, local_size=5, alpha=0.0001, beta=0.75)
        ns.pool1 = L.Pooling(ns.norm1,
                             pool=P.Pooling.MAX,
                             kernel_size=3,
                             stride=2)
        if truncate_at == 'pool1':
            return ns.pool1

        ns.conv2 = L.Convolution(ns.pool1,
                                 num_output=256,
                                 kernel_size=5,
                                 pad=2,
                                 group=2,
                                 **conv_opt_params)
        ns.relu2 = L.ReLU(ns.conv2, in_place=True)
        ns.norm2 = L.LRN(ns.relu2, local_size=5, alpha=0.0001, beta=0.75)
        ns.pool2 = L.Pooling(ns.norm2,
                             pool=P.Pooling.MAX,
                             kernel_size=3,
                             stride=2)
        if truncate_at == 'pool2':
            return ns.pool2

        ns.conv3 = L.Convolution(ns.pool2,
                                 num_output=384,
                                 kernel_size=3,
                                 pad=1,
                                 **conv_opt_params)
        ns.relu3 = L.ReLU(ns.conv3, in_place=True)
        if truncate_at == 'conv3':
            return ns.relu3

        ns.conv4 = L.Convolution(ns.relu3,
                                 num_output=384,
                                 kernel_size=3,
                                 pad=1,
                                 group=2,
                                 **conv_opt_params)
        ns.relu4 = L.ReLU(ns.conv4, in_place=True)
        if truncate_at == 'conv4':
            return ns.relu4

        ns.conv5 = L.Convolution(ns.relu4,
                                 num_output=256,
                                 kernel_size=3,
                                 pad=1,
                                 group=2,
                                 **conv_opt_params)
        ns.relu5 = L.ReLU(ns.conv5, in_place=True)
        ns.pool5 = L.Pooling(ns.relu5,
                             pool=P.Pooling.MAX,
                             kernel_size=3,
                             stride=2)
        if truncate_at == 'pool5':
            return ns.pool5

        ns.fc6 = L.InnerProduct(ns.pool5, num_output=4096, **fc_opt_params)
        ns.relu6 = L.ReLU(ns.fc6, in_place=True)
        ns.drop6 = L.Dropout(ns.relu6, dropout_ratio=0.5, in_place=True)
        if truncate_at == 'fc6':
            return ns.drop6

        ns.fc7 = L.InnerProduct(ns.drop6, num_output=4096, **fc_opt_params)
        ns.relu7 = L.ReLU(ns.fc7, in_place=True)
        ns.drop7 = L.Dropout(ns.relu7, dropout_ratio=0.5, in_place=True)
        if truncate_at == 'fc7':
            return ns.drop7

        ns.fc8 = L.InnerProduct(ns.fc7, num_output=1000, **fc_opt_params)
        return ns.fc8
예제 #17
0
def ZFNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False,
        dilated=False, dropout=True, need_fc8=False, freeze_layers=[]):
    kwargs = {
            'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
            'weight_filler': dict(type='xavier'),
            'bias_filler': dict(type='constant', value=0)}

    assert from_layer in net.keys()
    net.conv1 = L.Convolution(net[from_layer], num_output=96, pad=3, kernel_size=7, stride=2, **kwargs)
    net.relu1 = L.ReLU(net.conv1, in_place=True)

    net.norm1 = L.LRN(net.relu1, local_size=3, alpha=0.00005, beta=0.75,
            norm_region=P.LRN.WITHIN_CHANNEL, engine=P.LRN.CAFFE)

    net.pool1 = L.Pooling(net.norm1, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=2)

    net.conv2 = L.Convolution(net.pool1, num_output=256, pad=2, kernel_size=5, stride=2, **kwargs)
    net.relu2 = L.ReLU(net.conv2, in_place=True)

    net.norm2 = L.LRN(net.relu2, local_size=3, alpha=0.00005, beta=0.75,
            norm_region=P.LRN.WITHIN_CHANNEL, engine=P.LRN.CAFFE)

    net.pool2 = L.Pooling(net.norm2, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=2)

    net.conv3 = L.Convolution(net.pool2, num_output=384, pad=1, kernel_size=3, **kwargs)
    net.relu3 = L.ReLU(net.conv3, in_place=True)
    net.conv4 = L.Convolution(net.relu3, num_output=384, pad=1, kernel_size=3, **kwargs)
    net.relu4 = L.ReLU(net.conv4, in_place=True)
    net.conv5 = L.Convolution(net.relu4, num_output=256, pad=1, kernel_size=3, **kwargs)
    net.relu5 = L.ReLU(net.conv5, in_place=True)

    if need_fc:
        if dilated:
            name = 'pool5'
            net[name] = L.Pooling(net.relu5, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=1)
        else:
            name = 'pool5'
            net[name] = L.Pooling(net.relu5, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=2)

        if fully_conv:
            if dilated:
                if reduced:
                    net.fc6 = L.Convolution(net[name], num_output=1024, pad=5, kernel_size=3, dilation=5, **kwargs)
                else:
                    net.fc6 = L.Convolution(net[name], num_output=4096, pad=5, kernel_size=6, dilation=2, **kwargs)
            else:
                if reduced:
                    net.fc6 = L.Convolution(net[name], num_output=1024, pad=2, kernel_size=3, dilation=2,  **kwargs)
                else:
                    net.fc6 = L.Convolution(net[name], num_output=4096, pad=2, kernel_size=6, **kwargs)

            net.relu6 = L.ReLU(net.fc6, in_place=True)
            if dropout:
                net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True)

            if reduced:
                net.fc7 = L.Convolution(net.relu6, num_output=1024, kernel_size=1, **kwargs)
            else:
                net.fc7 = L.Convolution(net.relu6, num_output=4096, kernel_size=1, **kwargs)
            net.relu7 = L.ReLU(net.fc7, in_place=True)
            if dropout:
                net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True)
        else:
            net.fc6 = L.InnerProduct(net.pool5, num_output=4096)
            net.relu6 = L.ReLU(net.fc6, in_place=True)
            if dropout:
                net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True)
            net.fc7 = L.InnerProduct(net.relu6, num_output=4096)
            net.relu7 = L.ReLU(net.fc7, in_place=True)
            if dropout:
                net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True)
    if need_fc8:
        from_layer = net.keys()[-1]
        if fully_conv:
            net.fc8 = L.Convolution(net[from_layer], num_output=1000, kernel_size=1, **kwargs)
        else:
            net.fc8 = L.InnerProduct(net[from_layer], num_output=1000)
        net.prob = L.Softmax(net.fc8)

    # Update freeze layers.
    kwargs['param'] = [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]
    layers = net.keys()
    for freeze_layer in freeze_layers:
        if freeze_layer in layers:
            net.update(freeze_layer, kwargs)

    return net
예제 #18
0
def densenet(mode, data_file, bs, nlayer, nclass, first_nout=16, growth_rate=16, dropout=0.2):

  net = caffe.NetSpec()

  # data layer ---------------------------------------------------------------
  mirror  = True
  shuffle = True
  if mode == 1: # TEST phase
    mirror  = False
    shuffle = False
  
  transform = dict(scale = 0.0078125,
                    mirror = mirror,
                    #crop_size = 224,
                    mean_value = [127.5, 127.5, 127.5])

  net.data, net.label = L.Data(#include = dict(phase = mode),
                              transform_param = transform,
                              source = data_file,
                              batch_size = bs, 
                              backend = P.Data.LMDB,
                              ntop = 2)
  # net.data, net.label = L.ImageData(#include = dict(phase = mode),
                                    # transform_param = transform,
                                    # source = data_file,
                                    # batch_size = bs,
                                    # shuffle = shuffle,
                                    # #new_height = 256,
                                    # #new_width = 256,
                                    # #is_color = True,
                                    # ntop = 2)

  pre_fmap = 0 # total number of previous feature maps
  
  # first convolution --------------------------------------------------------
  net.conv_1 = L.Convolution(net.data, num_output=first_nout,
                             kernel_size=7, stride=2, pad=3, 
                             weight_filler=dict(type='msra'), 
                             bias_filler=dict(type='constant'),
                             param=[dict(lr_mult=1, decay_mult=1),
                                    dict(lr_mult=2, decay_mult=0)])
  
  net.relu_1 = L.PReLU(net.conv_1, in_place=True)
  
  net.pool_1 = L.Pooling(net.relu_1, pool=P.Pooling.MAX,
                         kernel_size=3, stride=2)
  
  pre_layer = net.pool_1
  pre_fmap += first_nout
  
  # DB + TD ------------------------------------------------------------------
  # +1 in order to make the index values from 1
  for major in xrange(len(nlayer)-1):
    # DB
    for minor in xrange(nlayer[major]):
      pre_layer = cat_layer(net, mode, major+1, minor+1, pre_layer, growth_rate, dropout)
      pre_fmap += growth_rate
    # TD
    pre_layer = transition_down(net, mode, major+1, pre_layer, pre_fmap, dropout)
    pre_fmap = pre_fmap // 2
  
  # last DB, without TD
  major = len(nlayer)
  for minor in xrange(nlayer[-1]):
    pre_layer = cat_layer(net, mode, major, minor+1, pre_layer, growth_rate, dropout)
    pre_fmap += growth_rate
  
  # final layers -------------------------------------------------------------
  use_global_stats = False
  if mode == 1: # TEST phase
    use_global_stats = True
  net.bn_final = L.BatchNorm(pre_layer, in_place=False, 
                             batch_norm_param = dict(use_global_stats=use_global_stats),
                             param=[dict(lr_mult=0, decay_mult=0), 
                                    dict(lr_mult=0, decay_mult=0), 
                                    dict(lr_mult=0, decay_mult=0)])
  net.scale_finel = L.Scale(net.bn_final, bias_term=True, in_place=True, 
                            filler=dict(value=1), bias_filler=dict(value=0))
  net.relu_final = L.PReLU(net.scale_finel, in_place=True)
  net.pool_final = L.Pooling(net.relu_final, pool=P.Pooling.AVE, global_pooling=True)
  
  net.fc_class = L.InnerProduct(net.pool_final, num_output=nclass,
                                weight_filler=dict(type='xavier'), 
                                bias_filler=dict(type='constant'),
                                param=[dict(lr_mult=1, decay_mult=1),
                                       dict(lr_mult=2, decay_mult=0)])
  
  net.loss = L.SoftmaxWithLoss(net.fc_class, net.label)
  
  if mode == 1:
    net.accuracy = L.Accuracy(net.fc_class, net.label)
  
  return str(net.to_proto())
예제 #19
0
    def unpack_item(self,
                    layer,
                    previous_image_size,
                    layer_number,
                    bottom,
                    label=None):

        if layer.terminate == 1:
            # Softmax Accuracy/Loss
            # loss = cl.SoftmaxWithLoss(bottom, label)
            bottom = cl.InnerProduct(bottom,
                                     num_output=self.hp.NUM_CLASSES,
                                     weight_filler=dict(type='xavier'))
            return bottom

        if layer.layer_type == 'conv':
            out_depth = layer.filter_depth
            kernel_size = layer.filter_size
            stride = layer.stride
            pad = self.get_pad(kernel_size)
            bottom = cl.Convolution(bottom,
                                    kernel_size=kernel_size,
                                    num_output=out_depth,
                                    stride=stride,
                                    pad=pad,
                                    weight_filler=dict(type='xavier'))
            if self.ssp.batch_norm:
                bottom = self.add_batchnorm(bottom)
            return self.add_activate(bottom)

        if layer.layer_type == 'nin':
            out_depth = layer.filter_depth
            bottom = cl.Convolution(bottom,
                                    kernel_size=1,
                                    num_output=out_depth,
                                    weight_filler=dict(type='xavier'))
            bottom = self.add_activate(bottom)

            bottom = cl.Convolution(bottom,
                                    kernel_size=1,
                                    num_output=out_depth,
                                    weight_filler=dict(type='xavier'))
            bottom = self.add_activate(bottom)
            return bottom

        if layer.layer_type == 'gap':
            out_depth = self.hp.NUM_CLASSES
            bottom = cl.Convolution(bottom,
                                    kernel_size=1,
                                    num_output=out_depth,
                                    weight_filler=dict(type='xavier'))
            bottom = self.add_activate(bottom)
            bottom = cl.Pooling(bottom,
                                kernel_size=previous_image_size,
                                pool=P.Pooling.AVE)
            return bottom

        if layer.layer_type == 'fc':
            num_output = layer.fc_size
            bottom = cl.InnerProduct(bottom,
                                     num_output=num_output,
                                     weight_filler=dict(type='xavier'))
            bottom = self.add_activate(bottom)
            return bottom

        if layer.layer_type == 'dropout':
            dropout_ratio = 0.5 * float(layer.filter_depth) / layer.fc_size
            return cl.Dropout(bottom, dropout_ratio=dropout_ratio)

        if layer.layer_type == 'pool':
            kernel_size = layer.filter_size
            stride = layer.stride
            if self.ssp.batch_norm:
                bottom = self.add_batchnorm(bottom)
            return cl.Pooling(bottom,
                              kernel_size=kernel_size,
                              stride=stride,
                              pool=P.Pooling.MAX)
예제 #20
0
    def addDANStage(self, net):
        #CONNNECTION LAYERS OF PREVIOUS STAGE
        # TRANSFORM ESTIMATION
        net.s1_transform_params = L.Python(
            net.s1_landmarks,
            module="LandmarkTranFormLayer",
            layer="LandmarkTranFormLayer",
            param_str=str(dict(mean_shape=self.initlandmarks.tolist())))
        # IMAGE TRANSFORM
        net.s1_img_output = L.Python(net.s1_input,
                                     net.s1_transform_params,
                                     module="AffineTransformLayer",
                                     layer="AffineTransformLayer")
        # LANDMARK TRANSFORM
        net.s1_landmarks_affine = L.Python(net.s1_landmarks,
                                           net.s1_transform_params,
                                           module="LandmarkTransformLayer",
                                           layer="LandmarkTransformLayer")
        # HEATMAP GENERATION
        net.s1_img_heatmap = L.Python(net.s1_landmarks_affine,
                                      module="GetHeatMapLayer",
                                      layer="GetHeatMapLayer")
        # FEATURE GENERATION
        # 使用56*56而不是112*112的原因是,可以减少参数,因为两者最终表现没有太大差别
        net.s1_img_feature = fc_relu(net.s1_fc1_batch, 56 * 56)
        net.s1_img_feature = L.Reshape(net.s1_img_feature,
                                       shape=dict(dim=[-1, 1, 56, 56]))
        net.s1_img_feature = L.Python(net.s1_img_feature,
                                      module="Upscale2DLayer",
                                      layer="Upscale2DLayer",
                                      param_str=str(dict(scale_factor=2)))

        # CURRENT STAGE
        net.s2_input = L.Concat(net.s1_img_output, net.s1_img_heatmap,
                                net.s1_img_feature)
        net.s2_input_batch = L.BatchNorm(net.s2_input)

        net.s2_conv1_1, net.s2_relu1_1 = conv_relu(net.s2_input_batch, 3, 64)
        net.s2_batch1_1 = L.BatchNorm(net.s2_relu1_1)
        net.s2_conv1_2, s2_net.relu1_2 = conv_relu(net.s2_batch1_1, 3, 64)
        net.s2_batch1_2 = L.BatchNorm(net.s2_relu1_2)
        net.s2_pool1 = max_pool(net.s2_batch1_2, 2)

        net.s2_conv2_1, net.s2_relu2_1 = conv_relu(net.s2_pool1, 3, 128)
        net.s2_batch2_1 = L.BatchNorm(net.s2_relu2_1)
        net.s2_conv2_2, net.s2_relu2_2 = conv_relu(net.s2_batch2_1, 3, 128)
        net.s2_batch2_2 = L.BatchNorm(net.s2_relu2_2)
        net.s2_pool2 = max_pool(net.s2_batch2_2)

        net.s2_conv3_1, net.s2_relu3_1 = conv_relu(net.s2_pool2, 3, 256)
        net.s2_batch3_1 = L.BatchNorm(net.s2_relu3_1)
        net.s2_conv3_2, net.s2_relu3_2 = conv_relu(net.s2_batch3_1, 3, 256)
        net.s2_batch3_2 = L.BatchNorm(net.s2_relu3_2)
        net.s2_pool3 = max_pool(net.s2_batch3_2)

        net.s2_conv4_1, net.s2_relu4_1 = conv_relu(net.s2_pool3, 3, 512)
        net.s2_batch4_1 = L.BatchNorm(net.s2_relu4_1)
        net.s2_conv4_2, net.s2_relu4_2 = conv_relu(net.s2_batch4_1, 3, 512)
        net.s2_batch4_2 = L.BatchNorm(net.s2_relu4_2)
        net.s2_pool4 = max_pool(net.s2_batch4_2)

        net.s2_pool4_flatten = L.Flatten(net.s2_pool4)
        if istrain:
            net.s2_fc1_dropout = L.Dropout(net.s2_pool4_flatten,
                                           dropout_ratio=0.5,
                                           in_place=True)
            # , include=dict(phase=caffe.TRAIN)
        else:
            net.s1_fc1_dropout = net.s2_pool4_flatten
        net.s2_fc1, net.s2_fc1_relu = fc_relu(net.s2_fc1_dropout, 256)
        net.s2_fc1_batch = L.BatchNorm(net.s2_fce_relu)

        net.s2_output = L.InnerProduct(net.s2_fc1_batch,
                                       num_output=136,
                                       bias_filler=dict(type='constant',
                                                        value=0))
        net.s2_landmarks = L.Eltwise(net.s2_output, net.s1_landmarks_affine)
        net.s2_landmarks = L.Python(net.s2_landmarks,
                                    net.s1_transform_params,
                                    module="LandmarkTranFormLayer",
                                    layer="LandmarkTranFormLayer")
예제 #21
0
    def MakeNetwork(self, db, batch_size, layers, deploy, act, input_dropout,
                    hidden_dropout, L2, filler):

        #Create Data layer
        data, label = L.HDF5Data(source=db, batch_size=batch_size, ntop=2)

        #Add hidden layers
        top = data
        if (input_dropout != 0):
            top = L.Dropout(top, in_place=True, dropout_ratio=input_dropout)

        test = 0
        for x in range(0, len(layers)):
            if (L2):
                if (filler == 1):
                    top = L.InnerProduct(top,
                                         num_output=layers[x],
                                         weight_filler=dict(type='xavier'),
                                         bias_filler=dict(type='xavier'),
                                         param=[dict(decay_mult=1)])
                elif (filler == 2):
                    top = L.InnerProduct(top,
                                         num_output=layers[x],
                                         weight_filler=dict(type='gaussian',
                                                            std=0.01),
                                         bias_filler=dict(type='gaussian',
                                                          std=0.01),
                                         param=[dict(decay_mult=1)])

            else:
                if (filler == 1):
                    top = L.InnerProduct(top,
                                         num_output=layers[x],
                                         weight_filler=dict(type='xavier'),
                                         bias_filler=dict(type='xavier'),
                                         param=[dict(decay_mult=0)])
                elif (filler == 2):
                    top = L.InnerProduct(top,
                                         num_output=layers[x],
                                         weight_filler=dict(type='gaussian',
                                                            std=0.01),
                                         bias_filler=dict(type='gaussian',
                                                          std=0.01),
                                         param=[dict(decay_mult=0)])

            if (act == 1):
                top = L.ReLU(top, in_place=True)
            elif (act == 2):
                top = L.Sigmoid(top, in_place=True)
            elif (act == 3):
                top = L.TanH(top, in_place=True)
            else:
                print "Error, invalid activation function choice "
            if (hidden_dropout != 0):
                top = L.Dropout(top,
                                in_place=True,
                                dropout_ratio=hidden_dropout)

        #Add Output Layers
        if (filler == 1):
            output = L.InnerProduct(top,
                                    num_output=self._numClasses,
                                    weight_filler=dict(type='xavier'),
                                    bias_filler=dict(type='xavier'))
        elif (filler == 2):
            output = L.InnerProduct(top,
                                    num_output=self._numClasses,
                                    weight_filler=dict(type='gaussian',
                                                       std=0.01),
                                    bias_filler=dict(type='gaussian',
                                                     std=0.01))

        if (deploy == False):
            loss = L.SoftmaxWithLoss(output, label)
            return to_proto(loss)
        else:
            prob = L.Softmax(output)
            return to_proto(prob)
예제 #22
0
def fc_relu(bottom, nout):
    fc = L.InnerProduct(bottom,
                        num_output=nout,
                        weight_filler=dict(type='xavier'),
                        bias_filler=dict(type='constant', value=0))
    return fc, L.ReLU(fc, in_place=True)
예제 #23
0
def vgg_face(split, mean, opt):
    n = caffe.NetSpec()

    # config python data layer
    if split == 'train':
        batch_size = opt.train_batch_size
    if split == 'val':
        batch_size = opt.val_batch_size
    if split == 'test':
        batch_size = opt.test_batch_size

    if split == 'train' or split == 'val':
        dataset_name = opt.train_dataset_name
    else:
        dataset_name = opt.test_dataset_name

    pydata_params = dict(split=split,
                         data_dir=opt.data_dir,
                         batch_size=batch_size,
                         mean=mean,
                         dataset=dataset_name,
                         load_size=opt.load_size,
                         crop_size=opt.crop_size)
    n.data, n.label = L.Python(module='faceData_layers',
                               layer='FaceDataLayer',
                               ntop=2,
                               param_str=str(pydata_params))

    # vgg-face net
    # conv layers
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)

    n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)

    # drop out and fc layers
    n.fc6, n.relu6, n.drop6 = fc_relu_dropout(n.pool5, 4096, 0.5)
    n.fc7, n.relu7, n.drop7 = fc_relu_dropout(n.fc6, 4096, 0.5)

    lr_ratio = 100  # lr multiplier for truncated layers
    n.fc8_face = L.InnerProduct(n.fc7,
                                num_output=1024,
                                param=[
                                    dict(lr_mult=1 * lr_ratio, decay_mult=1),
                                    dict(lr_mult=2 * lr_ratio, decay_mult=0)
                                ],
                                weight_filler=dict(type='gaussian', std=0.01),
                                bias_filler=dict(type='constant', value=0))
    n.fc9_face = L.InnerProduct(n.fc8_face,
                                num_output=2,
                                param=[
                                    dict(lr_mult=1 * lr_ratio, decay_mult=1),
                                    dict(lr_mult=2 * lr_ratio, decay_mult=0)
                                ],
                                weight_filler=dict(type='gaussian', std=0.01),
                                bias_filler=dict(type='constant', value=0))

    # loss layer
    n.loss = L.SoftmaxWithLoss(n.fc9_face, n.label)

    # loss and accuracy layer
    n.acc = L.Accuracy(n.fc9_face, n.label)
    return n.to_proto()
예제 #24
0
    def createCNN(self, istrain):
        net = caffe.NetSpec()
        if istrain:
            net.s1_input, net.label = L.MemoryData(batch_size=self.batchsize,
                                                   channels=self.nChannels,
                                                   height=self.imageHeight,
                                                   width=self.imageWidth,
                                                   ntop=2)
        else:
            net.s1_input, net.label = L.MemoryData(batch_size=self.batchsize,
                                                   channels=self.nChannels,
                                                   height=self.imageHeight,
                                                   width=self.imageWidth,
                                                   ntop=2)

        # STAGE 1
        net.s1_conv1_1, net.s1_relu1_1 = conv_relu(net.s1_input, 3, 64)
        net.s1_batch1_1 = L.BatchNorm(net.s1_relu1_1)
        net.s1_conv1_2, net.s1_relu1_2 = conv_relu(net.s1_batch1_1, 3, 64)
        net.s1_batch1_2 = L.BatchNorm(net.s1_relu1_2)
        net.s1_pool1 = max_pool(net.s1_batch1_2, 2)

        net.s1_conv2_1, net.s1_relu2_1 = conv_relu(net.s1_pool1, 3, 128)
        net.s1_batch2_1 = L.BatchNorm(net.s1_relu2_1)
        net.s1_conv2_2, net.s1_relu2_2 = conv_relu(net.s1_batch2_1, 3, 128)
        net.s1_batch2_2 = L.BatchNorm(net.s1_relu2_2)
        net.s1_pool2 = max_pool(net.s1_batch2_2)

        net.s1_conv3_1, net.s1_relu3_1 = conv_relu(net.s1_pool2, 3, 256)
        net.s1_batch3_1 = L.BatchNorm(net.s1_relu3_1)
        net.s1_conv3_2, net.s1_relu3_2 = conv_relu(net.s1_batch3_1, 3, 256)
        net.s1_batch3_2 = L.BatchNorm(net.s1_relu3_2)
        net.s1_pool3 = max_pool(net.s1_batch3_2)

        net.s1_conv4_1, net.s1_relu4_1 = conv_relu(net.s1_pool3, 3, 512)
        net.s1_batch4_1 = L.BatchNorm(net.s1_relu4_1)
        net.s1_conv4_2, net.s1_relu4_2 = conv_relu(net.s1_batch4_1, 3, 512)
        net.s1_batch4_2 = L.BatchNorm(net.s1_relu4_2)
        net.s1_pool4 = max_pool(net.s1_batch4_2)
        if istrain:
            net.s1_fc1_dropout = L.Dropout(net.s1_pool4,
                                           dropout_ratio=0.5,
                                           in_place=True)
        else:
            net.s1_fc1_dropout = net.s1_pool4
        net.s1_fc1, net.s1_fc1_relu = fc_relu(net.s1_fc1_dropout, 256)
        net.s1_fc1_batch = L.BatchNorm(net.s1_fc1_relu)

        net.s1_output = L.InnerProduct(net.s1_fc1_batch,
                                       num_output=136,
                                       bias_filler=dict(type='constant',
                                                        value=0))
        net.s1_landmarks = L.Python(
            net.s1_output,
            module="InitLandmark",
            layer="InitLandmark",
            param_str=str(dict(initlandmarks=self.initLandmarks.tolist())))

        if self.nStages == 2:
            addDANStage(net)
            net.output = net.s2_landmarks
        else:
            net.output = net.s1_landmarks

        net.loss = L.Python(net.output,
                            net.label,
                            module="SumOfSquaredLossLayer",
                            layer="SumOfSquaredLossLayer",
                            loss_weight=1)
        return str(net.to_proto())
예제 #25
0
def pj_x(mode, batchsize, T, exp_T, question_vocab_size, exp_vocab_size):
    n = caffe.NetSpec()
    mode_str = json.dumps({'mode':mode, 'batchsize':batchsize})
    n.data, n.cont, n.img_feature, n.label, n.exp, n.exp_out, n.exp_cont_1, n.exp_cont_2 = \
        L.Python(module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=8)

    n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \
        weight_filler=dict(type='uniform',min=-0.08,max=0.08), param=fixed_weights)
    n.embed = L.TanH(n.embed_ba) 

    n.exp_embed_ba = L.Embed(n.exp, input_dim=exp_vocab_size, num_output=300, \
        weight_filler=dict(type='uniform', min=-0.08, max=0.08))
    n.exp_embed = L.TanH(n.exp_embed_ba)

    # LSTM1
    n.lstm1 = L.LSTM(\
                   n.embed, n.cont,\
                   recurrent_param=dict(\
                       num_output=1024,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)),
                   param=fixed_weights_lstm)
    tops1 = L.Slice(n.lstm1, ntop=T, slice_param={'axis':0})
    for i in range(T-1):
        n.__setattr__('slice_first'+str(i), tops1[int(i)])
        n.__setattr__('silence_data_first'+str(i), L.Silence(tops1[int(i)],ntop=0))
    n.lstm1_out = tops1[T-1]
    n.lstm1_reshaped = L.Reshape(n.lstm1_out,\
                          reshape_param=dict(\
                              shape=dict(dim=[-1,1024])))
    n.lstm1_reshaped_droped = L.Dropout(n.lstm1_reshaped,dropout_param={'dropout_ratio':0.3})
    n.lstm1_droped = L.Dropout(n.lstm1,dropout_param={'dropout_ratio':0.3})
    # LSTM2
    n.lstm2 = L.LSTM(\
                   n.lstm1_droped, n.cont,\
                   recurrent_param=dict(\
                       num_output=1024,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)),
                   param=fixed_weights_lstm)
    tops2 = L.Slice(n.lstm2, ntop=T, slice_param={'axis':0})
    for i in range(T-1):
        n.__setattr__('slice_second'+str(i), tops2[int(i)])
        n.__setattr__('silence_data_second'+str(i), L.Silence(tops2[int(i)],ntop=0))
    n.lstm2_out = tops2[T-1]
    n.lstm2_reshaped = L.Reshape(n.lstm2_out,\
                          reshape_param=dict(\
                              shape=dict(dim=[-1,1024])))
    n.lstm2_reshaped_droped = L.Dropout(n.lstm2_reshaped,dropout_param={'dropout_ratio':0.3})
    concat_botom = [n.lstm1_reshaped_droped, n.lstm2_reshaped_droped]
    n.lstm_12 = L.Concat(*concat_botom)


    # Tile question feature
    n.q_emb_resh = L.Reshape(n.lstm_12, reshape_param=dict(shape=dict(dim=[-1,2048,1,1])))
    n.q_emb_tiled_1 = L.Tile(n.q_emb_resh, axis=2, tiles=14)
    n.q_emb_resh_tiled = L.Tile(n.q_emb_tiled_1, axis=3, tiles=14)

    # Embed image feature
    n.i_emb = L.Convolution(n.img_feature, kernel_size=1, stride=1,
                            num_output=2048, pad=0, weight_filler=dict(type='xavier'),
                            param=fixed_weights)

    # Eltwise product and normalization
    n.eltwise = L.Eltwise(n.q_emb_resh_tiled, n.i_emb, eltwise_param={'operation': P.Eltwise.PROD})
    n.eltwise_sqrt = L.SignedSqrt(n.eltwise)
    n.eltwise_l2 = L.L2Normalize(n.eltwise_sqrt)
    n.eltwise_drop = L.Dropout(n.eltwise_l2, dropout_param={'dropout_ratio': 0.3})

    # Attention for VQA
    n.att_conv1 = L.Convolution(n.eltwise_drop, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier'), param=fixed_weights)
    n.att_conv1_relu = L.ReLU(n.att_conv1)
    n.att_conv2 = L.Convolution(n.att_conv1_relu, kernel_size=1, stride=1, num_output=1, pad=0, weight_filler=dict(type='xavier'), param=fixed_weights)
    n.att_reshaped = L.Reshape(n.att_conv2,reshape_param=dict(shape=dict(dim=[-1,1,14*14])))
    n.att_softmax = L.Softmax(n.att_reshaped, axis=2)
    n.att_map = L.Reshape(n.att_softmax,reshape_param=dict(shape=dict(dim=[-1,1,14,14])))
    
    dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1)
    n.att_feature  = L.SoftAttention(n.img_feature, n.att_map, dummy)
    n.att_feature_resh = L.Reshape(n.att_feature, reshape_param=dict(shape=dict(dim=[-1,2048])))

    # eltwise product + normalization again for VQA
    n.i_emb2 = L.InnerProduct(n.att_feature_resh, num_output=2048, weight_filler=dict(type='xavier'), param=fixed_weights)
    n.eltwise2 = L.Eltwise(n.lstm_12, n.i_emb2, eltwise_param={'operation': P.Eltwise.PROD})
    n.eltwise2_sqrt = L.SignedSqrt(n.eltwise2)
    n.eltwise2_l2 = L.L2Normalize(n.eltwise2_sqrt)
    n.eltwise2_drop = L.Dropout(n.eltwise2_l2, dropout_param={'dropout_ratio': 0.3})

    n.prediction = L.InnerProduct(n.eltwise2_drop, num_output=3000, weight_filler=dict(type='xavier'), param=fixed_weights)
    n.loss = L.SoftmaxWithLoss(n.prediction, n.label)

    # Embed VQA GT answer during training
    n.exp_emb_ans = L.Embed(n.label, input_dim=3000, num_output=300, \
        weight_filler=dict(type='uniform', min=-0.08, max=0.08))
    n.exp_emb_ans_tanh = L.TanH(n.exp_emb_ans)
    n.exp_emb_ans2 = L.InnerProduct(n.exp_emb_ans_tanh, num_output=2048, weight_filler=dict(type='xavier'))

    # Merge VQA answer and visual+textual feature
    n.exp_emb_resh = L.Reshape(n.exp_emb_ans2, reshape_param=dict(shape=dict(dim=[-1,2048,1,1])))
    n.exp_emb_tiled_1 = L.Tile(n.exp_emb_resh, axis=2, tiles=14)
    n.exp_emb_tiled = L.Tile(n.exp_emb_tiled_1, axis=3, tiles=14)
    n.eltwise_emb = L.Convolution(n.eltwise, kernel_size=1, stride=1, num_output=2048, pad=0, weight_filler=dict(type='xavier'))
    n.exp_eltwise = L.Eltwise(n.eltwise_emb,  n.exp_emb_tiled, eltwise_param={'operation': P.Eltwise.PROD})
    n.exp_eltwise_sqrt = L.SignedSqrt(n.exp_eltwise)
    n.exp_eltwise_l2 = L.L2Normalize(n.exp_eltwise_sqrt)
    n.exp_eltwise_drop = L.Dropout(n.exp_eltwise_l2, dropout_param={'dropout_ratio': 0.3})

    # Attention for Explanation
    n.exp_att_conv1 = L.Convolution(n.exp_eltwise_drop, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier'))
    n.exp_att_conv1_relu = L.ReLU(n.exp_att_conv1)
    n.exp_att_conv2 = L.Convolution(n.exp_att_conv1_relu, kernel_size=1, stride=1, num_output=1, pad=0, weight_filler=dict(type='xavier'))
    n.exp_att_reshaped = L.Reshape(n.exp_att_conv2,reshape_param=dict(shape=dict(dim=[-1,1,14*14])))
    n.exp_att_softmax = L.Softmax(n.exp_att_reshaped, axis=2)
    n.exp_att_map = L.Reshape(n.exp_att_softmax,reshape_param=dict(shape=dict(dim=[-1,1,14,14])))
    
    exp_dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1)
    n.exp_att_feature_prev  = L.SoftAttention(n.img_feature, n.exp_att_map, exp_dummy)
    n.exp_att_feature_resh = L.Reshape(n.exp_att_feature_prev, reshape_param=dict(shape=dict(dim=[-1, 2048])))
    n.exp_att_feature_embed = L.InnerProduct(n.exp_att_feature_resh, num_output=2048, weight_filler=dict(type='xavier'))
    n.exp_lstm12_embed = L.InnerProduct(n.lstm_12, num_output=2048, weight_filler=dict(type='xavier'))
    n.exp_eltwise2 = L.Eltwise(n.exp_lstm12_embed, n.exp_att_feature_embed, eltwise_param={'operation': P.Eltwise.PROD})
    n.exp_att_feature = L.Eltwise(n.exp_emb_ans2, n.exp_eltwise2, eltwise_param={'operation': P.Eltwise.PROD})


    # LSTM1 for Explanation
    n.exp_lstm1 = L.LSTM(\
                   n.exp_embed, n.exp_cont_1,\
                   recurrent_param=dict(\
                       num_output=2048,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)))

    n.exp_lstm1_dropped = L.Dropout(n.exp_lstm1,dropout_param={'dropout_ratio':0.3})

    # merge with LSTM1 for explanation
    n.exp_att_resh = L.Reshape(n.exp_att_feature, reshape_param=dict(shape=dict(dim=[1, -1, 2048])))
    n.exp_att_tiled = L.Tile(n.exp_att_resh, axis=0, tiles=exp_T)
    n.exp_eltwise_all = L.Eltwise(n.exp_lstm1_dropped, n.exp_att_tiled, eltwise_param={'operation': P.Eltwise.PROD})
    n.exp_eltwise_all_sqrt = L.SignedSqrt(n.exp_eltwise_all)
    n.exp_eltwise_all_l2 = L.L2Normalize(n.exp_eltwise_all_sqrt)
    n.exp_eltwise_all_drop = L.Dropout(n.exp_eltwise_all_l2, dropout_param={'dropout_ratio': 0.3})

    # LSTM2 for Explanation
    n.exp_lstm2 = L.LSTM(\
                   n.exp_eltwise_all_drop, n.exp_cont_2,\
                   recurrent_param=dict(\
                       num_output=1024,\
                       weight_filler=dict(type='uniform',min=-0.08,max=0.08),\
                       bias_filler=dict(type='constant',value=0)))
    n.exp_lstm2_dropped = L.Dropout(n.exp_lstm2,dropout_param={'dropout_ratio':0.3})
    
    n.exp_prediction = L.InnerProduct(n.exp_lstm2_dropped, num_output=exp_vocab_size, weight_filler=dict(type='xavier'), axis=2)

    n.exp_loss = L.SoftmaxWithLoss(n.exp_prediction, n.exp_out,
                                   loss_param=dict(ignore_label=-1),
                                   softmax_param=dict(axis=2))
    n.exp_accuracy = L.Accuracy(n.exp_prediction, n.exp_out, axis=2, ignore_label=-1)

    return n.to_proto()
예제 #26
0
    def make_caffenet(self,
                      bottom,
                      return_layer,
                      weight_filler={},
                      bias_filler={},
                      learning_param={}):
        default_weight_filler = self.gaussian_filler()
        default_bias_filler = self.gaussian_filler(1)
        default_learning_param = self.learning_params([[1, 1], [2, 0]])
        for layer in [
                'conv1', 'conv2', 'conv3', 'conv4', 'conv5', 'fc6', 'fc7',
                'fc8'
        ]:
            if layer not in weight_filler.keys():
                weight_filler[layer] = default_weight_filler
            if layer not in bias_filler.keys():
                bias_filler[layer] = default_bias_filler
            if layer not in learning_param.keys():
                learning_param[layer] = default_learning_param

        self.n.tops['conv1'], self.n.tops['relu1'] = self.conv_relu(
            bottom,
            11,
            96,
            stride=4,
            weight_filler=weight_filler['conv1'],
            bias_filler=bias_filler['conv1'],
            learning_param=learning_param['conv1'])
        if return_layer in self.n.tops.keys(): return
        self.n.tops['pool1'] = self.max_pool(self.n.tops['relu1'], 3, stride=2)
        if return_layer in self.n.tops.keys(): return
        self.n.tops['norm1'] = L.LRN(self.n.tops['pool1'],
                                     local_size=5,
                                     alpha=1e-4,
                                     beta=0.75)
        if return_layer in self.n.tops.keys(): return

        self.n.tops['conv2'], self.n.tops['relu2'] = self.conv_relu(
            self.n.tops['norm1'],
            5,
            256,
            pad=2,
            group=2,
            weight_filler=weight_filler['conv2'],
            bias_filler=bias_filler['conv2'],
            learning_param=learning_param['conv2'])
        if return_layer in self.n.tops.keys(): return
        self.n.tops['pool2'] = self.max_pool(self.n.tops['relu2'], 3, stride=2)
        if return_layer in self.n.tops.keys(): return
        self.n.tops['norm2'] = L.LRN(self.n.tops['pool2'],
                                     local_size=5,
                                     alpha=1e-4,
                                     beta=0.75)
        if return_layer in self.n.tops.keys(): return

        self.n.tops['conv3'], self.n.tops['relu3'] = self.conv_relu(
            self.n.tops['norm2'],
            3,
            384,
            pad=1,
            weight_filler=weight_filler['conv3'],
            bias_filler=bias_filler['conv3'],
            learning_param=learning_param['conv3'])
        if return_layer in self.n.tops.keys(): return

        self.n.tops['conv4'], self.n.tops['relu4'] = self.conv_relu(
            self.n.tops['relu3'],
            3,
            384,
            pad=1,
            group=2,
            weight_filler=weight_filler['conv4'],
            bias_filler=bias_filler['conv4'],
            learning_param=learning_param['conv4'])
        if return_layer in self.n.tops.keys(): return

        self.n.tops['conv5'], self.n.tops['relu5'] = self.conv_relu(
            self.n.tops['relu4'],
            3,
            256,
            pad=1,
            group=2,
            weight_filler=weight_filler['conv5'],
            bias_filler=bias_filler['conv5'],
            learning_param=learning_param['conv5'])
        if return_layer in self.n.tops.keys(): return
        self.n.tops['pool5'] = self.max_pool(self.n.tops['relu5'], 3, stride=2)
        if return_layer in self.n.tops.keys(): return

        self.n.tops['fc6'], self.n.tops['relu6'] = self.fc_relu(
            self.n.tops['pool5'],
            4096,
            weight_filler=weight_filler['fc6'],
            bias_filler=bias_filler['fc6'],
            learning_param=learning_param['fc6'])
        if return_layer in self.n.tops.keys(): return
        self.n.tops['drop6'] = L.Dropout(self.n.tops['relu6'], in_place=True)
        if return_layer in self.n.tops.keys(): return
        self.n.tops['fc7'], self.n.tops['relu7'] = self.fc_relu(
            self.n.tops['drop6'],
            4096,
            weight_filler=weight_filler['fc7'],
            bias_filler=bias_filler['fc7'],
            learning_param=learning_param['fc7'])
        if return_layer in self.n.tops.keys(): return 'relu7'
        self.n.tops['drop7'] = L.Dropout(self.n.tops['relu7'], in_place=True)
        if return_layer in self.n.tops.keys(): return
        self.n.tops['fc8'] = L.InnerProduct(self.n.tops['drop7'],
                                            num_output=1000,
                                            weight_filler=weight_filler['fc8'],
                                            bias_filler=bias_filler['fc8'],
                                            param=learning_param['fc8'])
예제 #27
0
def VGGNetBody(net,
               from_layer,
               need_fc=True,
               fully_conv=False,
               reduced=False,
               dilated=False,
               nopool=False,
               dropout=True,
               freeze_layers=[]):
    kwargs = {
        'param':
        [dict(lr_mult=1, decay_mult=1),
         dict(lr_mult=2, decay_mult=0)],
        'weight_filler': dict(type='xavier'),
        'bias_filler': dict(type='constant', value=0)
    }

    assert from_layer in net.keys()
    net.conv1_1 = L.Convolution(net[from_layer],
                                num_output=64,
                                pad=1,
                                kernel_size=3,
                                **kwargs)

    net.relu1_1 = L.ReLU(net.conv1_1, in_place=True)
    net.conv1_2 = L.Convolution(net.relu1_1,
                                num_output=64,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu1_2 = L.ReLU(net.conv1_2, in_place=True)

    if nopool:
        name = 'conv1_3'
        net[name] = L.Convolution(net.relu1_2,
                                  num_output=64,
                                  pad=1,
                                  kernel_size=3,
                                  stride=2,
                                  **kwargs)
    else:
        name = 'pool1'
        net.pool1 = L.Pooling(net.relu1_2,
                              pool=P.Pooling.MAX,
                              kernel_size=2,
                              stride=2)

    net.conv2_1 = L.Convolution(net[name],
                                num_output=128,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu2_1 = L.ReLU(net.conv2_1, in_place=True)
    net.conv2_2 = L.Convolution(net.relu2_1,
                                num_output=128,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu2_2 = L.ReLU(net.conv2_2, in_place=True)

    if nopool:
        name = 'conv2_3'
        net[name] = L.Convolution(net.relu2_2,
                                  num_output=128,
                                  pad=1,
                                  kernel_size=3,
                                  stride=2,
                                  **kwargs)
    else:
        name = 'pool2'
        net[name] = L.Pooling(net.relu2_2,
                              pool=P.Pooling.MAX,
                              kernel_size=2,
                              stride=2)

    net.conv3_1 = L.Convolution(net[name],
                                num_output=256,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu3_1 = L.ReLU(net.conv3_1, in_place=True)
    net.conv3_2 = L.Convolution(net.relu3_1,
                                num_output=256,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu3_2 = L.ReLU(net.conv3_2, in_place=True)
    net.conv3_3 = L.Convolution(net.relu3_2,
                                num_output=256,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu3_3 = L.ReLU(net.conv3_3, in_place=True)

    if nopool:
        name = 'conv3_4'
        net[name] = L.Convolution(net.relu3_3,
                                  num_output=256,
                                  pad=1,
                                  kernel_size=3,
                                  stride=2,
                                  **kwargs)
    else:
        name = 'pool3'
        net[name] = L.Pooling(net.relu3_3,
                              pool=P.Pooling.MAX,
                              kernel_size=2,
                              stride=2)

    net.conv4_1 = L.Convolution(net[name],
                                num_output=512,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu4_1 = L.ReLU(net.conv4_1, in_place=True)
    net.conv4_2 = L.Convolution(net.relu4_1,
                                num_output=512,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu4_2 = L.ReLU(net.conv4_2, in_place=True)
    net.conv4_3 = L.Convolution(net.relu4_2,
                                num_output=512,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu4_3 = L.ReLU(net.conv4_3, in_place=True)

    if nopool:
        name = 'conv4_4'
        net[name] = L.Convolution(net.relu4_3,
                                  num_output=512,
                                  pad=1,
                                  kernel_size=3,
                                  stride=2,
                                  **kwargs)
    else:
        name = 'pool4'
        net[name] = L.Pooling(net.relu4_3,
                              pool=P.Pooling.MAX,
                              kernel_size=2,
                              stride=2)

    net.conv5_1 = L.Convolution(net[name],
                                num_output=512,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu5_1 = L.ReLU(net.conv5_1, in_place=True)
    net.conv5_2 = L.Convolution(net.relu5_1,
                                num_output=512,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu5_2 = L.ReLU(net.conv5_2, in_place=True)
    net.conv5_3 = L.Convolution(net.relu5_2,
                                num_output=512,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu5_3 = L.ReLU(net.conv5_3, in_place=True)

    if need_fc:
        if dilated:
            if nopool:
                name = 'conv5_4'
                net[name] = L.Convolution(net.relu5_3,
                                          num_output=512,
                                          pad=1,
                                          kernel_size=3,
                                          stride=1,
                                          **kwargs)
            else:
                name = 'pool5'
                net[name] = L.Pooling(net.relu5_3,
                                      pool=P.Pooling.MAX,
                                      pad=1,
                                      kernel_size=3,
                                      stride=1)
        else:
            if nopool:
                name = 'conv5_4'
                net[name] = L.Convolution(net.relu5_3,
                                          num_output=512,
                                          pad=1,
                                          kernel_size=3,
                                          stride=2,
                                          **kwargs)
            else:
                name = 'pool5'
                net[name] = L.Pooling(net.relu5_3,
                                      pool=P.Pooling.MAX,
                                      kernel_size=2,
                                      stride=2)

        if fully_conv:
            if dilated:
                if reduced:
                    net.fc6 = L.Convolution(net[name],
                                            num_output=1024,
                                            pad=6,
                                            kernel_size=3,
                                            dilation=6,
                                            **kwargs)
                else:
                    net.fc6 = L.Convolution(net[name],
                                            num_output=4096,
                                            pad=6,
                                            kernel_size=7,
                                            dilation=2,
                                            **kwargs)
            else:
                if reduced:
                    net.fc6 = L.Convolution(net[name],
                                            num_output=1024,
                                            pad=3,
                                            kernel_size=3,
                                            dilation=3,
                                            **kwargs)
                else:
                    net.fc6 = L.Convolution(net[name],
                                            num_output=4096,
                                            pad=3,
                                            kernel_size=7,
                                            **kwargs)

            net.relu6 = L.ReLU(net.fc6, in_place=True)
            if dropout:
                net.drop6 = L.Dropout(net.relu6,
                                      dropout_ratio=0.5,
                                      in_place=True)

            if reduced:
                net.fc7 = L.Convolution(net.relu6,
                                        num_output=1024,
                                        kernel_size=1,
                                        **kwargs)
            else:
                net.fc7 = L.Convolution(net.relu6,
                                        num_output=4096,
                                        kernel_size=1,
                                        **kwargs)
            net.relu7 = L.ReLU(net.fc7, in_place=True)
            if dropout:
                net.drop7 = L.Dropout(net.relu7,
                                      dropout_ratio=0.5,
                                      in_place=True)
        else:
            net.fc6 = L.InnerProduct(net.pool5, num_output=4096)
            net.relu6 = L.ReLU(net.fc6, in_place=True)
            if dropout:
                net.drop6 = L.Dropout(net.relu6,
                                      dropout_ratio=0.5,
                                      in_place=True)
            net.fc7 = L.InnerProduct(net.relu6, num_output=4096)
            net.relu7 = L.ReLU(net.fc7, in_place=True)
            if dropout:
                net.drop7 = L.Dropout(net.relu7,
                                      dropout_ratio=0.5,
                                      in_place=True)

    # Update freeze layers.
    kwargs['param'] = [
        dict(lr_mult=0, decay_mult=0),
        dict(lr_mult=0, decay_mult=0)
    ]
    layers = net.keys()
    for freeze_layer in freeze_layers:
        if freeze_layer in layers:
            net.update(freeze_layer, kwargs)

    return net
예제 #28
0
    def lstm_unit(self,
                  prefix,
                  x,
                  cont,
                  static=None,
                  h=None,
                  c=None,
                  batch_size=100,
                  timestep=0,
                  lstm_hidden=1000,
                  weight_filler=None,
                  bias_filler=None,
                  weight_lr_mult=1,
                  bias_lr_mult=2,
                  weight_decay_mult=1,
                  bias_decay_mult=0,
                  concat_hidden=True):

        #assume static is already transformed
        if not weight_filler:
            weight_filler = self.uniform_weight_filler(-0.08, 0.08)
        if not bias_filler:
            bias_filler = self.constant_filler(0)
        if not h:
            h = self.dummy_data_layer([1, batch_size, lstm_hidden], 1)
        if not c:
            c = self.dummy_data_layer([1, batch_size, lstm_hidden], 1)
        gate_dim = self.gate_dim

        def get_name(name):
            return '%s_%s' % (prefix, name)

        def get_param(weight_name, bias_name=None):
            w = dict(lr_mult=weight_lr_mult,
                     decay_mult=weight_decay_mult,
                     name=get_name(weight_name))
            if bias_name is not None:
                b = dict(lr_mult=bias_lr_mult,
                         decay_mult=bias_decay_mult,
                         name=get_name(bias_name))
                return [w, b]
            return [w]

        # gate_dim is the dimension of the cell state inputs:
        # 4 gates (i, f, o, g), each with dimension dim
        # Add layer to transform all timesteps of x to the hidden state dimension.
        #     x_transform = W_xc * x + b_c
        cont_reshape = L.Reshape(cont, shape=dict(dim=[1, 1, -1]))
        x = L.InnerProduct(x,
                           num_output=gate_dim,
                           axis=2,
                           weight_filler=weight_filler,
                           bias_filler=bias_filler,
                           param=get_param('W_xc', 'b_c'))
        setattr(self.n, get_name('%d_x_transform' % timestep), x)
        h_conted = L.Eltwise(h, cont_reshape, coeff_blob=True)
        h = L.InnerProduct(h_conted,
                           num_output=gate_dim,
                           axis=2,
                           bias_term=False,
                           weight_filler=weight_filler,
                           param=get_param('W_hc'))
        h_name = get_name('%d_h_transform' % timestep)
        if not hasattr(self.n, h_name):
            setattr(self.n, h_name, h)
        gate_input_args = x, h
        if static is not None:
            gate_input_args += (static, )
        gate_input = L.Eltwise(*gate_input_args)
        assert cont is not None
        c, h = L.LSTMUnit(c, gate_input, cont_reshape, ntop=2)
        return h, c
예제 #29
0
def create_deploy():
    #少了第一层数据层
    #第二层,卷积层
    conv1 = L.Convolution(
        bottom='data',
        kernel_size=11,
        stride=4,
        num_output=96,
        pad=0,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', value=0))
    #第三层,激活函数层
    relu1 = L.ReLU(conv1, in_place=True)
    #第四层,池化层
    pool1 = L.Pooling(relu1, pool=P.Pooling.MAX, kernel_size=3, stride=2)
    #第五层,LRN层
    norm1 = L.LRN(pool1, local_size=5, alpha=1e-4, beta=0.75)
    #第六层,卷积层
    conv2 = L.Convolution(
        norm1,
        kernel_size=5,
        stride=1,
        num_output=256,
        pad=2,
        group=2,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', value=1))

    #第七层,激活函数层
    relu2 = L.ReLU(conv2, in_place=True)
    #第八层,池化层
    pool2 = L.Pooling(relu2, pool=P.Pooling.MAX, kernel_size=3, stride=2)
    #第九层,LRN层
    norm2 = L.LRN(pool2, local_size=5, alpha=1e-4, beta=0.75)
    #第十层,卷积层
    conv3 = L.Convolution(
        norm2,
        kernel_size=3,
        stride=1,
        num_output=384,
        pad=1,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', value=0))
    #第十一层,激活函数层
    relu3 = L.ReLU(conv3, in_place=True)
    #第十二层,卷积层
    conv4 = L.Convolution(
        relu3,
        kernel_size=3,
        stride=1,
        num_output=384,
        pad=1,
        group=2,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', value=1))

    #第十三层,激活函数层
    relu4 = L.ReLU(conv4, in_place=True)
    #第十四层,卷积层
    conv5 = L.Convolution(
        relu4,
        kernel_size=3,
        stride=1,
        num_output=256,
        pad=1,
        group=2,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', value=1))

    #第十五层,激活函数层
    relu5 = L.ReLU(conv5, in_place=True)
    #第十六层,池化层
    pool5 = L.Pooling(relu5, pool=P.Pooling.MAX, kernel_size=3, stride=2)
    #第十七层,全连接层
    fc6 = L.InnerProduct(
        pool5,
        num_output=4096,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.005),
        bias_filler=dict(type='constant', value=1))
    #第十八层,激活函数层
    relu6 = L.ReLU(fc6, in_place=True)
    #第十九层,Dropout层
    drop6 = L.Dropout(relu6, dropout_ratio=0.5, in_place=True)
    #第二十层,全连接层
    fc7 = L.InnerProduct(
        drop6,
        num_output=4096,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.005),
        bias_filler=dict(type='constant', value=1))
    #第二十一层,激活函数层
    relu7 = L.ReLU(fc7, in_place=True)
    #第二十二层,Dropout层
    drop7 = L.Dropout(relu7, dropout_ratio=0.5)
    #第二十三层,全连接层
    fc8 = L.InnerProduct(
        drop7,
        num_output=1000,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', value=0))

    #最后没有accuracy层,但有一个Softmax层
    prob = L.Softmax(fc8)
    return to_proto(prob)
예제 #30
0
    def net(self, params=[]):

        conv_param = [
            dict(lr_mult=0.01, decay_mult=1),  # weight_param
            dict(lr_mult=0.02, decay_mult=0)
        ]  # learned_param

        fc_param = [
            dict(lr_mult=1, decay_mult=1),  # weight_param
            dict(lr_mult=2, decay_mult=0)
        ]  # learned_param

        wfiller = dict(type='gaussian', std=0.01)
        wfiller_fc = dict(type='gaussian', std=0.005)
        bfiller = dict(type='constant', value=0.1)

        # initialize net and data layer
        n = caffe.NetSpec()

        # layer 0
        n.data = self.data
        # layer 1
        n.conv1 = L.Convolution(n.data,
                                kernel_size=11,
                                num_output=96,
                                stride=4,
                                pad=0,
                                group=1,
                                param=conv_param,
                                weight_filler=wfiller,
                                bias_filler=bfiller)
        self.receptiveFieldStride.append(4)
        if self.last_layer == 'conv1':
            self.__network_end(n, n.conv1, params)
            return

        n.relu1 = L.ReLU(n.conv1, in_place=True)
        self.receptiveFieldStride.append(1)
        if self.last_layer == 'relu1':
            self.__network_end(n, n.relu1, params)
            return

        n.norm1 = L.LRN(n.relu1, local_size=5, alpha=1e-4, beta=0.75)
        self.receptiveFieldStride.append(1)
        if self.last_layer == 'norm1':
            self.__network_end(n, n.norm1, params)
            return

        n.pool1 = L.Pooling(n.norm1,
                            pool=P.Pooling.MAX,
                            kernel_size=3,
                            stride=2)
        self.receptiveFieldStride.append(2)
        if self.last_layer == 'pool1':
            self.__network_end(n, n.pool1, params)
            return

        # layer 2
        n.conv2 = L.Convolution(n.pool1,
                                kernel_size=5,
                                num_output=256,
                                stride=1,
                                pad=2,
                                group=2,
                                param=conv_param,
                                weight_filler=wfiller,
                                bias_filler=bfiller)
        self.receptiveFieldStride.append(1)
        if self.last_layer == 'conv2':
            self.__network_end(n, n.conv2, params)
            return

        n.relu2 = L.ReLU(n.conv2, in_place=True)
        self.receptiveFieldStride.append(1)
        if self.last_layer == 'relu2':
            self.__network_end(n, n.relu2, params)
            return

        n.norm2 = L.LRN(n.relu2, local_size=5, alpha=1e-4, beta=0.75)
        self.receptiveFieldStride.append(1)
        if self.last_layer == 'norm2':
            self.__network_end(n, n.norm2, params)
            return

        n.pool2 = L.Pooling(n.norm2,
                            pool=P.Pooling.MAX,
                            kernel_size=3,
                            stride=2)
        self.receptiveFieldStride.append(2)
        if self.last_layer == 'pool2':
            self.__network_end(n, n.pool2, params)
            return

        # layer 3
        n.conv3 = L.Convolution(n.pool2,
                                kernel_size=3,
                                num_output=384,
                                stride=1,
                                pad=1,
                                group=1,
                                param=conv_param,
                                weight_filler=wfiller,
                                bias_filler=bfiller)
        self.receptiveFieldStride.append(1)
        if self.last_layer == 'conv3':
            self.__network_end(n, n.conv3, params)
            return

        n.relu3 = L.ReLU(n.conv3, in_place=True)
        self.receptiveFieldStride.append(1)
        if self.last_layer == 'relu3':
            self.__network_end(n, n.relu3, params)
            return

        # layer 4
        n.conv4 = L.Convolution(n.relu3,
                                kernel_size=3,
                                num_output=384,
                                stride=1,
                                pad=1,
                                group=2,
                                param=conv_param,
                                weight_filler=wfiller,
                                bias_filler=bfiller)
        self.receptiveFieldStride.append(1)
        if self.last_layer == 'conv4':
            self.__network_end(n, n.conv4, params)
            return

        n.relu4 = L.ReLU(n.conv4, in_place=True)
        self.receptiveFieldStride.append(1)
        if self.last_layer == 'relu4':
            self.__network_end(n, n.relu4, params)
            return

        # layer 5
        n.conv5 = L.Convolution(n.relu4,
                                kernel_size=3,
                                num_output=256,
                                stride=1,
                                pad=1,
                                group=2,
                                param=conv_param,
                                weight_filler=wfiller,
                                bias_filler=bfiller)
        self.receptiveFieldStride.append(1)
        if self.last_layer == 'conv5':
            self.__network_end(n, n.conv5, params)
            return

        n.relu5 = L.ReLU(n.conv5, in_place=True)
        self.receptiveFieldStride.append(1)
        if self.last_layer == 'relu5':
            self.__network_end(n, n.relu5, params)
            return

        n.pool5 = L.Pooling(n.relu5,
                            pool=P.Pooling.MAX,
                            kernel_size=3,
                            stride=2)
        self.receptiveFieldStride.append(2)
        if self.last_layer == 'pool5':
            self.__network_end(n, n.pool5, params)
            return

        # layer 6
        n.fc6 = L.InnerProduct(n.pool5,
                               num_output=4096,
                               param=fc_param,
                               weight_filler=wfiller_fc,
                               bias_filler=bfiller)
        self.receptiveFieldStride.append(1)
        if self.last_layer == 'fc6':
            self.__network_end(n, n.fc6, params)
            return

        n.relu6 = L.ReLU(n.fc6, in_place=True)
        self.receptiveFieldStride.append(1)
        if self.last_layer == 'relu6':
            self.__network_end(n, n.relu6, params)
            return

        # layer 7
        n.fc7 = L.InnerProduct(n.relu6,
                               num_output=4096,
                               param=fc_param,
                               weight_filler=wfiller_fc,
                               bias_filler=bfiller)
        self.receptiveFieldStride.append(1)
        if self.last_layer == 'fc7':
            self.__network_end(n, n.fc7, params)
            return

        n.relu7 = L.ReLU(n.fc7, in_place=True)
        self.receptiveFieldStride.append(1)
        if self.last_layer == 'relu7':
            self.__network_end(n, n.relu7, params)
            return

        # layer 8: always learn fc8 (param=learned_param)
        n.fc8 = L.InnerProduct(n.relu7,
                               num_output=1000,
                               param=fc_param,
                               weight_filler=wfiller_fc,
                               bias_filler=bfiller)
        self.receptiveFieldStride.append(1)
        if self.last_layer == 'fc8':
            self.__network_end(n, n.fc8, params)
            return