Beispiel #1
0
def create_net(phase): 
    global train_transform_param
    global test_transform_param
    train_transform_param = {
            'mirror': True,
            'mean_file': Params['mean_file'] 
            }
    test_transform_param = {
            'mean_file': Params['mean_file'] 
            }
    if phase == 'train':
        lmdb_file = Params['train_lmdb']
        transform_param = train_transform_param
        batch_size = Params['batch_size_per_device']
    else:
        lmdb_file = Params['test_lmdb']
        transform_param = test_transform_param
        batch_size = Params['test_batch_size']

    net = caffe.NetSpec()
    net.data, net.label = L.Data(batch_size=batch_size,
        backend=P.Data.LMDB,
        source=lmdb_file,
        transform_param=transform_param,
        ntop=2) 
        #include=dict(phase=caffe_pb2.Phase.Value('TRAIN')),
   
    kwargs = {
            'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
            'weight_filler': dict(type='gaussian', std=0.0001),
            'bias_filler': dict(type='constant')}
    net.conv1 = L.Convolution(net.data, num_output=16, kernel_size=3, **kwargs)
    net.pool1 = L.Pooling(net.conv1, pool=P.Pooling.MAX, kernel_size=3, stride=2)
    net.relu1 = L.ReLU(net.pool1, in_place=True)
    kwargs = {
            'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
            'weight_filler': dict(type='gaussian', std=0.005),
            'bias_filler': dict(type='constant')}
    net.fc2 = L.InnerProduct(net.pool1, num_output=16, **kwargs)
    net.relu2 = L.ReLU(net.fc2, in_place=True)
    net.drop2 = L.Dropout(net.fc2, in_place=True, dropout_param=dict(dropout_ratio=0.5))
    kwargs = {
            'param': [dict(lr_mult=1, decay_mult=100), dict(lr_mult=2, decay_mult=0)],
            'weight_filler': dict(type='gaussian', std=0.01),
            'bias_filler': dict(type='constant', value=0)}
    net.fc3 = L.InnerProduct(net.fc2, num_output=2, **kwargs)
    if phase == 'train':
        net.loss = L.SoftmaxWithLoss(net.fc3, net.label)
    elif phase == 'test':
        net.accuracy = L.Accuracy(net.fc3, net.label)
    else:
        net.prob = L.Softmax(net.fc3)

    net_proto = net.to_proto()
    if phase == 'deploy':
        del net_proto.layer[0]
        #del net_proto.layer[-1]
        net_proto.input.extend(['data'])
        net_proto.input_dim.extend([64,3,12,36])
    net_proto.name = '{}_{}'.format(Params['model_name'], phase)
    return net_proto
def mobilenet(split):
    train_data_file = root_str + 'train_lmdb'
    test_data_file = root_str + 'test_lmdb'
    #mean_file = root_str + 'imagenet_mean.binaryproto'


    if split == 'train':
        data, labels = L.Data(source = train_data_file,
                              backend = P.Data.LMDB,
                              ntop = 2,
                              batch_size = 128,
                              image_data_param=dict(shuffle=True),
                                                    #include={'phase':caffe.TRAIN},
                                                    transform_param = dict(#scale=0.00390625,
                                                                           crop_size = 28,
                                                                           #mean_file=mean_file, 
                                                                           mirror=True
                                                                           ))
    else:
        data, labels = L.Data(source = test_data_file,
                              backend = P.Data.LMDB,
                              ntop = 2,
                              batch_size = 100,
                              image_data_param=dict(shuffle=True),
                                                    #include={'phase':caffe.TRAIN},
                                                    transform_param = dict(#scale=0.00390625,
                                                                           crop_size = 28,
                                                                           #mean_file=mean_file, 
                                                                           #mirror=True
                                                                           ))
        
    
    if split == 'deploy':
        scale, result = conv_BN_scale_relu(split, bottom="data", nout = 32, ks = 3, stride = 1, pad = 1,group=1) #conv1
    else:
        scale, result = conv_BN_scale_relu(split, bottom=data, nout = 32, ks = 3, stride = 1, pad = 1,group=1) #conv1

    
    scale,result = conv_BN_scale_relu(split, bottom=result, nout = 32, ks = 3, stride = 1, pad = 1,group=32) #conv1
    scale,result = conv_BN_scale_relu(split, bottom=result, nout = 64, ks = 1, stride = 1, pad = 0,group=1) #conv1
    
    scale,result = conv_BN_scale_relu(split, bottom=result, nout = 64, ks = 3, stride = 2, pad = 1,group=64) #conv1
    scale,result = conv_BN_scale_relu(split, bottom=result, nout = 128, ks = 1, stride = 1, pad = 0,group=1) #conv1
    
    scale,result = conv_BN_scale_relu(split, bottom=result, nout = 128, ks = 3, stride = 2, pad = 1,group=128) #conv1
    scale,result = conv_BN_scale_relu(split, bottom=result, nout = 256, ks = 1, stride = 1, pad = 0,group=1) #conv1
      
    scale,result = conv_BN_scale_relu(split, bottom=result, nout = 256, ks = 3, stride = 2, pad = 1,group=256) #conv1
    scale,result = conv_BN_scale_relu(split, bottom=result, nout = 512, ks = 1, stride = 1, pad = 0,group=1) #conv1

    scale,result = conv_BN_scale_relu(split, bottom=result, nout = 512, ks = 3, stride = 2, pad = 1,group=512) #conv1
    scale,result = conv_BN_scale_relu(split, bottom=result, nout = 1024, ks = 1, stride = 1, pad = 0,group=1) #conv1
    
    pool = L.Pooling(result, pool = P.Pooling.AVE, global_pooling = True)
    #pool = L.Pooling(result, pool=P.Pooling.AVE, kernel_size=4, stride=1,pad=0)  
    IP = L.InnerProduct(pool, num_output = 10,
                        weight_filler=dict(type='xavier'),bias_filler=dict(type='constant'))
    
    
    
    
    
    
    
    
    
    if split == 'deploy':
        prob=L.Softmax(IP)
        return to_proto(prob)
    else:
        loss = L.SoftmaxWithLoss(IP, labels)
        if split == 'train':
            return to_proto(loss)
         
        acc = L.Accuracy(IP, labels)
        return to_proto(acc, loss)
Beispiel #3
0
def max_pool(bottom, ks=2, stride=2):
  return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
Beispiel #4
0
def create_net(lmdb, batch_size, mean_file, model):

    n = caffe.NetSpec()
    #数据层
    if model == False:
        n.data, n.label = L.Data(batch_size=batch_size,
                                 backend=P.Data.LMDB,
                                 source=lmdb,
                                 include=dict(phase=0),
                                 transform_param=dict(scale=1. / 255,
                                                      mirror=True,
                                                      crop_size=227,
                                                      mean_file=mean_file),
                                 ntop=2)
    if model == True:
        n.data, n.label = L.Data(batch_size=batch_size,
                                 backend=P.Data.LMDB,
                                 source=lmdb,
                                 include=dict(phase=1),
                                 transform_param=dict(scale=1. / 255,
                                                      mirror=True,
                                                      crop_size=227,
                                                      mean_file=mean_file),
                                 ntop=2)

    #卷积层conv1
    n.conv1 = L.Convolution(
        n.data,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        kernel_size=11,
        stride=4,
        num_output=96,
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type='constant', value=0))
    #ReLu层
    n.relu1 = L.ReLU(n.conv1, in_place=True)

    #LRN层
    n.norm1 = L.LRN(n.conv1, local_size=5, alpha=0.0001, beta=0.75)

    #Pooling层
    n.pool1 = L.Pooling(n.norm1, kernel_size=3, stride=2, pool=P.Pooling.MAX)

    #卷积层conv2
    n.conv2 = L.Convolution(
        n.pool1,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        kernel_size=5,
        num_output=256,
        pad=2,
        group=2,
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type='constant', value=0.1))

    # ReLu2层
    n.relu2 = L.ReLU(n.conv2, in_place=True)

    # LRN2层
    n.norm2 = L.LRN(n.conv2, local_size=5, alpha=0.0001, beta=0.75)

    # Pooling2层
    n.pool2 = L.Pooling(n.norm2, kernel_size=3, stride=2, pool=P.Pooling.MAX)

    # 卷积层conv3
    n.conv3 = L.Convolution(
        n.pool2,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        kernel_size=3,
        num_output=384,
        pad=1,
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type='constant', value=0))
    # ReLu3层
    n.relu3 = L.ReLU(n.conv3, in_place=True)

    # 卷积层conv4
    n.conv4 = L.Convolution(
        n.conv3,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        kernel_size=3,
        num_output=384,
        pad=1,
        group=2,
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type='constant', value=0.1))
    # ReLu4层
    n.relu4 = L.ReLU(n.conv4, in_place=True)

    # 卷积层conv5
    n.conv5 = L.Convolution(
        n.conv4,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        kernel_size=3,
        num_output=256,
        pad=1,
        group=2,
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type='constant', value=0.1))
    # ReLu5层
    n.relu5 = L.ReLU(n.conv5, in_place=True)

    # Pooling5层
    n.pool5 = L.Pooling(n.conv5, kernel_size=3, stride=2, pool=P.Pooling.MAX)

    #全连接层fc6
    n.fc6 = L.InnerProduct(
        n.pool5,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        num_output=4096,
        weight_filler=dict(type="gaussian", std=0.005),
        bias_filler=dict(type='constant', value=0.1))

    n.relu6 = L.ReLU(n.fc6, in_place=True)

    #Dropout6层
    n.drop6 = L.Dropout(n.fc6, dropout_ratio=0.5, in_place=True)  #丢弃数据的概率

    # 全连接层fc7
    n.fc7 = L.InnerProduct(
        n.fc6,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        num_output=4096,
        weight_filler=dict(type="gaussian", std=0.005),
        bias_filler=dict(type='constant', value=0.1))

    # ReLu7层
    n.relu7 = L.ReLU(n.fc7, in_place=True)

    # Dropout7层
    n.drop7 = L.Dropout(n.fc7, dropout_ratio=0.5, in_place=True)  # 丢弃数据的概率

    # 全连接层fc8

    n.fc8 = L.InnerProduct(
        n.fc7,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        num_output=1000,
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type='constant', value=0))

    if model:
        n.acc = L.Accuracy(n.fc8, n.label)
    else:
        n.loss = L.SoftmaxWithLoss(n.fc8, n.label)

    return n.to_proto()
Beispiel #5
0
def InceptionV3Body(net, from_layer, output_pred=False):
    # scale is fixed to 1, thus we ignore it.
    use_scale = False

    out_layer = 'conv'
    ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True,
                num_output=32, kernel_size=3, pad=0, stride=2, use_scale=use_scale)
    from_layer = out_layer

    out_layer = 'conv_1'
    ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True,
                num_output=32, kernel_size=3, pad=0, stride=1, use_scale=use_scale)
    from_layer = out_layer

    out_layer = 'conv_2'
    ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True,
                num_output=64, kernel_size=3, pad=1, stride=1, use_scale=use_scale)
    from_layer = out_layer

    out_layer = 'pool'
    net[out_layer] = L.Pooling(net[from_layer], pool=P.Pooling.MAX,
                               kernel_size=3, stride=2, pad=0)
    from_layer = out_layer

    out_layer = 'conv_3'
    ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True,
                num_output=80, kernel_size=1, pad=0, stride=1, use_scale=use_scale)
    from_layer = out_layer

    out_layer = 'conv_4'
    ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True,
                num_output=192, kernel_size=3, pad=0, stride=1, use_scale=use_scale)
    from_layer = out_layer

    out_layer = 'pool_1'
    net[out_layer] = L.Pooling(net[from_layer], pool=P.Pooling.MAX,
                               kernel_size=3, stride=2, pad=0)
    from_layer = out_layer

    # inceptions with 1x1, 3x3, 5x5 convolutions
    for inception_id in xrange(0, 3):
        if inception_id == 0:
            out_layer = 'mixed'
            tower_2_conv_num_output = 32
        else:
            out_layer = 'mixed_{}'.format(inception_id)
            tower_2_conv_num_output = 64
        towers = []
        tower_name = '{}'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv', num_output=64, kernel_size=1, pad=0, stride=1),
        ])
        towers.append(tower)
        tower_name = '{}/tower'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv', num_output=48, kernel_size=1, pad=0, stride=1),
            dict(name='conv_1', num_output=64, kernel_size=5, pad=2, stride=1),
        ])
        towers.append(tower)
        tower_name = '{}/tower_1'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv', num_output=64, kernel_size=1, pad=0, stride=1),
            dict(name='conv_1', num_output=96, kernel_size=3, pad=1, stride=1),
            dict(name='conv_2', num_output=96, kernel_size=3, pad=1, stride=1),
        ])
        towers.append(tower)
        tower_name = '{}/tower_2'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='pool', pool=P.Pooling.AVE, kernel_size=3, pad=1, stride=1),
            dict(name='conv', num_output=tower_2_conv_num_output, kernel_size=1, pad=0, stride=1),
        ])
        towers.append(tower)
        out_layer = '{}/join'.format(out_layer)
        net[out_layer] = L.Concat(*towers, axis=1)
        from_layer = out_layer

    # inceptions with 1x1, 3x3(in sequence) convolutions
    out_layer = 'mixed_3'
    towers = []
    tower_name = '{}'.format(out_layer)
    tower = InceptionTower(net, from_layer, tower_name, [
        dict(name='conv', num_output=384, kernel_size=3, pad=0, stride=2),
    ])
    towers.append(tower)
    tower_name = '{}/tower'.format(out_layer)
    tower = InceptionTower(net, from_layer, tower_name, [
        dict(name='conv', num_output=64, kernel_size=1, pad=0, stride=1),
        dict(name='conv_1', num_output=96, kernel_size=3, pad=1, stride=1),
        dict(name='conv_2', num_output=96, kernel_size=3, pad=0, stride=2),
    ])
    towers.append(tower)
    tower_name = '{}'.format(out_layer)
    tower = InceptionTower(net, from_layer, tower_name, [
        dict(name='pool', pool=P.Pooling.MAX, kernel_size=3, pad=0, stride=2),
    ])
    towers.append(tower)
    out_layer = '{}/join'.format(out_layer)
    net[out_layer] = L.Concat(*towers, axis=1)
    from_layer = out_layer

    # inceptions with 1x1, 7x1, 1x7 convolutions
    for inception_id in xrange(4, 8):
        if inception_id == 4:
            num_output = 128
        elif inception_id == 5 or inception_id == 6:
            num_output = 160
        elif inception_id == 7:
            num_output = 192
        out_layer = 'mixed_{}'.format(inception_id)
        towers = []
        tower_name = '{}'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1),
        ])
        towers.append(tower)
        tower_name = '{}/tower'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv', num_output=num_output, kernel_size=1, pad=0, stride=1),
            dict(name='conv_1', num_output=num_output, kernel_size=[1, 7], pad=[0, 3], stride=[1, 1]),
            dict(name='conv_2', num_output=192, kernel_size=[7, 1], pad=[3, 0], stride=[1, 1]),
        ])
        towers.append(tower)
        tower_name = '{}/tower_1'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv', num_output=num_output, kernel_size=1, pad=0, stride=1),
            dict(name='conv_1', num_output=num_output, kernel_size=[7, 1], pad=[3, 0], stride=[1, 1]),
            dict(name='conv_2', num_output=num_output, kernel_size=[1, 7], pad=[0, 3], stride=[1, 1]),
            dict(name='conv_3', num_output=num_output, kernel_size=[7, 1], pad=[3, 0], stride=[1, 1]),
            dict(name='conv_4', num_output=192, kernel_size=[1, 7], pad=[0, 3], stride=[1, 1]),
        ])
        towers.append(tower)
        tower_name = '{}/tower_2'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='pool', pool=P.Pooling.AVE, kernel_size=3, pad=1, stride=1),
            dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1),
        ])
        towers.append(tower)
        out_layer = '{}/join'.format(out_layer)
        net[out_layer] = L.Concat(*towers, axis=1)
        from_layer = out_layer

    # inceptions with 1x1, 3x3, 1x7, 7x1 filters
    out_layer = 'mixed_8'
    towers = []
    tower_name = '{}/tower'.format(out_layer)
    tower = InceptionTower(net, from_layer, tower_name, [
        dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1),
        dict(name='conv_1', num_output=320, kernel_size=3, pad=0, stride=2),
    ])
    towers.append(tower)
    tower_name = '{}/tower_1'.format(out_layer)
    tower = InceptionTower(net, from_layer, tower_name, [
        dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1),
        dict(name='conv_1', num_output=192, kernel_size=[1, 7], pad=[0, 3], stride=[1, 1]),
        dict(name='conv_2', num_output=192, kernel_size=[7, 1], pad=[3, 0], stride=[1, 1]),
        dict(name='conv_3', num_output=192, kernel_size=3, pad=0, stride=2),
    ])
    towers.append(tower)
    tower_name = '{}'.format(out_layer)
    tower = InceptionTower(net, from_layer, tower_name, [
        dict(name='pool', pool=P.Pooling.MAX, kernel_size=3, pad=0, stride=2),
    ])
    towers.append(tower)
    out_layer = '{}/join'.format(out_layer)
    net[out_layer] = L.Concat(*towers, axis=1)
    from_layer = out_layer

    for inception_id in xrange(9, 11):
        num_output = 384
        num_output2 = 448
        if inception_id == 9:
            pool = P.Pooling.AVE
        else:
            pool = P.Pooling.MAX
        out_layer = 'mixed_{}'.format(inception_id)
        towers = []
        tower_name = '{}'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv', num_output=320, kernel_size=1, pad=0, stride=1),
        ])
        towers.append(tower)

        tower_name = '{}/tower'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv', num_output=num_output, kernel_size=1, pad=0, stride=1),
        ])
        subtowers = []
        subtower_name = '{}/mixed'.format(tower_name)
        subtower = InceptionTower(net, '{}/conv'.format(tower_name), subtower_name, [
            dict(name='conv', num_output=num_output, kernel_size=[1, 3], pad=[0, 1], stride=[1, 1]),
        ])
        subtowers.append(subtower)
        subtower = InceptionTower(net, '{}/conv'.format(tower_name), subtower_name, [
            dict(name='conv_1', num_output=num_output, kernel_size=[3, 1], pad=[1, 0], stride=[1, 1]),
        ])
        subtowers.append(subtower)
        net[subtower_name] = L.Concat(*subtowers, axis=1)
        towers.append(net[subtower_name])

        tower_name = '{}/tower_1'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='conv', num_output=num_output2, kernel_size=1, pad=0, stride=1),
            dict(name='conv_1', num_output=num_output, kernel_size=3, pad=1, stride=1),
        ])
        subtowers = []
        subtower_name = '{}/mixed'.format(tower_name)
        subtower = InceptionTower(net, '{}/conv_1'.format(tower_name), subtower_name, [
            dict(name='conv', num_output=num_output, kernel_size=[1, 3], pad=[0, 1], stride=[1, 1]),
        ])
        subtowers.append(subtower)
        subtower = InceptionTower(net, '{}/conv_1'.format(tower_name), subtower_name, [
            dict(name='conv_1', num_output=num_output, kernel_size=[3, 1], pad=[1, 0], stride=[1, 1]),
        ])
        subtowers.append(subtower)
        net[subtower_name] = L.Concat(*subtowers, axis=1)
        towers.append(net[subtower_name])

        tower_name = '{}/tower_2'.format(out_layer)
        tower = InceptionTower(net, from_layer, tower_name, [
            dict(name='pool', pool=pool, kernel_size=3, pad=1, stride=1),
            dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1),
        ])
        towers.append(tower)
        out_layer = '{}/join'.format(out_layer)
        net[out_layer] = L.Concat(*towers, axis=1)
        from_layer = out_layer

    if output_pred:
        net.pool_3 = L.Pooling(net[from_layer], pool=P.Pooling.AVE, kernel_size=8, pad=0, stride=1)
        net.softmax = L.InnerProduct(net.pool_3, num_output=1008)
        net.softmax_prob = L.Softmax(net.softmax)

    return net
Beispiel #6
0
def max_pooling_layer(previous, name, params):
    """ create a max pooling layer """
    return cl.Pooling(
        previous, name=name, pool=cp.Pooling.MAX,
        kernel_size=int(params["size"]), stride=int(params["stride"]))
Beispiel #7
0
def ResNet(split):

    data, labels = L.Python(module='readDataLayer',
                            layer='input_layer',
                            ntop=2,
                            param_str=str(
                                dict(split=split,
                                     data_dir=this_dir + '/data/',
                                     train_data_name='train_',
                                     test_data_name='test',
                                     train_batches=128,
                                     test_batches=128,
                                     crop_size_x=33,
                                     crop_size_y=33,
                                     train_pack_nums=9,
                                     test_pack_nums=1)))
    HGG_1, _ = conv_BN_scale_relu(split, data, 64, 3, 1, 0)
    HGG_2, _ = conv_BN_scale_relu(split, HGG_1, 64, 3, 1, 0)
    HGG_3, _ = conv_BN_scale_relu(split, HGG_2, 64, 3, 1, 0)
    HGG_4 = L.Pooling(HGG_3,
                      pool=P.Pooling.MAX,
                      global_pooling=False,
                      stride=2,
                      kernel_size=3)

    HGG_5, _ = conv_BN_scale_relu(split, HGG_4, 128, 3, 1, 0)

    HGG_6, _ = conv_BN_scale_relu(split, HGG_5, 128, 3, 1, 0)

    HGG_7, _ = conv_BN_scale_relu(split, HGG_6, 128, 3, 1, 0)

    HGG_8 = L.Pooling(HGG_7,
                      pool=P.Pooling.MAX,
                      global_pooling=False,
                      stride=2,
                      kernel_size=3)

    HGG_8a = L.Flatten(HGG_8)

    HGG_9 = L.ReLU(HGG_8a)
    HGG_9a = L.InnerProduct(L.Dropout(HGG_9, dropout_ratio=0.1),
                            num_output=256,
                            weight_filler=dict(type='xavier'),
                            bias_filler=dict(type='constant'))
    #    HGG_9a = L.InnerProduct(HGG_9, num_output = 256)

    HGG_10 = L.ReLU(HGG_9a)
    HGG_10a = L.InnerProduct(L.Dropout(HGG_10, dropout_ratio=0.1),
                             num_output=256,
                             weight_filler=dict(type='xavier'),
                             bias_filler=dict(type='constant'))
    #    HGG_10a = L.InnerProduct(HGG_10,num_output = 256)

    HGG_11 = L.Dropout(HGG_10a, dropout_ratio=0.1)
    HGG_11a = L.InnerProduct(HGG_11,
                             num_output=5,
                             weight_filler=dict(type='xavier'),
                             bias_filler=dict(type='constant'))

    acc = L.Accuracy(HGG_11a, labels)
    loss = L.SoftmaxWithLoss(HGG_11a, labels)
    return to_proto(loss, acc)
Beispiel #8
0
def generate_net(train_lmdb, val_lmdb, train_batch_size, test_batch_size):
    net = caffe.NetSpec()

    net.data, net.label = L.Data(source=train_lmdb,
                                 backend=caffe.params.Data.LMDB,
                                 batch_size=train_batch_size,
                                 ntop=2,
                                 transform_param=dict(
                                     crop_size=224,
                                     mean_value=[103.94, 116.78, 123.68]),
                                 scale=0.017,
                                 include=dict(phase=caffe.TRAIN))
    # note:
    train_data_layer_str = str(net.to_proto())

    net.data, net.label = L.Data(source=val_lmdb,
                                 backend=caffe.params.Data.LMDB,
                                 batch_size=test_batch_size,
                                 ntop=2,
                                 transform_param=dict(
                                     crop_size=224,
                                     mean_value=[103.94, 116.78, 123.68]),
                                 scale=0.017,
                                 include=dict(phase=caffe.TEST))
    # bone
    net.conv1 = L.Convolution(
        net.data,
        num_output=32,
        kernel_size=3,
        stride=2,
        pad=1,
        weight_filler={"type": "xavier"},
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])

    net.tops['conv1/bn'] = L.BatchNorm(net.conv1,
                                       param=[
                                           dict(lr_mult=0, decay_mult=0),
                                           dict(lr_mult=0, decay_mult=0),
                                           dict(lr_mult=0, decay_mult=0)
                                       ],
                                       in_place=False)

    net.tops['conv1/scale'] = L.Scale(
        net.tops['conv1/bn'],
        param=[dict(lr_mult=1, decay_mult=0),
               dict(lr_mult=2, decay_mult=0)],
        scale_param={
            'filler': {
                'value': 1
            },
            'bias_term': True,
            'bias_filler': {
                'value': 0
            }
        },
        in_place=True)

    net.conv1_relu = L.ReLU6(net.tops['conv1/scale'], in_place=True)

    bottleneck(net, net.conv1_relu, 'conv2', 32, 1, 16, 1)

    bottleneck(net, net.tops['conv2/1x1_down/scale'], 'conv3_1', 16, 6, 24, 2)

    bottleneck(net, net.tops['conv3_1/1x1_down/scale'], 'conv3_2', 24, 6, 24,
               1)

    bottleneck(net, net.tops['conv3_2/add'], 'conv4_1', 24, 6, 32, 2)

    bottleneck(net, net.tops['conv4_1/1x1_down/scale'], 'conv4_2', 32, 6, 32,
               1)

    bottleneck(net, net.tops['conv4_2/add'], 'conv4_3', 32, 6, 32, 1)

    bottleneck(net, net.tops['conv4_3/add'], 'conv5_1', 32, 6, 64, 2)

    bottleneck(net, net.tops['conv5_1/1x1_down/scale'], 'conv5_2', 64, 6, 64,
               1)

    bottleneck(net, net.tops['conv5_2/add'], 'conv5_3', 64, 6, 64, 1)

    bottleneck(net, net.tops['conv5_3/add'], 'conv5_4', 64, 6, 64, 1)

    bottleneck(net, net.tops['conv5_4/add'], 'conv6_1', 64, 6, 96, 1)

    bottleneck(net, net.tops['conv6_1/1x1_down/scale'], 'conv6_2', 96, 6, 96,
               1)

    bottleneck(net, net.tops['conv6_2/add'], 'conv6_3', 96, 6, 96, 1)

    bottleneck(net, net.tops['conv6_3/add'], 'conv7_1', 96, 6, 160, 2)

    bottleneck(net, net.tops['conv7_1/1x1_down/scale'], 'conv7_2', 160, 6, 160,
               1)

    bottleneck(net, net.tops['conv7_2/add'], 'conv7_3', 160, 6, 160, 1)

    bottleneck(net, net.tops['conv7_3/add'], 'conv8', 160, 6, 320, 1)

    net.conv9 = L.Convolution(
        net.tops['conv8/1x1_down/scale'],
        num_output=1280,
        kernel_size=1,
        weight_filler={"type": "xavier"},
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])

    net.tops['conv9/bn'] = L.BatchNorm(net.conv9,
                                       param=[
                                           dict(lr_mult=0, decay_mult=0),
                                           dict(lr_mult=0, decay_mult=0),
                                           dict(lr_mult=0, decay_mult=0)
                                       ],
                                       in_place=False)

    net.tops['conv9/scale'] = L.Scale(
        net.tops['conv9/bn'],
        param=[dict(lr_mult=1, decay_mult=0),
               dict(lr_mult=2, decay_mult=0)],
        scale_param={
            'filler': {
                'value': 1
            },
            'bias_term': True,
            'bias_filler': {
                'value': 0
            }
        },
        in_place=True)
    net.conv9_relu = caffe.layers.ReLU6(net.tops['conv9/scale'], in_place=True)

    # global average pooling
    net.pool10 = L.Pooling(net.conv9_relu,
                           pool=caffe.params.Pooling.AVE,
                           global_pooling=True)

    # 1000 cls
    net.conv11 = L.Convolution(
        net.pool10,
        num_output=1000,
        kernel_size=1,
        weight_filler={
            "type": "gaussian",
            "mean": 0,
            "std": 0.01
        },
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)])

    # softmax loss
    net.loss = L.SoftmaxWithLoss(net.conv11,
                                 net.label,
                                 include=dict(phase=caffe.TRAIN))

    # accuracy
    net.accuracy = L.Accuracy(net.conv11,
                              net.label,
                              include=dict(phase=caffe.TEST))
    net.accuracy_top5 = L.Accuracy(net.conv11,
                                   net.label,
                                   include=dict(phase=caffe.TEST),
                                   accuracy_param=dict(top_k=5))

    return train_data_layer_str + str(net.to_proto())
Beispiel #9
0
    def compile_time_operation(self, learning_option, cluster):
        """
        define pooling(max/average pooling) operation for input blob
        """
        # get input
        input_ = self.get_input('input')
        indim = self.get_dimension('input')
        print(indim)  # twtest
        # get attr
        # required field
        pool_type = self.get_attr('pool_type', default=None)
        if pool_type is None:
            raise Exception(
                '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format(
                    'type', self.name))
        kernel_size = self.get_attr('kernel_size', default=None)
        if kernel_size is None:
            raise Exception(
                '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format(
                    'kernel_size', self.name))

        # optional field
        padding = self.get_attr('padding', default='VALID')
        stride = self.get_attr('stride', default=1)
        engine = self.get_attr('engine', default='DEFAULT')
        global_pooling = self.get_attr('global_pooling', default=False)

        # padding
        if padding == 'SAME':
            outdim = [
                np.ceil(float(indim[i]) / float(stride)) for i in xrange(2)
            ]
            outdim.insert(0, indim[0])
            outdim.insert(1, indim[1])
            p = [
                int(((outdim[i + 2] - 1) * stride + kernel_size[i] -
                     indim[i + 2]) / 2) for i in xrange(2)
            ]
        else:
            outdim = [
                np.ceil(float(indim[i] - kernel_size[i] + 1) / float(stride))
                for i in xrange(2)
            ]
            outdim.insert(0, indim[0])
            outdim.insert(1, indim[1])
            p = [0, 0]

        if engine == 'DEFAULT':
            engine_idx = 0
        elif engine == 'CAFFE':
            engine_idx = 1
        elif engine == 'CUDNN':
            engine_idx = 2
        else:  #TODO: error handling
            pass

        # pool=0: max_pool, pool=1: avr_pool
        if pool_type == 'MAX':
            pool_type_idx = 0
        elif pool_type == 'AVG':
            pool_type_idx = 1
        else:  #TODO: error handling
            pass
        pool = L.Pooling(input_,
                         name=self.name,
                         pool=pool_type_idx,
                         kernel_h=kernel_size[0],
                         kernel_w=kernel_size[1],
                         stride=stride,
                         pad_h=p[0],
                         pad_w=p[1],
                         engine=engine_idx,
                         global_pooling=global_pooling)

        self.set_output('output', pool)
        self.set_dimension('output', outdim)
def resnet18(split, mean, opt):
    n = caffe.NetSpec()

    # config python data layer
    if split == 'train':
        batch_size = opt.train_batch_size
    if split == 'val':
        batch_size = opt.val_batch_size
    if split == 'test':
        batch_size = opt.test_batch_size

    if split == 'train' or split == 'val':
        dataset_name = opt.train_dataset_name
    else:
        dataset_name = opt.test_dataset_name

    pydata_params = dict(split=split,
                         data_dir=opt.data_dir,
                         batch_size=batch_size,
                         mean=mean,
                         dataset=dataset_name,
                         use_HSV=opt.use_HSV,
                         load_size=opt.load_size,
                         crop_size=opt.crop_size)
    n.data, n.label = L.Python(module='faceData_layers',
                               layer='FaceDataLayer',
                               ntop=2,
                               param_str=str(pydata_params))

    # start building main body of network
    # There main differences:
    #   1. do not use 4*nout for certain convolution layers
    #   2. do not use bias_term for convolution layer before start of residual blocks
    #   3. do not set the BN layer parameter, moving_average_fraction, to 0.9 (using default value 0.999)
    #   4. for weight filter initialziation, we do not specify 'msra'
    n.conv1, n.bn_conv1, n.scale_conv1 = _conv_bn_scale(n.data,
                                                        64,
                                                        bias_term=False,
                                                        kernel_size=7,
                                                        pad=3,
                                                        stride=2)
    n.conv1_relu = L.ReLU(n.scale_conv1, in_place=True)
    n.pool1 = L.Pooling(n.conv1_relu,
                        kernel_size=3,
                        stride=2,
                        pool=P.Pooling.MAX)

    _resnet_block_2stages('2a', n, n.pool1, 64, branch1=True, initial_stride=1)
    _resnet_block_2stages('2b', n, n.res2a_relu, 64)

    _resnet_block_2stages('3a', n, n.res2b_relu, 128, branch1=True)
    _resnet_block_2stages('3b', n, n.res3a_relu, 128)

    _resnet_block_2stages('4a', n, n.res3b_relu, 256, branch1=True)
    _resnet_block_2stages('4b', n, n.res4a_relu, 256)

    _resnet_block_2stages('5a', n, n.res4b_relu, 512, branch1=True)
    _resnet_block_2stages('5b', n, n.res5a_relu, 512)

    n.pool5 = L.Pooling(n.res5b_relu,
                        kernel_size=7,
                        stride=1,
                        pool=P.Pooling.AVE)

    # fully connected classifier
    lr_ratio = 100  # lr multiplier for truncated layers
    n.fc_face1 = L.InnerProduct(n.pool5,
                                num_output=1000,
                                param=[
                                    dict(lr_mult=1 * lr_ratio, decay_mult=1),
                                    dict(lr_mult=2 * lr_ratio, decay_mult=0)
                                ],
                                weight_filler=dict(type='gaussian', std=0.01),
                                bias_filler=dict(type='constant', value=0))
    n.fc_face2 = L.InnerProduct(n.fc_face1,
                                num_output=2,
                                param=[
                                    dict(lr_mult=1 * lr_ratio, decay_mult=1),
                                    dict(lr_mult=2 * lr_ratio, decay_mult=0)
                                ],
                                weight_filler=dict(type='gaussian', std=0.01),
                                bias_filler=dict(type='constant', value=0))

    # loss and accuracy layer
    n.loss = L.SoftmaxWithLoss(n.fc_face2, n.label)
    n.acc = L.Accuracy(n.fc_face2, n.label)
    return n.to_proto()
def resnet50(split, mean, opt):
    n = caffe.NetSpec()

    # config python data layer
    if split == 'train':
        batch_size = opt.train_batch_size
    if split == 'val':
        batch_size = opt.val_batch_size
    if split == 'test':
        batch_size = opt.test_batch_size

    if split == 'train' or split == 'val':
        dataset_name = opt.train_dataset_name
    else:
        dataset_name = opt.test_dataset_name

    pydata_params = dict(split=split,
                         data_dir=opt.data_dir,
                         batch_size=batch_size,
                         mean=mean,
                         dataset=dataset_name,
                         use_HSV=opt.use_HSV,
                         load_size=opt.load_size,
                         crop_size=opt.crop_size)
    n.data, n.label = L.Python(module='faceData_layers',
                               layer='FaceDataLayer',
                               ntop=2,
                               param_str=str(pydata_params))

    # start building main body of network
    n.conv1, n.bn_conv1, n.scale_conv1 = _conv_bn_scale(n.data,
                                                        64,
                                                        bias_term=True,
                                                        kernel_size=7,
                                                        pad=3,
                                                        stride=2)
    n.conv1_relu = L.ReLU(n.scale_conv1)
    n.pool1 = L.Pooling(n.conv1_relu,
                        kernel_size=3,
                        stride=2,
                        pool=P.Pooling.MAX)

    _resnet_block_3stages('2a', n, n.pool1, 64, branch1=True, initial_stride=1)
    _resnet_block_3stages('2b', n, n.res2a_relu, 64)
    _resnet_block_3stages('2c', n, n.res2b_relu, 64)

    _resnet_block_3stages('3a', n, n.res2c_relu, 128, branch1=True)
    _resnet_block_3stages('3b', n, n.res3a_relu, 128)
    _resnet_block_3stages('3c', n, n.res3b_relu, 128)
    _resnet_block_3stages('3d', n, n.res3c_relu, 128)

    _resnet_block_3stages('4a', n, n.res3d_relu, 256, branch1=True)
    _resnet_block_3stages('4b', n, n.res4a_relu, 256)
    _resnet_block_3stages('4c', n, n.res4b_relu, 256)
    _resnet_block_3stages('4d', n, n.res4c_relu, 256)
    _resnet_block_3stages('4e', n, n.res4d_relu, 256)
    _resnet_block_3stages('4f', n, n.res4e_relu, 256)

    _resnet_block_3stages('5a', n, n.res4f_relu, 512, branch1=True)
    _resnet_block_3stages('5b', n, n.res5a_relu, 512)
    _resnet_block_3stages('5c', n, n.res5b_relu, 512)

    n.pool5 = L.Pooling(n.res5c_relu,
                        kernel_size=7,
                        stride=1,
                        pool=P.Pooling.AVE)

    # fully connected classifier
    lr_ratio = 100  # lr multiplier for truncated layers
    n.fc_face1 = L.InnerProduct(n.pool5,
                                num_output=1000,
                                param=[
                                    dict(lr_mult=1 * lr_ratio, decay_mult=1),
                                    dict(lr_mult=2 * lr_ratio, decay_mult=0)
                                ],
                                weight_filler=dict(type='gaussian', std=0.01),
                                bias_filler=dict(type='constant', value=0))
    n.fc_face2 = L.InnerProduct(n.fc_face1,
                                num_output=2,
                                param=[
                                    dict(lr_mult=1 * lr_ratio, decay_mult=1),
                                    dict(lr_mult=2 * lr_ratio, decay_mult=0)
                                ],
                                weight_filler=dict(type='gaussian', std=0.01),
                                bias_filler=dict(type='constant', value=0))

    # loss and accuracy layer
    n.loss = L.SoftmaxWithLoss(n.fc_face2, n.label)
    n.acc = L.Accuracy(n.fc_face2, n.label)
    return n.to_proto()
Beispiel #12
0
def mfb_coatt(mode, batchsize, T, question_vocab_size, folder):
    n = caffe.NetSpec()
    mode_str = json.dumps({
        'mode': mode,
        'batchsize': batchsize,
        'folder': folder
    })
    if mode == 'val':
        n.data, n.cont, n.img_feature, n.label, n.glove = L.Python( \
            module='vqa_data_layer', layer='VQADataProviderLayer', \
            param_str=mode_str, ntop=5 )
    else:
        n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\
            module='vqa_data_layer_kld', layer='VQADataProviderLayer', \
            param_str=mode_str, ntop=5 )
    n.embed = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \
                         weight_filler=dict(type='xavier'))
    n.embed_tanh = L.TanH(n.embed)
    concat_word_embed = [n.embed_tanh, n.glove]
    n.concat_embed = L.Concat(*concat_word_embed,
                              concat_param={'axis': 2})  # T x N x 600

    # LSTM
    n.lstm1 = L.LSTM(\
                   n.concat_embed, n.cont,\
                   recurrent_param=dict(\
                       num_output=config.LSTM_UNIT_NUM,\
                       weight_filler=dict(type='xavier')))
    n.lstm1_droped = L.Dropout(
        n.lstm1, dropout_param={'dropout_ratio': config.LSTM_DROPOUT_RATIO})
    n.lstm1_resh = L.Permute(n.lstm1_droped,
                             permute_param=dict(order=[1, 2, 0]))
    n.lstm1_resh2 = L.Reshape(n.lstm1_resh, \
            reshape_param=dict(shape=dict(dim=[0,0,0,1])))
    '''
    Question Attention
    '''
    n.qatt_conv1 = L.Convolution(n.lstm1_resh2,
                                 kernel_size=1,
                                 stride=1,
                                 num_output=512,
                                 pad=0,
                                 weight_filler=dict(type='xavier'))
    n.qatt_relu = L.ReLU(n.qatt_conv1)
    n.qatt_conv2 = L.Convolution(n.qatt_relu,
                                 kernel_size=1,
                                 stride=1,
                                 num_output=config.NUM_QUESTION_GLIMPSE,
                                 pad=0,
                                 weight_filler=dict(type='xavier'))
    n.qatt_reshape = L.Reshape(
        n.qatt_conv2,
        reshape_param=dict(shape=dict(dim=[
            -1, config.NUM_QUESTION_GLIMPSE, config.MAX_WORDS_IN_QUESTION, 1
        ])))  # N*NUM_QUESTION_GLIMPSE*15
    n.qatt_softmax = L.Softmax(n.qatt_reshape, axis=2)

    qatt_maps = L.Slice(n.qatt_softmax,
                        ntop=config.NUM_QUESTION_GLIMPSE,
                        slice_param={'axis': 1})
    dummy_lstm = L.DummyData(shape=dict(dim=[batchsize, 1]),
                             data_filler=dict(type='constant', value=1),
                             ntop=1)
    qatt_feature_list = []
    for i in xrange(config.NUM_QUESTION_GLIMPSE):
        if config.NUM_QUESTION_GLIMPSE == 1:
            n.__setattr__(
                'qatt_feat%d' % i,
                L.SoftAttention(n.lstm1_resh2, qatt_maps, dummy_lstm))
        else:
            n.__setattr__(
                'qatt_feat%d' % i,
                L.SoftAttention(n.lstm1_resh2, qatt_maps[i], dummy_lstm))
        qatt_feature_list.append(n.__getattr__('qatt_feat%d' % i))
    n.qatt_feat_concat = L.Concat(*qatt_feature_list)
    '''
    Image Attention with MFB
    '''
    n.q_feat_resh = L.Reshape(
        n.qatt_feat_concat, reshape_param=dict(shape=dict(dim=[0, -1, 1, 1])))
    n.i_feat_resh = L.Reshape(
        n.img_feature,
        reshape_param=dict(shape=dict(
            dim=[0, -1, config.IMG_FEAT_WIDTH, config.IMG_FEAT_WIDTH])))

    n.iatt_q_proj = L.InnerProduct(n.q_feat_resh,
                                   num_output=config.JOINT_EMB_SIZE,
                                   weight_filler=dict(type='xavier'))
    n.iatt_q_resh = L.Reshape(
        n.iatt_q_proj,
        reshape_param=dict(shape=dict(dim=[-1, config.JOINT_EMB_SIZE, 1, 1])))
    n.iatt_q_tile1 = L.Tile(n.iatt_q_resh, axis=2, tiles=config.IMG_FEAT_WIDTH)
    n.iatt_q_tile2 = L.Tile(n.iatt_q_tile1,
                            axis=3,
                            tiles=config.IMG_FEAT_WIDTH)

    n.iatt_i_conv = L.Convolution(n.i_feat_resh,
                                  kernel_size=1,
                                  stride=1,
                                  num_output=config.JOINT_EMB_SIZE,
                                  pad=0,
                                  weight_filler=dict(type='xavier'))
    n.iatt_i_resh1 = L.Reshape(n.iatt_i_conv,
                               reshape_param=dict(shape=dict(dim=[
                                   -1, config.JOINT_EMB_SIZE,
                                   config.IMG_FEAT_WIDTH, config.IMG_FEAT_WIDTH
                               ])))
    n.iatt_iq_eltwise = L.Eltwise(n.iatt_q_tile2,
                                  n.iatt_i_resh1,
                                  eltwise_param=dict(operation=0))
    n.iatt_iq_droped = L.Dropout(
        n.iatt_iq_eltwise,
        dropout_param={'dropout_ratio': config.MFB_DROPOUT_RATIO})
    n.iatt_iq_resh2 = L.Reshape(n.iatt_iq_droped,
                                reshape_param=dict(shape=dict(
                                    dim=[-1, config.JOINT_EMB_SIZE, 196, 1])))
    n.iatt_iq_permute1 = L.Permute(n.iatt_iq_resh2,
                                   permute_param=dict(order=[0, 2, 1, 3]))
    n.iatt_iq_resh2 = L.Reshape(
        n.iatt_iq_permute1,
        reshape_param=dict(shape=dict(dim=[
            -1, config.IMG_FEAT_SIZE, config.MFB_OUT_DIM, config.MFB_FACTOR_NUM
        ])))
    n.iatt_iq_sumpool = L.Pooling(n.iatt_iq_resh2, pool=P.Pooling.SUM, \
                              pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1))
    n.iatt_iq_permute2 = L.Permute(n.iatt_iq_sumpool,
                                   permute_param=dict(order=[0, 2, 1, 3]))

    n.iatt_iq_sqrt = L.SignedSqrt(n.iatt_iq_permute2)
    n.iatt_iq_l2 = L.L2Normalize(n.iatt_iq_sqrt)

    ## 2 conv layers 1000 -> 512 -> 2
    n.iatt_conv1 = L.Convolution(n.iatt_iq_l2,
                                 kernel_size=1,
                                 stride=1,
                                 num_output=512,
                                 pad=0,
                                 weight_filler=dict(type='xavier'))
    n.iatt_relu = L.ReLU(n.iatt_conv1)
    n.iatt_conv2 = L.Convolution(n.iatt_relu,
                                 kernel_size=1,
                                 stride=1,
                                 num_output=config.NUM_IMG_GLIMPSE,
                                 pad=0,
                                 weight_filler=dict(type='xavier'))
    n.iatt_resh = L.Reshape(
        n.iatt_conv2,
        reshape_param=dict(shape=dict(
            dim=[-1, config.NUM_IMG_GLIMPSE, config.IMG_FEAT_SIZE])))
    n.iatt_softmax = L.Softmax(n.iatt_resh, axis=2)
    n.iatt_softmax_resh = L.Reshape(
        n.iatt_softmax,
        reshape_param=dict(shape=dict(dim=[
            -1, config.NUM_IMG_GLIMPSE, config.IMG_FEAT_WIDTH,
            config.IMG_FEAT_WIDTH
        ])))
    iatt_maps = L.Slice(n.iatt_softmax_resh,
                        ntop=config.NUM_IMG_GLIMPSE,
                        slice_param={'axis': 1})
    dummy = L.DummyData(shape=dict(dim=[batchsize, 1]),
                        data_filler=dict(type='constant', value=1),
                        ntop=1)
    iatt_feature_list = []
    for i in xrange(config.NUM_IMG_GLIMPSE):
        if config.NUM_IMG_GLIMPSE == 1:
            n.__setattr__('iatt_feat%d' % i,
                          L.SoftAttention(n.i_feat_resh, iatt_maps, dummy))
        else:
            n.__setattr__('iatt_feat%d' % i,
                          L.SoftAttention(n.i_feat_resh, iatt_maps[i], dummy))
        n.__setattr__('iatt_feat%d_resh'%i, L.Reshape(n.__getattr__('iatt_feat%d'%i), \
                                reshape_param=dict(shape=dict(dim=[0,-1]))))
        iatt_feature_list.append(n.__getattr__('iatt_feat%d_resh' % i))
    n.iatt_feat_concat = L.Concat(*iatt_feature_list)
    n.iatt_feat_concat_resh = L.Reshape(
        n.iatt_feat_concat, reshape_param=dict(shape=dict(dim=[0, -1, 1, 1])))
    '''
    Fine-grained Image-Question MFB fusion
    '''

    n.mfb_q_proj = L.InnerProduct(n.q_feat_resh,
                                  num_output=config.JOINT_EMB_SIZE,
                                  weight_filler=dict(type='xavier'))
    n.mfb_i_proj = L.InnerProduct(n.iatt_feat_concat_resh,
                                  num_output=config.JOINT_EMB_SIZE,
                                  weight_filler=dict(type='xavier'))
    n.mfb_iq_eltwise = L.Eltwise(n.mfb_q_proj,
                                 n.mfb_i_proj,
                                 eltwise_param=dict(operation=0))
    n.mfb_iq_drop = L.Dropout(
        n.mfb_iq_eltwise,
        dropout_param={'dropout_ratio': config.MFB_DROPOUT_RATIO})
    n.mfb_iq_resh = L.Reshape(
        n.mfb_iq_drop,
        reshape_param=dict(shape=dict(
            dim=[-1, 1, config.MFB_OUT_DIM, config.MFB_FACTOR_NUM])))
    n.mfb_iq_sumpool = L.Pooling(n.mfb_iq_resh, pool=P.Pooling.SUM, \
                                      pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1))
    n.mfb_out = L.Reshape(n.mfb_iq_sumpool,\
                                    reshape_param=dict(shape=dict(dim=[-1,config.MFB_OUT_DIM])))
    n.mfb_sign_sqrt = L.SignedSqrt(n.mfb_out)
    n.mfb_l2 = L.L2Normalize(n.mfb_sign_sqrt)

    n.prediction = L.InnerProduct(n.mfb_l2,
                                  num_output=config.NUM_OUTPUT_UNITS,
                                  weight_filler=dict(type='xavier'))
    if mode == 'val':
        n.loss = L.SoftmaxWithLoss(n.prediction, n.label)
    else:
        n.loss = L.SoftmaxKLDLoss(n.prediction, n.label)
    return n.to_proto()
Beispiel #13
0
def lenet(lmdb_data, lmdb_label, batch_size, deploy, crop=64, mirror=False):
    """Simple LeNet to predict cdf."""
    data_transforms = dict(scale=1.)
    if crop:  # will crop images to [crop]x[crop] with random center
        data_transforms['crop_size'] = crop
    if mirror:  # will randomly flip images
        data_transforms['mirror'] = 1

    n = caffe.NetSpec()
    if deploy:
        input_ = "data"
        dim1 = batch_size
        dim2 = 3  # need to change these manually
        dim3 = 64
        dim4 = 64
        n.data = L.Layer()
    else:
        n.data = L.Data(batch_size=batch_size,
                        backend=P.Data.LMDB,
                        source=lmdb_data,
                        transform_param=data_transforms,
                        ntop=1)
        n.label = L.Data(batch_size=batch_size,
                         backend=P.Data.LMDB,
                         source=lmdb_label,
                         ntop=1)

    # first convolutional layer
    n.conv1 = L.Convolution(n.data,
                            kernel_size=5,
                            num_output=40,
                            weight_filler=dict(type='xavier'))
    n.norm1 = L.BatchNorm(n.conv1)
    n.relu1 = L.ReLU(n.norm1, in_place=True)
    n.pool1 = L.Pooling(n.relu1, kernel_size=2, stride=2, pool=P.Pooling.MAX)

    # second convolutional layer
    n.conv2 = L.Convolution(n.pool1,
                            kernel_size=5,
                            num_output=40,
                            weight_filler=dict(type='xavier'))
    n.norm2 = L.BatchNorm(n.conv2)
    n.relu2 = L.ReLU(n.norm2, in_place=True)
    n.pool2 = L.Pooling(n.relu2, kernel_size=2, stride=2, pool=P.Pooling.MAX)

    # fully connected layers
    n.drop = L.Dropout(n.pool2, dropout_ratio=0.5)
    n.ip1 = L.InnerProduct(n.drop,
                           num_output=600,
                           weight_filler=dict(type='xavier'))
    n.out = L.Sigmoid(n.ip1)
    if deploy:
        deploy_str = ('input: {}\ninput_dim: {}\n'
                      'input_dim: {}\ninput_dim: {}\n'
                      'input_dim: {}').format('"%s"' % input_, dim1, dim2,
                                              dim3, dim4)
        return (deploy_str + '\n' + 'layer {' +
                'layer {'.join(str(n.to_proto()).split('layer {')[2:]))
    else:
        n.loss = L.EuclideanLoss(n.out, n.label)
        return str(n.to_proto())
def convert_symbol2proto(symbol):
    def looks_like_weight(name):
        """Internal helper to figure out if node should be hidden with `hide_weights`.
        """
        if name.endswith("_weight"):
            return True
        if name.endswith("_bias"):
            return True
        if name.endswith("_beta") or name.endswith("_gamma") or name.endswith("_moving_var") or name.endswith(
                "_moving_mean"):
            return True
        return False

    json_symbol = json.loads(symbol.tojson())
    all_nodes = json_symbol['nodes']
    no_weight_nodes = []
    for node in all_nodes:
        op = node['op']
        name = node['name']
        if op == 'null':
            if looks_like_weight(name):
                continue
        no_weight_nodes.append(node)

    # build next node dict
    next_node = dict()
    for node in no_weight_nodes:
        node_name = node['name']
        for input in node['inputs']:
            last_node_name = all_nodes[input[0]]['name']
            if last_node_name in next_node:
                next_node[last_node_name].append(node_name)
            else:
                next_node[last_node_name] = [node_name]

    supported_op_type = ['null', 'BatchNorm', 'Convolution', 'Activation', 'Pooling', 'elemwise_add', 'SliceChannel',
                         'FullyConnected', 'SoftmaxOutput', '_maximum', 'add_n', 'Concat']
    top_dict = dict()
    caffe_net = caffe.NetSpec()
    for node in no_weight_nodes:
        if node['op'] == 'null':
            input_param = dict()
            if node['name'] == 'data':
                input_param['shape'] = dict(dim=[1, 3, 160, 160])
            else:
                input_param['shape'] = dict(dim=[1])
            top_data = CL.Input(ntop=1, input_param=input_param)
            top_dict[node['name']] = [top_data]
            setattr(caffe_net, node['name'], top_data)
        elif node['op'].endswith('_copy'):
            pass
        elif node['op'] == 'BatchNorm':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            in_place = False
            if len(next_node[bottom_node_name]) == 1:
                in_place = True
            if 'momentum' in attr:
                momentum = float(attr['momentum'])
            else:
                momentum = 0.9
            if 'eps' in attr:
                eps = float(attr['eps'])
            else:
                eps = 0.001
            if NO_INPLACE:
                in_place = False
            bn_top = CL.BatchNorm(top_dict[bottom_node_name][input[1]], ntop=1,
                                  batch_norm_param=dict(use_global_stats=True,
                                                        moving_average_fraction=momentum,
                                                        eps=eps), in_place=in_place)
            setattr(caffe_net, node['name'], bn_top)
            scale_top = CL.Scale(bn_top, ntop=1, scale_param=dict(bias_term=True), in_place=not NO_INPLACE)
            top_dict[node['name']] = [scale_top]
            setattr(caffe_net, node['name'] + '_scale', scale_top)
        elif node['op'] == 'Convolution':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            convolution_param = dict()
            if 'kernel' in attr:
                kernel_size = eval(attr['kernel'])
                assert kernel_size[0] == kernel_size[1]
                convolution_param['kernel_size'] = kernel_size[0]
            else:
                convolution_param['kernel_size'] = 1
            if 'no_bias' in attr:
                convolution_param['bias_term'] = not eval(attr['no_bias'])
            if 'num_group' in attr:
                convolution_param['group'] = int(attr['num_group'])
            convolution_param['num_output'] = int(attr['num_filter'])
            if 'pad' in attr:
                pad_size = eval(attr['pad'])
                assert pad_size[0] == pad_size[1]
                convolution_param['pad'] = pad_size[0]
            if 'stride' in attr:
                stride_size = eval(attr['stride'])
                assert stride_size[0] == stride_size[1]
                convolution_param['stride'] = stride_size[0]
            conv_top = CL.Convolution(top_dict[bottom_node_name][input[1]], ntop=1, convolution_param=convolution_param)
            top_dict[node['name']] = [conv_top]
            setattr(caffe_net, node['name'], conv_top)
        elif node['op'] == 'Activation':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            in_place = False
            if len(next_node[bottom_node_name]) == 1:
                in_place = True
            if NO_INPLACE:
                in_place = False
            if attr['act_type'] == 'relu':
                ac_top = CL.ReLU(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place)
            elif attr['act_type'] == 'sigmoid':
                ac_top = CL.Sigmoid(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place)
            elif attr['act_type'] == 'tanh':
                ac_top = CL.TanH(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place)
            top_dict[node['name']] = [ac_top]
            setattr(caffe_net, node['name'], ac_top)
        elif node['op'] == 'Pooling':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            pooling_param = dict()
            if attr['pool_type'] == 'avg':
                pooling_param['pool'] = 1
            elif attr['pool_type'] == 'max':
                pooling_param['pool'] = 0
            else:
                assert False, attr['pool_type']
            if 'global_pool' in attr and eval(attr['global_pool']) is True:
                pooling_param['global_pooling'] = True
            else:
                if 'kernel' in attr:
                    kernel_size = eval(attr['kernel'])
                    assert kernel_size[0] == kernel_size[1]
                    pooling_param['kernel_size'] = kernel_size[0]
                if 'pad' in attr:
                    pad_size = eval(attr['pad'])
                    assert pad_size[0] == pad_size[1]
                    pooling_param['pad'] = pad_size[0]
                if 'stride' in attr:
                    stride_size = eval(attr['stride'])
                    assert stride_size[0] == stride_size[1]
                    pooling_param['stride'] = stride_size[0]
            pool_top = CL.Pooling(top_dict[bottom_node_name][input[1]], ntop=1, pooling_param=pooling_param)
            top_dict[node['name']] = [pool_top]
            setattr(caffe_net, node['name'], pool_top)
        elif node['op'] == 'elemwise_add' or node['op'] == 'add_n':
            input_a = node['inputs'][0]
            while True:
                if all_nodes[input_a[0]]['op'] not in supported_op_type:
                    input_a = all_nodes[input_a[0]]['inputs'][0]
                else:
                    break
            input_b = node['inputs'][1]
            while True:
                if all_nodes[input_b[0]]['op'] not in supported_op_type:
                    input_b = all_nodes[input_b[0]]['inputs'][0]
                else:
                    break
            bottom_node_name_a = all_nodes[input_a[0]]['name']
            bottom_node_name_b = all_nodes[input_b[0]]['name']
            eltwise_param = dict()
            eltwise_param['operation'] = 1
            ele_add_top = CL.Eltwise(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]],
                                     ntop=1, eltwise_param=eltwise_param)
            top_dict[node['name']] = [ele_add_top]
            setattr(caffe_net, node['name'], ele_add_top)
        elif node['op'] == '_maximum':
            input_a = node['inputs'][0]
            while True:
                if all_nodes[input_a[0]]['op'] not in supported_op_type:
                    input_a = all_nodes[input_a[0]]['inputs'][0]
                else:
                    break
            input_b = node['inputs'][1]
            while True:
                if all_nodes[input_b[0]]['op'] not in supported_op_type:
                    input_b = all_nodes[input_b[0]]['inputs'][0]
                else:
                    break
            bottom_node_name_a = all_nodes[input_a[0]]['name']
            bottom_node_name_b = all_nodes[input_b[0]]['name']
            eltwise_param = dict()
            eltwise_param['operation'] = 2
            ele_add_top = CL.Eltwise(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]],
                                     ntop=1, eltwise_param=eltwise_param)
            top_dict[node['name']] = [ele_add_top]
            setattr(caffe_net, node['name'], ele_add_top)
        elif node['op'] == 'SliceChannel':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            slice_param = dict()
            slice_param['slice_dim'] = 1
            slice_num = 2
            slice_outputs = CL.Slice(top_dict[bottom_node_name][input[1]], ntop=slice_num, slice_param=slice_param)
            top_dict[node['name']] = slice_outputs
            for idx, output in enumerate(slice_outputs):
                setattr(caffe_net, node['name'] + '_' + str(idx), output)
        elif node['op'] == 'FullyConnected':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            inner_product_param = dict()
            inner_product_param['num_output'] = int(attr['num_hidden'])
            fc_top = CL.InnerProduct(top_dict[bottom_node_name][input[1]], ntop=1,
                                     inner_product_param=inner_product_param)
            top_dict[node['name']] = [fc_top]
            setattr(caffe_net, node['name'], fc_top)
        elif node['op'] == 'SoftmaxOutput':
            input_a = node['inputs'][0]
            while True:
                if all_nodes[input_a[0]]['op'] not in supported_op_type:
                    input_a = all_nodes[input_a[0]]['inputs'][0]
                else:
                    break
            input_b = node['inputs'][1]
            while True:
                if all_nodes[input_b[0]]['op'] not in supported_op_type:
                    input_b = all_nodes[input_b[0]]['inputs'][0]
                else:
                    break
            bottom_node_name_a = all_nodes[input_a[0]]['name']
            bottom_node_name_b = all_nodes[input_b[0]]['name']
            softmax_loss = CL.SoftmaxWithLoss(top_dict[bottom_node_name_a][input_a[1]],
                                              top_dict[bottom_node_name_b][input_b[1]], ntop=1)
            top_dict[node['name']] = [softmax_loss]
            setattr(caffe_net, node['name'], softmax_loss)
        elif node['op'] == 'Concat':
            if len(node['inputs']) == 2:
                input_a = node['inputs'][0]
                while True:
                    if all_nodes[input_a[0]]['op'] not in supported_op_type:
                        input_a = all_nodes[input_a[0]]['inputs'][0]
                    else:
                        break
                input_b = node['inputs'][1]
                while True:
                    if all_nodes[input_b[0]]['op'] not in supported_op_type:
                        input_b = all_nodes[input_b[0]]['inputs'][0]
                    else:
                        break
                bottom_node_name_a = all_nodes[input_a[0]]['name']
                bottom_node_name_b = all_nodes[input_b[0]]['name']
                concat_top = CL.Concat(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1)
                top_dict[node['name']] = [concat_top]
                setattr(caffe_net, node['name'], concat_top)
            elif len(node['inputs']) == 3:
                input_a = node['inputs'][0]
                while True:
                    if all_nodes[input_a[0]]['op'] not in supported_op_type:
                        input_a = all_nodes[input_a[0]]['inputs'][0]
                    else:
                        break
                input_b = node['inputs'][1]
                while True:
                    if all_nodes[input_b[0]]['op'] not in supported_op_type:
                        input_b = all_nodes[input_b[0]]['inputs'][0]
                    else:
                        break
                input_c = node['inputs'][2]
                while True:
                    if all_nodes[input_c[0]]['op'] not in supported_op_type:
                        input_c = all_nodes[input_c[0]]['inputs'][0]
                    else:
                        break
                bottom_node_name_a = all_nodes[input_a[0]]['name']
                bottom_node_name_b = all_nodes[input_b[0]]['name']
                bottom_node_name_c = all_nodes[input_c[0]]['name']
                concat_top = CL.Concat(top_dict[bottom_node_name_a][input_a[1]],
                                       top_dict[bottom_node_name_b][input_b[1]],
                                       top_dict[bottom_node_name_c][input_c[1]], ntop=1)
                top_dict[node['name']] = [concat_top]
                setattr(caffe_net, node['name'], concat_top)
        else:
            logging.warn('unknown op type = %s' % node['op'])

    return caffe_net.to_proto()
def setLayers(data_source,
              batch_size,
              layername,
              kernel,
              stride,
              outCH,
              label_name,
              transform_param_in,
              deploy=False):
    # it is tricky to produce the deploy prototxt file, as the data input is not from a layer, so we have to creat a workaround
    # producing training and testing prototxt files is pretty straight forward
    n = caffe.NetSpec()
    assert len(layername) == len(kernel)
    assert len(layername) == len(stride)
    assert len(layername) == len(outCH)

    # produce data definition for deploy net
    if deploy == False:
        n.data, n.tops['label'] = L.CPMData(data_param=dict(
            backend=1, source=data_source, batch_size=batch_size),
                                            transform_param=transform_param_in,
                                            ntop=2)
        n.tops[label_name[1]], n.tops[label_name[0]] = L.Slice(
            n.label, slice_param=dict(axis=1, slice_point=15), ntop=2)
    else:
        input = "data"
        dim1 = 1
        dim2 = 4
        dim3 = 368
        dim4 = 368
        # make an empty "data" layer so the next layer accepting input will be able to take the correct blob name "data",
        # we will later have to remove this layer from the serialization string, since this is just a placeholder
        n.data = L.Layer()

    # something special before everything
    n.image, n.center_map = L.Slice(n.data,
                                    slice_param=dict(axis=1, slice_point=3),
                                    ntop=2)
    n.pool_center_lower = L.Pooling(n.center_map,
                                    kernel_size=9,
                                    stride=8,
                                    pool=P.Pooling.AVE)

    # just follow arrays..CPCPCPCPCCCC....
    last_layer = 'image'
    stage = 1
    conv_counter = 1
    pool_counter = 1
    drop_counter = 1
    state = 'image'  # can be image or fuse
    share_point = 0

    for l in range(0, len(layername)):
        if layername[l] == 'C':
            if state == 'image':
                conv_name = 'conv%d_stage%d' % (conv_counter, stage)
            else:
                conv_name = 'Mconv%d_stage%d' % (conv_counter, stage)
            if stage == 1:
                lr_m = 5
            else:
                lr_m = 1
            n.tops[conv_name] = L.Convolution(
                n.tops[last_layer],
                kernel_size=kernel[l],
                num_output=outCH[l],
                pad=int(math.floor(kernel[l] / 2)),
                param=[
                    dict(lr_mult=lr_m, decay_mult=1),
                    dict(lr_mult=lr_m * 2, decay_mult=0)
                ],
                weight_filler=dict(type='gaussian', std=0.01),
                bias_filler=dict(type='constant'))
            last_layer = conv_name
            if layername[l + 1] != 'L':
                if (state == 'image'):
                    ReLUname = 'relu%d_stage%d' % (conv_counter, stage)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer],
                                              in_place=True)
                else:
                    ReLUname = 'Mrelu%d_stage%d' % (conv_counter, stage)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer],
                                              in_place=True)
                last_layer = ReLUname
            conv_counter += 1
        elif layername[l] == 'P':  # Pooling
            n.tops['pool%d_stage%d' % (pool_counter, stage)] = L.Pooling(
                n.tops[last_layer],
                kernel_size=kernel[l],
                stride=stride[l],
                pool=P.Pooling.MAX)
            last_layer = 'pool%d_stage%d' % (pool_counter, stage)
            pool_counter += 1
        elif layername[l] == 'L':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            if deploy == False:
                if stage == 1:
                    n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                        n.tops[last_layer], n.tops[label_name[0]])
                else:
                    n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                        n.tops[last_layer], n.tops[label_name[1]])

            stage += 1
            last_connect = last_layer
            last_layer = 'image'
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            state = 'image'
        elif layername[l] == 'D':
            if deploy == False:
                n.tops['drop%d_stage%d' % (drop_counter, stage)] = L.Dropout(
                    n.tops[last_layer],
                    in_place=True,
                    dropout_param=dict(dropout_ratio=0.5))
                drop_counter += 1
        elif layername[l] == '@':
            n.tops['concat_stage%d' % stage] = L.Concat(
                n.tops[last_layer],
                n.tops[last_connect],
                n.pool_center_lower,
                concat_param=dict(axis=1))
            conv_counter = 1
            state = 'fuse'
            last_layer = 'concat_stage%d' % stage
        elif layername[l] == '$':
            if not share_point:
                share_point = last_layer
            else:
                last_layer = share_point

    # final process
    stage -= 1
    if stage == 1:
        n.silence = L.Silence(n.pool_center_lower, ntop=0)

    if deploy == False:
        return str(n.to_proto())
        # for generating the deploy net
    else:
        # generate the input information header string
        deploy_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format(
            '"' + input + '"', dim1, dim2, dim3, dim4)
        # assemble the input header with the net layers string.  remove the first placeholder layer from the net string.
        return deploy_str + '\n' + 'layer {' + 'layer {'.join(
            str(n.to_proto()).split('layer {')[2:])
def setLayers_twoBranches(data_source,
                          batch_size,
                          layername,
                          kernel,
                          stride,
                          outCH,
                          label_name,
                          transform_param_in,
                          deploy=False,
                          batchnorm=0,
                          lr_mult_distro=[1, 1, 1]):
    # it is tricky to produce the deploy prototxt file, as the data input is not from a layer, so we have to creat a workaround
    # producing training and testing prototxt files is pretty straight forward
    n = caffe.NetSpec()
    assert len(layername) == len(kernel)
    assert len(layername) == len(stride)
    assert len(layername) == len(outCH)
    num_parts = transform_param['num_parts']

    if deploy == False and "lmdb" not in data_source:
        if (len(label_name) == 1):
            n.data, n.tops[label_name[0]] = L.HDF5Data(hdf5_data_param=dict(
                batch_size=batch_size, source=data_source),
                                                       ntop=2)
        elif (len(label_name) == 2):
            n.data, n.tops[label_name[0]], n.tops[label_name[1]] = L.HDF5Data(
                hdf5_data_param=dict(batch_size=batch_size,
                                     source=data_source),
                ntop=3)
    # produce data definition for deploy net
    elif deploy == False:
        n.data, n.tops['label'] = L.CPMData(
            data_param=dict(backend=1,
                            source=data_source,
                            batch_size=batch_size),
            cpm_transform_param=transform_param_in,
            ntop=2)
        n.tops[label_name[2]], n.tops[label_name[3]], n.tops[
            label_name[4]], n.tops[label_name[5]] = L.Slice(
                n.label,
                slice_param=dict(
                    axis=1, slice_point=[38, num_parts + 1, num_parts + 39]),
                ntop=4)
        n.tops[label_name[0]] = L.Eltwise(n.tops[label_name[2]],
                                          n.tops[label_name[4]],
                                          operation=P.Eltwise.PROD)
        n.tops[label_name[1]] = L.Eltwise(n.tops[label_name[3]],
                                          n.tops[label_name[5]],
                                          operation=P.Eltwise.PROD)

    else:
        input = "data"
        dim1 = 1
        dim2 = 4
        dim3 = 368
        dim4 = 368
        # make an empty "data" layer so the next layer accepting input will be able to take the correct blob name "data",
        # we will later have to remove this layer from the serialization string, since this is just a placeholder
        n.data = L.Layer()

    # something special before everything
    n.image, n.center_map = L.Slice(n.data,
                                    slice_param=dict(axis=1, slice_point=3),
                                    ntop=2)
    n.silence2 = L.Silence(n.center_map, ntop=0)
    #n.pool_center_lower = L.Pooling(n.center_map, kernel_size=9, stride=8, pool=P.Pooling.AVE)

    # just follow arrays..CPCPCPCPCCCC....
    last_layer = ['image', 'image']
    stage = 1
    conv_counter = 1
    pool_counter = 1
    drop_counter = 1
    local_counter = 1
    state = 'image'  # can be image or fuse
    share_point = 0

    for l in range(0, len(layername)):
        if layername[l] == 'V':  #pretrained VGG layers
            conv_name = 'conv%d_%d' % (pool_counter, local_counter)
            lr_m = lr_mult_distro[0]
            n.tops[conv_name] = L.Convolution(
                n.tops[last_layer[0]],
                kernel_size=kernel[l],
                num_output=outCH[l],
                pad=int(math.floor(kernel[l] / 2)),
                param=[
                    dict(lr_mult=lr_m, decay_mult=1),
                    dict(lr_mult=lr_m * 2, decay_mult=0)
                ],
                weight_filler=dict(type='gaussian', std=0.01),
                bias_filler=dict(type='constant'))
            last_layer[0] = conv_name
            last_layer[1] = conv_name
            print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m)
            ReLUname = 'relu%d_%d' % (pool_counter, local_counter)
            n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True)
            local_counter += 1
            print ReLUname
        if layername[l] == 'B':
            pool_counter += 1
            local_counter = 1
        if layername[l] == 'C':
            if state == 'image':
                #conv_name = 'conv%d_stage%d' % (conv_counter, stage)
                conv_name = 'conv%d_%d_CPM' % (
                    pool_counter, local_counter
                )  # no image state in subsequent stages
                if stage == 1:
                    lr_m = lr_mult_distro[1]
                else:
                    lr_m = lr_mult_distro[1]
            else:  # fuse
                conv_name = 'Mconv%d_stage%d' % (conv_counter, stage)
                lr_m = lr_mult_distro[2]
                conv_counter += 1
            #if stage == 1:
            #    lr_m = 1
            #else:
            #    lr_m = lr_sub
            n.tops[conv_name] = L.Convolution(
                n.tops[last_layer[0]],
                kernel_size=kernel[l],
                num_output=outCH[l],
                pad=int(math.floor(kernel[l] / 2)),
                param=[
                    dict(lr_mult=lr_m, decay_mult=1),
                    dict(lr_mult=lr_m * 2, decay_mult=0)
                ],
                weight_filler=dict(type='gaussian', std=0.01),
                bias_filler=dict(type='constant'))
            last_layer[0] = conv_name
            last_layer[1] = conv_name
            print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m)

            if layername[l + 1] != 'L':
                if (state == 'image'):
                    if (batchnorm == 1):
                        batchnorm_name = 'bn%d_stage%d' % (conv_counter, stage)
                        n.tops[batchnorm_name] = L.BatchNorm(
                            n.tops[last_layer[0]],
                            param=[
                                dict(lr_mult=0),
                                dict(lr_mult=0),
                                dict(lr_mult=0)
                            ])
                        #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                        last_layer[0] = batchnorm_name
                    #ReLUname = 'relu%d_stage%d' % (conv_counter, stage)
                    ReLUname = 'relu%d_%d_CPM' % (pool_counter, local_counter)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]],
                                              in_place=True)
                else:
                    if (batchnorm == 1):
                        batchnorm_name = 'Mbn%d_stage%d' % (conv_counter,
                                                            stage)
                        n.tops[batchnorm_name] = L.BatchNorm(
                            n.tops[last_layer[0]],
                            param=[
                                dict(lr_mult=0),
                                dict(lr_mult=0),
                                dict(lr_mult=0)
                            ])
                        #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                        last_layer[0] = batchnorm_name
                    ReLUname = 'Mrelu%d_stage%d' % (conv_counter, stage)
                    n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]],
                                              in_place=True)
                #last_layer = ReLUname
                print ReLUname

            #conv_counter += 1
            local_counter += 1

        elif layername[l] == 'C2':
            for level in range(0, 2):
                if state == 'image':
                    #conv_name = 'conv%d_stage%d' % (conv_counter, stage)
                    conv_name = 'conv%d_%d_CPM_L%d' % (
                        pool_counter, local_counter, level + 1
                    )  # no image state in subsequent stages
                    if stage == 1:
                        lr_m = lr_mult_distro[1]
                    else:
                        lr_m = lr_mult_distro[1]
                else:  # fuse
                    conv_name = 'Mconv%d_stage%d_L%d' % (conv_counter, stage,
                                                         level + 1)
                    lr_m = lr_mult_distro[2]
                    #conv_counter += 1
                #if stage == 1:
                #    lr_m = 1
                #else:
                #    lr_m = lr_sub
                if layername[l + 1] == 'L2' or layername[l + 1] == 'L3':
                    if level == 0:
                        outCH[l] = 38
                    else:
                        outCH[l] = 19

                n.tops[conv_name] = L.Convolution(
                    n.tops[last_layer[level]],
                    kernel_size=kernel[l],
                    num_output=outCH[l],
                    pad=int(math.floor(kernel[l] / 2)),
                    param=[
                        dict(lr_mult=lr_m, decay_mult=1),
                        dict(lr_mult=lr_m * 2, decay_mult=0)
                    ],
                    weight_filler=dict(type='gaussian', std=0.01),
                    bias_filler=dict(type='constant'))
                last_layer[level] = conv_name
                print '%s\tch=%d\t%.1f' % (last_layer[level], outCH[l], lr_m)

                if layername[l + 1] != 'L2' and layername[l + 1] != 'L3':
                    if (state == 'image'):
                        if (batchnorm == 1):
                            batchnorm_name = 'bn%d_stage%d_L%d' % (
                                conv_counter, stage, level + 1)
                            n.tops[batchnorm_name] = L.BatchNorm(
                                n.tops[last_layer[level]],
                                param=[
                                    dict(lr_mult=0),
                                    dict(lr_mult=0),
                                    dict(lr_mult=0)
                                ])
                            #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                            last_layer[level] = batchnorm_name
                        #ReLUname = 'relu%d_stage%d' % (conv_counter, stage)
                        ReLUname = 'relu%d_%d_CPM_L%d' % (
                            pool_counter, local_counter, level + 1)
                        n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]],
                                                  in_place=True)
                    else:
                        if (batchnorm == 1):
                            batchnorm_name = 'Mbn%d_stage%d_L%d' % (
                                conv_counter, stage, level + 1)
                            n.tops[batchnorm_name] = L.BatchNorm(
                                n.tops[last_layer[level]],
                                param=[
                                    dict(lr_mult=0),
                                    dict(lr_mult=0),
                                    dict(lr_mult=0)
                                ])
                            #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001))
                            last_layer[level] = batchnorm_name
                        ReLUname = 'Mrelu%d_stage%d_L%d' % (conv_counter,
                                                            stage, level + 1)
                        n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]],
                                                  in_place=True)
                    print ReLUname

            conv_counter += 1
            local_counter += 1

        elif layername[l] == 'P':  # Pooling
            n.tops['pool%d_stage%d' % (pool_counter, stage)] = L.Pooling(
                n.tops[last_layer[0]],
                kernel_size=kernel[l],
                stride=stride[l],
                pool=P.Pooling.MAX)
            last_layer[0] = 'pool%d_stage%d' % (pool_counter, stage)
            pool_counter += 1
            local_counter = 1
            conv_counter += 1
            print last_layer[0]

        elif layername[l] == 'L':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            if deploy == False and "lmdb" not in data_source:
                n.tops['map_vec_stage%d' % stage] = L.Flatten(
                    n.tops[last_layer[0]])
                n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                    n.tops['map_vec_stage%d' % stage], n.tops[label_name[1]])
            elif deploy == False:
                level = 1
                name = 'weight_stage%d' % stage
                n.tops[name] = L.Eltwise(n.tops[last_layer[level]],
                                         n.tops[label_name[(level + 2)]],
                                         operation=P.Eltwise.PROD)
                n.tops['loss_stage%d' % stage] = L.EuclideanLoss(
                    n.tops[name], n.tops[label_name[level]])

            print 'loss %d' % stage
            stage += 1
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            local_counter = 1
            state = 'image'

        elif layername[l] == 'L2':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            weight = [lr_mult_distro[3], 1]
            # print lr_mult_distro[3]
            for level in range(0, 2):
                if deploy == False and "lmdb" not in data_source:
                    n.tops['map_vec_stage%d_L%d' %
                           (stage, level + 1)] = L.Flatten(
                               n.tops[last_layer[level]])
                    n.tops['loss_stage%d_L%d' %
                           (stage, level + 1)] = L.EuclideanLoss(
                               n.tops['map_vec_stage%d' % stage],
                               n.tops[label_name[level]],
                               loss_weight=weight[level])
                elif deploy == False:
                    name = 'weight_stage%d_L%d' % (stage, level + 1)
                    n.tops[name] = L.Eltwise(n.tops[last_layer[level]],
                                             n.tops[label_name[(level + 2)]],
                                             operation=P.Eltwise.PROD)
                    n.tops['loss_stage%d_L%d' %
                           (stage, level + 1)] = L.EuclideanLoss(
                               n.tops[name],
                               n.tops[label_name[level]],
                               loss_weight=weight[level])

                print 'loss %d level %d' % (stage, level + 1)

            stage += 1
            #last_connect = last_layer
            #last_layer = 'image'
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            local_counter = 1
            state = 'image'

        elif layername[l] == 'L3':
            # Loss: n.loss layer is only in training and testing nets, but not in deploy net.
            weight = [lr_mult_distro[3], 1]
            # print lr_mult_distro[3]
            if deploy == False:
                level = 0
                n.tops['loss_stage%d_L%d' %
                       (stage, level + 1)] = L.Euclidean2Loss(
                           n.tops[last_layer[level]],
                           n.tops[label_name[level]],
                           n.tops[label_name[2]],
                           loss_weight=weight[level])
                print 'loss %d level %d' % (stage, level + 1)
                level = 1
                n.tops['loss_stage%d_L%d' %
                       (stage, level + 1)] = L.EuclideanLoss(
                           n.tops[last_layer[level]],
                           n.tops[label_name[level]],
                           loss_weight=weight[level])
                print 'loss %d level %d' % (stage, level + 1)

            stage += 1
            #last_connect = last_layer
            #last_layer = 'image'
            conv_counter = 1
            pool_counter = 1
            drop_counter = 1
            local_counter = 1
            state = 'image'

        elif layername[l] == 'D':
            if deploy == False:
                n.tops['drop%d_stage%d' % (drop_counter, stage)] = L.Dropout(
                    n.tops[last_layer[0]],
                    in_place=True,
                    dropout_param=dict(dropout_ratio=0.5))
                drop_counter += 1
        elif layername[l] == '@':
            #if not share_point:
            #    share_point = last_layer
            n.tops['concat_stage%d' % stage] = L.Concat(
                n.tops[last_layer[0]],
                n.tops[last_layer[1]],
                n.tops[share_point],
                concat_param=dict(axis=1))

            local_counter = 1
            state = 'fuse'
            last_layer[0] = 'concat_stage%d' % stage
            last_layer[1] = 'concat_stage%d' % stage
            print last_layer
        elif layername[l] == '$':
            share_point = last_layer[0]
            pool_counter += 1
            local_counter = 1
            print 'share'

    # final process
    stage -= 1
    #if stage == 1:
    #    n.silence = L.Silence(n.pool_center_lower, ntop=0)

    if deploy == False:
        return str(n.to_proto())
        # for generating the deploy net
    else:
        # generate the input information header string
        deploy_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format(
            '"' + input + '"', dim1, dim2, dim3, dim4)
        # assemble the input header with the net layers string.  remove the first placeholder layer from the net string.
        return deploy_str + '\n' + 'layer {' + 'layer {'.join(
            str(n.to_proto()).split('layer {')[2:])
def get_caffe_layer(node, net, input_dims):
    """Generate caffe layer for corresponding mxnet op.

    Args:
        node (iterable from MxnetParser): Mxnet op summary generated by MxnetParser
        net (caffe.net): Caffe netspec object

    Returns:
        caffe.layers: Equivalent caffe layer
    """
    print(node)
    if node['type'] == 'Convolution':
        assert len(node['inputs']) == 1, \
            'Convolution layers can have only one input'
        conv_params = node['attrs']
        kernel_size = make_list(conv_params['kernel'])[0]
        num_filters = make_list(conv_params['num_filter'])[0]
        if 'stride' in conv_params:
            stride = make_list(conv_params['stride'])[0]
        else:
            stride = 1
        padding = make_list(conv_params['pad'])[0]
        if 'dilate' in conv_params:
            dilation = make_list(conv_params['dilate'])[0]
        else:
            dilation = 1
        convolution_param = {'pad': padding,
                             'kernel_size': kernel_size,
                             'num_output': num_filters,
                             'stride': stride,
                             'dilation': dilation}
        return layers.Convolution(net[node['inputs'][0]],
                                  convolution_param=convolution_param)
    if node['type'] == 'Activation':
        assert len(node['inputs']) == 1, \
            'Activation layers can have only one input'
        assert node['attrs']['act_type'] == 'relu'
        return layers.ReLU(net[node['inputs'][0]])

    if node['type'] == 'Pooling':
        assert len(node['inputs']) == 1, \
            'Pooling layers can have only one input'
        kernel_size = make_list(node['attrs']['kernel'])
        stride = make_list(node['attrs']['stride'])
        pooling_type = node['attrs']['pool_type']
        if 'pad' in node['attrs']:
            padding = make_list(node['attrs']['pad'])
        else:
            padding = [0]
        if pooling_type == 'max':
            pooling = params.Pooling.MAX
        elif pooling_type == 'avg':
            pooling = params.Pooling.AVE
        pooling_param = {'pool': pooling, 'pad': padding[0],
                         'kernel_size': kernel_size[0], 'stride': stride[0]}
        return layers.Pooling(net[node['inputs'][0]],
                              pooling_param=pooling_param)

    if node['type'] == 'L2Normalization':
        across_spatial = node['attrs']['mode'] != 'channel'
        channel_shared = False
        scale_filler = {
            'type': "constant",
            'value': constants.NORMALIZATION_FACTOR
        }
        norm_param = {'across_spatial': across_spatial,
                      'scale_filler': scale_filler,
                      'channel_shared': channel_shared}
        return layers.Normalize(net[node['inputs'][0]],
                                norm_param=norm_param)

    if node['type'] == 'BatchNorm':
        bn_param = {
                    'moving_average_fraction': 0.90,
                    'use_global_stats': True,
                    'eps': 1e-5
        }
        return layers.BatchNorm(net[node['inputs'][0]],
                                in_place=True, **bn_param)

    # Note - this layer has been implemented
    # only in WeiLiu's ssd branch of caffe not in caffe master
    if node['type'] == 'transpose':
        order = make_list(node['attrs']['axes'])
        return layers.Permute(net[node['inputs'][0]],
                              permute_param={'order': order})

    if node['type'] == 'Flatten':
        if node['inputs'][0].endswith('anchors'):
            axis = 2
        else:
            axis = 1
        return layers.Flatten(net[node['inputs'][0]],
                              flatten_param={'axis': axis})

    if node['type'] == 'Concat':
        # In the ssd model, always concatenate along last axis,
        # since anchor boxes have an extra dimension in caffe (that includes variance).
        axis = -1
        concat_inputs = [net[inp] for inp in node['inputs']]
        return layers.Concat(*concat_inputs, concat_param={'axis': axis})

    if node['type'] == 'Reshape':
        if node['name'] == 'multibox_anchors':
            reshape_dims = [1, 2, -1]
        else:
            reshape_dims = make_list(node['attrs']['shape'])
        return layers.Reshape(net[node['inputs'][0]],
                              reshape_param={'shape': {'dim': reshape_dims}})

    if node['type'] == '_contrib_MultiBoxPrior':
        priorbox_inputs = [net[inp] for inp in node['inputs']] + [net["data"]]
        sizes = make_list(node["attrs"]["sizes"])
        min_size = sizes[0] * input_dims[0]
        max_size = int(round((sizes[1] * input_dims[0]) ** 2 / min_size))
        aspect_ratio = make_list(node["attrs"]["ratios"])
        steps = make_list(node["attrs"]["steps"])
        param = {'clip': node["attrs"]["clip"] == "true",
                 'flip': False,
                 'min_size': int(round(min_size)),
                 'max_size': int(round(max_size)),
                 'aspect_ratio': aspect_ratio,
                 'variance': [.1, .1, .2, .2],
                 'step': int(round(steps[0] * input_dims[0])),
                 }
        return layers.PriorBox(*priorbox_inputs, prior_box_param=param)

    if node['type'] == '_contrib_MultiBoxDetection':
        multibox_inputs = [net[inp] for inp in node['inputs']]
        bottom_order = [1, 0, 2]
        multibox_inputs = [multibox_inputs[i] for i in bottom_order]
        param = {
            'num_classes': constants.NUM_CLASSES,
            'share_location': True,
            'background_label_id': 0,
            'nms_param': {
                'nms_threshold': float(node['attrs']['nms_threshold']),
                'top_k': int(node['attrs']['nms_topk'])
            },
            'keep_top_k': make_list(node['attrs']['nms_topk'])[0],
            'confidence_threshold': 0.01,
            'code_type': params.PriorBox.CENTER_SIZE,
        }
        return layers.DetectionOutput(*multibox_inputs, detection_output_param=param)

    if node['type'] in ['SoftmaxActivation', 'SoftmaxOutput']:
        if 'mode' not in node['attrs']:
            axis = 1
        elif node['attrs']['mode'] == 'channel':
            axis = 1
        else:
            axis = 0
        # note: caffe expects confidence scores to be flattened before detection output layer receives it
        return layers.Flatten(layers.Permute(layers.Softmax(net[node['inputs'][0]],
                                                            axis=axis),
                                             permute_param={'order': [0, 2, 1]}),
                              flatten_param={'axis': 1})
Beispiel #18
0
def pool(inputs, kernel_size=2, stride=2):
    pool = L.Pooling(inputs,
                     kernel_size=kernel_size,
                     stride=stride,
                     pool=P.Pooling.MAX)
    return pool
Beispiel #19
0
def global_pooling_layer(previous, name, mode="avg"):
    """ create a Global Pooling Layer """
    pool = cp.Pooling.AVE if mode == "avg" else cp.Pooling.MAX
    return cl.Pooling(previous, name=name, pool=pool, global_pooling=True)
Beispiel #20
0
def mixed_5b(net, common_bottom_layer):
    # branch 0
    top_layer_branch0 = 'Mixed_5b/Branch_0/Conv2d_1x1'
    conv_bn_layer(net,
                  in_layer=common_bottom_layer,
                  out_layer=top_layer_branch0,
                  use_bn=True,
                  use_relu=True,
                  num_output=96,
                  kernel_size=1,
                  pad=0,
                  stride=1)
    # branch 1
    top_layer_branch1 = 'Mixed_5b/Branch_1/Conv2d_0a_1x1'
    conv_bn_layer(net,
                  in_layer=common_bottom_layer,
                  out_layer=top_layer_branch1,
                  use_bn=True,
                  use_relu=True,
                  num_output=48,
                  kernel_size=1,
                  pad=0,
                  stride=1)

    bottom_layer_branch1 = top_layer_branch1
    top_layer_branch1 = 'Mixed_5b/Branch_1/Conv2d_0b_5x5'
    conv_bn_layer(net,
                  in_layer=bottom_layer_branch1,
                  out_layer=top_layer_branch1,
                  use_bn=True,
                  use_relu=True,
                  num_output=64,
                  kernel_size=5,
                  pad=2,
                  stride=1)
    # branch 2
    top_layer_branch2 = 'Mixed_5b/Branch_2/Conv2d_0a_1x1'
    conv_bn_layer(net,
                  in_layer=common_bottom_layer,
                  out_layer=top_layer_branch2,
                  use_bn=True,
                  use_relu=True,
                  num_output=64,
                  kernel_size=1,
                  pad=0,
                  stride=1)

    bottom_layer_branch2 = top_layer_branch2
    top_layer_branch2 = 'Mixed_5b/Branch_2/Conv2d_0b_3x3'
    conv_bn_layer(net,
                  in_layer=bottom_layer_branch2,
                  out_layer=top_layer_branch2,
                  use_bn=True,
                  use_relu=True,
                  num_output=96,
                  kernel_size=3,
                  pad=1,
                  stride=1)

    bottom_layer_branch2 = top_layer_branch2
    top_layer_branch2 = 'Mixed_5b/Branch_2/Conv2d_0c_3x3'
    conv_bn_layer(net,
                  in_layer=bottom_layer_branch2,
                  out_layer=top_layer_branch2,
                  use_bn=True,
                  use_relu=True,
                  num_output=96,
                  kernel_size=3,
                  pad=1,
                  stride=1)
    # branch 3
    top_layer_branch3 = 'mixed5b_branch3_avepool_0a'
    net[top_layer_branch3] = layers.Pooling(net[common_bottom_layer],
                                            pool=params.Pooling.AVE,
                                            kernel_size=3,
                                            stride=1,
                                            pad=1)

    bottom_layer_branch3 = top_layer_branch3
    top_layer_branch3 = 'Mixed_5b/Branch_3/Conv2d_0b_1x1'
    conv_bn_layer(net,
                  in_layer=bottom_layer_branch3,
                  out_layer=top_layer_branch3,
                  use_bn=True,
                  use_relu=True,
                  num_output=64,
                  kernel_size=1,
                  pad=0,
                  stride=1)

    top_layer = 'mixed5b'
    net[top_layer] = layers.Concat(*[
        net[top_layer_branch0], net[top_layer_branch1], net[top_layer_branch2],
        net[top_layer_branch3]
    ],
                                   axis=1)

    return top_layer
Beispiel #21
0
def ResNet(lmdb, batch_size, mean_file, model):
    n = caffe.NetSpec()
    #数据层
    if model == False:
        n.data, n.label = L.Data(batch_size=batch_size,
                                 backend=P.Data.LMDB,
                                 source=lmdb,
                                 include=dict(phase=0),
                                 transform_param=dict(scale=1. / 255,
                                                      mirror=True,
                                                      crop_size=227,
                                                      mean_file=mean_file),
                                 ntop=2)
    if model == True:
        n.data, n.label = L.Data(batch_size=batch_size,
                                 backend=P.Data.LMDB,
                                 source=lmdb,
                                 include=dict(phase=1),
                                 transform_param=dict(scale=1. / 255,
                                                      mirror=True,
                                                      crop_size=227,
                                                      mean_file=mean_file),
                                 ntop=2)

    #卷积层conv1
    n.conv1 = L.Convolution(
        n.data,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        kernel_size=7,
        stride=2,
        num_output=64,
        pad=3,
        weight_filler=dict(type="gaussian", std=0.01),
        bias_filler=dict(type='constant', value=0),
        name="conv1/7x7_s2")
    #ReLu层
    n.relu1 = L.ReLU(n.conv1, in_place=True, name="conv1/relu_7x7")

    #Pooling层
    n.pool1 = L.Pooling(n.conv1,
                        kernel_size=2,
                        stride=2,
                        pool=P.Pooling.MAX,
                        name="pool1/3x3_s2")

    n.conv2 = L.Convolution(
        n.pool1,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        kernel_size=1,
        num_output=64,
        weight_filler=dict(type="xavier"),
        bias_filler=dict(type='constant', value=0.2),
        name="conv2/3x3_reduce")
    n.relu2 = L.ReLU(n.conv2, in_place=True, name="conv2/relu_3x3_reduce")
    n.conv2_3x3 = L.Convolution(
        n.conv2,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        kernel_size=3,
        num_output=192,
        pad=1,
        weight_filler=dict(type="xavier"),
        bias_filler=dict(type='constant', value=0.2),
        name="conv2/3x3")
    n.relu2 = L.ReLU(n.conv2_3x3, in_place=True, name="conv2/relu_3x3")
    n.pool2 = L.Pooling(n.conv2_3x3,
                        kernel_size=2,
                        stride=2,
                        pool=P.Pooling.MAX,
                        name="pool2/3x3_s2")
Beispiel #22
0
def mixed_7a(net, common_bottom_layer):
    # branch 0
    top_layer_branch0 = 'Mixed_7a/Branch_0/Conv2d_0a_1x1'
    conv_bn_layer(net,
                  in_layer=common_bottom_layer,
                  out_layer=top_layer_branch0,
                  use_bn=True,
                  use_relu=True,
                  num_output=256,
                  kernel_size=1,
                  pad=0,
                  stride=1)

    bottom_layer_branch0 = top_layer_branch0
    top_layer_branch0 = 'Mixed_7a/Branch_0/Conv2d_1a_3x3'
    conv_bn_layer(net,
                  in_layer=bottom_layer_branch0,
                  out_layer=top_layer_branch0,
                  use_bn=True,
                  use_relu=True,
                  num_output=384,
                  kernel_size=3,
                  pad=0,
                  stride=2)
    # branch 1
    top_layer_branch1 = 'Mixed_7a/Branch_1/Conv2d_0a_1x1'
    conv_bn_layer(net,
                  in_layer=common_bottom_layer,
                  out_layer=top_layer_branch1,
                  use_bn=True,
                  use_relu=True,
                  num_output=256,
                  kernel_size=1,
                  pad=0,
                  stride=1)

    bottom_layer_branch1 = top_layer_branch1
    top_layer_branch1 = 'Mixed_7a/Branch_1/Conv2d_1a_3x3'
    conv_bn_layer(net,
                  in_layer=bottom_layer_branch1,
                  out_layer=top_layer_branch1,
                  use_bn=True,
                  use_relu=True,
                  num_output=288,
                  kernel_size=3,
                  pad=0,
                  stride=2)

    # branch 2
    top_layer_branch2 = 'Mixed_7a/Branch_2/Conv2d_0a_1x1'
    conv_bn_layer(net,
                  in_layer=common_bottom_layer,
                  out_layer=top_layer_branch2,
                  use_bn=True,
                  use_relu=True,
                  num_output=256,
                  kernel_size=1,
                  pad=0,
                  stride=1)

    bottom_layer_branch2 = top_layer_branch2
    top_layer_branch2 = 'Mixed_7a/Branch_2/Conv2d_0b_3x3'
    conv_bn_layer(net,
                  in_layer=bottom_layer_branch2,
                  out_layer=top_layer_branch2,
                  use_bn=True,
                  use_relu=True,
                  num_output=288,
                  kernel_size=3,
                  pad=1,
                  stride=1)

    bottom_layer_branch2 = top_layer_branch2
    top_layer_branch2 = 'Mixed_7a/Branch_2/Conv2d_1a_3x3'
    conv_bn_layer(net,
                  in_layer=bottom_layer_branch2,
                  out_layer=top_layer_branch2,
                  use_bn=True,
                  use_relu=True,
                  num_output=320,
                  kernel_size=3,
                  pad=0,
                  stride=2)
    # branch 3
    top_layer_branch3 = 'mixed7a_branch3_maxpool_0'
    net[top_layer_branch3] = layers.Pooling(net[common_bottom_layer],
                                            pool=params.Pooling.MAX,
                                            kernel_size=3,
                                            stride=2,
                                            pad=0)

    top_layer = 'mixed7a'
    net[top_layer] = layers.Concat(*[
        net[top_layer_branch0], net[top_layer_branch1], net[top_layer_branch2],
        net[top_layer_branch3]
    ],
                                   axis=1)
    return top_layer
Beispiel #23
0
def VGGNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False,
               dilated=False, nopool=False, dropout=True, freeze_layers=[]):
    kwargs = {
        'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
        'weight_filler': dict(type='xavier'),
        'bias_filler': dict(type='constant', value=0)}

    assert from_layer in net.keys()
    net.conv1_1 = L.Convolution(net[from_layer], num_output=64, pad=1, kernel_size=3, **kwargs)

    net.relu1_1 = L.ReLU(net.conv1_1, in_place=True)
    net.conv1_2 = L.Convolution(net.relu1_1, num_output=64, pad=1, kernel_size=3, **kwargs)
    net.relu1_2 = L.ReLU(net.conv1_2, in_place=True)

    if nopool:
        name = 'conv1_3'
        net[name] = L.Convolution(net.relu1_2, num_output=64, pad=1, kernel_size=3, stride=2, **kwargs)
    else:
        name = 'pool1'
        net.pool1 = L.Pooling(net.relu1_2, pool=P.Pooling.MAX, kernel_size=2, stride=2)

    net.conv2_1 = L.Convolution(net[name], num_output=128, pad=1, kernel_size=3, **kwargs)
    net.relu2_1 = L.ReLU(net.conv2_1, in_place=True)
    net.conv2_2 = L.Convolution(net.relu2_1, num_output=128, pad=1, kernel_size=3, **kwargs)
    net.relu2_2 = L.ReLU(net.conv2_2, in_place=True)

    if nopool:
        name = 'conv2_3'
        net[name] = L.Convolution(net.relu2_2, num_output=128, pad=1, kernel_size=3, stride=2, **kwargs)
    else:
        name = 'pool2'
        net[name] = L.Pooling(net.relu2_2, pool=P.Pooling.MAX, kernel_size=2, stride=2)

    net.conv3_1 = L.Convolution(net[name], num_output=256, pad=1, kernel_size=3, **kwargs)
    net.relu3_1 = L.ReLU(net.conv3_1, in_place=True)
    net.conv3_2 = L.Convolution(net.relu3_1, num_output=256, pad=1, kernel_size=3, **kwargs)
    net.relu3_2 = L.ReLU(net.conv3_2, in_place=True)
    net.conv3_3 = L.Convolution(net.relu3_2, num_output=256, pad=1, kernel_size=3, **kwargs)
    net.relu3_3 = L.ReLU(net.conv3_3, in_place=True)

    if nopool:
        name = 'conv3_4'
        net[name] = L.Convolution(net.relu3_3, num_output=256, pad=1, kernel_size=3, stride=2, **kwargs)
    else:
        name = 'pool3'
        net[name] = L.Pooling(net.relu3_3, pool=P.Pooling.MAX, kernel_size=2, stride=2)

    net.conv4_1 = L.Convolution(net[name], num_output=512, pad=1, kernel_size=3, **kwargs)
    net.relu4_1 = L.ReLU(net.conv4_1, in_place=True)
    net.conv4_2 = L.Convolution(net.relu4_1, num_output=512, pad=1, kernel_size=3, **kwargs)
    net.relu4_2 = L.ReLU(net.conv4_2, in_place=True)
    net.conv4_3 = L.Convolution(net.relu4_2, num_output=512, pad=1, kernel_size=3, **kwargs)
    net.relu4_3 = L.ReLU(net.conv4_3, in_place=True)

    if nopool:
        name = 'conv4_4'
        net[name] = L.Convolution(net.relu4_3, num_output=512, pad=1, kernel_size=3, stride=2, **kwargs)
    else:
        name = 'pool4'
        net[name] = L.Pooling(net.relu4_3, pool=P.Pooling.MAX, kernel_size=2, stride=2)

    net.conv5_1 = L.Convolution(net[name], num_output=512, pad=1, kernel_size=3, **kwargs)
    net.relu5_1 = L.ReLU(net.conv5_1, in_place=True)
    net.conv5_2 = L.Convolution(net.relu5_1, num_output=512, pad=1, kernel_size=3, **kwargs)
    net.relu5_2 = L.ReLU(net.conv5_2, in_place=True)
    net.conv5_3 = L.Convolution(net.relu5_2, num_output=512, pad=1, kernel_size=3, **kwargs)
    net.relu5_3 = L.ReLU(net.conv5_3, in_place=True)

    if need_fc:
        if dilated:
            if nopool:
                name = 'conv5_4'
                net[name] = L.Convolution(net.relu5_3, num_output=512, pad=1, kernel_size=3, stride=1, **kwargs)
            else:
                name = 'pool5'
                net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=1)
        else:
            if nopool:
                name = 'conv5_4'
                net[name] = L.Convolution(net.relu5_3, num_output=512, pad=1, kernel_size=3, stride=2, **kwargs)
            else:
                name = 'pool5'
                net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, kernel_size=2, stride=2)

        if fully_conv:
            if dilated:
                if reduced:
                    net.fc6 = L.Convolution(net[name], num_output=1024, pad=6, kernel_size=3, dilation=6, **kwargs)
                else:
                    net.fc6 = L.Convolution(net[name], num_output=4096, pad=6, kernel_size=7, dilation=2, **kwargs)
            else:
                if reduced:
                    net.fc6 = L.Convolution(net[name], num_output=1024, pad=3, kernel_size=3, dilation=3, **kwargs)
                else:
                    net.fc6 = L.Convolution(net[name], num_output=4096, pad=3, kernel_size=7, **kwargs)

            net.relu6 = L.ReLU(net.fc6, in_place=True)
            if dropout:
                net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True)

            if reduced:
                net.fc7 = L.Convolution(net.relu6, num_output=1024, kernel_size=1, **kwargs)
            else:
                net.fc7 = L.Convolution(net.relu6, num_output=4096, kernel_size=1, **kwargs)
            net.relu7 = L.ReLU(net.fc7, in_place=True)
            if dropout:
                net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True)
        else:
            net.fc6 = L.InnerProduct(net.pool5, num_output=4096)
            net.relu6 = L.ReLU(net.fc6, in_place=True)
            if dropout:
                net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True)
            net.fc7 = L.InnerProduct(net.relu6, num_output=4096)
            net.relu7 = L.ReLU(net.fc7, in_place=True)
            if dropout:
                net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True)

    # Update freeze layers.
    kwargs['param'] = [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]
    layers = net.keys()
    for freeze_layer in freeze_layers:
        if freeze_layer in layers:
            net.update(freeze_layer, kwargs)

    return net
Beispiel #24
0
def inception_resnet_v2(net):
    net['data'] = layers.DummyData(num=1, channels=3, height=299, width=299)
    # 149 x 149 x 32
    top_layer = 'Conv2d_1a_3x3'
    conv_bn_layer(net,
                  in_layer='data',
                  out_layer=top_layer,
                  use_bn=True,
                  use_relu=True,
                  num_output=32,
                  kernel_size=3,
                  pad=0,
                  stride=2)
    bottom_layer = top_layer
    # 147 x 147 x 32
    top_layer = 'Conv2d_2a_3x3'
    conv_bn_layer(net,
                  in_layer=bottom_layer,
                  out_layer=top_layer,
                  use_bn=True,
                  use_relu=True,
                  num_output=32,
                  kernel_size=3,
                  pad=0,
                  stride=1)
    bottom_layer = top_layer
    # 147 x 147 x 64
    top_layer = 'Conv2d_2b_3x3'
    conv_bn_layer(net,
                  in_layer=bottom_layer,
                  out_layer=top_layer,
                  use_bn=True,
                  use_relu=True,
                  num_output=64,
                  kernel_size=3,
                  pad=0,
                  stride=1)
    bottom_layer = top_layer
    # 73 x 73 x 64
    top_layer = 'maxpool_3a'
    net[top_layer] = layers.Pooling(net[bottom_layer],
                                    pool=params.Pooling.MAX,
                                    kernel_size=3,
                                    stride=2,
                                    pad=0)
    bottom_layer = top_layer
    # 73 x 73 x 80
    top_layer = 'Conv2d_3b_1x1'
    conv_bn_layer(net,
                  in_layer=bottom_layer,
                  out_layer=top_layer,
                  use_bn=True,
                  use_relu=True,
                  num_output=80,
                  kernel_size=1,
                  pad=0,
                  stride=1)
    bottom_layer = top_layer
    # 71 x 71 x 192
    top_layer = 'Conv2d_4a_3x3'
    conv_bn_layer(net,
                  in_layer=bottom_layer,
                  out_layer=top_layer,
                  use_bn=True,
                  use_relu=True,
                  num_output=192,
                  kernel_size=3,
                  pad=0,
                  stride=1)
    bottom_layer = top_layer
    # 35 x 35 x 192
    top_layer = 'maxpool_5a'
    net[top_layer] = layers.Pooling(net[bottom_layer],
                                    pool=params.Pooling.MAX,
                                    kernel_size=3,
                                    stride=2,
                                    pad=0)
    bottom_layer = top_layer

    bottom_layer = mixed_5b(net, bottom_layer)
    # 35 x 35 x 320 (Mixed 5a)
    bottom_layer = inception_block_35(net,
                                      bottom_layer,
                                      10,
                                      0.17,
                                      repeat_name='Repeat')

    # 17 x 17 x 1088
    bottom_layer = mixed_6a(net, bottom_layer)

    bottom_layer = inception_block_17(net,
                                      bottom_layer,
                                      20,
                                      0.10,
                                      repeat_name='Repeat_1')

    bottom_layer = mixed_7a(net, bottom_layer)

    bottom_layer = inception_block_8(net,
                                     bottom_layer,
                                     9,
                                     0.20,
                                     repeat_name='Repeat_2',
                                     apply_last_relu=True)
    bottom_layer = inception_block_8(net,
                                     bottom_layer,
                                     1,
                                     0.20,
                                     repeat_name='',
                                     apply_last_relu=False)

    top_layer = 'Conv2d_7b_1x1'
    conv_bn_layer(net,
                  in_layer=bottom_layer,
                  out_layer=top_layer,
                  use_bn=True,
                  use_relu=True,
                  num_output=1536,
                  kernel_size=1,
                  pad=0,
                  stride=1)

    with open('gg.prototxt', 'w') as f:
        print(net.to_proto(), file=f)
def max_pool(data, ks=2, stride=2):
    return L.Pooling(data, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
def global_avg_pool(bottom, kernelSize=3):
    #return L.Pooling(bottom, pool=P.Pooling.AVE,stride=1, kernel_size=kernelSize)
    return L.Pooling(bottom, pool=P.Pooling.AVE, global_pooling=True)
Beispiel #27
0
def ave_pool(bottom, ks, stride=1):
    return L.Pooling(bottom, pool=P.Pooling.AVE, kernel_size=ks, stride=stride)
Beispiel #28
0
def inception_bn(bottom, conv_output):
    conv_1x1 = L.Convolution(
        bottom,
        kernel_size=1,
        num_output=conv_output['conv_1x1'],
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='xavier'),
        bias_filler=dict(type='constant', value=0.2))
    bn_1x1 = L.BatchNorm(conv_1x1, use_global_stats=False)
    relu_1x1 = L.ReLU(bn_1x1, in_place=True)

    conv_3x3_reduce = L.Convolution(
        bottom,
        kernel_size=1,
        num_output=conv_output['conv_3x3_reduce'],
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='xavier'),
        bias_filler=dict(type='constant', value=0.2))
    bn_3x3_reduce = L.BatchNorm(conv_3x3_reduce, use_global_stats=False)
    relu_3x3_reduce = L.ReLU(bn_3x3_reduce, in_place=True)
    conv_3x3 = L.Convolution(
        bn_3x3_reduce,
        kernel_size=3,
        num_output=conv_output['conv_3x3'],
        pad=1,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='xavier'),
        bias_filler=dict(type='constant', value=0.2))
    bn_3x3 = L.BatchNorm(conv_3x3, use_global_stats=False)
    relu_3x3 = L.ReLU(bn_3x3, in_place=True)

    conv_5x5_reduce = L.Convolution(
        bottom,
        kernel_size=1,
        num_output=conv_output['conv_5x5_reduce'],
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='xavier'),
        bias_filler=dict(type='constant', value=0.2))
    bn_5x5_reduce = L.BatchNorm(conv_5x5_reduce, use_global_stats=False)
    relu_5x5_reduce = L.ReLU(bn_5x5_reduce, in_place=True)
    conv_5x5 = L.Convolution(
        bn_5x5_reduce,
        kernel_size=5,
        num_output=conv_output['conv_5x5'],
        pad=2,
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='xavier'),
        bias_filler=dict(type='constant', value=0.2))
    bn_5x5 = L.BatchNorm(conv_5x5, use_global_stats=False)
    relu_5x5 = L.ReLU(bn_5x5, in_place=True)

    pool = L.Pooling(bottom,
                     kernel_size=3,
                     stride=1,
                     pad=1,
                     pool=P.Pooling.MAX)
    pool_proj = L.Convolution(
        pool,
        kernel_size=1,
        num_output=conv_output['pool_proj'],
        param=[dict(lr_mult=1, decay_mult=1),
               dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='xavier'),
        bias_filler=dict(type='constant', value=0.2))
    bn_proj = L.BatchNorm(pool_proj, use_global_stats=False)
    relu_pool_proj = L.ReLU(bn_proj, in_place=True)
    concat = L.Concat(bn_1x1, bn_3x3, bn_5x5, bn_proj)

    return conv_1x1, bn_1x1, relu_1x1, conv_3x3_reduce, bn_3x3_reduce, relu_3x3_reduce, conv_3x3, bn_3x3, relu_3x3, \
           conv_5x5_reduce, bn_5x5_reduce, relu_5x5_reduce, conv_5x5, bn_5x5, relu_5x5, pool, pool_proj, bn_proj, \
           relu_pool_proj, concat
def create_neural_net(input_file, batch_size=50):
    net = caffe.NetSpec()
    net.data, net.label = L.Data(batch_size=batch_size, source=input_file, 
                                  backend = caffe.params.Data.LMDB, ntop=2, 
                                  include=dict(phase=caffe.TEST), name='juniward04')

    ## pre-process
    net.conv1 = L.Convolution(net.data, num_output=16, kernel_size=4, stride=1,
                               pad=1, weight_filler=dict(type='dct4'),
                               param=[{'lr_mult':0, 'decay_mult':0}],
                               bias_term=False)
    TRUNCABS = caffe_pb2.QuantTruncAbsParameter.TRUNCABS
    net.quanttruncabs=L.QuantTruncAbs(net.conv1, process=TRUNCABS, threshold=8, in_place=True)

    ## block 1
    [net.conv1_proj, net.bn2, net.scale2, net.conv512_1, net.bn2_1, net.scale2_1,
     net.relu512_1, net.conv512_to_256, net.bn2_2, net.scale2_2, net.res512_to_256,
     net.relu512_to_256] = add_downsampling_block(net.quanttruncabs, 12)
    ## block 2
    [net.conv256_1, net.bn2_3, net.scale2_3, net.relu256_1, net.conv256_2, net.bn2_4, 
     net.scale2_4, net.res256_2, net.relu256_2] = add_skip_block(net.res512_to_256, 24)
    ## block 3
    [net.res256_2_proj, net.bn2_5, net.scale2_5, net.conv256_3, net.bn2_6, net.scale2_6, 
     net.relu256_3, net.conv256_to_128, net.bn2_7, net.scale2_7, net.res256_to_128, 
     net.relu256_to_128] = add_downsampling_block(net.res256_2, 24)
    ##    ## block 4 
    ##    [net.conv128_1, net.bn2_8, net.scale2_8, net.relu128_1, net.conv128_2, net.bn2_9, 
    ##     net.scale2_9, net.res128_2, net.relu128_2] = add_skip_block(net.res256_to_128, 48)
    ## block 5
    [net.res128_2_proj, net.bn2_10, net.scale2_10, net.conv128_3, net.bn2_11, net.scale2_11, 
     net.relu128_3, net.conv128_to_64, net.bn2_12, net.scale2_12, net.res128_to_64, 
     net.relu128_to_64] = add_downsampling_block(net.res256_to_128, 48)
    ## block 6
    [net.conv64_1, net.bn2_13, net.scale2_13, net.relu64_1, net.conv64_2, net.bn2_14, 
     net.scale2_14, net.res64_2, net.relu64_2] = add_skip_block(net.res128_to_64, 96)
    ## block 7
    [net.res64_2_proj, net.bn2_15, net.scale2_15, net.conv64_3, net.bn2_16, net.scale2_16, 
     net.relu64_3, net.conv64_to_32, net.bn2_17, net.scale2_17, net.res64_to_32, 
     net.relu64_to_32] = add_downsampling_block(net.res64_2, 96)
    ## block 8
    [net.conv32_1, net.bn2_18, net.scale2_18, net.relu32_1, net.conv32_2, net.bn2_19, 
     net.scale2_19, net.res32_2, net.relu32_2] = add_skip_block(net.res64_to_32, 192)
    ## block 9
    [net.res32_2_proj, net.bn2_20, net.scale2_20, net.conv32_3, net.bn2_21, net.scale2_21, 
     net.relu32_3, net.conv32_to_16, net.bn2_22, net.scale2_22, net.res32_to_16, 
     net.relu32_to_16] = add_downsampling_block(net.res32_2, 192)
    ## block 10
    [net.conv16_1, net.bn2_23, net.scale2_23, net.relu16_1, net.conv16_2, net.bn2_24, 
     net.scale2_24, net.res16_2, net.relu16_2] = add_skip_block(net.res32_to_16, 384)
    
    ## global pool
    AVE = caffe_pb2.PoolingParameter.AVE
    net.global_pool = L.Pooling(net.res16_2, pool=AVE, kernel_size=8, stride=1)
    
    ## full connecting
    net.fc = L.InnerProduct(net.global_pool, param=[{'lr_mult':1}, {'lr_mult':2}], num_output=2, 
                            weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))
    ## accuracy
    net.accuracy = L.Accuracy(net.fc, net.label, include=dict(phase=caffe.TEST))
    ## loss
    net.loss = L.SoftmaxWithLoss(net.fc, net.label)
    
    return net.to_proto()
Beispiel #30
0
def VGG19Net_Pre10(net, from_layer="data"):
    kwargs = {
        'param':
        [dict(lr_mult=1, decay_mult=1),
         dict(lr_mult=2, decay_mult=0)],
        'weight_filler': dict(type='gaussian', std=0.01),
        'bias_filler': dict(type='constant', value=0)
    }

    assert from_layer in net.keys()
    # conv1
    net.conv1_1 = L.Convolution(net[from_layer],
                                num_output=64,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu1_1 = L.ReLU(net.conv1_1, in_place=True)
    net.conv1_2 = L.Convolution(net.relu1_1,
                                num_output=64,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu1_2 = L.ReLU(net.conv1_2, in_place=True)
    # pool1
    net.pool1_stage1 = L.Pooling(net.relu1_2,
                                 pool=P.Pooling.MAX,
                                 kernel_size=2,
                                 stride=2)
    # conv2
    net.conv2_1 = L.Convolution(net.pool1_stage1,
                                num_output=128,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu2_1 = L.ReLU(net.conv2_1, in_place=True)
    net.conv2_2 = L.Convolution(net.relu2_1,
                                num_output=128,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu2_2 = L.ReLU(net.conv2_2, in_place=True)
    # pool2
    net.pool2_stage1 = L.Pooling(net.relu2_2,
                                 pool=P.Pooling.MAX,
                                 kernel_size=2,
                                 stride=2)
    # conv3
    net.conv3_1 = L.Convolution(net.pool2_stage1,
                                num_output=256,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu3_1 = L.ReLU(net.conv3_1, in_place=True)
    net.conv3_2 = L.Convolution(net.relu3_1,
                                num_output=256,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu3_2 = L.ReLU(net.conv3_2, in_place=True)
    net.conv3_3 = L.Convolution(net.relu3_2,
                                num_output=256,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu3_3 = L.ReLU(net.conv3_3, in_place=True)
    net.conv3_4 = L.Convolution(net.relu3_3,
                                num_output=256,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu3_4 = L.ReLU(net.conv3_4, in_place=True)
    # pool3
    net.pool3_stage1 = L.Pooling(net.relu3_4,
                                 pool=P.Pooling.MAX,
                                 kernel_size=2,
                                 stride=2)
    # conv4
    net.conv4_1 = L.Convolution(net.pool3_stage1,
                                num_output=512,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu4_1 = L.ReLU(net.conv4_1, in_place=True)
    net.conv4_2 = L.Convolution(net.relu4_1,
                                num_output=512,
                                pad=1,
                                kernel_size=3,
                                **kwargs)
    net.relu4_2 = L.ReLU(net.conv4_2, in_place=True)

    return net