def create_net(phase): global train_transform_param global test_transform_param train_transform_param = { 'mirror': True, 'mean_file': Params['mean_file'] } test_transform_param = { 'mean_file': Params['mean_file'] } if phase == 'train': lmdb_file = Params['train_lmdb'] transform_param = train_transform_param batch_size = Params['batch_size_per_device'] else: lmdb_file = Params['test_lmdb'] transform_param = test_transform_param batch_size = Params['test_batch_size'] net = caffe.NetSpec() net.data, net.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb_file, transform_param=transform_param, ntop=2) #include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='gaussian', std=0.0001), 'bias_filler': dict(type='constant')} net.conv1 = L.Convolution(net.data, num_output=16, kernel_size=3, **kwargs) net.pool1 = L.Pooling(net.conv1, pool=P.Pooling.MAX, kernel_size=3, stride=2) net.relu1 = L.ReLU(net.pool1, in_place=True) kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='gaussian', std=0.005), 'bias_filler': dict(type='constant')} net.fc2 = L.InnerProduct(net.pool1, num_output=16, **kwargs) net.relu2 = L.ReLU(net.fc2, in_place=True) net.drop2 = L.Dropout(net.fc2, in_place=True, dropout_param=dict(dropout_ratio=0.5)) kwargs = { 'param': [dict(lr_mult=1, decay_mult=100), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0)} net.fc3 = L.InnerProduct(net.fc2, num_output=2, **kwargs) if phase == 'train': net.loss = L.SoftmaxWithLoss(net.fc3, net.label) elif phase == 'test': net.accuracy = L.Accuracy(net.fc3, net.label) else: net.prob = L.Softmax(net.fc3) net_proto = net.to_proto() if phase == 'deploy': del net_proto.layer[0] #del net_proto.layer[-1] net_proto.input.extend(['data']) net_proto.input_dim.extend([64,3,12,36]) net_proto.name = '{}_{}'.format(Params['model_name'], phase) return net_proto
def mobilenet(split): train_data_file = root_str + 'train_lmdb' test_data_file = root_str + 'test_lmdb' #mean_file = root_str + 'imagenet_mean.binaryproto' if split == 'train': data, labels = L.Data(source = train_data_file, backend = P.Data.LMDB, ntop = 2, batch_size = 128, image_data_param=dict(shuffle=True), #include={'phase':caffe.TRAIN}, transform_param = dict(#scale=0.00390625, crop_size = 28, #mean_file=mean_file, mirror=True )) else: data, labels = L.Data(source = test_data_file, backend = P.Data.LMDB, ntop = 2, batch_size = 100, image_data_param=dict(shuffle=True), #include={'phase':caffe.TRAIN}, transform_param = dict(#scale=0.00390625, crop_size = 28, #mean_file=mean_file, #mirror=True )) if split == 'deploy': scale, result = conv_BN_scale_relu(split, bottom="data", nout = 32, ks = 3, stride = 1, pad = 1,group=1) #conv1 else: scale, result = conv_BN_scale_relu(split, bottom=data, nout = 32, ks = 3, stride = 1, pad = 1,group=1) #conv1 scale,result = conv_BN_scale_relu(split, bottom=result, nout = 32, ks = 3, stride = 1, pad = 1,group=32) #conv1 scale,result = conv_BN_scale_relu(split, bottom=result, nout = 64, ks = 1, stride = 1, pad = 0,group=1) #conv1 scale,result = conv_BN_scale_relu(split, bottom=result, nout = 64, ks = 3, stride = 2, pad = 1,group=64) #conv1 scale,result = conv_BN_scale_relu(split, bottom=result, nout = 128, ks = 1, stride = 1, pad = 0,group=1) #conv1 scale,result = conv_BN_scale_relu(split, bottom=result, nout = 128, ks = 3, stride = 2, pad = 1,group=128) #conv1 scale,result = conv_BN_scale_relu(split, bottom=result, nout = 256, ks = 1, stride = 1, pad = 0,group=1) #conv1 scale,result = conv_BN_scale_relu(split, bottom=result, nout = 256, ks = 3, stride = 2, pad = 1,group=256) #conv1 scale,result = conv_BN_scale_relu(split, bottom=result, nout = 512, ks = 1, stride = 1, pad = 0,group=1) #conv1 scale,result = conv_BN_scale_relu(split, bottom=result, nout = 512, ks = 3, stride = 2, pad = 1,group=512) #conv1 scale,result = conv_BN_scale_relu(split, bottom=result, nout = 1024, ks = 1, stride = 1, pad = 0,group=1) #conv1 pool = L.Pooling(result, pool = P.Pooling.AVE, global_pooling = True) #pool = L.Pooling(result, pool=P.Pooling.AVE, kernel_size=4, stride=1,pad=0) IP = L.InnerProduct(pool, num_output = 10, weight_filler=dict(type='xavier'),bias_filler=dict(type='constant')) if split == 'deploy': prob=L.Softmax(IP) return to_proto(prob) else: loss = L.SoftmaxWithLoss(IP, labels) if split == 'train': return to_proto(loss) acc = L.Accuracy(IP, labels) return to_proto(acc, loss)
def max_pool(bottom, ks=2, stride=2): return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
def create_net(lmdb, batch_size, mean_file, model): n = caffe.NetSpec() #数据层 if model == False: n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb, include=dict(phase=0), transform_param=dict(scale=1. / 255, mirror=True, crop_size=227, mean_file=mean_file), ntop=2) if model == True: n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb, include=dict(phase=1), transform_param=dict(scale=1. / 255, mirror=True, crop_size=227, mean_file=mean_file), ntop=2) #卷积层conv1 n.conv1 = L.Convolution( n.data, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], kernel_size=11, stride=4, num_output=96, weight_filler=dict(type="gaussian", std=0.01), bias_filler=dict(type='constant', value=0)) #ReLu层 n.relu1 = L.ReLU(n.conv1, in_place=True) #LRN层 n.norm1 = L.LRN(n.conv1, local_size=5, alpha=0.0001, beta=0.75) #Pooling层 n.pool1 = L.Pooling(n.norm1, kernel_size=3, stride=2, pool=P.Pooling.MAX) #卷积层conv2 n.conv2 = L.Convolution( n.pool1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], kernel_size=5, num_output=256, pad=2, group=2, weight_filler=dict(type="gaussian", std=0.01), bias_filler=dict(type='constant', value=0.1)) # ReLu2层 n.relu2 = L.ReLU(n.conv2, in_place=True) # LRN2层 n.norm2 = L.LRN(n.conv2, local_size=5, alpha=0.0001, beta=0.75) # Pooling2层 n.pool2 = L.Pooling(n.norm2, kernel_size=3, stride=2, pool=P.Pooling.MAX) # 卷积层conv3 n.conv3 = L.Convolution( n.pool2, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], kernel_size=3, num_output=384, pad=1, weight_filler=dict(type="gaussian", std=0.01), bias_filler=dict(type='constant', value=0)) # ReLu3层 n.relu3 = L.ReLU(n.conv3, in_place=True) # 卷积层conv4 n.conv4 = L.Convolution( n.conv3, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], kernel_size=3, num_output=384, pad=1, group=2, weight_filler=dict(type="gaussian", std=0.01), bias_filler=dict(type='constant', value=0.1)) # ReLu4层 n.relu4 = L.ReLU(n.conv4, in_place=True) # 卷积层conv5 n.conv5 = L.Convolution( n.conv4, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], kernel_size=3, num_output=256, pad=1, group=2, weight_filler=dict(type="gaussian", std=0.01), bias_filler=dict(type='constant', value=0.1)) # ReLu5层 n.relu5 = L.ReLU(n.conv5, in_place=True) # Pooling5层 n.pool5 = L.Pooling(n.conv5, kernel_size=3, stride=2, pool=P.Pooling.MAX) #全连接层fc6 n.fc6 = L.InnerProduct( n.pool5, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=4096, weight_filler=dict(type="gaussian", std=0.005), bias_filler=dict(type='constant', value=0.1)) n.relu6 = L.ReLU(n.fc6, in_place=True) #Dropout6层 n.drop6 = L.Dropout(n.fc6, dropout_ratio=0.5, in_place=True) #丢弃数据的概率 # 全连接层fc7 n.fc7 = L.InnerProduct( n.fc6, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=4096, weight_filler=dict(type="gaussian", std=0.005), bias_filler=dict(type='constant', value=0.1)) # ReLu7层 n.relu7 = L.ReLU(n.fc7, in_place=True) # Dropout7层 n.drop7 = L.Dropout(n.fc7, dropout_ratio=0.5, in_place=True) # 丢弃数据的概率 # 全连接层fc8 n.fc8 = L.InnerProduct( n.fc7, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=1000, weight_filler=dict(type="gaussian", std=0.01), bias_filler=dict(type='constant', value=0)) if model: n.acc = L.Accuracy(n.fc8, n.label) else: n.loss = L.SoftmaxWithLoss(n.fc8, n.label) return n.to_proto()
def InceptionV3Body(net, from_layer, output_pred=False): # scale is fixed to 1, thus we ignore it. use_scale = False out_layer = 'conv' ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True, num_output=32, kernel_size=3, pad=0, stride=2, use_scale=use_scale) from_layer = out_layer out_layer = 'conv_1' ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True, num_output=32, kernel_size=3, pad=0, stride=1, use_scale=use_scale) from_layer = out_layer out_layer = 'conv_2' ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True, num_output=64, kernel_size=3, pad=1, stride=1, use_scale=use_scale) from_layer = out_layer out_layer = 'pool' net[out_layer] = L.Pooling(net[from_layer], pool=P.Pooling.MAX, kernel_size=3, stride=2, pad=0) from_layer = out_layer out_layer = 'conv_3' ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True, num_output=80, kernel_size=1, pad=0, stride=1, use_scale=use_scale) from_layer = out_layer out_layer = 'conv_4' ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True, num_output=192, kernel_size=3, pad=0, stride=1, use_scale=use_scale) from_layer = out_layer out_layer = 'pool_1' net[out_layer] = L.Pooling(net[from_layer], pool=P.Pooling.MAX, kernel_size=3, stride=2, pad=0) from_layer = out_layer # inceptions with 1x1, 3x3, 5x5 convolutions for inception_id in xrange(0, 3): if inception_id == 0: out_layer = 'mixed' tower_2_conv_num_output = 32 else: out_layer = 'mixed_{}'.format(inception_id) tower_2_conv_num_output = 64 towers = [] tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=64, kernel_size=1, pad=0, stride=1), ]) towers.append(tower) tower_name = '{}/tower'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=48, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=64, kernel_size=5, pad=2, stride=1), ]) towers.append(tower) tower_name = '{}/tower_1'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=64, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=96, kernel_size=3, pad=1, stride=1), dict(name='conv_2', num_output=96, kernel_size=3, pad=1, stride=1), ]) towers.append(tower) tower_name = '{}/tower_2'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='pool', pool=P.Pooling.AVE, kernel_size=3, pad=1, stride=1), dict(name='conv', num_output=tower_2_conv_num_output, kernel_size=1, pad=0, stride=1), ]) towers.append(tower) out_layer = '{}/join'.format(out_layer) net[out_layer] = L.Concat(*towers, axis=1) from_layer = out_layer # inceptions with 1x1, 3x3(in sequence) convolutions out_layer = 'mixed_3' towers = [] tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=384, kernel_size=3, pad=0, stride=2), ]) towers.append(tower) tower_name = '{}/tower'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=64, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=96, kernel_size=3, pad=1, stride=1), dict(name='conv_2', num_output=96, kernel_size=3, pad=0, stride=2), ]) towers.append(tower) tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='pool', pool=P.Pooling.MAX, kernel_size=3, pad=0, stride=2), ]) towers.append(tower) out_layer = '{}/join'.format(out_layer) net[out_layer] = L.Concat(*towers, axis=1) from_layer = out_layer # inceptions with 1x1, 7x1, 1x7 convolutions for inception_id in xrange(4, 8): if inception_id == 4: num_output = 128 elif inception_id == 5 or inception_id == 6: num_output = 160 elif inception_id == 7: num_output = 192 out_layer = 'mixed_{}'.format(inception_id) towers = [] tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1), ]) towers.append(tower) tower_name = '{}/tower'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=num_output, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=num_output, kernel_size=[1, 7], pad=[0, 3], stride=[1, 1]), dict(name='conv_2', num_output=192, kernel_size=[7, 1], pad=[3, 0], stride=[1, 1]), ]) towers.append(tower) tower_name = '{}/tower_1'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=num_output, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=num_output, kernel_size=[7, 1], pad=[3, 0], stride=[1, 1]), dict(name='conv_2', num_output=num_output, kernel_size=[1, 7], pad=[0, 3], stride=[1, 1]), dict(name='conv_3', num_output=num_output, kernel_size=[7, 1], pad=[3, 0], stride=[1, 1]), dict(name='conv_4', num_output=192, kernel_size=[1, 7], pad=[0, 3], stride=[1, 1]), ]) towers.append(tower) tower_name = '{}/tower_2'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='pool', pool=P.Pooling.AVE, kernel_size=3, pad=1, stride=1), dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1), ]) towers.append(tower) out_layer = '{}/join'.format(out_layer) net[out_layer] = L.Concat(*towers, axis=1) from_layer = out_layer # inceptions with 1x1, 3x3, 1x7, 7x1 filters out_layer = 'mixed_8' towers = [] tower_name = '{}/tower'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=320, kernel_size=3, pad=0, stride=2), ]) towers.append(tower) tower_name = '{}/tower_1'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=192, kernel_size=[1, 7], pad=[0, 3], stride=[1, 1]), dict(name='conv_2', num_output=192, kernel_size=[7, 1], pad=[3, 0], stride=[1, 1]), dict(name='conv_3', num_output=192, kernel_size=3, pad=0, stride=2), ]) towers.append(tower) tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='pool', pool=P.Pooling.MAX, kernel_size=3, pad=0, stride=2), ]) towers.append(tower) out_layer = '{}/join'.format(out_layer) net[out_layer] = L.Concat(*towers, axis=1) from_layer = out_layer for inception_id in xrange(9, 11): num_output = 384 num_output2 = 448 if inception_id == 9: pool = P.Pooling.AVE else: pool = P.Pooling.MAX out_layer = 'mixed_{}'.format(inception_id) towers = [] tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=320, kernel_size=1, pad=0, stride=1), ]) towers.append(tower) tower_name = '{}/tower'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=num_output, kernel_size=1, pad=0, stride=1), ]) subtowers = [] subtower_name = '{}/mixed'.format(tower_name) subtower = InceptionTower(net, '{}/conv'.format(tower_name), subtower_name, [ dict(name='conv', num_output=num_output, kernel_size=[1, 3], pad=[0, 1], stride=[1, 1]), ]) subtowers.append(subtower) subtower = InceptionTower(net, '{}/conv'.format(tower_name), subtower_name, [ dict(name='conv_1', num_output=num_output, kernel_size=[3, 1], pad=[1, 0], stride=[1, 1]), ]) subtowers.append(subtower) net[subtower_name] = L.Concat(*subtowers, axis=1) towers.append(net[subtower_name]) tower_name = '{}/tower_1'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=num_output2, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=num_output, kernel_size=3, pad=1, stride=1), ]) subtowers = [] subtower_name = '{}/mixed'.format(tower_name) subtower = InceptionTower(net, '{}/conv_1'.format(tower_name), subtower_name, [ dict(name='conv', num_output=num_output, kernel_size=[1, 3], pad=[0, 1], stride=[1, 1]), ]) subtowers.append(subtower) subtower = InceptionTower(net, '{}/conv_1'.format(tower_name), subtower_name, [ dict(name='conv_1', num_output=num_output, kernel_size=[3, 1], pad=[1, 0], stride=[1, 1]), ]) subtowers.append(subtower) net[subtower_name] = L.Concat(*subtowers, axis=1) towers.append(net[subtower_name]) tower_name = '{}/tower_2'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='pool', pool=pool, kernel_size=3, pad=1, stride=1), dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1), ]) towers.append(tower) out_layer = '{}/join'.format(out_layer) net[out_layer] = L.Concat(*towers, axis=1) from_layer = out_layer if output_pred: net.pool_3 = L.Pooling(net[from_layer], pool=P.Pooling.AVE, kernel_size=8, pad=0, stride=1) net.softmax = L.InnerProduct(net.pool_3, num_output=1008) net.softmax_prob = L.Softmax(net.softmax) return net
def max_pooling_layer(previous, name, params): """ create a max pooling layer """ return cl.Pooling( previous, name=name, pool=cp.Pooling.MAX, kernel_size=int(params["size"]), stride=int(params["stride"]))
def ResNet(split): data, labels = L.Python(module='readDataLayer', layer='input_layer', ntop=2, param_str=str( dict(split=split, data_dir=this_dir + '/data/', train_data_name='train_', test_data_name='test', train_batches=128, test_batches=128, crop_size_x=33, crop_size_y=33, train_pack_nums=9, test_pack_nums=1))) HGG_1, _ = conv_BN_scale_relu(split, data, 64, 3, 1, 0) HGG_2, _ = conv_BN_scale_relu(split, HGG_1, 64, 3, 1, 0) HGG_3, _ = conv_BN_scale_relu(split, HGG_2, 64, 3, 1, 0) HGG_4 = L.Pooling(HGG_3, pool=P.Pooling.MAX, global_pooling=False, stride=2, kernel_size=3) HGG_5, _ = conv_BN_scale_relu(split, HGG_4, 128, 3, 1, 0) HGG_6, _ = conv_BN_scale_relu(split, HGG_5, 128, 3, 1, 0) HGG_7, _ = conv_BN_scale_relu(split, HGG_6, 128, 3, 1, 0) HGG_8 = L.Pooling(HGG_7, pool=P.Pooling.MAX, global_pooling=False, stride=2, kernel_size=3) HGG_8a = L.Flatten(HGG_8) HGG_9 = L.ReLU(HGG_8a) HGG_9a = L.InnerProduct(L.Dropout(HGG_9, dropout_ratio=0.1), num_output=256, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) # HGG_9a = L.InnerProduct(HGG_9, num_output = 256) HGG_10 = L.ReLU(HGG_9a) HGG_10a = L.InnerProduct(L.Dropout(HGG_10, dropout_ratio=0.1), num_output=256, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) # HGG_10a = L.InnerProduct(HGG_10,num_output = 256) HGG_11 = L.Dropout(HGG_10a, dropout_ratio=0.1) HGG_11a = L.InnerProduct(HGG_11, num_output=5, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) acc = L.Accuracy(HGG_11a, labels) loss = L.SoftmaxWithLoss(HGG_11a, labels) return to_proto(loss, acc)
def generate_net(train_lmdb, val_lmdb, train_batch_size, test_batch_size): net = caffe.NetSpec() net.data, net.label = L.Data(source=train_lmdb, backend=caffe.params.Data.LMDB, batch_size=train_batch_size, ntop=2, transform_param=dict( crop_size=224, mean_value=[103.94, 116.78, 123.68]), scale=0.017, include=dict(phase=caffe.TRAIN)) # note: train_data_layer_str = str(net.to_proto()) net.data, net.label = L.Data(source=val_lmdb, backend=caffe.params.Data.LMDB, batch_size=test_batch_size, ntop=2, transform_param=dict( crop_size=224, mean_value=[103.94, 116.78, 123.68]), scale=0.017, include=dict(phase=caffe.TEST)) # bone net.conv1 = L.Convolution( net.data, num_output=32, kernel_size=3, stride=2, pad=1, weight_filler={"type": "xavier"}, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) net.tops['conv1/bn'] = L.BatchNorm(net.conv1, param=[ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ], in_place=False) net.tops['conv1/scale'] = L.Scale( net.tops['conv1/bn'], param=[dict(lr_mult=1, decay_mult=0), dict(lr_mult=2, decay_mult=0)], scale_param={ 'filler': { 'value': 1 }, 'bias_term': True, 'bias_filler': { 'value': 0 } }, in_place=True) net.conv1_relu = L.ReLU6(net.tops['conv1/scale'], in_place=True) bottleneck(net, net.conv1_relu, 'conv2', 32, 1, 16, 1) bottleneck(net, net.tops['conv2/1x1_down/scale'], 'conv3_1', 16, 6, 24, 2) bottleneck(net, net.tops['conv3_1/1x1_down/scale'], 'conv3_2', 24, 6, 24, 1) bottleneck(net, net.tops['conv3_2/add'], 'conv4_1', 24, 6, 32, 2) bottleneck(net, net.tops['conv4_1/1x1_down/scale'], 'conv4_2', 32, 6, 32, 1) bottleneck(net, net.tops['conv4_2/add'], 'conv4_3', 32, 6, 32, 1) bottleneck(net, net.tops['conv4_3/add'], 'conv5_1', 32, 6, 64, 2) bottleneck(net, net.tops['conv5_1/1x1_down/scale'], 'conv5_2', 64, 6, 64, 1) bottleneck(net, net.tops['conv5_2/add'], 'conv5_3', 64, 6, 64, 1) bottleneck(net, net.tops['conv5_3/add'], 'conv5_4', 64, 6, 64, 1) bottleneck(net, net.tops['conv5_4/add'], 'conv6_1', 64, 6, 96, 1) bottleneck(net, net.tops['conv6_1/1x1_down/scale'], 'conv6_2', 96, 6, 96, 1) bottleneck(net, net.tops['conv6_2/add'], 'conv6_3', 96, 6, 96, 1) bottleneck(net, net.tops['conv6_3/add'], 'conv7_1', 96, 6, 160, 2) bottleneck(net, net.tops['conv7_1/1x1_down/scale'], 'conv7_2', 160, 6, 160, 1) bottleneck(net, net.tops['conv7_2/add'], 'conv7_3', 160, 6, 160, 1) bottleneck(net, net.tops['conv7_3/add'], 'conv8', 160, 6, 320, 1) net.conv9 = L.Convolution( net.tops['conv8/1x1_down/scale'], num_output=1280, kernel_size=1, weight_filler={"type": "xavier"}, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) net.tops['conv9/bn'] = L.BatchNorm(net.conv9, param=[ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ], in_place=False) net.tops['conv9/scale'] = L.Scale( net.tops['conv9/bn'], param=[dict(lr_mult=1, decay_mult=0), dict(lr_mult=2, decay_mult=0)], scale_param={ 'filler': { 'value': 1 }, 'bias_term': True, 'bias_filler': { 'value': 0 } }, in_place=True) net.conv9_relu = caffe.layers.ReLU6(net.tops['conv9/scale'], in_place=True) # global average pooling net.pool10 = L.Pooling(net.conv9_relu, pool=caffe.params.Pooling.AVE, global_pooling=True) # 1000 cls net.conv11 = L.Convolution( net.pool10, num_output=1000, kernel_size=1, weight_filler={ "type": "gaussian", "mean": 0, "std": 0.01 }, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) # softmax loss net.loss = L.SoftmaxWithLoss(net.conv11, net.label, include=dict(phase=caffe.TRAIN)) # accuracy net.accuracy = L.Accuracy(net.conv11, net.label, include=dict(phase=caffe.TEST)) net.accuracy_top5 = L.Accuracy(net.conv11, net.label, include=dict(phase=caffe.TEST), accuracy_param=dict(top_k=5)) return train_data_layer_str + str(net.to_proto())
def compile_time_operation(self, learning_option, cluster): """ define pooling(max/average pooling) operation for input blob """ # get input input_ = self.get_input('input') indim = self.get_dimension('input') print(indim) # twtest # get attr # required field pool_type = self.get_attr('pool_type', default=None) if pool_type is None: raise Exception( '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format( 'type', self.name)) kernel_size = self.get_attr('kernel_size', default=None) if kernel_size is None: raise Exception( '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format( 'kernel_size', self.name)) # optional field padding = self.get_attr('padding', default='VALID') stride = self.get_attr('stride', default=1) engine = self.get_attr('engine', default='DEFAULT') global_pooling = self.get_attr('global_pooling', default=False) # padding if padding == 'SAME': outdim = [ np.ceil(float(indim[i]) / float(stride)) for i in xrange(2) ] outdim.insert(0, indim[0]) outdim.insert(1, indim[1]) p = [ int(((outdim[i + 2] - 1) * stride + kernel_size[i] - indim[i + 2]) / 2) for i in xrange(2) ] else: outdim = [ np.ceil(float(indim[i] - kernel_size[i] + 1) / float(stride)) for i in xrange(2) ] outdim.insert(0, indim[0]) outdim.insert(1, indim[1]) p = [0, 0] if engine == 'DEFAULT': engine_idx = 0 elif engine == 'CAFFE': engine_idx = 1 elif engine == 'CUDNN': engine_idx = 2 else: #TODO: error handling pass # pool=0: max_pool, pool=1: avr_pool if pool_type == 'MAX': pool_type_idx = 0 elif pool_type == 'AVG': pool_type_idx = 1 else: #TODO: error handling pass pool = L.Pooling(input_, name=self.name, pool=pool_type_idx, kernel_h=kernel_size[0], kernel_w=kernel_size[1], stride=stride, pad_h=p[0], pad_w=p[1], engine=engine_idx, global_pooling=global_pooling) self.set_output('output', pool) self.set_dimension('output', outdim)
def resnet18(split, mean, opt): n = caffe.NetSpec() # config python data layer if split == 'train': batch_size = opt.train_batch_size if split == 'val': batch_size = opt.val_batch_size if split == 'test': batch_size = opt.test_batch_size if split == 'train' or split == 'val': dataset_name = opt.train_dataset_name else: dataset_name = opt.test_dataset_name pydata_params = dict(split=split, data_dir=opt.data_dir, batch_size=batch_size, mean=mean, dataset=dataset_name, use_HSV=opt.use_HSV, load_size=opt.load_size, crop_size=opt.crop_size) n.data, n.label = L.Python(module='faceData_layers', layer='FaceDataLayer', ntop=2, param_str=str(pydata_params)) # start building main body of network # There main differences: # 1. do not use 4*nout for certain convolution layers # 2. do not use bias_term for convolution layer before start of residual blocks # 3. do not set the BN layer parameter, moving_average_fraction, to 0.9 (using default value 0.999) # 4. for weight filter initialziation, we do not specify 'msra' n.conv1, n.bn_conv1, n.scale_conv1 = _conv_bn_scale(n.data, 64, bias_term=False, kernel_size=7, pad=3, stride=2) n.conv1_relu = L.ReLU(n.scale_conv1, in_place=True) n.pool1 = L.Pooling(n.conv1_relu, kernel_size=3, stride=2, pool=P.Pooling.MAX) _resnet_block_2stages('2a', n, n.pool1, 64, branch1=True, initial_stride=1) _resnet_block_2stages('2b', n, n.res2a_relu, 64) _resnet_block_2stages('3a', n, n.res2b_relu, 128, branch1=True) _resnet_block_2stages('3b', n, n.res3a_relu, 128) _resnet_block_2stages('4a', n, n.res3b_relu, 256, branch1=True) _resnet_block_2stages('4b', n, n.res4a_relu, 256) _resnet_block_2stages('5a', n, n.res4b_relu, 512, branch1=True) _resnet_block_2stages('5b', n, n.res5a_relu, 512) n.pool5 = L.Pooling(n.res5b_relu, kernel_size=7, stride=1, pool=P.Pooling.AVE) # fully connected classifier lr_ratio = 100 # lr multiplier for truncated layers n.fc_face1 = L.InnerProduct(n.pool5, num_output=1000, param=[ dict(lr_mult=1 * lr_ratio, decay_mult=1), dict(lr_mult=2 * lr_ratio, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) n.fc_face2 = L.InnerProduct(n.fc_face1, num_output=2, param=[ dict(lr_mult=1 * lr_ratio, decay_mult=1), dict(lr_mult=2 * lr_ratio, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) # loss and accuracy layer n.loss = L.SoftmaxWithLoss(n.fc_face2, n.label) n.acc = L.Accuracy(n.fc_face2, n.label) return n.to_proto()
def resnet50(split, mean, opt): n = caffe.NetSpec() # config python data layer if split == 'train': batch_size = opt.train_batch_size if split == 'val': batch_size = opt.val_batch_size if split == 'test': batch_size = opt.test_batch_size if split == 'train' or split == 'val': dataset_name = opt.train_dataset_name else: dataset_name = opt.test_dataset_name pydata_params = dict(split=split, data_dir=opt.data_dir, batch_size=batch_size, mean=mean, dataset=dataset_name, use_HSV=opt.use_HSV, load_size=opt.load_size, crop_size=opt.crop_size) n.data, n.label = L.Python(module='faceData_layers', layer='FaceDataLayer', ntop=2, param_str=str(pydata_params)) # start building main body of network n.conv1, n.bn_conv1, n.scale_conv1 = _conv_bn_scale(n.data, 64, bias_term=True, kernel_size=7, pad=3, stride=2) n.conv1_relu = L.ReLU(n.scale_conv1) n.pool1 = L.Pooling(n.conv1_relu, kernel_size=3, stride=2, pool=P.Pooling.MAX) _resnet_block_3stages('2a', n, n.pool1, 64, branch1=True, initial_stride=1) _resnet_block_3stages('2b', n, n.res2a_relu, 64) _resnet_block_3stages('2c', n, n.res2b_relu, 64) _resnet_block_3stages('3a', n, n.res2c_relu, 128, branch1=True) _resnet_block_3stages('3b', n, n.res3a_relu, 128) _resnet_block_3stages('3c', n, n.res3b_relu, 128) _resnet_block_3stages('3d', n, n.res3c_relu, 128) _resnet_block_3stages('4a', n, n.res3d_relu, 256, branch1=True) _resnet_block_3stages('4b', n, n.res4a_relu, 256) _resnet_block_3stages('4c', n, n.res4b_relu, 256) _resnet_block_3stages('4d', n, n.res4c_relu, 256) _resnet_block_3stages('4e', n, n.res4d_relu, 256) _resnet_block_3stages('4f', n, n.res4e_relu, 256) _resnet_block_3stages('5a', n, n.res4f_relu, 512, branch1=True) _resnet_block_3stages('5b', n, n.res5a_relu, 512) _resnet_block_3stages('5c', n, n.res5b_relu, 512) n.pool5 = L.Pooling(n.res5c_relu, kernel_size=7, stride=1, pool=P.Pooling.AVE) # fully connected classifier lr_ratio = 100 # lr multiplier for truncated layers n.fc_face1 = L.InnerProduct(n.pool5, num_output=1000, param=[ dict(lr_mult=1 * lr_ratio, decay_mult=1), dict(lr_mult=2 * lr_ratio, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) n.fc_face2 = L.InnerProduct(n.fc_face1, num_output=2, param=[ dict(lr_mult=1 * lr_ratio, decay_mult=1), dict(lr_mult=2 * lr_ratio, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) # loss and accuracy layer n.loss = L.SoftmaxWithLoss(n.fc_face2, n.label) n.acc = L.Accuracy(n.fc_face2, n.label) return n.to_proto()
def mfb_coatt(mode, batchsize, T, question_vocab_size, folder): n = caffe.NetSpec() mode_str = json.dumps({ 'mode': mode, 'batchsize': batchsize, 'folder': folder }) if mode == 'val': n.data, n.cont, n.img_feature, n.label, n.glove = L.Python( \ module='vqa_data_layer', layer='VQADataProviderLayer', \ param_str=mode_str, ntop=5 ) else: n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\ module='vqa_data_layer_kld', layer='VQADataProviderLayer', \ param_str=mode_str, ntop=5 ) n.embed = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \ weight_filler=dict(type='xavier')) n.embed_tanh = L.TanH(n.embed) concat_word_embed = [n.embed_tanh, n.glove] n.concat_embed = L.Concat(*concat_word_embed, concat_param={'axis': 2}) # T x N x 600 # LSTM n.lstm1 = L.LSTM(\ n.concat_embed, n.cont,\ recurrent_param=dict(\ num_output=config.LSTM_UNIT_NUM,\ weight_filler=dict(type='xavier'))) n.lstm1_droped = L.Dropout( n.lstm1, dropout_param={'dropout_ratio': config.LSTM_DROPOUT_RATIO}) n.lstm1_resh = L.Permute(n.lstm1_droped, permute_param=dict(order=[1, 2, 0])) n.lstm1_resh2 = L.Reshape(n.lstm1_resh, \ reshape_param=dict(shape=dict(dim=[0,0,0,1]))) ''' Question Attention ''' n.qatt_conv1 = L.Convolution(n.lstm1_resh2, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.qatt_relu = L.ReLU(n.qatt_conv1) n.qatt_conv2 = L.Convolution(n.qatt_relu, kernel_size=1, stride=1, num_output=config.NUM_QUESTION_GLIMPSE, pad=0, weight_filler=dict(type='xavier')) n.qatt_reshape = L.Reshape( n.qatt_conv2, reshape_param=dict(shape=dict(dim=[ -1, config.NUM_QUESTION_GLIMPSE, config.MAX_WORDS_IN_QUESTION, 1 ]))) # N*NUM_QUESTION_GLIMPSE*15 n.qatt_softmax = L.Softmax(n.qatt_reshape, axis=2) qatt_maps = L.Slice(n.qatt_softmax, ntop=config.NUM_QUESTION_GLIMPSE, slice_param={'axis': 1}) dummy_lstm = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) qatt_feature_list = [] for i in xrange(config.NUM_QUESTION_GLIMPSE): if config.NUM_QUESTION_GLIMPSE == 1: n.__setattr__( 'qatt_feat%d' % i, L.SoftAttention(n.lstm1_resh2, qatt_maps, dummy_lstm)) else: n.__setattr__( 'qatt_feat%d' % i, L.SoftAttention(n.lstm1_resh2, qatt_maps[i], dummy_lstm)) qatt_feature_list.append(n.__getattr__('qatt_feat%d' % i)) n.qatt_feat_concat = L.Concat(*qatt_feature_list) ''' Image Attention with MFB ''' n.q_feat_resh = L.Reshape( n.qatt_feat_concat, reshape_param=dict(shape=dict(dim=[0, -1, 1, 1]))) n.i_feat_resh = L.Reshape( n.img_feature, reshape_param=dict(shape=dict( dim=[0, -1, config.IMG_FEAT_WIDTH, config.IMG_FEAT_WIDTH]))) n.iatt_q_proj = L.InnerProduct(n.q_feat_resh, num_output=config.JOINT_EMB_SIZE, weight_filler=dict(type='xavier')) n.iatt_q_resh = L.Reshape( n.iatt_q_proj, reshape_param=dict(shape=dict(dim=[-1, config.JOINT_EMB_SIZE, 1, 1]))) n.iatt_q_tile1 = L.Tile(n.iatt_q_resh, axis=2, tiles=config.IMG_FEAT_WIDTH) n.iatt_q_tile2 = L.Tile(n.iatt_q_tile1, axis=3, tiles=config.IMG_FEAT_WIDTH) n.iatt_i_conv = L.Convolution(n.i_feat_resh, kernel_size=1, stride=1, num_output=config.JOINT_EMB_SIZE, pad=0, weight_filler=dict(type='xavier')) n.iatt_i_resh1 = L.Reshape(n.iatt_i_conv, reshape_param=dict(shape=dict(dim=[ -1, config.JOINT_EMB_SIZE, config.IMG_FEAT_WIDTH, config.IMG_FEAT_WIDTH ]))) n.iatt_iq_eltwise = L.Eltwise(n.iatt_q_tile2, n.iatt_i_resh1, eltwise_param=dict(operation=0)) n.iatt_iq_droped = L.Dropout( n.iatt_iq_eltwise, dropout_param={'dropout_ratio': config.MFB_DROPOUT_RATIO}) n.iatt_iq_resh2 = L.Reshape(n.iatt_iq_droped, reshape_param=dict(shape=dict( dim=[-1, config.JOINT_EMB_SIZE, 196, 1]))) n.iatt_iq_permute1 = L.Permute(n.iatt_iq_resh2, permute_param=dict(order=[0, 2, 1, 3])) n.iatt_iq_resh2 = L.Reshape( n.iatt_iq_permute1, reshape_param=dict(shape=dict(dim=[ -1, config.IMG_FEAT_SIZE, config.MFB_OUT_DIM, config.MFB_FACTOR_NUM ]))) n.iatt_iq_sumpool = L.Pooling(n.iatt_iq_resh2, pool=P.Pooling.SUM, \ pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1)) n.iatt_iq_permute2 = L.Permute(n.iatt_iq_sumpool, permute_param=dict(order=[0, 2, 1, 3])) n.iatt_iq_sqrt = L.SignedSqrt(n.iatt_iq_permute2) n.iatt_iq_l2 = L.L2Normalize(n.iatt_iq_sqrt) ## 2 conv layers 1000 -> 512 -> 2 n.iatt_conv1 = L.Convolution(n.iatt_iq_l2, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.iatt_relu = L.ReLU(n.iatt_conv1) n.iatt_conv2 = L.Convolution(n.iatt_relu, kernel_size=1, stride=1, num_output=config.NUM_IMG_GLIMPSE, pad=0, weight_filler=dict(type='xavier')) n.iatt_resh = L.Reshape( n.iatt_conv2, reshape_param=dict(shape=dict( dim=[-1, config.NUM_IMG_GLIMPSE, config.IMG_FEAT_SIZE]))) n.iatt_softmax = L.Softmax(n.iatt_resh, axis=2) n.iatt_softmax_resh = L.Reshape( n.iatt_softmax, reshape_param=dict(shape=dict(dim=[ -1, config.NUM_IMG_GLIMPSE, config.IMG_FEAT_WIDTH, config.IMG_FEAT_WIDTH ]))) iatt_maps = L.Slice(n.iatt_softmax_resh, ntop=config.NUM_IMG_GLIMPSE, slice_param={'axis': 1}) dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) iatt_feature_list = [] for i in xrange(config.NUM_IMG_GLIMPSE): if config.NUM_IMG_GLIMPSE == 1: n.__setattr__('iatt_feat%d' % i, L.SoftAttention(n.i_feat_resh, iatt_maps, dummy)) else: n.__setattr__('iatt_feat%d' % i, L.SoftAttention(n.i_feat_resh, iatt_maps[i], dummy)) n.__setattr__('iatt_feat%d_resh'%i, L.Reshape(n.__getattr__('iatt_feat%d'%i), \ reshape_param=dict(shape=dict(dim=[0,-1])))) iatt_feature_list.append(n.__getattr__('iatt_feat%d_resh' % i)) n.iatt_feat_concat = L.Concat(*iatt_feature_list) n.iatt_feat_concat_resh = L.Reshape( n.iatt_feat_concat, reshape_param=dict(shape=dict(dim=[0, -1, 1, 1]))) ''' Fine-grained Image-Question MFB fusion ''' n.mfb_q_proj = L.InnerProduct(n.q_feat_resh, num_output=config.JOINT_EMB_SIZE, weight_filler=dict(type='xavier')) n.mfb_i_proj = L.InnerProduct(n.iatt_feat_concat_resh, num_output=config.JOINT_EMB_SIZE, weight_filler=dict(type='xavier')) n.mfb_iq_eltwise = L.Eltwise(n.mfb_q_proj, n.mfb_i_proj, eltwise_param=dict(operation=0)) n.mfb_iq_drop = L.Dropout( n.mfb_iq_eltwise, dropout_param={'dropout_ratio': config.MFB_DROPOUT_RATIO}) n.mfb_iq_resh = L.Reshape( n.mfb_iq_drop, reshape_param=dict(shape=dict( dim=[-1, 1, config.MFB_OUT_DIM, config.MFB_FACTOR_NUM]))) n.mfb_iq_sumpool = L.Pooling(n.mfb_iq_resh, pool=P.Pooling.SUM, \ pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1)) n.mfb_out = L.Reshape(n.mfb_iq_sumpool,\ reshape_param=dict(shape=dict(dim=[-1,config.MFB_OUT_DIM]))) n.mfb_sign_sqrt = L.SignedSqrt(n.mfb_out) n.mfb_l2 = L.L2Normalize(n.mfb_sign_sqrt) n.prediction = L.InnerProduct(n.mfb_l2, num_output=config.NUM_OUTPUT_UNITS, weight_filler=dict(type='xavier')) if mode == 'val': n.loss = L.SoftmaxWithLoss(n.prediction, n.label) else: n.loss = L.SoftmaxKLDLoss(n.prediction, n.label) return n.to_proto()
def lenet(lmdb_data, lmdb_label, batch_size, deploy, crop=64, mirror=False): """Simple LeNet to predict cdf.""" data_transforms = dict(scale=1.) if crop: # will crop images to [crop]x[crop] with random center data_transforms['crop_size'] = crop if mirror: # will randomly flip images data_transforms['mirror'] = 1 n = caffe.NetSpec() if deploy: input_ = "data" dim1 = batch_size dim2 = 3 # need to change these manually dim3 = 64 dim4 = 64 n.data = L.Layer() else: n.data = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb_data, transform_param=data_transforms, ntop=1) n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb_label, ntop=1) # first convolutional layer n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=40, weight_filler=dict(type='xavier')) n.norm1 = L.BatchNorm(n.conv1) n.relu1 = L.ReLU(n.norm1, in_place=True) n.pool1 = L.Pooling(n.relu1, kernel_size=2, stride=2, pool=P.Pooling.MAX) # second convolutional layer n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=40, weight_filler=dict(type='xavier')) n.norm2 = L.BatchNorm(n.conv2) n.relu2 = L.ReLU(n.norm2, in_place=True) n.pool2 = L.Pooling(n.relu2, kernel_size=2, stride=2, pool=P.Pooling.MAX) # fully connected layers n.drop = L.Dropout(n.pool2, dropout_ratio=0.5) n.ip1 = L.InnerProduct(n.drop, num_output=600, weight_filler=dict(type='xavier')) n.out = L.Sigmoid(n.ip1) if deploy: deploy_str = ('input: {}\ninput_dim: {}\n' 'input_dim: {}\ninput_dim: {}\n' 'input_dim: {}').format('"%s"' % input_, dim1, dim2, dim3, dim4) return (deploy_str + '\n' + 'layer {' + 'layer {'.join(str(n.to_proto()).split('layer {')[2:])) else: n.loss = L.EuclideanLoss(n.out, n.label) return str(n.to_proto())
def convert_symbol2proto(symbol): def looks_like_weight(name): """Internal helper to figure out if node should be hidden with `hide_weights`. """ if name.endswith("_weight"): return True if name.endswith("_bias"): return True if name.endswith("_beta") or name.endswith("_gamma") or name.endswith("_moving_var") or name.endswith( "_moving_mean"): return True return False json_symbol = json.loads(symbol.tojson()) all_nodes = json_symbol['nodes'] no_weight_nodes = [] for node in all_nodes: op = node['op'] name = node['name'] if op == 'null': if looks_like_weight(name): continue no_weight_nodes.append(node) # build next node dict next_node = dict() for node in no_weight_nodes: node_name = node['name'] for input in node['inputs']: last_node_name = all_nodes[input[0]]['name'] if last_node_name in next_node: next_node[last_node_name].append(node_name) else: next_node[last_node_name] = [node_name] supported_op_type = ['null', 'BatchNorm', 'Convolution', 'Activation', 'Pooling', 'elemwise_add', 'SliceChannel', 'FullyConnected', 'SoftmaxOutput', '_maximum', 'add_n', 'Concat'] top_dict = dict() caffe_net = caffe.NetSpec() for node in no_weight_nodes: if node['op'] == 'null': input_param = dict() if node['name'] == 'data': input_param['shape'] = dict(dim=[1, 3, 160, 160]) else: input_param['shape'] = dict(dim=[1]) top_data = CL.Input(ntop=1, input_param=input_param) top_dict[node['name']] = [top_data] setattr(caffe_net, node['name'], top_data) elif node['op'].endswith('_copy'): pass elif node['op'] == 'BatchNorm': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] in_place = False if len(next_node[bottom_node_name]) == 1: in_place = True if 'momentum' in attr: momentum = float(attr['momentum']) else: momentum = 0.9 if 'eps' in attr: eps = float(attr['eps']) else: eps = 0.001 if NO_INPLACE: in_place = False bn_top = CL.BatchNorm(top_dict[bottom_node_name][input[1]], ntop=1, batch_norm_param=dict(use_global_stats=True, moving_average_fraction=momentum, eps=eps), in_place=in_place) setattr(caffe_net, node['name'], bn_top) scale_top = CL.Scale(bn_top, ntop=1, scale_param=dict(bias_term=True), in_place=not NO_INPLACE) top_dict[node['name']] = [scale_top] setattr(caffe_net, node['name'] + '_scale', scale_top) elif node['op'] == 'Convolution': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] convolution_param = dict() if 'kernel' in attr: kernel_size = eval(attr['kernel']) assert kernel_size[0] == kernel_size[1] convolution_param['kernel_size'] = kernel_size[0] else: convolution_param['kernel_size'] = 1 if 'no_bias' in attr: convolution_param['bias_term'] = not eval(attr['no_bias']) if 'num_group' in attr: convolution_param['group'] = int(attr['num_group']) convolution_param['num_output'] = int(attr['num_filter']) if 'pad' in attr: pad_size = eval(attr['pad']) assert pad_size[0] == pad_size[1] convolution_param['pad'] = pad_size[0] if 'stride' in attr: stride_size = eval(attr['stride']) assert stride_size[0] == stride_size[1] convolution_param['stride'] = stride_size[0] conv_top = CL.Convolution(top_dict[bottom_node_name][input[1]], ntop=1, convolution_param=convolution_param) top_dict[node['name']] = [conv_top] setattr(caffe_net, node['name'], conv_top) elif node['op'] == 'Activation': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] in_place = False if len(next_node[bottom_node_name]) == 1: in_place = True if NO_INPLACE: in_place = False if attr['act_type'] == 'relu': ac_top = CL.ReLU(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place) elif attr['act_type'] == 'sigmoid': ac_top = CL.Sigmoid(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place) elif attr['act_type'] == 'tanh': ac_top = CL.TanH(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place) top_dict[node['name']] = [ac_top] setattr(caffe_net, node['name'], ac_top) elif node['op'] == 'Pooling': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] pooling_param = dict() if attr['pool_type'] == 'avg': pooling_param['pool'] = 1 elif attr['pool_type'] == 'max': pooling_param['pool'] = 0 else: assert False, attr['pool_type'] if 'global_pool' in attr and eval(attr['global_pool']) is True: pooling_param['global_pooling'] = True else: if 'kernel' in attr: kernel_size = eval(attr['kernel']) assert kernel_size[0] == kernel_size[1] pooling_param['kernel_size'] = kernel_size[0] if 'pad' in attr: pad_size = eval(attr['pad']) assert pad_size[0] == pad_size[1] pooling_param['pad'] = pad_size[0] if 'stride' in attr: stride_size = eval(attr['stride']) assert stride_size[0] == stride_size[1] pooling_param['stride'] = stride_size[0] pool_top = CL.Pooling(top_dict[bottom_node_name][input[1]], ntop=1, pooling_param=pooling_param) top_dict[node['name']] = [pool_top] setattr(caffe_net, node['name'], pool_top) elif node['op'] == 'elemwise_add' or node['op'] == 'add_n': input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] eltwise_param = dict() eltwise_param['operation'] = 1 ele_add_top = CL.Eltwise(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1, eltwise_param=eltwise_param) top_dict[node['name']] = [ele_add_top] setattr(caffe_net, node['name'], ele_add_top) elif node['op'] == '_maximum': input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] eltwise_param = dict() eltwise_param['operation'] = 2 ele_add_top = CL.Eltwise(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1, eltwise_param=eltwise_param) top_dict[node['name']] = [ele_add_top] setattr(caffe_net, node['name'], ele_add_top) elif node['op'] == 'SliceChannel': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] slice_param = dict() slice_param['slice_dim'] = 1 slice_num = 2 slice_outputs = CL.Slice(top_dict[bottom_node_name][input[1]], ntop=slice_num, slice_param=slice_param) top_dict[node['name']] = slice_outputs for idx, output in enumerate(slice_outputs): setattr(caffe_net, node['name'] + '_' + str(idx), output) elif node['op'] == 'FullyConnected': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] inner_product_param = dict() inner_product_param['num_output'] = int(attr['num_hidden']) fc_top = CL.InnerProduct(top_dict[bottom_node_name][input[1]], ntop=1, inner_product_param=inner_product_param) top_dict[node['name']] = [fc_top] setattr(caffe_net, node['name'], fc_top) elif node['op'] == 'SoftmaxOutput': input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] softmax_loss = CL.SoftmaxWithLoss(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1) top_dict[node['name']] = [softmax_loss] setattr(caffe_net, node['name'], softmax_loss) elif node['op'] == 'Concat': if len(node['inputs']) == 2: input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] concat_top = CL.Concat(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1) top_dict[node['name']] = [concat_top] setattr(caffe_net, node['name'], concat_top) elif len(node['inputs']) == 3: input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break input_c = node['inputs'][2] while True: if all_nodes[input_c[0]]['op'] not in supported_op_type: input_c = all_nodes[input_c[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] bottom_node_name_c = all_nodes[input_c[0]]['name'] concat_top = CL.Concat(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], top_dict[bottom_node_name_c][input_c[1]], ntop=1) top_dict[node['name']] = [concat_top] setattr(caffe_net, node['name'], concat_top) else: logging.warn('unknown op type = %s' % node['op']) return caffe_net.to_proto()
def setLayers(data_source, batch_size, layername, kernel, stride, outCH, label_name, transform_param_in, deploy=False): # it is tricky to produce the deploy prototxt file, as the data input is not from a layer, so we have to creat a workaround # producing training and testing prototxt files is pretty straight forward n = caffe.NetSpec() assert len(layername) == len(kernel) assert len(layername) == len(stride) assert len(layername) == len(outCH) # produce data definition for deploy net if deploy == False: n.data, n.tops['label'] = L.CPMData(data_param=dict( backend=1, source=data_source, batch_size=batch_size), transform_param=transform_param_in, ntop=2) n.tops[label_name[1]], n.tops[label_name[0]] = L.Slice( n.label, slice_param=dict(axis=1, slice_point=15), ntop=2) else: input = "data" dim1 = 1 dim2 = 4 dim3 = 368 dim4 = 368 # make an empty "data" layer so the next layer accepting input will be able to take the correct blob name "data", # we will later have to remove this layer from the serialization string, since this is just a placeholder n.data = L.Layer() # something special before everything n.image, n.center_map = L.Slice(n.data, slice_param=dict(axis=1, slice_point=3), ntop=2) n.pool_center_lower = L.Pooling(n.center_map, kernel_size=9, stride=8, pool=P.Pooling.AVE) # just follow arrays..CPCPCPCPCCCC.... last_layer = 'image' stage = 1 conv_counter = 1 pool_counter = 1 drop_counter = 1 state = 'image' # can be image or fuse share_point = 0 for l in range(0, len(layername)): if layername[l] == 'C': if state == 'image': conv_name = 'conv%d_stage%d' % (conv_counter, stage) else: conv_name = 'Mconv%d_stage%d' % (conv_counter, stage) if stage == 1: lr_m = 5 else: lr_m = 1 n.tops[conv_name] = L.Convolution( n.tops[last_layer], kernel_size=kernel[l], num_output=outCH[l], pad=int(math.floor(kernel[l] / 2)), param=[ dict(lr_mult=lr_m, decay_mult=1), dict(lr_mult=lr_m * 2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant')) last_layer = conv_name if layername[l + 1] != 'L': if (state == 'image'): ReLUname = 'relu%d_stage%d' % (conv_counter, stage) n.tops[ReLUname] = L.ReLU(n.tops[last_layer], in_place=True) else: ReLUname = 'Mrelu%d_stage%d' % (conv_counter, stage) n.tops[ReLUname] = L.ReLU(n.tops[last_layer], in_place=True) last_layer = ReLUname conv_counter += 1 elif layername[l] == 'P': # Pooling n.tops['pool%d_stage%d' % (pool_counter, stage)] = L.Pooling( n.tops[last_layer], kernel_size=kernel[l], stride=stride[l], pool=P.Pooling.MAX) last_layer = 'pool%d_stage%d' % (pool_counter, stage) pool_counter += 1 elif layername[l] == 'L': # Loss: n.loss layer is only in training and testing nets, but not in deploy net. if deploy == False: if stage == 1: n.tops['loss_stage%d' % stage] = L.EuclideanLoss( n.tops[last_layer], n.tops[label_name[0]]) else: n.tops['loss_stage%d' % stage] = L.EuclideanLoss( n.tops[last_layer], n.tops[label_name[1]]) stage += 1 last_connect = last_layer last_layer = 'image' conv_counter = 1 pool_counter = 1 drop_counter = 1 state = 'image' elif layername[l] == 'D': if deploy == False: n.tops['drop%d_stage%d' % (drop_counter, stage)] = L.Dropout( n.tops[last_layer], in_place=True, dropout_param=dict(dropout_ratio=0.5)) drop_counter += 1 elif layername[l] == '@': n.tops['concat_stage%d' % stage] = L.Concat( n.tops[last_layer], n.tops[last_connect], n.pool_center_lower, concat_param=dict(axis=1)) conv_counter = 1 state = 'fuse' last_layer = 'concat_stage%d' % stage elif layername[l] == '$': if not share_point: share_point = last_layer else: last_layer = share_point # final process stage -= 1 if stage == 1: n.silence = L.Silence(n.pool_center_lower, ntop=0) if deploy == False: return str(n.to_proto()) # for generating the deploy net else: # generate the input information header string deploy_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format( '"' + input + '"', dim1, dim2, dim3, dim4) # assemble the input header with the net layers string. remove the first placeholder layer from the net string. return deploy_str + '\n' + 'layer {' + 'layer {'.join( str(n.to_proto()).split('layer {')[2:])
def setLayers_twoBranches(data_source, batch_size, layername, kernel, stride, outCH, label_name, transform_param_in, deploy=False, batchnorm=0, lr_mult_distro=[1, 1, 1]): # it is tricky to produce the deploy prototxt file, as the data input is not from a layer, so we have to creat a workaround # producing training and testing prototxt files is pretty straight forward n = caffe.NetSpec() assert len(layername) == len(kernel) assert len(layername) == len(stride) assert len(layername) == len(outCH) num_parts = transform_param['num_parts'] if deploy == False and "lmdb" not in data_source: if (len(label_name) == 1): n.data, n.tops[label_name[0]] = L.HDF5Data(hdf5_data_param=dict( batch_size=batch_size, source=data_source), ntop=2) elif (len(label_name) == 2): n.data, n.tops[label_name[0]], n.tops[label_name[1]] = L.HDF5Data( hdf5_data_param=dict(batch_size=batch_size, source=data_source), ntop=3) # produce data definition for deploy net elif deploy == False: n.data, n.tops['label'] = L.CPMData( data_param=dict(backend=1, source=data_source, batch_size=batch_size), cpm_transform_param=transform_param_in, ntop=2) n.tops[label_name[2]], n.tops[label_name[3]], n.tops[ label_name[4]], n.tops[label_name[5]] = L.Slice( n.label, slice_param=dict( axis=1, slice_point=[38, num_parts + 1, num_parts + 39]), ntop=4) n.tops[label_name[0]] = L.Eltwise(n.tops[label_name[2]], n.tops[label_name[4]], operation=P.Eltwise.PROD) n.tops[label_name[1]] = L.Eltwise(n.tops[label_name[3]], n.tops[label_name[5]], operation=P.Eltwise.PROD) else: input = "data" dim1 = 1 dim2 = 4 dim3 = 368 dim4 = 368 # make an empty "data" layer so the next layer accepting input will be able to take the correct blob name "data", # we will later have to remove this layer from the serialization string, since this is just a placeholder n.data = L.Layer() # something special before everything n.image, n.center_map = L.Slice(n.data, slice_param=dict(axis=1, slice_point=3), ntop=2) n.silence2 = L.Silence(n.center_map, ntop=0) #n.pool_center_lower = L.Pooling(n.center_map, kernel_size=9, stride=8, pool=P.Pooling.AVE) # just follow arrays..CPCPCPCPCCCC.... last_layer = ['image', 'image'] stage = 1 conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' # can be image or fuse share_point = 0 for l in range(0, len(layername)): if layername[l] == 'V': #pretrained VGG layers conv_name = 'conv%d_%d' % (pool_counter, local_counter) lr_m = lr_mult_distro[0] n.tops[conv_name] = L.Convolution( n.tops[last_layer[0]], kernel_size=kernel[l], num_output=outCH[l], pad=int(math.floor(kernel[l] / 2)), param=[ dict(lr_mult=lr_m, decay_mult=1), dict(lr_mult=lr_m * 2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant')) last_layer[0] = conv_name last_layer[1] = conv_name print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m) ReLUname = 'relu%d_%d' % (pool_counter, local_counter) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True) local_counter += 1 print ReLUname if layername[l] == 'B': pool_counter += 1 local_counter = 1 if layername[l] == 'C': if state == 'image': #conv_name = 'conv%d_stage%d' % (conv_counter, stage) conv_name = 'conv%d_%d_CPM' % ( pool_counter, local_counter ) # no image state in subsequent stages if stage == 1: lr_m = lr_mult_distro[1] else: lr_m = lr_mult_distro[1] else: # fuse conv_name = 'Mconv%d_stage%d' % (conv_counter, stage) lr_m = lr_mult_distro[2] conv_counter += 1 #if stage == 1: # lr_m = 1 #else: # lr_m = lr_sub n.tops[conv_name] = L.Convolution( n.tops[last_layer[0]], kernel_size=kernel[l], num_output=outCH[l], pad=int(math.floor(kernel[l] / 2)), param=[ dict(lr_mult=lr_m, decay_mult=1), dict(lr_mult=lr_m * 2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant')) last_layer[0] = conv_name last_layer[1] = conv_name print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m) if layername[l + 1] != 'L': if (state == 'image'): if (batchnorm == 1): batchnorm_name = 'bn%d_stage%d' % (conv_counter, stage) n.tops[batchnorm_name] = L.BatchNorm( n.tops[last_layer[0]], param=[ dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0) ]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[0] = batchnorm_name #ReLUname = 'relu%d_stage%d' % (conv_counter, stage) ReLUname = 'relu%d_%d_CPM' % (pool_counter, local_counter) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True) else: if (batchnorm == 1): batchnorm_name = 'Mbn%d_stage%d' % (conv_counter, stage) n.tops[batchnorm_name] = L.BatchNorm( n.tops[last_layer[0]], param=[ dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0) ]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[0] = batchnorm_name ReLUname = 'Mrelu%d_stage%d' % (conv_counter, stage) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True) #last_layer = ReLUname print ReLUname #conv_counter += 1 local_counter += 1 elif layername[l] == 'C2': for level in range(0, 2): if state == 'image': #conv_name = 'conv%d_stage%d' % (conv_counter, stage) conv_name = 'conv%d_%d_CPM_L%d' % ( pool_counter, local_counter, level + 1 ) # no image state in subsequent stages if stage == 1: lr_m = lr_mult_distro[1] else: lr_m = lr_mult_distro[1] else: # fuse conv_name = 'Mconv%d_stage%d_L%d' % (conv_counter, stage, level + 1) lr_m = lr_mult_distro[2] #conv_counter += 1 #if stage == 1: # lr_m = 1 #else: # lr_m = lr_sub if layername[l + 1] == 'L2' or layername[l + 1] == 'L3': if level == 0: outCH[l] = 38 else: outCH[l] = 19 n.tops[conv_name] = L.Convolution( n.tops[last_layer[level]], kernel_size=kernel[l], num_output=outCH[l], pad=int(math.floor(kernel[l] / 2)), param=[ dict(lr_mult=lr_m, decay_mult=1), dict(lr_mult=lr_m * 2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant')) last_layer[level] = conv_name print '%s\tch=%d\t%.1f' % (last_layer[level], outCH[l], lr_m) if layername[l + 1] != 'L2' and layername[l + 1] != 'L3': if (state == 'image'): if (batchnorm == 1): batchnorm_name = 'bn%d_stage%d_L%d' % ( conv_counter, stage, level + 1) n.tops[batchnorm_name] = L.BatchNorm( n.tops[last_layer[level]], param=[ dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0) ]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[level] = batchnorm_name #ReLUname = 'relu%d_stage%d' % (conv_counter, stage) ReLUname = 'relu%d_%d_CPM_L%d' % ( pool_counter, local_counter, level + 1) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]], in_place=True) else: if (batchnorm == 1): batchnorm_name = 'Mbn%d_stage%d_L%d' % ( conv_counter, stage, level + 1) n.tops[batchnorm_name] = L.BatchNorm( n.tops[last_layer[level]], param=[ dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0) ]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[level] = batchnorm_name ReLUname = 'Mrelu%d_stage%d_L%d' % (conv_counter, stage, level + 1) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]], in_place=True) print ReLUname conv_counter += 1 local_counter += 1 elif layername[l] == 'P': # Pooling n.tops['pool%d_stage%d' % (pool_counter, stage)] = L.Pooling( n.tops[last_layer[0]], kernel_size=kernel[l], stride=stride[l], pool=P.Pooling.MAX) last_layer[0] = 'pool%d_stage%d' % (pool_counter, stage) pool_counter += 1 local_counter = 1 conv_counter += 1 print last_layer[0] elif layername[l] == 'L': # Loss: n.loss layer is only in training and testing nets, but not in deploy net. if deploy == False and "lmdb" not in data_source: n.tops['map_vec_stage%d' % stage] = L.Flatten( n.tops[last_layer[0]]) n.tops['loss_stage%d' % stage] = L.EuclideanLoss( n.tops['map_vec_stage%d' % stage], n.tops[label_name[1]]) elif deploy == False: level = 1 name = 'weight_stage%d' % stage n.tops[name] = L.Eltwise(n.tops[last_layer[level]], n.tops[label_name[(level + 2)]], operation=P.Eltwise.PROD) n.tops['loss_stage%d' % stage] = L.EuclideanLoss( n.tops[name], n.tops[label_name[level]]) print 'loss %d' % stage stage += 1 conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' elif layername[l] == 'L2': # Loss: n.loss layer is only in training and testing nets, but not in deploy net. weight = [lr_mult_distro[3], 1] # print lr_mult_distro[3] for level in range(0, 2): if deploy == False and "lmdb" not in data_source: n.tops['map_vec_stage%d_L%d' % (stage, level + 1)] = L.Flatten( n.tops[last_layer[level]]) n.tops['loss_stage%d_L%d' % (stage, level + 1)] = L.EuclideanLoss( n.tops['map_vec_stage%d' % stage], n.tops[label_name[level]], loss_weight=weight[level]) elif deploy == False: name = 'weight_stage%d_L%d' % (stage, level + 1) n.tops[name] = L.Eltwise(n.tops[last_layer[level]], n.tops[label_name[(level + 2)]], operation=P.Eltwise.PROD) n.tops['loss_stage%d_L%d' % (stage, level + 1)] = L.EuclideanLoss( n.tops[name], n.tops[label_name[level]], loss_weight=weight[level]) print 'loss %d level %d' % (stage, level + 1) stage += 1 #last_connect = last_layer #last_layer = 'image' conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' elif layername[l] == 'L3': # Loss: n.loss layer is only in training and testing nets, but not in deploy net. weight = [lr_mult_distro[3], 1] # print lr_mult_distro[3] if deploy == False: level = 0 n.tops['loss_stage%d_L%d' % (stage, level + 1)] = L.Euclidean2Loss( n.tops[last_layer[level]], n.tops[label_name[level]], n.tops[label_name[2]], loss_weight=weight[level]) print 'loss %d level %d' % (stage, level + 1) level = 1 n.tops['loss_stage%d_L%d' % (stage, level + 1)] = L.EuclideanLoss( n.tops[last_layer[level]], n.tops[label_name[level]], loss_weight=weight[level]) print 'loss %d level %d' % (stage, level + 1) stage += 1 #last_connect = last_layer #last_layer = 'image' conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' elif layername[l] == 'D': if deploy == False: n.tops['drop%d_stage%d' % (drop_counter, stage)] = L.Dropout( n.tops[last_layer[0]], in_place=True, dropout_param=dict(dropout_ratio=0.5)) drop_counter += 1 elif layername[l] == '@': #if not share_point: # share_point = last_layer n.tops['concat_stage%d' % stage] = L.Concat( n.tops[last_layer[0]], n.tops[last_layer[1]], n.tops[share_point], concat_param=dict(axis=1)) local_counter = 1 state = 'fuse' last_layer[0] = 'concat_stage%d' % stage last_layer[1] = 'concat_stage%d' % stage print last_layer elif layername[l] == '$': share_point = last_layer[0] pool_counter += 1 local_counter = 1 print 'share' # final process stage -= 1 #if stage == 1: # n.silence = L.Silence(n.pool_center_lower, ntop=0) if deploy == False: return str(n.to_proto()) # for generating the deploy net else: # generate the input information header string deploy_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format( '"' + input + '"', dim1, dim2, dim3, dim4) # assemble the input header with the net layers string. remove the first placeholder layer from the net string. return deploy_str + '\n' + 'layer {' + 'layer {'.join( str(n.to_proto()).split('layer {')[2:])
def get_caffe_layer(node, net, input_dims): """Generate caffe layer for corresponding mxnet op. Args: node (iterable from MxnetParser): Mxnet op summary generated by MxnetParser net (caffe.net): Caffe netspec object Returns: caffe.layers: Equivalent caffe layer """ print(node) if node['type'] == 'Convolution': assert len(node['inputs']) == 1, \ 'Convolution layers can have only one input' conv_params = node['attrs'] kernel_size = make_list(conv_params['kernel'])[0] num_filters = make_list(conv_params['num_filter'])[0] if 'stride' in conv_params: stride = make_list(conv_params['stride'])[0] else: stride = 1 padding = make_list(conv_params['pad'])[0] if 'dilate' in conv_params: dilation = make_list(conv_params['dilate'])[0] else: dilation = 1 convolution_param = {'pad': padding, 'kernel_size': kernel_size, 'num_output': num_filters, 'stride': stride, 'dilation': dilation} return layers.Convolution(net[node['inputs'][0]], convolution_param=convolution_param) if node['type'] == 'Activation': assert len(node['inputs']) == 1, \ 'Activation layers can have only one input' assert node['attrs']['act_type'] == 'relu' return layers.ReLU(net[node['inputs'][0]]) if node['type'] == 'Pooling': assert len(node['inputs']) == 1, \ 'Pooling layers can have only one input' kernel_size = make_list(node['attrs']['kernel']) stride = make_list(node['attrs']['stride']) pooling_type = node['attrs']['pool_type'] if 'pad' in node['attrs']: padding = make_list(node['attrs']['pad']) else: padding = [0] if pooling_type == 'max': pooling = params.Pooling.MAX elif pooling_type == 'avg': pooling = params.Pooling.AVE pooling_param = {'pool': pooling, 'pad': padding[0], 'kernel_size': kernel_size[0], 'stride': stride[0]} return layers.Pooling(net[node['inputs'][0]], pooling_param=pooling_param) if node['type'] == 'L2Normalization': across_spatial = node['attrs']['mode'] != 'channel' channel_shared = False scale_filler = { 'type': "constant", 'value': constants.NORMALIZATION_FACTOR } norm_param = {'across_spatial': across_spatial, 'scale_filler': scale_filler, 'channel_shared': channel_shared} return layers.Normalize(net[node['inputs'][0]], norm_param=norm_param) if node['type'] == 'BatchNorm': bn_param = { 'moving_average_fraction': 0.90, 'use_global_stats': True, 'eps': 1e-5 } return layers.BatchNorm(net[node['inputs'][0]], in_place=True, **bn_param) # Note - this layer has been implemented # only in WeiLiu's ssd branch of caffe not in caffe master if node['type'] == 'transpose': order = make_list(node['attrs']['axes']) return layers.Permute(net[node['inputs'][0]], permute_param={'order': order}) if node['type'] == 'Flatten': if node['inputs'][0].endswith('anchors'): axis = 2 else: axis = 1 return layers.Flatten(net[node['inputs'][0]], flatten_param={'axis': axis}) if node['type'] == 'Concat': # In the ssd model, always concatenate along last axis, # since anchor boxes have an extra dimension in caffe (that includes variance). axis = -1 concat_inputs = [net[inp] for inp in node['inputs']] return layers.Concat(*concat_inputs, concat_param={'axis': axis}) if node['type'] == 'Reshape': if node['name'] == 'multibox_anchors': reshape_dims = [1, 2, -1] else: reshape_dims = make_list(node['attrs']['shape']) return layers.Reshape(net[node['inputs'][0]], reshape_param={'shape': {'dim': reshape_dims}}) if node['type'] == '_contrib_MultiBoxPrior': priorbox_inputs = [net[inp] for inp in node['inputs']] + [net["data"]] sizes = make_list(node["attrs"]["sizes"]) min_size = sizes[0] * input_dims[0] max_size = int(round((sizes[1] * input_dims[0]) ** 2 / min_size)) aspect_ratio = make_list(node["attrs"]["ratios"]) steps = make_list(node["attrs"]["steps"]) param = {'clip': node["attrs"]["clip"] == "true", 'flip': False, 'min_size': int(round(min_size)), 'max_size': int(round(max_size)), 'aspect_ratio': aspect_ratio, 'variance': [.1, .1, .2, .2], 'step': int(round(steps[0] * input_dims[0])), } return layers.PriorBox(*priorbox_inputs, prior_box_param=param) if node['type'] == '_contrib_MultiBoxDetection': multibox_inputs = [net[inp] for inp in node['inputs']] bottom_order = [1, 0, 2] multibox_inputs = [multibox_inputs[i] for i in bottom_order] param = { 'num_classes': constants.NUM_CLASSES, 'share_location': True, 'background_label_id': 0, 'nms_param': { 'nms_threshold': float(node['attrs']['nms_threshold']), 'top_k': int(node['attrs']['nms_topk']) }, 'keep_top_k': make_list(node['attrs']['nms_topk'])[0], 'confidence_threshold': 0.01, 'code_type': params.PriorBox.CENTER_SIZE, } return layers.DetectionOutput(*multibox_inputs, detection_output_param=param) if node['type'] in ['SoftmaxActivation', 'SoftmaxOutput']: if 'mode' not in node['attrs']: axis = 1 elif node['attrs']['mode'] == 'channel': axis = 1 else: axis = 0 # note: caffe expects confidence scores to be flattened before detection output layer receives it return layers.Flatten(layers.Permute(layers.Softmax(net[node['inputs'][0]], axis=axis), permute_param={'order': [0, 2, 1]}), flatten_param={'axis': 1})
def pool(inputs, kernel_size=2, stride=2): pool = L.Pooling(inputs, kernel_size=kernel_size, stride=stride, pool=P.Pooling.MAX) return pool
def global_pooling_layer(previous, name, mode="avg"): """ create a Global Pooling Layer """ pool = cp.Pooling.AVE if mode == "avg" else cp.Pooling.MAX return cl.Pooling(previous, name=name, pool=pool, global_pooling=True)
def mixed_5b(net, common_bottom_layer): # branch 0 top_layer_branch0 = 'Mixed_5b/Branch_0/Conv2d_1x1' conv_bn_layer(net, in_layer=common_bottom_layer, out_layer=top_layer_branch0, use_bn=True, use_relu=True, num_output=96, kernel_size=1, pad=0, stride=1) # branch 1 top_layer_branch1 = 'Mixed_5b/Branch_1/Conv2d_0a_1x1' conv_bn_layer(net, in_layer=common_bottom_layer, out_layer=top_layer_branch1, use_bn=True, use_relu=True, num_output=48, kernel_size=1, pad=0, stride=1) bottom_layer_branch1 = top_layer_branch1 top_layer_branch1 = 'Mixed_5b/Branch_1/Conv2d_0b_5x5' conv_bn_layer(net, in_layer=bottom_layer_branch1, out_layer=top_layer_branch1, use_bn=True, use_relu=True, num_output=64, kernel_size=5, pad=2, stride=1) # branch 2 top_layer_branch2 = 'Mixed_5b/Branch_2/Conv2d_0a_1x1' conv_bn_layer(net, in_layer=common_bottom_layer, out_layer=top_layer_branch2, use_bn=True, use_relu=True, num_output=64, kernel_size=1, pad=0, stride=1) bottom_layer_branch2 = top_layer_branch2 top_layer_branch2 = 'Mixed_5b/Branch_2/Conv2d_0b_3x3' conv_bn_layer(net, in_layer=bottom_layer_branch2, out_layer=top_layer_branch2, use_bn=True, use_relu=True, num_output=96, kernel_size=3, pad=1, stride=1) bottom_layer_branch2 = top_layer_branch2 top_layer_branch2 = 'Mixed_5b/Branch_2/Conv2d_0c_3x3' conv_bn_layer(net, in_layer=bottom_layer_branch2, out_layer=top_layer_branch2, use_bn=True, use_relu=True, num_output=96, kernel_size=3, pad=1, stride=1) # branch 3 top_layer_branch3 = 'mixed5b_branch3_avepool_0a' net[top_layer_branch3] = layers.Pooling(net[common_bottom_layer], pool=params.Pooling.AVE, kernel_size=3, stride=1, pad=1) bottom_layer_branch3 = top_layer_branch3 top_layer_branch3 = 'Mixed_5b/Branch_3/Conv2d_0b_1x1' conv_bn_layer(net, in_layer=bottom_layer_branch3, out_layer=top_layer_branch3, use_bn=True, use_relu=True, num_output=64, kernel_size=1, pad=0, stride=1) top_layer = 'mixed5b' net[top_layer] = layers.Concat(*[ net[top_layer_branch0], net[top_layer_branch1], net[top_layer_branch2], net[top_layer_branch3] ], axis=1) return top_layer
def ResNet(lmdb, batch_size, mean_file, model): n = caffe.NetSpec() #数据层 if model == False: n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb, include=dict(phase=0), transform_param=dict(scale=1. / 255, mirror=True, crop_size=227, mean_file=mean_file), ntop=2) if model == True: n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb, include=dict(phase=1), transform_param=dict(scale=1. / 255, mirror=True, crop_size=227, mean_file=mean_file), ntop=2) #卷积层conv1 n.conv1 = L.Convolution( n.data, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], kernel_size=7, stride=2, num_output=64, pad=3, weight_filler=dict(type="gaussian", std=0.01), bias_filler=dict(type='constant', value=0), name="conv1/7x7_s2") #ReLu层 n.relu1 = L.ReLU(n.conv1, in_place=True, name="conv1/relu_7x7") #Pooling层 n.pool1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=P.Pooling.MAX, name="pool1/3x3_s2") n.conv2 = L.Convolution( n.pool1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], kernel_size=1, num_output=64, weight_filler=dict(type="xavier"), bias_filler=dict(type='constant', value=0.2), name="conv2/3x3_reduce") n.relu2 = L.ReLU(n.conv2, in_place=True, name="conv2/relu_3x3_reduce") n.conv2_3x3 = L.Convolution( n.conv2, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], kernel_size=3, num_output=192, pad=1, weight_filler=dict(type="xavier"), bias_filler=dict(type='constant', value=0.2), name="conv2/3x3") n.relu2 = L.ReLU(n.conv2_3x3, in_place=True, name="conv2/relu_3x3") n.pool2 = L.Pooling(n.conv2_3x3, kernel_size=2, stride=2, pool=P.Pooling.MAX, name="pool2/3x3_s2")
def mixed_7a(net, common_bottom_layer): # branch 0 top_layer_branch0 = 'Mixed_7a/Branch_0/Conv2d_0a_1x1' conv_bn_layer(net, in_layer=common_bottom_layer, out_layer=top_layer_branch0, use_bn=True, use_relu=True, num_output=256, kernel_size=1, pad=0, stride=1) bottom_layer_branch0 = top_layer_branch0 top_layer_branch0 = 'Mixed_7a/Branch_0/Conv2d_1a_3x3' conv_bn_layer(net, in_layer=bottom_layer_branch0, out_layer=top_layer_branch0, use_bn=True, use_relu=True, num_output=384, kernel_size=3, pad=0, stride=2) # branch 1 top_layer_branch1 = 'Mixed_7a/Branch_1/Conv2d_0a_1x1' conv_bn_layer(net, in_layer=common_bottom_layer, out_layer=top_layer_branch1, use_bn=True, use_relu=True, num_output=256, kernel_size=1, pad=0, stride=1) bottom_layer_branch1 = top_layer_branch1 top_layer_branch1 = 'Mixed_7a/Branch_1/Conv2d_1a_3x3' conv_bn_layer(net, in_layer=bottom_layer_branch1, out_layer=top_layer_branch1, use_bn=True, use_relu=True, num_output=288, kernel_size=3, pad=0, stride=2) # branch 2 top_layer_branch2 = 'Mixed_7a/Branch_2/Conv2d_0a_1x1' conv_bn_layer(net, in_layer=common_bottom_layer, out_layer=top_layer_branch2, use_bn=True, use_relu=True, num_output=256, kernel_size=1, pad=0, stride=1) bottom_layer_branch2 = top_layer_branch2 top_layer_branch2 = 'Mixed_7a/Branch_2/Conv2d_0b_3x3' conv_bn_layer(net, in_layer=bottom_layer_branch2, out_layer=top_layer_branch2, use_bn=True, use_relu=True, num_output=288, kernel_size=3, pad=1, stride=1) bottom_layer_branch2 = top_layer_branch2 top_layer_branch2 = 'Mixed_7a/Branch_2/Conv2d_1a_3x3' conv_bn_layer(net, in_layer=bottom_layer_branch2, out_layer=top_layer_branch2, use_bn=True, use_relu=True, num_output=320, kernel_size=3, pad=0, stride=2) # branch 3 top_layer_branch3 = 'mixed7a_branch3_maxpool_0' net[top_layer_branch3] = layers.Pooling(net[common_bottom_layer], pool=params.Pooling.MAX, kernel_size=3, stride=2, pad=0) top_layer = 'mixed7a' net[top_layer] = layers.Concat(*[ net[top_layer_branch0], net[top_layer_branch1], net[top_layer_branch2], net[top_layer_branch3] ], axis=1) return top_layer
def VGGNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False, dilated=False, nopool=False, dropout=True, freeze_layers=[]): kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0)} assert from_layer in net.keys() net.conv1_1 = L.Convolution(net[from_layer], num_output=64, pad=1, kernel_size=3, **kwargs) net.relu1_1 = L.ReLU(net.conv1_1, in_place=True) net.conv1_2 = L.Convolution(net.relu1_1, num_output=64, pad=1, kernel_size=3, **kwargs) net.relu1_2 = L.ReLU(net.conv1_2, in_place=True) if nopool: name = 'conv1_3' net[name] = L.Convolution(net.relu1_2, num_output=64, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool1' net.pool1 = L.Pooling(net.relu1_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv2_1 = L.Convolution(net[name], num_output=128, pad=1, kernel_size=3, **kwargs) net.relu2_1 = L.ReLU(net.conv2_1, in_place=True) net.conv2_2 = L.Convolution(net.relu2_1, num_output=128, pad=1, kernel_size=3, **kwargs) net.relu2_2 = L.ReLU(net.conv2_2, in_place=True) if nopool: name = 'conv2_3' net[name] = L.Convolution(net.relu2_2, num_output=128, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool2' net[name] = L.Pooling(net.relu2_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv3_1 = L.Convolution(net[name], num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_1 = L.ReLU(net.conv3_1, in_place=True) net.conv3_2 = L.Convolution(net.relu3_1, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_2 = L.ReLU(net.conv3_2, in_place=True) net.conv3_3 = L.Convolution(net.relu3_2, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_3 = L.ReLU(net.conv3_3, in_place=True) if nopool: name = 'conv3_4' net[name] = L.Convolution(net.relu3_3, num_output=256, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool3' net[name] = L.Pooling(net.relu3_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv4_1 = L.Convolution(net[name], num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_1 = L.ReLU(net.conv4_1, in_place=True) net.conv4_2 = L.Convolution(net.relu4_1, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_2 = L.ReLU(net.conv4_2, in_place=True) net.conv4_3 = L.Convolution(net.relu4_2, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_3 = L.ReLU(net.conv4_3, in_place=True) if nopool: name = 'conv4_4' net[name] = L.Convolution(net.relu4_3, num_output=512, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool4' net[name] = L.Pooling(net.relu4_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv5_1 = L.Convolution(net[name], num_output=512, pad=1, kernel_size=3, **kwargs) net.relu5_1 = L.ReLU(net.conv5_1, in_place=True) net.conv5_2 = L.Convolution(net.relu5_1, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu5_2 = L.ReLU(net.conv5_2, in_place=True) net.conv5_3 = L.Convolution(net.relu5_2, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu5_3 = L.ReLU(net.conv5_3, in_place=True) if need_fc: if dilated: if nopool: name = 'conv5_4' net[name] = L.Convolution(net.relu5_3, num_output=512, pad=1, kernel_size=3, stride=1, **kwargs) else: name = 'pool5' net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=1) else: if nopool: name = 'conv5_4' net[name] = L.Convolution(net.relu5_3, num_output=512, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool5' net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) if fully_conv: if dilated: if reduced: net.fc6 = L.Convolution(net[name], num_output=1024, pad=6, kernel_size=3, dilation=6, **kwargs) else: net.fc6 = L.Convolution(net[name], num_output=4096, pad=6, kernel_size=7, dilation=2, **kwargs) else: if reduced: net.fc6 = L.Convolution(net[name], num_output=1024, pad=3, kernel_size=3, dilation=3, **kwargs) else: net.fc6 = L.Convolution(net[name], num_output=4096, pad=3, kernel_size=7, **kwargs) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) if reduced: net.fc7 = L.Convolution(net.relu6, num_output=1024, kernel_size=1, **kwargs) else: net.fc7 = L.Convolution(net.relu6, num_output=4096, kernel_size=1, **kwargs) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) else: net.fc6 = L.InnerProduct(net.pool5, num_output=4096) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) net.fc7 = L.InnerProduct(net.relu6, num_output=4096) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) # Update freeze layers. kwargs['param'] = [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)] layers = net.keys() for freeze_layer in freeze_layers: if freeze_layer in layers: net.update(freeze_layer, kwargs) return net
def inception_resnet_v2(net): net['data'] = layers.DummyData(num=1, channels=3, height=299, width=299) # 149 x 149 x 32 top_layer = 'Conv2d_1a_3x3' conv_bn_layer(net, in_layer='data', out_layer=top_layer, use_bn=True, use_relu=True, num_output=32, kernel_size=3, pad=0, stride=2) bottom_layer = top_layer # 147 x 147 x 32 top_layer = 'Conv2d_2a_3x3' conv_bn_layer(net, in_layer=bottom_layer, out_layer=top_layer, use_bn=True, use_relu=True, num_output=32, kernel_size=3, pad=0, stride=1) bottom_layer = top_layer # 147 x 147 x 64 top_layer = 'Conv2d_2b_3x3' conv_bn_layer(net, in_layer=bottom_layer, out_layer=top_layer, use_bn=True, use_relu=True, num_output=64, kernel_size=3, pad=0, stride=1) bottom_layer = top_layer # 73 x 73 x 64 top_layer = 'maxpool_3a' net[top_layer] = layers.Pooling(net[bottom_layer], pool=params.Pooling.MAX, kernel_size=3, stride=2, pad=0) bottom_layer = top_layer # 73 x 73 x 80 top_layer = 'Conv2d_3b_1x1' conv_bn_layer(net, in_layer=bottom_layer, out_layer=top_layer, use_bn=True, use_relu=True, num_output=80, kernel_size=1, pad=0, stride=1) bottom_layer = top_layer # 71 x 71 x 192 top_layer = 'Conv2d_4a_3x3' conv_bn_layer(net, in_layer=bottom_layer, out_layer=top_layer, use_bn=True, use_relu=True, num_output=192, kernel_size=3, pad=0, stride=1) bottom_layer = top_layer # 35 x 35 x 192 top_layer = 'maxpool_5a' net[top_layer] = layers.Pooling(net[bottom_layer], pool=params.Pooling.MAX, kernel_size=3, stride=2, pad=0) bottom_layer = top_layer bottom_layer = mixed_5b(net, bottom_layer) # 35 x 35 x 320 (Mixed 5a) bottom_layer = inception_block_35(net, bottom_layer, 10, 0.17, repeat_name='Repeat') # 17 x 17 x 1088 bottom_layer = mixed_6a(net, bottom_layer) bottom_layer = inception_block_17(net, bottom_layer, 20, 0.10, repeat_name='Repeat_1') bottom_layer = mixed_7a(net, bottom_layer) bottom_layer = inception_block_8(net, bottom_layer, 9, 0.20, repeat_name='Repeat_2', apply_last_relu=True) bottom_layer = inception_block_8(net, bottom_layer, 1, 0.20, repeat_name='', apply_last_relu=False) top_layer = 'Conv2d_7b_1x1' conv_bn_layer(net, in_layer=bottom_layer, out_layer=top_layer, use_bn=True, use_relu=True, num_output=1536, kernel_size=1, pad=0, stride=1) with open('gg.prototxt', 'w') as f: print(net.to_proto(), file=f)
def max_pool(data, ks=2, stride=2): return L.Pooling(data, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
def global_avg_pool(bottom, kernelSize=3): #return L.Pooling(bottom, pool=P.Pooling.AVE,stride=1, kernel_size=kernelSize) return L.Pooling(bottom, pool=P.Pooling.AVE, global_pooling=True)
def ave_pool(bottom, ks, stride=1): return L.Pooling(bottom, pool=P.Pooling.AVE, kernel_size=ks, stride=stride)
def inception_bn(bottom, conv_output): conv_1x1 = L.Convolution( bottom, kernel_size=1, num_output=conv_output['conv_1x1'], param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0.2)) bn_1x1 = L.BatchNorm(conv_1x1, use_global_stats=False) relu_1x1 = L.ReLU(bn_1x1, in_place=True) conv_3x3_reduce = L.Convolution( bottom, kernel_size=1, num_output=conv_output['conv_3x3_reduce'], param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0.2)) bn_3x3_reduce = L.BatchNorm(conv_3x3_reduce, use_global_stats=False) relu_3x3_reduce = L.ReLU(bn_3x3_reduce, in_place=True) conv_3x3 = L.Convolution( bn_3x3_reduce, kernel_size=3, num_output=conv_output['conv_3x3'], pad=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0.2)) bn_3x3 = L.BatchNorm(conv_3x3, use_global_stats=False) relu_3x3 = L.ReLU(bn_3x3, in_place=True) conv_5x5_reduce = L.Convolution( bottom, kernel_size=1, num_output=conv_output['conv_5x5_reduce'], param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0.2)) bn_5x5_reduce = L.BatchNorm(conv_5x5_reduce, use_global_stats=False) relu_5x5_reduce = L.ReLU(bn_5x5_reduce, in_place=True) conv_5x5 = L.Convolution( bn_5x5_reduce, kernel_size=5, num_output=conv_output['conv_5x5'], pad=2, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0.2)) bn_5x5 = L.BatchNorm(conv_5x5, use_global_stats=False) relu_5x5 = L.ReLU(bn_5x5, in_place=True) pool = L.Pooling(bottom, kernel_size=3, stride=1, pad=1, pool=P.Pooling.MAX) pool_proj = L.Convolution( pool, kernel_size=1, num_output=conv_output['pool_proj'], param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0.2)) bn_proj = L.BatchNorm(pool_proj, use_global_stats=False) relu_pool_proj = L.ReLU(bn_proj, in_place=True) concat = L.Concat(bn_1x1, bn_3x3, bn_5x5, bn_proj) return conv_1x1, bn_1x1, relu_1x1, conv_3x3_reduce, bn_3x3_reduce, relu_3x3_reduce, conv_3x3, bn_3x3, relu_3x3, \ conv_5x5_reduce, bn_5x5_reduce, relu_5x5_reduce, conv_5x5, bn_5x5, relu_5x5, pool, pool_proj, bn_proj, \ relu_pool_proj, concat
def create_neural_net(input_file, batch_size=50): net = caffe.NetSpec() net.data, net.label = L.Data(batch_size=batch_size, source=input_file, backend = caffe.params.Data.LMDB, ntop=2, include=dict(phase=caffe.TEST), name='juniward04') ## pre-process net.conv1 = L.Convolution(net.data, num_output=16, kernel_size=4, stride=1, pad=1, weight_filler=dict(type='dct4'), param=[{'lr_mult':0, 'decay_mult':0}], bias_term=False) TRUNCABS = caffe_pb2.QuantTruncAbsParameter.TRUNCABS net.quanttruncabs=L.QuantTruncAbs(net.conv1, process=TRUNCABS, threshold=8, in_place=True) ## block 1 [net.conv1_proj, net.bn2, net.scale2, net.conv512_1, net.bn2_1, net.scale2_1, net.relu512_1, net.conv512_to_256, net.bn2_2, net.scale2_2, net.res512_to_256, net.relu512_to_256] = add_downsampling_block(net.quanttruncabs, 12) ## block 2 [net.conv256_1, net.bn2_3, net.scale2_3, net.relu256_1, net.conv256_2, net.bn2_4, net.scale2_4, net.res256_2, net.relu256_2] = add_skip_block(net.res512_to_256, 24) ## block 3 [net.res256_2_proj, net.bn2_5, net.scale2_5, net.conv256_3, net.bn2_6, net.scale2_6, net.relu256_3, net.conv256_to_128, net.bn2_7, net.scale2_7, net.res256_to_128, net.relu256_to_128] = add_downsampling_block(net.res256_2, 24) ## ## block 4 ## [net.conv128_1, net.bn2_8, net.scale2_8, net.relu128_1, net.conv128_2, net.bn2_9, ## net.scale2_9, net.res128_2, net.relu128_2] = add_skip_block(net.res256_to_128, 48) ## block 5 [net.res128_2_proj, net.bn2_10, net.scale2_10, net.conv128_3, net.bn2_11, net.scale2_11, net.relu128_3, net.conv128_to_64, net.bn2_12, net.scale2_12, net.res128_to_64, net.relu128_to_64] = add_downsampling_block(net.res256_to_128, 48) ## block 6 [net.conv64_1, net.bn2_13, net.scale2_13, net.relu64_1, net.conv64_2, net.bn2_14, net.scale2_14, net.res64_2, net.relu64_2] = add_skip_block(net.res128_to_64, 96) ## block 7 [net.res64_2_proj, net.bn2_15, net.scale2_15, net.conv64_3, net.bn2_16, net.scale2_16, net.relu64_3, net.conv64_to_32, net.bn2_17, net.scale2_17, net.res64_to_32, net.relu64_to_32] = add_downsampling_block(net.res64_2, 96) ## block 8 [net.conv32_1, net.bn2_18, net.scale2_18, net.relu32_1, net.conv32_2, net.bn2_19, net.scale2_19, net.res32_2, net.relu32_2] = add_skip_block(net.res64_to_32, 192) ## block 9 [net.res32_2_proj, net.bn2_20, net.scale2_20, net.conv32_3, net.bn2_21, net.scale2_21, net.relu32_3, net.conv32_to_16, net.bn2_22, net.scale2_22, net.res32_to_16, net.relu32_to_16] = add_downsampling_block(net.res32_2, 192) ## block 10 [net.conv16_1, net.bn2_23, net.scale2_23, net.relu16_1, net.conv16_2, net.bn2_24, net.scale2_24, net.res16_2, net.relu16_2] = add_skip_block(net.res32_to_16, 384) ## global pool AVE = caffe_pb2.PoolingParameter.AVE net.global_pool = L.Pooling(net.res16_2, pool=AVE, kernel_size=8, stride=1) ## full connecting net.fc = L.InnerProduct(net.global_pool, param=[{'lr_mult':1}, {'lr_mult':2}], num_output=2, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) ## accuracy net.accuracy = L.Accuracy(net.fc, net.label, include=dict(phase=caffe.TEST)) ## loss net.loss = L.SoftmaxWithLoss(net.fc, net.label) return net.to_proto()
def VGG19Net_Pre10(net, from_layer="data"): kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0) } assert from_layer in net.keys() # conv1 net.conv1_1 = L.Convolution(net[from_layer], num_output=64, pad=1, kernel_size=3, **kwargs) net.relu1_1 = L.ReLU(net.conv1_1, in_place=True) net.conv1_2 = L.Convolution(net.relu1_1, num_output=64, pad=1, kernel_size=3, **kwargs) net.relu1_2 = L.ReLU(net.conv1_2, in_place=True) # pool1 net.pool1_stage1 = L.Pooling(net.relu1_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) # conv2 net.conv2_1 = L.Convolution(net.pool1_stage1, num_output=128, pad=1, kernel_size=3, **kwargs) net.relu2_1 = L.ReLU(net.conv2_1, in_place=True) net.conv2_2 = L.Convolution(net.relu2_1, num_output=128, pad=1, kernel_size=3, **kwargs) net.relu2_2 = L.ReLU(net.conv2_2, in_place=True) # pool2 net.pool2_stage1 = L.Pooling(net.relu2_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) # conv3 net.conv3_1 = L.Convolution(net.pool2_stage1, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_1 = L.ReLU(net.conv3_1, in_place=True) net.conv3_2 = L.Convolution(net.relu3_1, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_2 = L.ReLU(net.conv3_2, in_place=True) net.conv3_3 = L.Convolution(net.relu3_2, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_3 = L.ReLU(net.conv3_3, in_place=True) net.conv3_4 = L.Convolution(net.relu3_3, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_4 = L.ReLU(net.conv3_4, in_place=True) # pool3 net.pool3_stage1 = L.Pooling(net.relu3_4, pool=P.Pooling.MAX, kernel_size=2, stride=2) # conv4 net.conv4_1 = L.Convolution(net.pool3_stage1, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_1 = L.ReLU(net.conv4_1, in_place=True) net.conv4_2 = L.Convolution(net.relu4_1, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_2 = L.ReLU(net.conv4_2, in_place=True) return net