def alexnet_bn_proto(self, batch_size, phase='TRAIN'): n = caffe.NetSpec() if phase == 'TRAIN': source_data = self.train_data mirror = True else: source_data = self.test_data mirror = False n.data, n.label = L.Data(source=source_data, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict( crop_size=227, mean_value=[104, 117, 123], mirror=mirror)) n.conv1, n.conv1_bn, n.conv1_scale, n.conv1_relu = \ factorization_conv_bn_scale_relu(n.data, num_output=96, kernel_size=11, stride=4,) # 96x55x55 n.pool1 = L.Pooling(n.conv1, kernel_size=3, stride=2, pool=P.Pooling.MAX) # 96x27x27 n.conv2, n.conv2_bn, n.conv2_scale, n.conv2_relu = \ factorization_conv_bn_scale_relu(n.pool1, num_output=256, kernel_size=5, pad=2) # 256x27x27 n.pool2 = L.Pooling(n.conv2, kernel_size=3, stride=2, pool=P.Pooling.MAX) # 256x13x13 n.conv3, n.conv3_bn, n.conv3_scale, n.conv3_relu = \ factorization_conv_bn_scale_relu(n.pool2, num_output=384, kernel_size=3, pad=1) # 384x13x13 n.conv4, n.conv4_bn, n.conv4_scale, n.conv4_relu = \ factorization_conv_bn_scale_relu(n.conv3, num_output=384, kernel_size=3, pad=1) # 384x13x13 n.conv5, n.conv5_bn, n.conv5_scale, n.conv5_relu = \ factorization_conv_bn_scale_relu(n.conv4, num_output=256, kernel_size=3, pad=1) # 256x13x13 n.pool5 = L.Pooling(n.conv5, kernel_size=3, stride=2, pool=P.Pooling.MAX) # 256x6x16 n.fc6, n.relu6, n.drop6 = fc_relu_drop(n.pool5, num_output=2048) # 1024x1x1 n.fc7, n.relu7, n.drop7 = fc_relu_drop(n.fc6, num_output=2048) # 1024x1x1 n.fc8 = L.InnerProduct(n.fc7, num_output=self.classifier_num, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) if phase == 'TRAIN': pass else: n.accuracy_top1 = L.Accuracy(n.fc8, n.label, include=dict(phase=1)) n.accuracy_top5 = L.Accuracy(n.fc8, n.label, include=dict(phase=1), accuracy_param=dict(top_k=5)) n.loss = L.SoftmaxWithLoss(n.fc8, n.label) return n.to_proto()
def create_neural_net(input_file, batch_size=50): net = caffe.NetSpec() net.data, net.label = L.Data(batch_size=batch_size, source=input_file, backend = caffe.params.Data.LMDB, ntop=2, include=dict(phase=caffe.TEST), name='juniward04') ## pre-process net.conv1 = L.Convolution(net.data, num_output=16, kernel_size=4, stride=1, pad=1, weight_filler=dict(type='dct4'), param=[{'lr_mult':0, 'decay_mult':0}], bias_term=False) TRUNCABS = caffe_pb2.QuantTruncAbsParameter.TRUNCABS net.quanttruncabs=L.QuantTruncAbs(net.conv1, process=TRUNCABS, threshold=8, in_place=True) ## block 1 [net.conv1_proj, net.bn2, net.scale2, net.conv512_1, net.bn2_1, net.scale2_1, net.relu512_1, net.conv512_to_256, net.bn2_2, net.scale2_2, net.res512_to_256, net.relu512_to_256] = add_downsampling_block(net.quanttruncabs, 12) ## block 2 [net.conv256_1, net.bn2_3, net.scale2_3, net.relu256_1, net.conv256_2, net.bn2_4, net.scale2_4, net.res256_2, net.relu256_2] = add_skip_block(net.res512_to_256, 24) ## block 2_1 [net.conv256_4, net.bn3_1, net.scale3_1, net.relu256_4, net.conv256_5, net.bn3_2, net.scale3_2, net.res256_5, net.relu256_5] = add_skip_block(net.res256_2, 24) ## block 2_2 [net.conv256_6, net.bn4_1, net.scale4_1, net.relu256_6, net.conv256_7, net.bn4_2, net.scale4_2, net.res256_7, net.relu256_7] = add_skip_block(net.res256_5, 24) ## block 2_3 [net.conv256_8, net.bn5_1, net.scale5_1, net.relu256_8, net.conv256_9, net.bn5_2, net.scale5_2, net.res256_9, net.relu256_9] = add_skip_block(net.res256_7, 24) ## block 3 [net.res256_2_proj, net.bn2_5, net.scale2_5, net.conv256_3, net.bn2_6, net.scale2_6, net.relu256_3, net.conv256_to_128, net.bn2_7, net.scale2_7, net.res256_to_128, net.relu256_to_128] = add_downsampling_block(net.res256_9, 24) ## block 4 [net.conv128_1, net.bn2_8, net.scale2_8, net.relu128_1, net.conv128_2, net.bn2_9, net.scale2_9, net.res128_2, net.relu128_2] = add_skip_block(net.res256_to_128, 48) ## block 4_1 [net.conv128_4, net.bn3_3, net.scale3_3, net.relu128_4, net.conv128_5, net.bn3_4, net.scale3_4, net.res128_5, net.relu128_5] = add_skip_block(net.res128_2, 48) ## block 4_2 [net.conv128_6, net.bn4_3, net.scale4_3, net.relu128_6, net.conv128_7, net.bn4_4, net.scale4_4, net.res128_7, net.relu128_7] = add_skip_block(net.res128_5, 48) ## block 4_3 [net.conv128_8, net.bn5_3, net.scale5_3, net.relu128_8, net.conv128_9, net.bn5_4, net.scale5_4, net.res128_9, net.relu128_9] = add_skip_block(net.res128_7, 48) ## block 5 [net.res128_2_proj, net.bn2_10, net.scale2_10, net.conv128_3, net.bn2_11, net.scale2_11, net.relu128_3, net.conv128_to_64, net.bn2_12, net.scale2_12, net.res128_to_64, net.relu128_to_64] = add_downsampling_block(net.res128_9, 48) ## block 6 [net.conv64_1, net.bn2_13, net.scale2_13, net.relu64_1, net.conv64_2, net.bn2_14, net.scale2_14, net.res64_2, net.relu64_2] = add_skip_block(net.res128_to_64, 96) ## block 6_1 [net.conv64_4, net.bn3_5, net.scale3_5, net.relu64_4, net.conv64_5, net.bn3_6, net.scale3_6, net.res64_5, net.relu64_5] = add_skip_block(net.res64_2, 96) ## block 6_2 [net.conv64_6, net.bn4_5, net.scale4_5, net.relu64_6, net.conv64_7, net.bn4_6, net.scale4_6, net.res64_7, net.relu64_7] = add_skip_block(net.res64_5, 96) ## block 6_3 [net.conv64_8, net.bn5_5, net.scale5_5, net.relu64_8, net.conv64_9, net.bn5_6, net.scale5_6, net.res64_9, net.relu64_9] = add_skip_block(net.res64_7, 96) ## block 7 [net.res64_2_proj, net.bn2_15, net.scale2_15, net.conv64_3, net.bn2_16, net.scale2_16, net.relu64_3, net.conv64_to_32, net.bn2_17, net.scale2_17, net.res64_to_32, net.relu64_to_32] = add_downsampling_block(net.res64_9, 96) ## block 8 [net.conv32_1, net.bn2_18, net.scale2_18, net.relu32_1, net.conv32_2, net.bn2_19, net.scale2_19, net.res32_2, net.relu32_2] = add_skip_block(net.res64_to_32, 192) ## block 8_1 [net.conv32_4, net.bn3_7, net.scale3_7, net.relu32_4, net.conv32_5, net.bn3_8, net.scale3_8, net.res32_5, net.relu32_5] = add_skip_block(net.res32_2, 192) ## block 8_2 [net.conv32_6, net.bn4_7, net.scale4_7, net.relu32_6, net.conv32_7, net.bn4_8, net.scale4_8, net.res32_7, net.relu32_7] = add_skip_block(net.res32_5, 192) ## block 8_3 [net.conv32_8, net.bn5_7, net.scale5_7, net.relu32_8, net.conv32_9, net.bn5_8, net.scale5_8, net.res32_9, net.relu32_9] = add_skip_block(net.res32_7, 192) ## block 9 [net.res32_2_proj, net.bn2_20, net.scale2_20, net.conv32_3, net.bn2_21, net.scale2_21, net.relu32_3, net.conv32_to_16, net.bn2_22, net.scale2_22, net.res32_to_16, net.relu32_to_16] = add_downsampling_block(net.res32_9, 192) ## block 10_2 [net.conv16_5, net.bn4_9, net.scale4_9, net.relu16_5, net.conv16_6, net.bn4_10, net.scale4_10, net.res16_6, net.relu16_6] = add_skip_block(net.res32_to_16, 384) ## block 10_3 [net.conv16_7, net.bn5_9, net.scale5_9, net.relu16_7, net.conv16_8, net.bn5_10, net.scale5_10, net.res16_8, net.relu16_8] = add_skip_block(net.res16_6, 384) ## block 10_1 [net.conv16_3, net.bn3_9, net.scale3_9, net.relu16_3, net.conv16_4, net.bn3_10, net.scale3_10, net.res16_4, net.relu16_4] = add_skip_block(net.res16_8, 384) ## block 10 [net.conv16_1, net.bn2_23, net.scale2_23, net.relu16_1, net.conv16_2, net.bn2_24, net.scale2_24, net.res16_2, net.relu16_2] = add_skip_block(net.res16_4, 384) ## global pool AVE = caffe_pb2.PoolingParameter.AVE net.global_pool = L.Pooling(net.res16_2, pool=AVE, kernel_size=8, stride=1) ## full connecting net.fc = L.InnerProduct(net.global_pool, param=[{'lr_mult':1}, {'lr_mult':2}], num_output=2, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) ## accuracy net.accuracy = L.Accuracy(net.fc, net.label, include=dict(phase=caffe.TEST)) ## loss net.loss = L.SoftmaxWithLoss(net.fc, net.label) return net.to_proto()
def InceptionV3Body(net, from_layer, output_pred=False): # scale is fixed to 1, thus we ignore it. use_scale = False out_layer = 'conv' ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True, num_output=32, kernel_size=3, pad=0, stride=2, use_scale=use_scale) from_layer = out_layer out_layer = 'conv_1' ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True, num_output=32, kernel_size=3, pad=0, stride=1, use_scale=use_scale) from_layer = out_layer out_layer = 'conv_2' ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True, num_output=64, kernel_size=3, pad=1, stride=1, use_scale=use_scale) from_layer = out_layer out_layer = 'pool' net[out_layer] = L.Pooling(net[from_layer], pool=P.Pooling.MAX, kernel_size=3, stride=2, pad=0) from_layer = out_layer out_layer = 'conv_3' ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True, num_output=80, kernel_size=1, pad=0, stride=1, use_scale=use_scale) from_layer = out_layer out_layer = 'conv_4' ConvBNLayer(net, from_layer, out_layer, use_bn=True, use_relu=True, num_output=192, kernel_size=3, pad=0, stride=1, use_scale=use_scale) from_layer = out_layer out_layer = 'pool_1' net[out_layer] = L.Pooling(net[from_layer], pool=P.Pooling.MAX, kernel_size=3, stride=2, pad=0) from_layer = out_layer # inceptions with 1x1, 3x3, 5x5 convolutions for inception_id in xrange(0, 3): if inception_id == 0: out_layer = 'mixed' tower_2_conv_num_output = 32 else: out_layer = 'mixed_{}'.format(inception_id) tower_2_conv_num_output = 64 towers = [] tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=64, kernel_size=1, pad=0, stride=1), ]) towers.append(tower) tower_name = '{}/tower'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=48, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=64, kernel_size=5, pad=2, stride=1), ]) towers.append(tower) tower_name = '{}/tower_1'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=64, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=96, kernel_size=3, pad=1, stride=1), dict(name='conv_2', num_output=96, kernel_size=3, pad=1, stride=1), ]) towers.append(tower) tower_name = '{}/tower_2'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='pool', pool=P.Pooling.AVE, kernel_size=3, pad=1, stride=1), dict(name='conv', num_output=tower_2_conv_num_output, kernel_size=1, pad=0, stride=1), ]) towers.append(tower) out_layer = '{}/join'.format(out_layer) net[out_layer] = L.Concat(*towers, axis=1) from_layer = out_layer # inceptions with 1x1, 3x3(in sequence) convolutions out_layer = 'mixed_3' towers = [] tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=384, kernel_size=3, pad=0, stride=2), ]) towers.append(tower) tower_name = '{}/tower'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=64, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=96, kernel_size=3, pad=1, stride=1), dict(name='conv_2', num_output=96, kernel_size=3, pad=0, stride=2), ]) towers.append(tower) tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='pool', pool=P.Pooling.MAX, kernel_size=3, pad=0, stride=2), ]) towers.append(tower) out_layer = '{}/join'.format(out_layer) net[out_layer] = L.Concat(*towers, axis=1) from_layer = out_layer # inceptions with 1x1, 7x1, 1x7 convolutions for inception_id in xrange(4, 8): if inception_id == 4: num_output = 128 elif inception_id == 5 or inception_id == 6: num_output = 160 elif inception_id == 7: num_output = 192 out_layer = 'mixed_{}'.format(inception_id) towers = [] tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1), ]) towers.append(tower) tower_name = '{}/tower'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=num_output, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=num_output, kernel_size=[1, 7], pad=[0, 3], stride=[1, 1]), dict(name='conv_2', num_output=192, kernel_size=[7, 1], pad=[3, 0], stride=[1, 1]), ]) towers.append(tower) tower_name = '{}/tower_1'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=num_output, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=num_output, kernel_size=[7, 1], pad=[3, 0], stride=[1, 1]), dict(name='conv_2', num_output=num_output, kernel_size=[1, 7], pad=[0, 3], stride=[1, 1]), dict(name='conv_3', num_output=num_output, kernel_size=[7, 1], pad=[3, 0], stride=[1, 1]), dict(name='conv_4', num_output=192, kernel_size=[1, 7], pad=[0, 3], stride=[1, 1]), ]) towers.append(tower) tower_name = '{}/tower_2'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='pool', pool=P.Pooling.AVE, kernel_size=3, pad=1, stride=1), dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1), ]) towers.append(tower) out_layer = '{}/join'.format(out_layer) net[out_layer] = L.Concat(*towers, axis=1) from_layer = out_layer # inceptions with 1x1, 3x3, 1x7, 7x1 filters out_layer = 'mixed_8' towers = [] tower_name = '{}/tower'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=320, kernel_size=3, pad=0, stride=2), ]) towers.append(tower) tower_name = '{}/tower_1'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=192, kernel_size=[1, 7], pad=[0, 3], stride=[1, 1]), dict(name='conv_2', num_output=192, kernel_size=[7, 1], pad=[3, 0], stride=[1, 1]), dict(name='conv_3', num_output=192, kernel_size=3, pad=0, stride=2), ]) towers.append(tower) tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='pool', pool=P.Pooling.MAX, kernel_size=3, pad=0, stride=2), ]) towers.append(tower) out_layer = '{}/join'.format(out_layer) net[out_layer] = L.Concat(*towers, axis=1) from_layer = out_layer for inception_id in xrange(9, 11): num_output = 384 num_output2 = 448 if inception_id == 9: pool = P.Pooling.AVE else: pool = P.Pooling.MAX out_layer = 'mixed_{}'.format(inception_id) towers = [] tower_name = '{}'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=320, kernel_size=1, pad=0, stride=1), ]) towers.append(tower) tower_name = '{}/tower'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=num_output, kernel_size=1, pad=0, stride=1), ]) subtowers = [] subtower_name = '{}/mixed'.format(tower_name) subtower = InceptionTower(net, '{}/conv'.format(tower_name), subtower_name, [ dict(name='conv', num_output=num_output, kernel_size=[1, 3], pad=[0, 1], stride=[1, 1]), ]) subtowers.append(subtower) subtower = InceptionTower(net, '{}/conv'.format(tower_name), subtower_name, [ dict(name='conv_1', num_output=num_output, kernel_size=[3, 1], pad=[1, 0], stride=[1, 1]), ]) subtowers.append(subtower) net[subtower_name] = L.Concat(*subtowers, axis=1) towers.append(net[subtower_name]) tower_name = '{}/tower_1'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='conv', num_output=num_output2, kernel_size=1, pad=0, stride=1), dict(name='conv_1', num_output=num_output, kernel_size=3, pad=1, stride=1), ]) subtowers = [] subtower_name = '{}/mixed'.format(tower_name) subtower = InceptionTower(net, '{}/conv_1'.format(tower_name), subtower_name, [ dict(name='conv', num_output=num_output, kernel_size=[1, 3], pad=[0, 1], stride=[1, 1]), ]) subtowers.append(subtower) subtower = InceptionTower(net, '{}/conv_1'.format(tower_name), subtower_name, [ dict(name='conv_1', num_output=num_output, kernel_size=[3, 1], pad=[1, 0], stride=[1, 1]), ]) subtowers.append(subtower) net[subtower_name] = L.Concat(*subtowers, axis=1) towers.append(net[subtower_name]) tower_name = '{}/tower_2'.format(out_layer) tower = InceptionTower(net, from_layer, tower_name, [ dict(name='pool', pool=pool, kernel_size=3, pad=1, stride=1), dict(name='conv', num_output=192, kernel_size=1, pad=0, stride=1), ]) towers.append(tower) out_layer = '{}/join'.format(out_layer) net[out_layer] = L.Concat(*towers, axis=1) from_layer = out_layer if output_pred: net.pool_3 = L.Pooling(net[from_layer], pool=P.Pooling.AVE, kernel_size=8, pad=0, stride=1) net.softmax = L.InnerProduct(net.pool_3, num_output=1008) net.softmax_prob = L.Softmax(net.softmax) return net
def shuffle_net(group, scale_f, input_size, se=False, num_classes=1000, asoft=True): # figure out network structure group_defs = { 1: [36, 72, 144], 2: [50, 100, 200], 3: [60, 120, 240], 4: [68, 136, 272], 8: [96, 192, 384], } nouts_list = [int(v * scale_f) for v in group_defs[group]] nunits_list = [3, 7, 3] f_size = 24 # setup the first couple of layers n = caffe.NetSpec() net = n.__dict__['tops'] n.data, n.label = L.ImageData(batch_size=128, source="../data/train.list", root_folder="/", ntop=2, include=dict(phase=0), transform_param=dict(crop_size=input_size, mirror=True, scale=1 / 128.)) # The data mean n.conv1 = L.Convolution(n.data, kernel_size=3, stride=2, num_output=f_size, pad=1, bias_term=False, param=[dict(lr_mult=1, decay_mult=1)], weight_filler=dict(type="msra")) n.conv1_bn = L.BatchNorm( n.conv1, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)], in_place=False) n.conv1_scale = L.Scale( n.conv1_bn, scale_param=dict(bias_term=True), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=1)], in_place=True) n.conv1_relu = L.ReLU(n.conv1_scale, in_place=True) n.conv1_pool = L.Pooling(n.conv1_relu, stride=2, kernel_size=3) # make the convolutional body last_size = f_size / 4 for i, (nout, nunit) in enumerate(zip(nouts_list, nunits_list)): s = 'Step' + str(i + 1) + '_reduction_' if i == 0: standard_unit(n, nout - last_size, s, group, se=se, newdepth=True, is_first=True) else: standard_unit(n, nout - last_size, s, group, se=se, newdepth=True) last_size = nout for unit in range(nunit): s = 'Step' + str(i + 1) + '_' + str(unit + 1) + '_' standard_unit(n, nout, s, group, se=se) # add the end layers net = n.__dict__['tops'] bottom = net[list(net.keys())[-1]] #find the last layer in netspec n.global_pool = L.Pooling(bottom, pooling_param=dict(pool=1, global_pooling=True)) n.score = L.InnerProduct(n.global_pool, num_output=num_classes, bias_term=False, param=[dict(lr_mult=1, decay_mult=1)], weight_filler=dict(type="msra")) n.loss = L.SoftmaxWithLoss(n.score, n.label) n.accuracy = L.Accuracy(n.score, n.label) return n
def generate(self): """Returns a NetSpec specifying CaffeNet, following the original proto text specification (./models/bvlc_reference_caffenet/train_val.prototxt).""" conf = self n = caffe.NetSpec() param = LT.learned_param if conf.train else LT.frozen_param if self.train: n.data = L.Python(top=[ "rois", 'labels', 'bbox_targets', 'bbox_inside_weights', 'bbox_outside_weights' ], python_param=dict(module='roi_data_layer.layer', layer='RoIDataLayer', param_str="num_classes: " + str(conf.num_classes))) else: n.data, n.im_info = LT.input() conv15_param = LT.learned_param if ( conf.conv_1_to_5_learn) else LT.frozen_param LT.conv1_to_5(n, conv15_param) if not (self.train): n.rpn_conv1, n.rpn_relu1, n.rpn_cls_score, n.rpn_bbox_pred = LT.rpn_class_and_bbox_predictors( n, self, param) n.rpn_cls_score_reshape = LT.reshape(n.rpn_cls_score, [0, 2, -1, 0]) n.rpn_cls_prob, n.rpn_cls_prob_reshape, n.rois = LT.roi_proposal( n, self) n.roi_pool = L.ROIPooling(bottom=["conv5", "rois"], pooled_w=6, pooled_h=6, spatial_scale=0.0625) n.fc6, n.relu6 = LT.fc_relu(n.roi_pool, 4096, param=param) n.drop6 = fc7input = L.Dropout(n.relu6, in_place=True, dropout_ratio=0.5, scale_train=False) n.fc7, n.relu7 = LT.fc_relu(fc7input, 4096, param=param) n.drop7 = layer7 = L.Dropout(n.relu7, in_place=True, dropout_ratio=0.5, scale_train=False) weight_filler = (LT.WEIGHT_FILLER if conf.train else dict()) bias_filler = (LT.BIAS_FILLER if conf.train else dict()) n.cls_score = L.InnerProduct(layer7, num_output=conf.num_classes, weight_filler=weight_filler, bias_filler=bias_filler, param=LT.learned_param) n.bbox_pred = L.InnerProduct(layer7, num_output=conf.num_classes * 4, weight_filler=weight_filler, bias_filler=bias_filler, param=LT.learned_param) if conf.train: n.loss_cls = LT.soft_max_with_loss(["cls_score", "labels"]) n.loss_bbox = L.SmoothL1Loss(bottom=[ "bbox_pred", "bbox_targets", "bbox_inside_weights", "bbox_outside_weights" ], loss_weight=1) else: n.cls_prob = L.Softmax(n.cls_score, loss_param=dict(ignore_label=-1, normalize=True)) if self.train: n.rpn_conv1, n.rpn_relu1, n.rpn_cls_score, n.rpn_bbox_pred = LT.rpn_class_and_bbox_predictors( n, self, LT.frozen_param) n.silence_rpn_cls_score = LT.silence(n.rpn_cls_score) n.silence_rpn_bbox_pred = LT.silence(n.rpn_bbox_pred) # write the net to a temporary file and return its filename return self.save(n)
def mfb_baseline(mode, batchsize, T, question_vocab_size, folder): n = caffe.NetSpec() mode_str = json.dumps({ 'mode': mode, 'batchsize': batchsize, 'folder': folder }) if mode == 'val': n.data, n.cont, n.img_feature, n.label = L.Python( \ module='vqa_data_layer', layer='VQADataProviderLayer', \ param_str=mode_str, ntop=4) else: n.data, n.cont, n.img_feature, n.label = L.Python( \ module='vqa_data_layer_kld', layer='VQADataProviderLayer', \ param_str=mode_str, ntop=4) n.embed = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \ weight_filler=dict(type='xavier')) n.embed_tanh = L.TanH(n.embed) # LSTM n.lstm1 = L.LSTM( \ n.embed_tanh, n.cont, \ recurrent_param=dict( \ num_output=config.LSTM_UNIT_NUM, \ weight_filler=dict(type='xavier'))) tops1 = L.Slice(n.lstm1, ntop=config.MAX_WORDS_IN_QUESTION, slice_param={'axis': 0}) for i in xrange(config.MAX_WORDS_IN_QUESTION - 1): n.__setattr__('slice_first' + str(i), tops1[int(i)]) n.__setattr__('silence_data_first' + str(i), L.Silence(tops1[int(i)], ntop=0)) n.lstm1_out = tops1[config.MAX_WORDS_IN_QUESTION - 1] n.lstm1_reshaped = L.Reshape(n.lstm1_out, \ reshape_param=dict( \ shape=dict(dim=[-1, 1024]))) n.q_feat = L.Dropout( n.lstm1_reshaped, dropout_param={'dropout_ratio': config.LSTM_DROPOUT_RATIO}) ''' Coarse Image-Question MFB fusion ''' n.mfb_q_proj = L.InnerProduct(n.q_feat, num_output=config.JOINT_EMB_SIZE, weight_filler=dict(type='xavier')) n.mfb_i_proj = L.InnerProduct(n.img_feature, num_output=config.JOINT_EMB_SIZE, weight_filler=dict(type='xavier')) n.mfb_iq_eltwise = L.Eltwise(n.mfb_q_proj, n.mfb_i_proj, eltwise_param=dict(operation=0)) n.mfb_iq_drop = L.Dropout( n.mfb_iq_eltwise, dropout_param={'dropout_ratio': config.MFB_DROPOUT_RATIO}) n.mfb_iq_resh = L.Reshape( n.mfb_iq_drop, reshape_param=dict(shape=dict( dim=[-1, 1, config.MFB_OUT_DIM, config.MFB_FACTOR_NUM]))) n.mfb_iq_sumpool = L.Pooling(n.mfb_iq_resh, pool=P.Pooling.SUM, \ pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1)) n.mfb_out = L.Reshape(n.mfb_iq_sumpool, \ reshape_param=dict(shape=dict(dim=[-1, config.MFB_OUT_DIM]))) n.mfb_sign_sqrt = L.SignedSqrt(n.mfb_out) n.mfb_l2 = L.L2Normalize(n.mfb_sign_sqrt) n.prediction = L.InnerProduct(n.mfb_l2, num_output=config.NUM_OUTPUT_UNITS, weight_filler=dict(type='xavier')) if mode == 'val': n.loss = L.SoftmaxWithLoss(n.prediction, n.label) else: n.loss = L.SoftmaxKLDLoss(n.prediction, n.label) return n.to_proto()
def fc_relu(bottom, nout): fc = L.InnerProduct(bottom, num_output=nout) return fc, L.ReLU(fc, in_place=True)
def qlstm(mode, batchsize, T, question_vocab_size): n = caffe.NetSpec() mode_str = json.dumps({'mode':mode, 'batchsize':batchsize}) # n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\ # module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=5 ) n.data, n.cont, n.img_feature, n.label = L.Python(\ module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=4 ) # word embedding n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \ weight_filler=dict(type='uniform',min=-0.08,max=0.08)) # n.embed = L.TanH(n.embed_ba) n.embed_scale = L.Scale(n.embed_ba, n.cont, scale_param=dict(dict(axis=0))) n.embed_scale_resh = L.Reshape(n.embed_scale,\ reshape_param=dict(\ shape=dict(dim=[batchsize,1,T,-1]))) # convolution n.word_feature_3_1 = L.Convolution(n.embed_scale_resh, kernel_h=1, kernel_w=300, stride=1, num_output=256, pad_h=0, pad_w=0, weight_filler=dict(type='xavier')) n.word_relu_3_1_r = L.ReLU(n.word_feature_3_1) n.word_feature_3_2 = L.Convolution(n.word_relu_3_1_r, kernel_h=3, kernel_w=1, stride=1, num_output=256, pad_h=1, pad_w=0, weight_filler=dict(type='xavier')) n.word_relu_3_2_r = L.ReLU(n.word_feature_3_2) n.word_feature_3 = L.Convolution(n.word_relu_3_2_r, kernel_h=1, kernel_w=1, stride=1, num_output=1024, pad_h=0, pad_w=0, weight_filler=dict(type='xavier')) n.word_feature_5_1 = L.Convolution(n.embed_scale_resh, kernel_h=1, kernel_w=300, stride=1, num_output=256, pad_h=0, pad_w=0, weight_filler=dict(type='xavier')) n.word_relu_5_1_r = L.ReLU(n.word_feature_5_1) n.word_feature_5_2 = L.Convolution(n.word_relu_5_1_r, kernel_h=5, kernel_w=1, stride=1, num_output=256, pad_h=2, pad_w=0, weight_filler=dict(type='xavier')) n.word_relu_5_2_r = L.ReLU(n.word_feature_5_2) n.word_feature_5 = L.Convolution(n.word_relu_5_2_r, kernel_h=1, kernel_w=1, stride=1, num_output=1024, pad_h=0, pad_w=0, weight_filler=dict(type='xavier')) n.word_relu_3 = L.ReLU(n.word_feature_3) n.word_relu_5 = L.ReLU(n.word_feature_5) n.word_vec_3 = L.Pooling(n.word_relu_3, kernel_h=T, kernel_w=1, stride=T, pool=P.Pooling.MAX) n.word_vec_5 = L.Pooling(n.word_relu_5, kernel_h=T, kernel_w=1, stride=T, pool=P.Pooling.MAX) word_vec = [n.word_vec_3, n.word_vec_5] n.concat_vec = L.Concat(*word_vec, concat_param={'axis': 1}) # N x 2*d_w x 1 x 1 n.concat_vec_dropped = L.Dropout(n.concat_vec,dropout_param={'dropout_ratio':0.5}) n.q_emb_tanh_droped_resh_tiled_1 = L.Tile(n.concat_vec_dropped, axis=2, tiles=14) n.q_emb_tanh_droped_resh_tiled = L.Tile(n.q_emb_tanh_droped_resh_tiled_1, axis=3, tiles=14) n.i_emb_tanh_droped_resh = L.Reshape(n.img_feature,reshape_param=dict(shape=dict(dim=[-1,2048,14,14]))) n.blcf = L.CompactBilinear(n.q_emb_tanh_droped_resh_tiled, n.i_emb_tanh_droped_resh, compact_bilinear_param=dict(num_output=16000,sum_pool=False)) n.blcf_sign_sqrt = L.SignedSqrt(n.blcf) n.blcf_sign_sqrt_l2 = L.L2Normalize(n.blcf_sign_sqrt) n.blcf_droped = L.Dropout(n.blcf_sign_sqrt_l2,dropout_param={'dropout_ratio':0.1}) # multi-channel attention n.att_conv1 = L.Convolution(n.blcf_droped, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.att_conv1_relu = L.ReLU(n.att_conv1) n.att_conv2 = L.Convolution(n.att_conv1_relu, kernel_size=1, stride=1, num_output=2, pad=0, weight_filler=dict(type='xavier')) n.att_reshaped = L.Reshape(n.att_conv2,reshape_param=dict(shape=dict(dim=[-1,2,14*14]))) n.att_softmax = L.Softmax(n.att_reshaped, axis=2) n.att = L.Reshape(n.att_softmax,reshape_param=dict(shape=dict(dim=[-1,2,14,14]))) att_maps = L.Slice(n.att, ntop=2, slice_param={'axis':1}) n.att_map0 = att_maps[0] n.att_map1 = att_maps[1] dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) n.att_feature0 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map0, dummy) n.att_feature1 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map1, dummy) n.att_feature0_resh = L.Reshape(n.att_feature0, reshape_param=dict(shape=dict(dim=[-1,2048]))) n.att_feature1_resh = L.Reshape(n.att_feature1, reshape_param=dict(shape=dict(dim=[-1,2048]))) n.att_feature = L.Concat(n.att_feature0_resh, n.att_feature1_resh) # merge attention and lstm with compact bilinear pooling n.att_feature_resh = L.Reshape(n.att_feature, reshape_param=dict(shape=dict(dim=[-1,4096,1,1]))) #n.lstm_12_resh = L.Reshape(n.lstm_12, reshape_param=dict(shape=dict(dim=[-1,2048,1,1]))) n.bc_att_lstm = L.CompactBilinear(n.att_feature_resh, n.concat_vec_dropped, compact_bilinear_param=dict(num_output=16000,sum_pool=False)) n.bc_sign_sqrt = L.SignedSqrt(n.bc_att_lstm) n.bc_sign_sqrt_l2 = L.L2Normalize(n.bc_sign_sqrt) n.bc_dropped = L.Dropout(n.bc_sign_sqrt_l2, dropout_param={'dropout_ratio':0.1}) n.bc_dropped_resh = L.Reshape(n.bc_dropped, reshape_param=dict(shape=dict(dim=[-1, 16000]))) n.prediction = L.InnerProduct(n.bc_dropped_resh, num_output=3000, weight_filler=dict(type='xavier')) n.loss = L.SoftmaxWithLoss(n.prediction, n.label) return n.to_proto()
def ZFNetBody(net, from_layer, for_training=True): net.conv1 = L.Convolution( net[from_layer], kernel_size=k_conv1, stride=s_conv1, num_output=d_conv1, pad=p_conv1, bias_term=True, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', std=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) net.relu1 = L.ReLU(net.conv1, in_place=True) net.pool1 = L.Pooling(net.relu1, pool=P.Pooling.MAX, kernel_size=k_pool1, stride=s_pool1) net.norm1 = L.LRN(net.pool1, lrn_param=dict(local_size=local_size_norm1, alpha=alpha_norm1, beta=beta_norm1)) net.conv2 = L.Convolution( net.norm1, kernel_size=k_conv2, stride=s_conv2, num_output=d_conv2, #pad=p_conv2, bias_term=True, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', std=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) net.relu2 = L.ReLU(net.conv2, in_place=True) net.pool2 = L.Pooling(net.relu2, pool=P.Pooling.MAX, kernel_size=k_pool2, stride=s_pool2) net.norm2 = L.LRN(net.pool2, lrn_param=dict(local_size=local_size_norm2, alpha=alpha_norm2, beta=beta_norm2)) net.conv3 = L.Convolution( net.norm2, kernel_size=k_conv3, stride=s_conv3, num_output=d_conv3, pad=p_conv3, bias_term=True, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', std=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) net.relu3 = L.ReLU(net.conv3, in_place=True) net.conv4 = L.Convolution( net.relu3, kernel_size=k_conv4, stride=s_conv4, num_output=d_conv4, pad=p_conv4, bias_term=True, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', std=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) net.relu4 = L.ReLU(net.conv4, in_place=True) net.conv5 = L.Convolution( net.relu4, kernel_size=k_conv5, stride=s_conv5, num_output=d_conv5, pad=p_conv5, bias_term=True, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', std=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) net.relu5 = L.ReLU(net.conv5, in_place=True) net.pool5 = L.Pooling(net.relu5, pool=P.Pooling.MAX, kernel_size=k_pool5, stride=s_pool5) net.fc6 = L.InnerProduct( net.pool5, num_output=k_ip6, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', std=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) net.relu6 = L.ReLU(net.fc6, in_place=True) net.drop6 = L.Dropout(net.relu6, dropout_param=dict(dropout_ratio=r_drop6), in_place=True) net.fc7 = L.InnerProduct( net.fc6, num_output=k_ip7, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', std=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) net.relu7 = L.ReLU(net.fc7, in_place=True) net.drop7 = L.Dropout(net.relu7, dropout_param=dict(dropout_ratio=r_drop7), in_place=True) net.fc8 = L.InnerProduct( net.fc7, num_output=k_ip8, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', std=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) if not for_training: net.acc = L.Accuracy(net.fc8, net.label, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) net.loss = L.SoftmaxWithLoss(net.fc8, net.label) return net
def test_workdir_setup(self): solver = bct.CaffeSolver(debug=True) solver.write(osp.join(self.workdir, 'solver.prototxt')) n = caffe.NetSpec() n.data, n.label = L.ImageData(transform_param=dict(crop_size=224, mean_value=128), source='../static/imlist.txt', batch_size=50, ntop=2) net = vgg_core(n, learn=True) net.score = L.InnerProduct(net.fc7, num_output=2, param=[ dict(lr_mult=5, decay_mult=1), dict(lr_mult=10, decay_mult=0) ]) net.loss = L.SoftmaxWithLoss(net.score, n.label) with open(osp.join(self.workdir, 'trainnet.prototxt'), 'w') as w: w.write(str(net.to_proto())) with open(osp.join(self.workdir, 'testnet.prototxt'), 'w') as w: w.write(str(net.to_proto())) caffefile = '/runs/templates/VGG_ILSVRC_16_layers_initial.caffemodel' if osp.isfile(caffefile): shutil.copyfile(caffefile, osp.join(self.workdir, 'initial.caffemodel')) bct.run(self.workdir, nbr_iters=3) self.assertTrue(osp.isfile(osp.join(self.workdir, 'train.log'))) self.assertTrue( osp.isfile(osp.join(self.workdir, 'snapshot_iter_3.caffemodel'))) caffemodel, iter_ = bct.find_latest_caffemodel(self.workdir) self.assertEqual(iter_, 3) net = bct.load_model(self.workdir, caffemodel, gpuid=0, net_prototxt='testnet.prototxt', phase=caffe.TEST) estlist, scorelist = bct.classify_from_datalayer(net, n_testinstances=3, batch_size=50, scorelayer='score') self.assertEqual(len(scorelist), 3) self.assertEqual(len(estlist), 3) self.assertEqual(len(scorelist[0]), 2) img = np.asarray(Image.open('../static/bbc.jpg'))[:224, :224, :] imglist = [] for itt in range(6): imglist.append(img) estlist, scorelist = bct.classify_from_imlist(imglist, net, bct.Transformer(), 4) self.assertEqual(len(scorelist), 6) self.assertEqual(len(estlist), 6) self.assertEqual(len(scorelist[0]), 2)
def resnet_layers_proto(self, batch_size, phase='TRAIN', stages=(3, 4, 6, 3)): n = caffe.NetSpec() if phase == 'TRAIN': source_data = self.train_data need_mirror = True else: source_data = self.test_data need_mirror = False n.data, n.label = L.Data(source=source_data, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict( crop_size=224, mean_value=[128, 128, 128], mirror=need_mirror)) n.conv1, n.conv1_bn, n.conv1_scale, n.conv1_relu = \ block_conv_bn_scale_relu( n.data, num_output = 64, kernel_size = 7, stride = 2, pad = 3 ) # 64x112x112 n.pool1 = L.Pooling(n.conv1, kernel_size=3, stride=2, pool=P.Pooling.MAX) residual_num = 0 for num in xrange(len(stages)): for i in xrange(stages[num]): residual_num = residual_num + 1 if num == 0 and i == 0: stage_string = skip_connect_with_dimen_match_no_patch_reduce if residual_num == 1: bottom_string = 'n.pool1' else: bottom_string = 'n.res%s_eletwise' % ( str(residual_num - 1)) elif i == 0 and num > 0: stage_string = skip_connect_with_dimen_match if residual_num == 1: bottom_string = 'n.pool1' else: bottom_string = 'n.res%s_eletwise' % ( str(residual_num - 1)) else: stage_string = skip_connect_no_dimen_match bottom_string = 'n.res%s_eletwise' % (str(residual_num - 1)) exec( stage_string.replace('(stage)', str(residual_num)).replace( '(bottom)', bottom_string).replace('(num)', str(2**num * 64))) exec 'n.pool5 = L.Pooling( bottom_string, kernel_size=7, stride=1, pool=P.Pooling.AVE)'.replace( 'bottom_string', 'n.res%s_eletwise' % str(residual_num)) n.classifier = L.InnerProduct(n.pool5, num_output=self.classifier_num, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) n.loss = L.SoftmaxWithLoss(n.classifier, n.label) if phase == 'TRAIN': pass else: n.accuracy_top1 = L.Accuracy(n.classifier, n.label, include=dict(phase=1)) n.accuracy_top5 = L.Accuracy(n.classifier, n.label, include=dict(phase=1), accuracy_param=dict(top_k=5)) return n.to_proto()
def generate_layer(blobs, layer, n, net_params): """ Parameters: blobs: weights for keras, layer: keras layer, n: Caffe NetSpec, net_params: Dictionary to store Caffe weights """ if type(layer) == keras.layers.InputLayer: name = layer.name input_shape = list(layer.batch_input_shape) input_shape = [1, input_shape[3], input_shape[1], input_shape[2]] n[name] = L.Input(shape=[dict(dim=input_shape)]) print(f'generate {name} ok...') elif type(layer) == keras.layers.Dense: name = layer.name config = layer.get_config() use_bias = config['use_bias'] if use_bias == None: use_bias = False if use_bias: net_params[name] = (np.array(blobs[0]).transpose(1, 0), np.array(blobs[1])) else: net_params[name] = (blobs[0]) in_nodes = get_input_nodes(layer) n[name] = L.InnerProduct(n[in_nodes[0].name], num_output=layer.units, bias_term=use_bias) if layer.activation is not None and layer.activation.__name__ != 'linear': name_act = name + "_activation_" + layer.activation.__name__ # get function string n[name_act] = apply_activation(layer, n[name]) print(f'generate {name} ok...') elif type(layer) == keras.layers.Flatten: raise Exception(f"{layer.name} is not implemented") elif type(layer) == keras.layers.Dropout: name = layer.name in_nodes = get_input_nodes(layer) n[name] = L.Dropout(n[in_nodes[0].name], dropout_ratio=layer.rate, in_place=True) print(f'generate {name} ok...') elif type(layer) == keras.layers.Add: name = layer.name in_nodes = get_input_nodes(layer) network_layers = [] for ref in in_nodes: network_layers.append(n[ref.name]) n[name] = L.Eltwise(*network_layers, operation=1) # 1 is SUM print(f'generate {name} ok...') elif type(layer) == keras.layers.Multiply: name = layer.name in_nodes = get_input_nodes(layer) network_layers = [] for ref in in_nodes: network_layers.append(n[ref.name]) n[name] = L.Eltwise(*network_layers, operation=0) print(f'generate {name} ok...') elif type(layer) == keras.layers.Concatenate: name = layer.name in_nodes = get_input_nodes(layer) network_layers = [] for ref in in_nodes: network_layers.append(n[ref.name]) n[name] = L.Concat(*network_layers, axis=1) print(f'generate {name} ok...') elif type(layer) == keras.layers.Maximum: name = layer.name in_nodes = get_input_nodes(layer) network_layers = [] for ref in in_nodes: network_layers += n[ref.name] n[name] = L.Eltwise(*network_layers, operation=2) print(f'generate {name} ok...') elif type(layer) == keras.layers.Conv2DTranspose: ''' DeconvolutionLayer: output = (input - 1) * stride + kernel_size - 2 * pad; kernel_size: {{2 * factor - factor % 2}} stride: {{factor}} num_output: {{C}} group: {{C}} pad: {{ceil((factor - 1) / 2.)}} ''' name = layer.name in_nodes = get_input_nodes(layer) # Stride if layer.strides is None: stride = (1, 1) else: stride = layer.strides # if layer.padding == 'same': # Calculate the padding for 'same' # padding = [layer.kernel_size[0] // 2, layer.kernel_size[1] // 2] # else: # padding = [0, 0] # If padding is valid(aka no padding) config = layer.get_config() use_bias = config['use_bias'] if use_bias == None: use_bias = False n[name] = L.Deconvolution(n[in_nodes[0].name], convolution_param=dict( kernel_h=layer.kernel_size[0], kernel_w=layer.kernel_size[1], stride_h=stride[0], stride_w=stride[1], num_output=layer.filters, pad_h=math.ceil((stride[0] - 1) / 2.), pad_w=math.ceil((stride[1] - 1) / 2.), bias_term=use_bias)) blobs[0] = np.array(blobs[0]).transpose(3, 2, 0, 1) net_params[name] = blobs if layer.activation is not None and layer.activation.__name__ != 'linear': name_act = name + "_activation_" + layer.activation.__name__ # get function string n[name_act] = apply_activation(layer, n[name]) print(f'generate {name} ok...') elif type(layer) == keras.layers.BatchNormalization: name = layer.name in_nodes = get_input_nodes(layer) n[name] = L.BatchNorm(n[in_nodes[0].name], moving_average_fraction=layer.momentum, eps=layer.epsilon, use_global_stats=True) variance = np.array(blobs[-1]) mean = np.array(blobs[-2]) config = layer.get_config() param = dict() if config['scale']: gamma = np.array(blobs[0]) else: gamma = np.ones(mean.shape, dtype=np.float32) if config['center']: beta = np.array(blobs[1]) param['bias_term'] = True else: beta = np.zeros(mean.shape, dtype=np.float32) param['bias_term'] = False net_params[name] = (mean, variance, np.array([1.0])) # Scale after batchNorm name_scale = name + '_scale' n[name_scale] = L.Scale(n[name], in_place=True, scale_param=param) net_params[name_scale] = (gamma, beta) print(f'generate {name} ok...') # TODO Needs to be implemented elif type(layer) == keras.layers.Conv1D: raise Exception(f"{layer.name} is not implemented") elif type(layer) == keras.layers.ZeroPadding2D: print(f"{layer.name} is passed...") elif type(layer) == keras.layers.Conv2D: ''' ConvolutionLayer: output = (input + 2 * pad - kernel_size) / stride + 1; kernel_shape: [out,in,k_size_h,k_size_w] ''' name = layer.name # Padding if layer.padding == 'same': # Calculate the padding for 'same' padding = [layer.kernel_size[0] // 2, layer.kernel_size[1] // 2] else: padding = [0, 0] # If padding is valid(aka no padding) in_nodes = get_input_nodes(layer) if type(in_nodes[0]) == keras.layers.ZeroPadding2D: in_nodes = get_input_nodes(in_nodes[0]) padding = [layer.kernel_size[0] // 2, layer.kernel_size[1] // 2] if layer.strides is None: stride = (1, 1) else: stride = layer.strides # TODO The rest of the arguements including bias, regulizers, dilation, config = layer.get_config() # print(config) # get bias parameter use_bias = config['use_bias'] if use_bias == None: use_bias = False n[name] = L.Convolution(n[in_nodes[0].name], kernel_h=layer.kernel_size[0], kernel_w=layer.kernel_size[1], stride_h=stride[0], stride_w=stride[1], num_output=layer.filters, pad_h=padding[0], pad_w=padding[1], bias_term=use_bias) # weights = blobs blobs[0] = np.array(blobs[0]).transpose((3, 2, 0, 1)) # print(blobs[0].shape) net_params[name] = blobs if layer.activation is not None and layer.activation.__name__ != 'linear': name_act = name + "_activation_" + layer.activation.__name__ # get function string n[name_act] = apply_activation(layer, n[name]) print(f'generate {name} ok...') elif type(layer) == keras.layers.MaxPooling2D or type( layer) == keras.layers.AveragePooling2D: name = layer.name in_nodes = get_input_nodes(layer) if type(layer) == keras.layers.MaxPooling2D: pool = P.Pooling.MAX else: # NOTE AveragePooling needs to be implemented pool = P.Pooling.AVE # Padding # TODO The rest of the arguements including bias, regulizers, dilatin, if layer.strides is None: stride = (1, 1) else: stride = layer.strides # Padding if layer.padding == 'same': # Calculate the padding for 'same' padding = [layer.pool_size[0] // 2, layer.pool_size[1] // 2] else: padding = [0, 0] # If padding is valid(aka no padding) n[name] = L.Pooling(n[in_nodes[0].name], kernel_h=layer.pool_size[0], kernel_w=layer.pool_size[1], stride_h=stride[0], stride_w=stride[1], pad_h=padding[0], pad_w=padding[1], pool=pool) print(f'generate {name} ok...') # Activation (wrapper for activations) and Advanced Activation Layers elif type(layer) == keras.layers.Activation: name = layer.name in_nodes = get_input_nodes(layer) n[name] = apply_activation(layer, n[in_nodes[0].name]) # TODO: Assert only 1 print(f'generate {name} ok...') # Caffe lacks intializer, regulizer, and constraint params elif type(layer) == keras.layers.LeakyReLU: # TODO: figure out how to pass Leaky params name = layer.name in_nodes = get_input_nodes(layer) n[name] = L.PReLU(n[in_nodes[0].name]) print(f'generate {name} ok...') elif type(layer) == keras.layers.PReLU: name = layer.name in_nodes = get_input_nodes(layer) n[name] = L.PReLU(n[in_nodes[0].name]) print(f'generate {name} ok...') elif type(layer) == keras.layers.ELU: name = layer.name in_nodes = get_input_nodes(layer) n[name] = L.ELU(n[in_nodes[0].name], layer.alpha) print(f'generate {name} ok...') elif type(layer) == keras.layers.GlobalAveragePooling2D: name = layer.name in_nodes = get_input_nodes(layer) n[name] = L.Pooling(n[in_nodes[0].name], kernel_size=layer.kernel_size[0], stride=layer.strides[0], pad=layer.kernel_size[0] // 2, pool=P.Pooling.AVE) print(f'generate {name} ok...') else: raise Exception("Cannot convert model." + layer.name + " is not supported.")
def InceptionResNetV2(train_lmdb, test_lmdb, input_size=299, batch_size=256, stages=[0, 5, 10, 5], first_output=32, include_acc=False): # now, this code can't recognize include phase, so there will only be a TEST phase data layer data, label = L.Data(source=train_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(crop_size=input_size, mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TRAIN'))) data, label = L.Data(source=test_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(crop_size=input_size, mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TEST'))) data, label = L.MemoryData(batch_size=batch_size, height=input_size, width=input_size, channels=3, ntop=2, transform_param=dict(mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TEST'))) Inception_ResNet_A_input = stem(bottom=data, conv1_num=32, conv2_num=32, conv3_num=64, conv4_num=96, conv5_num=64, conv6_num=96, conv7_num=64, conv8_num=64, conv9_num=64, conv10_num=96, conv11_num=192) for i in xrange(stages[1]): Inception_ResNet_A_input = Inception_ResNet_A( bottom=Inception_ResNet_A_input, bottom_size=384, num1x1=32, num3x3=48, num3x3double=64) Inception_ResNet_B_input = ReductionA(bottom=Inception_ResNet_A_input, num1x1_k=256, num3x3_l=256, num3x3_n=384, num3x3_m=384) for i in xrange(stages[2]): Inception_ResNet_B_input = Inception_ResNet_B( bottom=Inception_ResNet_B_input, bottom_size=1152, num1x1=192, num1x1double=128, num7x1=160, num1x7=192) Inception_ResNet_C_input = ReductionB(bottom=Inception_ResNet_B_input, num1x1=256, num3x3=384, num3x3double=288, num3x3three=320) for i in xrange(stages[3]): Inception_ResNet_C_input = Inception_ResNet_C( bottom=Inception_ResNet_C_input, bottom_size=2144, num1x1=192, num1x3=224, num3x1=256) glb_pool = L.Pooling(Inception_ResNet_C_input, pool=P.Pooling.AVE, global_pooling=True) dropout = L.Dropout(glb_pool, dropout_ratio=0.2) fc = L.InnerProduct(dropout, num_output=1000) loss = L.SoftmaxWithLoss(fc, label) acc = L.Accuracy(fc, label, include=dict(phase=getattr(caffe_pb2, 'TEST'))) return to_proto(loss, acc)
def yolonet(): # Python data layer pydata_params = dict(list_root='/home/zehao/WorkSpace/caffe/examples/yolo/lists') pydata_params['split'] = 'train' pydata_params['mean'] = (104.00699, 116.66877, 122.67892) pydata_params['batch_size'] = 16 pydata_params['im_shape'] = (448, 448) pydata_params['classes'] = 20 pydata_params['coords'] = 4 pydata_params['num'] = 2 pydata_params['side'] = 7 pylayer = 'VOCLocDataLayerSyncSync' data, label = L.Python(module='voc_data_layer',name='DataLayer', layer=pylayer, ntop=2, param_str=str(pydata_params)) # the net itself conv1, relu1 = conv_relu(data, 7, 64, stride=2, pad=3) pool1 = max_pool(relu1, 2, stride=2) conv2, relu2 = conv_relu(pool1, 3, 192, stride=1, pad=1) pool2 = max_pool(relu2, 2, stride=2) conv3, relu3 = conv_relu(pool2, 1, 128, stride=1, pad=0) conv4, relu4 = conv_relu(relu3, 3, 256, stride=1, pad=1) conv5, relu5 = conv_relu(relu4, 1, 256, stride=1, pad=0) conv6, relu6 = conv_relu(relu5, 3, 512, stride=1, pad=1) pool6 = max_pool(relu6, 2, stride=2) conv7, relu7 = conv_relu(pool6, 1, 256, stride=1, pad=0) conv8, relu8 = conv_relu(relu7, 3, 512, stride=1, pad=1) conv9, relu9 = conv_relu(relu8, 1, 256, stride=1, pad=0) conv10, relu10 = conv_relu(relu9, 3, 512, stride=1, pad=1) conv11, relu11 = conv_relu(relu10, 1, 256, stride=1, pad=0) conv12, relu12 = conv_relu(relu11, 3, 512, stride=1, pad=1) conv13, relu13 = conv_relu(relu12, 1, 256, stride=1, pad=0) conv14, relu14 = conv_relu(relu13, 3, 512, stride=1, pad=1) conv15, relu15 = conv_relu(relu14, 1, 512, stride=1, pad=0) conv16, relu16 = conv_relu(relu15, 3, 1024, stride=1, pad=1) pool16 = max_pool(relu16, 2, stride=2) conv17, relu17 = conv_relu(pool16, 1, 512, stride=1, pad=0) conv18, relu18 = conv_relu(relu17, 3, 1024, stride=1, pad=1) conv19, relu19 = conv_relu(relu18, 1, 512, stride=1, pad=0) conv20, relu20 = conv_relu(relu19, 3, 1024, stride=1, pad=1) conv21, relu21 = conv_relu(relu20, 3, 1024, stride=1, pad=1) conv22, relu22 = conv_relu(relu21, 3, 1024, stride=2, pad=1) conv23, relu23 = conv_relu(relu22, 3, 1024, stride=1, pad=1) conv24, relu24 = conv_relu(relu23, 3, 1024, stride=1, pad=1) fc25, relu25 = fc_relu(relu24, 4096) result = L.InnerProduct(relu25, num_output=1470, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) # Python loss layer pydata_params = dict(classes=20) pydata_params['coords'] = 4 pydata_params['side'] = 7 pydata_params['num'] = 2 pydata_params['object_scale'] = 1 pydata_params['noobject_scale'] = 0.5 pydata_params['class_scale'] = 1 pydata_params['coord_scale'] = 5 pydata_params['sqrt'] = True pylayer = 'YoloLossLayer' loss = L.Python(result, label, name='YoloLoss', module='yolo_loss_layer', layer=pylayer, ntop=1, param_str=str(pydata_params)) return to_proto(loss)
def inception_v3_proto(self, batch_size, phase='TRAIN'): n = caffe.NetSpec() if phase == 'TRAIN': source_data = self.train_data mirror = True else: source_data = self.test_data mirror = False n.data, n.label = L.Data(source=source_data, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict( crop_size=299, mean_value=[104, 117, 123], mirror=mirror)) # stage 1 n.conv1_3x3_s2, n.conv1_3x3_s2_bn, n.conv1_3x3_relu, n.conv2_3x3_s1, n.conv2_3x3_s1_bn, n.conv2_3x3_relu, \ n.conv3_3x3_s1, n.conv3_3x3_s1_bn, n.conv3_3x3_relu = \ conv_bn_stack_3(n.data, dict(num_output=[32, 32, 64], kernel_size=[3, 3, 3], stride=[2, 1, 1], pad=[0, 0, 1], group=[1, 1, 1], weight_type=['xavier', 'xavier', 'xavier'], weight_std=[0.01, 0.01, 0.01], bias_type=['constant', 'constant', 'constant'], bias_value=[0.2, 0.2, 0.2])) n.pool1_3x3_s2 = L.Pooling(n.conv3_3x3_s1_bn, kernel_size=3, stride=2, pool=P.Pooling.MAX) # stage 2 n.conv4_3x3_reduce, n.conv4_3x3_reduce_bn, n.conv4_relu_3x3_reduce, n.conv4_3x3, n.conv4_3x3_bn, n.conv4_relu_3x3 = \ conv_bn_stack_2(n.pool1_3x3_s2, dict(num_output=[80, 192], kernel_size=[1, 3], stride=[1, 1], pad=[0, 0], group=[1, 1], weight_type=['xavier', 'xavier'], weight_std=[0.01, 0.01], bias_type=['constant', 'constant'], bias_value=[0.2, 0.2])) n.pool2_3x3_s2 = L.Pooling(n.conv4_3x3_bn, kernel_size=3, stride=2, pool=P.Pooling.MAX) # stage 3 n.inception_3a_1x1, n.inception_3a_1x1_bn, n.inception_3a_relu_1x1, n.inception_3a_5x5_reduce, \ n.inception_3a_5x5_reduce_bn, n.inception_3a_relu_5x5_reduce, n.inception_3a_5x5, n.inception_3a_5x5_bn, \ n.inception_3a_relu_5x5, n.inception_3a_3x3_reduce, n.inception_3a_3x3_reduce_bn, n.inception_3a_relu_3x3_reduce, \ n.inception_3a_3x3_1, n.inception_3a_3x3_1_bn, n.inception_3a_relu_3x3_1, n.inception_3a_3x3_2, \ n.inception_3a_3x3_2_bn, n.inception_3a_relu_3x3_2, n.inception_3a_pool, n.inception_3a_pool_proj, \ n.inception_3a_pool_proj_bn, n.inception_3a_relu_pool_proj, n.inception_3a_output = \ inception_v3_7a(n.pool2_3x3_s2, dict(conv_1x1=64, conv_5x5_reduce=48, conv_5x5=64, conv_3x3_reduce=64, conv_3x3_1=96, conv_3x3_2=96, pool_proj=32)) n.inception_3b_1x1, n.inception_3b_1x1_bn, n.inception_3b_relu_1x1, n.inception_3b_5x5_reduce, \ n.inception_3b_5x5_reduce_bn, n.inception_3b_relu_5x5_reduce, n.inception_3b_5x5, n.inception_3b_5x5_bn, \ n.inception_3b_relu_5x5, n.inception_3b_3x3_reduce, n.inception_3b_3x3_reduce_bn, n.inception_3b_relu_3x3_reduce, \ n.inception_3b_3x3_1, n.inception_3b_3x3_1_bn, n.inception_3b_relu_3x3_1, n.inception_3b_3x3_2, \ n.inception_3b_3x3_2_bn, n.inception_3b_relu_3x3_2, n.inception_3b_pool, n.inception_3b_pool_proj, \ n.inception_3b_pool_proj_bn, n.inception_3b_relu_pool_proj, n.inception_3b_output = \ inception_v3_7a(n.inception_3a_output, dict(conv_1x1=64, conv_5x5_reduce=48, conv_5x5=64, conv_3x3_reduce=64, conv_3x3_1=96, conv_3x3_2=96, pool_proj=64)) n.inception_3c_1x1, n.inception_3c_1x1_bn, n.inception_3c_relu_1x1, n.inception_3c_5x5_reduce, \ n.inception_3c_5x5_reduce_bn, n.inception_3c_relu_5x5_reduce, n.inception_3c_5x5, n.inception_3c_5x5_bn, \ n.inception_3c_relu_5x5, n.inception_3c_3x3_reduce, n.inception_3c_3x3_reduce_bn, n.inception_3c_relu_3x3_reduce, \ n.inception_3c_3x3_1, n.inception_3c_3x3_1_bn, n.inception_3c_relu_3x3_1, n.inception_3c_3x3_2, \ n.inception_3c_3x3_2_bn, n.inception_3c_relu_3x3_2, n.inception_3c_pool, n.inception_3c_pool_proj, \ n.inception_3c_pool_proj_bn, n.inception_3c_relu_pool_proj, n.inception_3c_output = \ inception_v3_7a(n.inception_3b_output, dict(conv_1x1=64, conv_5x5_reduce=48, conv_5x5=64, conv_3x3_reduce=64, conv_3x3_1=96, conv_3x3_2=96, pool_proj=64)) n.inception_3d_3x3_0, n.inception_3d_3x3_0_bn, n.inception_3d_relu_3x3_0, n.inception_3d_3x3_reduce, \ n.inception_3d_3x3_reduce_bn, n.inception_3d_relu_3x3_reduce, n.inception_3d_3x3_1, n.inception_3d_3x3_1_bn, \ n.inception_3d_relu_3x3_1, n.inception_3d_3x3_2, n.inception_3d_3x3_2_bn, n.inception_3d_relu_3x3_2, \ n.inception_3d_pool, n.inception_3d_output = \ inception_v3_7b(n.inception_3c_output, dict(conv_3x3_0=384, conv_3x3_reduce=64, conv_3x3_1=96, conv_3x3_2=96)) # stage 4 n.inception_4a_1x1, n.inception_4a_1x1_bn, n.inception_4a_relu_1x1, n.inception_4a_1x7_reduce, \ n.inception_4a_1x7_reduce_bn, n.inception_4a_relu_1x7_reduce, n.inception_4a_1x7_0, n.inception_4a_1x7_0_bn, \ n.inception_4a_relu_1x7_0, n.inception_4a_7x1_0, n.inception_4a_7x1_0_bn, n.inception_4a_relu_7x1_0, \ n.inception_4a_7x1_reduce, n.inception_4a_7x1_reduce_bn, n.inception_4a_relu_7x1_reduce, \ n.inception_4a_7x1_1, n.inception_4a_7x1_1_bn, n.inception_4a_relu_7x1_1, n.inception_4a_1x7_1, \ n.inception_4a_1x7_1_bn, n.inception_4a_relu_1x7_1, n.inception_4a_7x1_2, n.inception_4a_7x1_2_bn, \ n.inception_4a_relu_7x1_2, n.inception_4a_1x7_2, n.inception_4a_1x7_2_bn, n.inception_4a_relu_1x7_2, \ n.inception_4a_pool, n.inception_4a_pool_proj, n.inception_4a_pool_proj_bn, n.inception_4a_relu_pool_proj, \ n.inception_4a_output = \ inception_v3_7c(n.inception_3d_output, dict(conv_1x1=192, conv_1x7_reduce=128, conv_1x7_0=128, conv_7x1_0=192, conv_7x1_reduce=128, conv_1x7_1=128, conv_7x1_1=128, conv_1x7_2=128, conv_7x1_2=192, pool_proj=192)) n.inception_4b_1x1, n.inception_4b_1x1_bn, n.inception_4b_relu_1x1, n.inception_4b_1x7_reduce, \ n.inception_4b_1x7_reduce_bn, n.inception_4b_relu_1x7_reduce, n.inception_4b_1x7_0, n.inception_4b_1x7_0_bn, \ n.inception_4b_relu_1x7_0, n.inception_4b_7x1_0, n.inception_4b_7x1_0_bn, n.inception_4b_relu_7x1_0, \ n.inception_4b_7x1_reduce, n.inception_4b_7x1_reduce_bn, n.inception_4b_relu_7x1_reduce, \ n.inception_4b_7x1_1, n.inception_4b_7x1_1_bn, n.inception_4b_relu_7x1_1, n.inception_4b_1x7_1, \ n.inception_4b_1x7_1_bn, n.inception_4b_relu_1x7_1, n.inception_4b_7x1_2, n.inception_4b_7x1_2_bn, \ n.inception_4b_relu_7x1_2, n.inception_4b_1x7_2, n.inception_4b_1x7_2_bn, n.inception_4b_relu_1x7_2, \ n.inception_4b_pool, n.inception_4b_pool_proj, n.inception_4b_pool_proj_bn, n.inception_4b_relu_pool_proj, \ n.inception_4b_output = \ inception_v3_7c(n.inception_4a_output, dict(conv_1x1=192, conv_1x7_reduce=160, conv_1x7_0=160, conv_7x1_0=192, conv_7x1_reduce=160, conv_1x7_1=160, conv_7x1_1=160, conv_1x7_2=160, conv_7x1_2=160, pool_proj=192)) n.inception_4c_1x1, n.inception_4c_1x1_bn, n.inception_4c_relu_1x1, n.inception_4c_1x7_reduce, \ n.inception_4c_1x7_reduce_bn, n.inception_4c_relu_1x7_reduce, n.inception_4c_1x7_0, n.inception_4c_1x7_0_bn, \ n.inception_4c_relu_1x7_0, n.inception_4c_7x1_0, n.inception_4c_7x1_0_bn, n.inception_4c_relu_7x1_0, \ n.inception_4c_7x1_reduce, n.inception_4c_7x1_reduce_bn, n.inception_4c_relu_7x1_reduce, \ n.inception_4c_7x1_1, n.inception_4c_7x1_1_bn, n.inception_4c_relu_7x1_1, n.inception_4c_1x7_1, \ n.inception_4c_1x7_1_bn, n.inception_4c_relu_1x7_1, n.inception_4c_7x1_2, n.inception_4c_7x1_2_bn, \ n.inception_4c_relu_7x1_2, n.inception_4c_1x7_2, n.inception_4c_1x7_2_bn, n.inception_4c_relu_1x7_2, \ n.inception_4c_pool, n.inception_4c_pool_proj, n.inception_4c_pool_proj_bn, n.inception_4c_relu_pool_proj, \ n.inception_4c_output = \ inception_v3_7c(n.inception_4b_output, dict(conv_1x1=192, conv_1x7_reduce=160, conv_1x7_0=160, conv_7x1_0=192, conv_7x1_reduce=160, conv_1x7_1=160, conv_7x1_1=160, conv_1x7_2=160, conv_7x1_2=160, pool_proj=192)) n.inception_4d_1x1, n.inception_4d_1x1_bn, n.inception_4d_relu_1x1, n.inception_4d_1x7_reduce, \ n.inception_4d_1x7_reduce_bn, n.inception_4d_relu_1x7_reduce, n.inception_4d_1x7_0, n.inception_4d_1x7_0_bn, \ n.inception_4d_relu_1x7_0, n.inception_4d_7x1_0, n.inception_4d_7x1_0_bn, n.inception_4d_relu_7x1_0, \ n.inception_4d_7x1_reduce, n.inception_4d_7x1_reduce_bn, n.inception_4d_relu_7x1_reduce, \ n.inception_4d_7x1_1, n.inception_4d_7x1_1_bn, n.inception_4d_relu_7x1_1, n.inception_4d_1x7_1, \ n.inception_4d_1x7_1_bn, n.inception_4d_relu_1x7_1, n.inception_4d_7x1_2, n.inception_4d_7x1_2_bn, \ n.inception_4d_relu_7x1_2, n.inception_4d_1x7_2, n.inception_4d_1x7_2_bn, n.inception_4d_relu_1x7_2, \ n.inception_4d_pool, n.inception_4d_pool_proj, n.inception_4d_pool_proj_bn, n.inception_4d_relu_pool_proj, \ n.inception_4d_output = \ inception_v3_7c(n.inception_4c_output, dict(conv_1x1=192, conv_1x7_reduce=192, conv_1x7_0=192, conv_7x1_0=192, conv_7x1_reduce=192, conv_1x7_1=192, conv_7x1_1=192, conv_1x7_2=192, conv_7x1_2=192, pool_proj=192)) n.inception_4e_3x3_reduce, n.inception_4e_3x3_reduce_bn, n.inception_4e_relu_3x3_reduce, n.inception_4e_3x3_0, \ n.inception_4e_3x3_0_bn, n.inception_4e_relu_3x3_0, n.inception_4e_1x7_reduce, n.inception_4e_1x7_reduce_bn, \ n.inception_4e_relu_1x7_reduce, n.inception_4e_1x7, n.inception_4e_1x7_bn, n.inception_4e_relu_1x7, \ n.inception_4e_7x1, n.inception_4e_7x1_bn, n.inception_4e_relu_7x1, n.inception_4e_3x3_1, \ n.inception_4e_3x3_1_bn, n.inception_4e_relu_3x3_1, n.inception_4e_pool, n.inception_4e_output = \ inception_v3_7d(n.inception_4d_output, dict(conv_3x3_reduce=192, conv_3x3_0=320, conv_1x7_reduce=192, conv_1x7=192, conv_7x1=192, conv_3x3_1=192)) # stage 5 n.inception_5a_1x1, n.inception_5a_1x1_bn, n.inception_5a_relu_1x1, n.inception_5a_3x3_0_reduce, \ n.inception_5a_3x3_0_reduce_bn, n.inception_5a_relu_3x3_0_reduce, n.inception_5a_1x3_0, n.inception_5a_1x3_0_bn, \ n.inception_5a_relu_1x3_0, n.inception_5a_3x1_0, n.inception_5a_3x1_0_bn, n.inception_5a_relu_3x1_0, \ n.inception_5a_3x3_1_reduce, n.inception_5a_3x3_1_reduce_bn, n.inception_5a_relu_3x3_1_reduce, n.inception_5a_3x3_1, \ n.inception_5a_3x3_1_bn, n.inception_5a_relu_3x3_1, n.inception_5a_1x3_1, n.inception_5a_1x3_1_bn, \ n.inception_5a_relu_1x3_1, n.inception_5a_3x1_1, n.inception_5a_3x1_1_bn, n.inception_5a_relu_3x1_1, \ n.inception_5a_pool, n.inception_5a_pool_proj, n.inception_5a_pool_proj_bn, n.inception_5a_relu_pool_proj, \ n.inception_5a_output = \ inception_v3_7e(n.inception_4e_output, dict(conv_1x1=320, conv_3x3_0_reduce=384, conv_1x3_0=384, conv_3x1_0=384, conv_3x3_1_reduce=448, conv_3x3_1=384, conv_1x3_1=384, conv_3x1_1=384, pooling=P.Pooling.AVE, pool_proj=192)) n.inception_5b_1x1, n.inception_5b_1x1_bn, n.inception_5b_relu_1x1, n.inception_5b_3x3_0_reduce, \ n.inception_5b_3x3_0_reduce_bn, n.inception_5b_relu_3x3_0_reduce, n.inception_5b_1x3_0, n.inception_5b_1x3_0_bn, \ n.inception_5b_relu_1x3_0, n.inception_5b_3x1_0, n.inception_5b_3x1_0_bn, n.inception_5b_relu_3x1_0, \ n.inception_5b_3x3_1_reduce, n.inception_5b_3x3_1_reduce_bn, n.inception_5b_relu_3x3_1_reduce, n.inception_5b_3x3_1, \ n.inception_5b_3x3_1_bn, n.inception_5b_relu_3x3_1, n.inception_5b_1x3_1, n.inception_5b_1x3_1_bn, \ n.inception_5b_relu_1x3_1, n.inception_5b_3x1_1, n.inception_5b_3x1_1_bn, n.inception_5b_relu_3x1_1, \ n.inception_5b_pool, n.inception_5b_pool_proj, n.inception_5b_pool_proj_bn, n.inception_5b_relu_pool_proj, \ n.inception_5b_output = \ inception_v3_7e(n.inception_5a_output, dict(conv_1x1=320, conv_3x3_0_reduce=384, conv_1x3_0=384, conv_3x1_0=384, conv_3x3_1_reduce=448, conv_3x3_1=384, conv_1x3_1=384, conv_3x1_1=384, pooling=P.Pooling.MAX, pool_proj=192)) n.pool3_7x7_s1 = L.Pooling(n.inception_5b_output, kernel_size=7, stride=1, pool=P.Pooling.AVE) n.classifier = L.InnerProduct(n.pool3_7x7_s1, num_output=self.classifier_num, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) n.loss = L.SoftmaxWithLoss(n.classifier, n.label) if phase == 'TRAIN': pass else: n.loss_top1, n.loss_top5 = accuracy_top1_top5( n.classifier, n.label) return n.to_proto()
def inference_proto(self, bottom, mult=1., truncate_at=None, deploy=False): ns = self.netspec w_params = dict(lr_mult=mult, decay_mult=mult) b_params = dict(lr_mult=mult, decay_mult=0) conv_opt_params = dict(weight_filler=CONV_W_INIT, bias_filler=CONV_B_INIT, param=[w_params, b_params ]) if not deploy else {} fc_opt_params = dict(weight_filler=FC_W_INIT, bias_filler=FC_B_INIT, param=[w_params, b_params]) if not deploy else {} ns.conv1 = L.Convolution(bottom, num_output=96, kernel_size=11, stride=4, **conv_opt_params) ns.relu1 = L.ReLU(ns.conv1, in_place=True) ns.norm1 = L.LRN(ns.relu1, local_size=5, alpha=0.0001, beta=0.75) ns.pool1 = L.Pooling(ns.norm1, pool=P.Pooling.MAX, kernel_size=3, stride=2) if truncate_at == 'pool1': return ns.pool1 ns.conv2 = L.Convolution(ns.pool1, num_output=256, kernel_size=5, pad=2, group=2, **conv_opt_params) ns.relu2 = L.ReLU(ns.conv2, in_place=True) ns.norm2 = L.LRN(ns.relu2, local_size=5, alpha=0.0001, beta=0.75) ns.pool2 = L.Pooling(ns.norm2, pool=P.Pooling.MAX, kernel_size=3, stride=2) if truncate_at == 'pool2': return ns.pool2 ns.conv3 = L.Convolution(ns.pool2, num_output=384, kernel_size=3, pad=1, **conv_opt_params) ns.relu3 = L.ReLU(ns.conv3, in_place=True) if truncate_at == 'conv3': return ns.relu3 ns.conv4 = L.Convolution(ns.relu3, num_output=384, kernel_size=3, pad=1, group=2, **conv_opt_params) ns.relu4 = L.ReLU(ns.conv4, in_place=True) if truncate_at == 'conv4': return ns.relu4 ns.conv5 = L.Convolution(ns.relu4, num_output=256, kernel_size=3, pad=1, group=2, **conv_opt_params) ns.relu5 = L.ReLU(ns.conv5, in_place=True) ns.pool5 = L.Pooling(ns.relu5, pool=P.Pooling.MAX, kernel_size=3, stride=2) if truncate_at == 'pool5': return ns.pool5 ns.fc6 = L.InnerProduct(ns.pool5, num_output=4096, **fc_opt_params) ns.relu6 = L.ReLU(ns.fc6, in_place=True) ns.drop6 = L.Dropout(ns.relu6, dropout_ratio=0.5, in_place=True) if truncate_at == 'fc6': return ns.drop6 ns.fc7 = L.InnerProduct(ns.drop6, num_output=4096, **fc_opt_params) ns.relu7 = L.ReLU(ns.fc7, in_place=True) ns.drop7 = L.Dropout(ns.relu7, dropout_ratio=0.5, in_place=True) if truncate_at == 'fc7': return ns.drop7 ns.fc8 = L.InnerProduct(ns.fc7, num_output=1000, **fc_opt_params) return ns.fc8
def ZFNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False, dilated=False, dropout=True, need_fc8=False, freeze_layers=[]): kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0)} assert from_layer in net.keys() net.conv1 = L.Convolution(net[from_layer], num_output=96, pad=3, kernel_size=7, stride=2, **kwargs) net.relu1 = L.ReLU(net.conv1, in_place=True) net.norm1 = L.LRN(net.relu1, local_size=3, alpha=0.00005, beta=0.75, norm_region=P.LRN.WITHIN_CHANNEL, engine=P.LRN.CAFFE) net.pool1 = L.Pooling(net.norm1, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=2) net.conv2 = L.Convolution(net.pool1, num_output=256, pad=2, kernel_size=5, stride=2, **kwargs) net.relu2 = L.ReLU(net.conv2, in_place=True) net.norm2 = L.LRN(net.relu2, local_size=3, alpha=0.00005, beta=0.75, norm_region=P.LRN.WITHIN_CHANNEL, engine=P.LRN.CAFFE) net.pool2 = L.Pooling(net.norm2, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=2) net.conv3 = L.Convolution(net.pool2, num_output=384, pad=1, kernel_size=3, **kwargs) net.relu3 = L.ReLU(net.conv3, in_place=True) net.conv4 = L.Convolution(net.relu3, num_output=384, pad=1, kernel_size=3, **kwargs) net.relu4 = L.ReLU(net.conv4, in_place=True) net.conv5 = L.Convolution(net.relu4, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu5 = L.ReLU(net.conv5, in_place=True) if need_fc: if dilated: name = 'pool5' net[name] = L.Pooling(net.relu5, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=1) else: name = 'pool5' net[name] = L.Pooling(net.relu5, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=2) if fully_conv: if dilated: if reduced: net.fc6 = L.Convolution(net[name], num_output=1024, pad=5, kernel_size=3, dilation=5, **kwargs) else: net.fc6 = L.Convolution(net[name], num_output=4096, pad=5, kernel_size=6, dilation=2, **kwargs) else: if reduced: net.fc6 = L.Convolution(net[name], num_output=1024, pad=2, kernel_size=3, dilation=2, **kwargs) else: net.fc6 = L.Convolution(net[name], num_output=4096, pad=2, kernel_size=6, **kwargs) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) if reduced: net.fc7 = L.Convolution(net.relu6, num_output=1024, kernel_size=1, **kwargs) else: net.fc7 = L.Convolution(net.relu6, num_output=4096, kernel_size=1, **kwargs) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) else: net.fc6 = L.InnerProduct(net.pool5, num_output=4096) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) net.fc7 = L.InnerProduct(net.relu6, num_output=4096) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) if need_fc8: from_layer = net.keys()[-1] if fully_conv: net.fc8 = L.Convolution(net[from_layer], num_output=1000, kernel_size=1, **kwargs) else: net.fc8 = L.InnerProduct(net[from_layer], num_output=1000) net.prob = L.Softmax(net.fc8) # Update freeze layers. kwargs['param'] = [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)] layers = net.keys() for freeze_layer in freeze_layers: if freeze_layer in layers: net.update(freeze_layer, kwargs) return net
def densenet(mode, data_file, bs, nlayer, nclass, first_nout=16, growth_rate=16, dropout=0.2): net = caffe.NetSpec() # data layer --------------------------------------------------------------- mirror = True shuffle = True if mode == 1: # TEST phase mirror = False shuffle = False transform = dict(scale = 0.0078125, mirror = mirror, #crop_size = 224, mean_value = [127.5, 127.5, 127.5]) net.data, net.label = L.Data(#include = dict(phase = mode), transform_param = transform, source = data_file, batch_size = bs, backend = P.Data.LMDB, ntop = 2) # net.data, net.label = L.ImageData(#include = dict(phase = mode), # transform_param = transform, # source = data_file, # batch_size = bs, # shuffle = shuffle, # #new_height = 256, # #new_width = 256, # #is_color = True, # ntop = 2) pre_fmap = 0 # total number of previous feature maps # first convolution -------------------------------------------------------- net.conv_1 = L.Convolution(net.data, num_output=first_nout, kernel_size=7, stride=2, pad=3, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) net.relu_1 = L.PReLU(net.conv_1, in_place=True) net.pool_1 = L.Pooling(net.relu_1, pool=P.Pooling.MAX, kernel_size=3, stride=2) pre_layer = net.pool_1 pre_fmap += first_nout # DB + TD ------------------------------------------------------------------ # +1 in order to make the index values from 1 for major in xrange(len(nlayer)-1): # DB for minor in xrange(nlayer[major]): pre_layer = cat_layer(net, mode, major+1, minor+1, pre_layer, growth_rate, dropout) pre_fmap += growth_rate # TD pre_layer = transition_down(net, mode, major+1, pre_layer, pre_fmap, dropout) pre_fmap = pre_fmap // 2 # last DB, without TD major = len(nlayer) for minor in xrange(nlayer[-1]): pre_layer = cat_layer(net, mode, major, minor+1, pre_layer, growth_rate, dropout) pre_fmap += growth_rate # final layers ------------------------------------------------------------- use_global_stats = False if mode == 1: # TEST phase use_global_stats = True net.bn_final = L.BatchNorm(pre_layer, in_place=False, batch_norm_param = dict(use_global_stats=use_global_stats), param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]) net.scale_finel = L.Scale(net.bn_final, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0)) net.relu_final = L.PReLU(net.scale_finel, in_place=True) net.pool_final = L.Pooling(net.relu_final, pool=P.Pooling.AVE, global_pooling=True) net.fc_class = L.InnerProduct(net.pool_final, num_output=nclass, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) net.loss = L.SoftmaxWithLoss(net.fc_class, net.label) if mode == 1: net.accuracy = L.Accuracy(net.fc_class, net.label) return str(net.to_proto())
def unpack_item(self, layer, previous_image_size, layer_number, bottom, label=None): if layer.terminate == 1: # Softmax Accuracy/Loss # loss = cl.SoftmaxWithLoss(bottom, label) bottom = cl.InnerProduct(bottom, num_output=self.hp.NUM_CLASSES, weight_filler=dict(type='xavier')) return bottom if layer.layer_type == 'conv': out_depth = layer.filter_depth kernel_size = layer.filter_size stride = layer.stride pad = self.get_pad(kernel_size) bottom = cl.Convolution(bottom, kernel_size=kernel_size, num_output=out_depth, stride=stride, pad=pad, weight_filler=dict(type='xavier')) if self.ssp.batch_norm: bottom = self.add_batchnorm(bottom) return self.add_activate(bottom) if layer.layer_type == 'nin': out_depth = layer.filter_depth bottom = cl.Convolution(bottom, kernel_size=1, num_output=out_depth, weight_filler=dict(type='xavier')) bottom = self.add_activate(bottom) bottom = cl.Convolution(bottom, kernel_size=1, num_output=out_depth, weight_filler=dict(type='xavier')) bottom = self.add_activate(bottom) return bottom if layer.layer_type == 'gap': out_depth = self.hp.NUM_CLASSES bottom = cl.Convolution(bottom, kernel_size=1, num_output=out_depth, weight_filler=dict(type='xavier')) bottom = self.add_activate(bottom) bottom = cl.Pooling(bottom, kernel_size=previous_image_size, pool=P.Pooling.AVE) return bottom if layer.layer_type == 'fc': num_output = layer.fc_size bottom = cl.InnerProduct(bottom, num_output=num_output, weight_filler=dict(type='xavier')) bottom = self.add_activate(bottom) return bottom if layer.layer_type == 'dropout': dropout_ratio = 0.5 * float(layer.filter_depth) / layer.fc_size return cl.Dropout(bottom, dropout_ratio=dropout_ratio) if layer.layer_type == 'pool': kernel_size = layer.filter_size stride = layer.stride if self.ssp.batch_norm: bottom = self.add_batchnorm(bottom) return cl.Pooling(bottom, kernel_size=kernel_size, stride=stride, pool=P.Pooling.MAX)
def addDANStage(self, net): #CONNNECTION LAYERS OF PREVIOUS STAGE # TRANSFORM ESTIMATION net.s1_transform_params = L.Python( net.s1_landmarks, module="LandmarkTranFormLayer", layer="LandmarkTranFormLayer", param_str=str(dict(mean_shape=self.initlandmarks.tolist()))) # IMAGE TRANSFORM net.s1_img_output = L.Python(net.s1_input, net.s1_transform_params, module="AffineTransformLayer", layer="AffineTransformLayer") # LANDMARK TRANSFORM net.s1_landmarks_affine = L.Python(net.s1_landmarks, net.s1_transform_params, module="LandmarkTransformLayer", layer="LandmarkTransformLayer") # HEATMAP GENERATION net.s1_img_heatmap = L.Python(net.s1_landmarks_affine, module="GetHeatMapLayer", layer="GetHeatMapLayer") # FEATURE GENERATION # 使用56*56而不是112*112的原因是,可以减少参数,因为两者最终表现没有太大差别 net.s1_img_feature = fc_relu(net.s1_fc1_batch, 56 * 56) net.s1_img_feature = L.Reshape(net.s1_img_feature, shape=dict(dim=[-1, 1, 56, 56])) net.s1_img_feature = L.Python(net.s1_img_feature, module="Upscale2DLayer", layer="Upscale2DLayer", param_str=str(dict(scale_factor=2))) # CURRENT STAGE net.s2_input = L.Concat(net.s1_img_output, net.s1_img_heatmap, net.s1_img_feature) net.s2_input_batch = L.BatchNorm(net.s2_input) net.s2_conv1_1, net.s2_relu1_1 = conv_relu(net.s2_input_batch, 3, 64) net.s2_batch1_1 = L.BatchNorm(net.s2_relu1_1) net.s2_conv1_2, s2_net.relu1_2 = conv_relu(net.s2_batch1_1, 3, 64) net.s2_batch1_2 = L.BatchNorm(net.s2_relu1_2) net.s2_pool1 = max_pool(net.s2_batch1_2, 2) net.s2_conv2_1, net.s2_relu2_1 = conv_relu(net.s2_pool1, 3, 128) net.s2_batch2_1 = L.BatchNorm(net.s2_relu2_1) net.s2_conv2_2, net.s2_relu2_2 = conv_relu(net.s2_batch2_1, 3, 128) net.s2_batch2_2 = L.BatchNorm(net.s2_relu2_2) net.s2_pool2 = max_pool(net.s2_batch2_2) net.s2_conv3_1, net.s2_relu3_1 = conv_relu(net.s2_pool2, 3, 256) net.s2_batch3_1 = L.BatchNorm(net.s2_relu3_1) net.s2_conv3_2, net.s2_relu3_2 = conv_relu(net.s2_batch3_1, 3, 256) net.s2_batch3_2 = L.BatchNorm(net.s2_relu3_2) net.s2_pool3 = max_pool(net.s2_batch3_2) net.s2_conv4_1, net.s2_relu4_1 = conv_relu(net.s2_pool3, 3, 512) net.s2_batch4_1 = L.BatchNorm(net.s2_relu4_1) net.s2_conv4_2, net.s2_relu4_2 = conv_relu(net.s2_batch4_1, 3, 512) net.s2_batch4_2 = L.BatchNorm(net.s2_relu4_2) net.s2_pool4 = max_pool(net.s2_batch4_2) net.s2_pool4_flatten = L.Flatten(net.s2_pool4) if istrain: net.s2_fc1_dropout = L.Dropout(net.s2_pool4_flatten, dropout_ratio=0.5, in_place=True) # , include=dict(phase=caffe.TRAIN) else: net.s1_fc1_dropout = net.s2_pool4_flatten net.s2_fc1, net.s2_fc1_relu = fc_relu(net.s2_fc1_dropout, 256) net.s2_fc1_batch = L.BatchNorm(net.s2_fce_relu) net.s2_output = L.InnerProduct(net.s2_fc1_batch, num_output=136, bias_filler=dict(type='constant', value=0)) net.s2_landmarks = L.Eltwise(net.s2_output, net.s1_landmarks_affine) net.s2_landmarks = L.Python(net.s2_landmarks, net.s1_transform_params, module="LandmarkTranFormLayer", layer="LandmarkTranFormLayer")
def MakeNetwork(self, db, batch_size, layers, deploy, act, input_dropout, hidden_dropout, L2, filler): #Create Data layer data, label = L.HDF5Data(source=db, batch_size=batch_size, ntop=2) #Add hidden layers top = data if (input_dropout != 0): top = L.Dropout(top, in_place=True, dropout_ratio=input_dropout) test = 0 for x in range(0, len(layers)): if (L2): if (filler == 1): top = L.InnerProduct(top, num_output=layers[x], weight_filler=dict(type='xavier'), bias_filler=dict(type='xavier'), param=[dict(decay_mult=1)]) elif (filler == 2): top = L.InnerProduct(top, num_output=layers[x], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='gaussian', std=0.01), param=[dict(decay_mult=1)]) else: if (filler == 1): top = L.InnerProduct(top, num_output=layers[x], weight_filler=dict(type='xavier'), bias_filler=dict(type='xavier'), param=[dict(decay_mult=0)]) elif (filler == 2): top = L.InnerProduct(top, num_output=layers[x], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='gaussian', std=0.01), param=[dict(decay_mult=0)]) if (act == 1): top = L.ReLU(top, in_place=True) elif (act == 2): top = L.Sigmoid(top, in_place=True) elif (act == 3): top = L.TanH(top, in_place=True) else: print "Error, invalid activation function choice " if (hidden_dropout != 0): top = L.Dropout(top, in_place=True, dropout_ratio=hidden_dropout) #Add Output Layers if (filler == 1): output = L.InnerProduct(top, num_output=self._numClasses, weight_filler=dict(type='xavier'), bias_filler=dict(type='xavier')) elif (filler == 2): output = L.InnerProduct(top, num_output=self._numClasses, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='gaussian', std=0.01)) if (deploy == False): loss = L.SoftmaxWithLoss(output, label) return to_proto(loss) else: prob = L.Softmax(output) return to_proto(prob)
def fc_relu(bottom, nout): fc = L.InnerProduct(bottom, num_output=nout, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) return fc, L.ReLU(fc, in_place=True)
def vgg_face(split, mean, opt): n = caffe.NetSpec() # config python data layer if split == 'train': batch_size = opt.train_batch_size if split == 'val': batch_size = opt.val_batch_size if split == 'test': batch_size = opt.test_batch_size if split == 'train' or split == 'val': dataset_name = opt.train_dataset_name else: dataset_name = opt.test_dataset_name pydata_params = dict(split=split, data_dir=opt.data_dir, batch_size=batch_size, mean=mean, dataset=dataset_name, load_size=opt.load_size, crop_size=opt.crop_size) n.data, n.label = L.Python(module='faceData_layers', layer='FaceDataLayer', ntop=2, param_str=str(pydata_params)) # vgg-face net # conv layers n.conv1_1, n.relu1_1 = conv_relu(n.data, 64) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # drop out and fc layers n.fc6, n.relu6, n.drop6 = fc_relu_dropout(n.pool5, 4096, 0.5) n.fc7, n.relu7, n.drop7 = fc_relu_dropout(n.fc6, 4096, 0.5) lr_ratio = 100 # lr multiplier for truncated layers n.fc8_face = L.InnerProduct(n.fc7, num_output=1024, param=[ dict(lr_mult=1 * lr_ratio, decay_mult=1), dict(lr_mult=2 * lr_ratio, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) n.fc9_face = L.InnerProduct(n.fc8_face, num_output=2, param=[ dict(lr_mult=1 * lr_ratio, decay_mult=1), dict(lr_mult=2 * lr_ratio, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) # loss layer n.loss = L.SoftmaxWithLoss(n.fc9_face, n.label) # loss and accuracy layer n.acc = L.Accuracy(n.fc9_face, n.label) return n.to_proto()
def createCNN(self, istrain): net = caffe.NetSpec() if istrain: net.s1_input, net.label = L.MemoryData(batch_size=self.batchsize, channels=self.nChannels, height=self.imageHeight, width=self.imageWidth, ntop=2) else: net.s1_input, net.label = L.MemoryData(batch_size=self.batchsize, channels=self.nChannels, height=self.imageHeight, width=self.imageWidth, ntop=2) # STAGE 1 net.s1_conv1_1, net.s1_relu1_1 = conv_relu(net.s1_input, 3, 64) net.s1_batch1_1 = L.BatchNorm(net.s1_relu1_1) net.s1_conv1_2, net.s1_relu1_2 = conv_relu(net.s1_batch1_1, 3, 64) net.s1_batch1_2 = L.BatchNorm(net.s1_relu1_2) net.s1_pool1 = max_pool(net.s1_batch1_2, 2) net.s1_conv2_1, net.s1_relu2_1 = conv_relu(net.s1_pool1, 3, 128) net.s1_batch2_1 = L.BatchNorm(net.s1_relu2_1) net.s1_conv2_2, net.s1_relu2_2 = conv_relu(net.s1_batch2_1, 3, 128) net.s1_batch2_2 = L.BatchNorm(net.s1_relu2_2) net.s1_pool2 = max_pool(net.s1_batch2_2) net.s1_conv3_1, net.s1_relu3_1 = conv_relu(net.s1_pool2, 3, 256) net.s1_batch3_1 = L.BatchNorm(net.s1_relu3_1) net.s1_conv3_2, net.s1_relu3_2 = conv_relu(net.s1_batch3_1, 3, 256) net.s1_batch3_2 = L.BatchNorm(net.s1_relu3_2) net.s1_pool3 = max_pool(net.s1_batch3_2) net.s1_conv4_1, net.s1_relu4_1 = conv_relu(net.s1_pool3, 3, 512) net.s1_batch4_1 = L.BatchNorm(net.s1_relu4_1) net.s1_conv4_2, net.s1_relu4_2 = conv_relu(net.s1_batch4_1, 3, 512) net.s1_batch4_2 = L.BatchNorm(net.s1_relu4_2) net.s1_pool4 = max_pool(net.s1_batch4_2) if istrain: net.s1_fc1_dropout = L.Dropout(net.s1_pool4, dropout_ratio=0.5, in_place=True) else: net.s1_fc1_dropout = net.s1_pool4 net.s1_fc1, net.s1_fc1_relu = fc_relu(net.s1_fc1_dropout, 256) net.s1_fc1_batch = L.BatchNorm(net.s1_fc1_relu) net.s1_output = L.InnerProduct(net.s1_fc1_batch, num_output=136, bias_filler=dict(type='constant', value=0)) net.s1_landmarks = L.Python( net.s1_output, module="InitLandmark", layer="InitLandmark", param_str=str(dict(initlandmarks=self.initLandmarks.tolist()))) if self.nStages == 2: addDANStage(net) net.output = net.s2_landmarks else: net.output = net.s1_landmarks net.loss = L.Python(net.output, net.label, module="SumOfSquaredLossLayer", layer="SumOfSquaredLossLayer", loss_weight=1) return str(net.to_proto())
def pj_x(mode, batchsize, T, exp_T, question_vocab_size, exp_vocab_size): n = caffe.NetSpec() mode_str = json.dumps({'mode':mode, 'batchsize':batchsize}) n.data, n.cont, n.img_feature, n.label, n.exp, n.exp_out, n.exp_cont_1, n.exp_cont_2 = \ L.Python(module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=8) n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \ weight_filler=dict(type='uniform',min=-0.08,max=0.08), param=fixed_weights) n.embed = L.TanH(n.embed_ba) n.exp_embed_ba = L.Embed(n.exp, input_dim=exp_vocab_size, num_output=300, \ weight_filler=dict(type='uniform', min=-0.08, max=0.08)) n.exp_embed = L.TanH(n.exp_embed_ba) # LSTM1 n.lstm1 = L.LSTM(\ n.embed, n.cont,\ recurrent_param=dict(\ num_output=1024,\ weight_filler=dict(type='uniform',min=-0.08,max=0.08),\ bias_filler=dict(type='constant',value=0)), param=fixed_weights_lstm) tops1 = L.Slice(n.lstm1, ntop=T, slice_param={'axis':0}) for i in range(T-1): n.__setattr__('slice_first'+str(i), tops1[int(i)]) n.__setattr__('silence_data_first'+str(i), L.Silence(tops1[int(i)],ntop=0)) n.lstm1_out = tops1[T-1] n.lstm1_reshaped = L.Reshape(n.lstm1_out,\ reshape_param=dict(\ shape=dict(dim=[-1,1024]))) n.lstm1_reshaped_droped = L.Dropout(n.lstm1_reshaped,dropout_param={'dropout_ratio':0.3}) n.lstm1_droped = L.Dropout(n.lstm1,dropout_param={'dropout_ratio':0.3}) # LSTM2 n.lstm2 = L.LSTM(\ n.lstm1_droped, n.cont,\ recurrent_param=dict(\ num_output=1024,\ weight_filler=dict(type='uniform',min=-0.08,max=0.08),\ bias_filler=dict(type='constant',value=0)), param=fixed_weights_lstm) tops2 = L.Slice(n.lstm2, ntop=T, slice_param={'axis':0}) for i in range(T-1): n.__setattr__('slice_second'+str(i), tops2[int(i)]) n.__setattr__('silence_data_second'+str(i), L.Silence(tops2[int(i)],ntop=0)) n.lstm2_out = tops2[T-1] n.lstm2_reshaped = L.Reshape(n.lstm2_out,\ reshape_param=dict(\ shape=dict(dim=[-1,1024]))) n.lstm2_reshaped_droped = L.Dropout(n.lstm2_reshaped,dropout_param={'dropout_ratio':0.3}) concat_botom = [n.lstm1_reshaped_droped, n.lstm2_reshaped_droped] n.lstm_12 = L.Concat(*concat_botom) # Tile question feature n.q_emb_resh = L.Reshape(n.lstm_12, reshape_param=dict(shape=dict(dim=[-1,2048,1,1]))) n.q_emb_tiled_1 = L.Tile(n.q_emb_resh, axis=2, tiles=14) n.q_emb_resh_tiled = L.Tile(n.q_emb_tiled_1, axis=3, tiles=14) # Embed image feature n.i_emb = L.Convolution(n.img_feature, kernel_size=1, stride=1, num_output=2048, pad=0, weight_filler=dict(type='xavier'), param=fixed_weights) # Eltwise product and normalization n.eltwise = L.Eltwise(n.q_emb_resh_tiled, n.i_emb, eltwise_param={'operation': P.Eltwise.PROD}) n.eltwise_sqrt = L.SignedSqrt(n.eltwise) n.eltwise_l2 = L.L2Normalize(n.eltwise_sqrt) n.eltwise_drop = L.Dropout(n.eltwise_l2, dropout_param={'dropout_ratio': 0.3}) # Attention for VQA n.att_conv1 = L.Convolution(n.eltwise_drop, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier'), param=fixed_weights) n.att_conv1_relu = L.ReLU(n.att_conv1) n.att_conv2 = L.Convolution(n.att_conv1_relu, kernel_size=1, stride=1, num_output=1, pad=0, weight_filler=dict(type='xavier'), param=fixed_weights) n.att_reshaped = L.Reshape(n.att_conv2,reshape_param=dict(shape=dict(dim=[-1,1,14*14]))) n.att_softmax = L.Softmax(n.att_reshaped, axis=2) n.att_map = L.Reshape(n.att_softmax,reshape_param=dict(shape=dict(dim=[-1,1,14,14]))) dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) n.att_feature = L.SoftAttention(n.img_feature, n.att_map, dummy) n.att_feature_resh = L.Reshape(n.att_feature, reshape_param=dict(shape=dict(dim=[-1,2048]))) # eltwise product + normalization again for VQA n.i_emb2 = L.InnerProduct(n.att_feature_resh, num_output=2048, weight_filler=dict(type='xavier'), param=fixed_weights) n.eltwise2 = L.Eltwise(n.lstm_12, n.i_emb2, eltwise_param={'operation': P.Eltwise.PROD}) n.eltwise2_sqrt = L.SignedSqrt(n.eltwise2) n.eltwise2_l2 = L.L2Normalize(n.eltwise2_sqrt) n.eltwise2_drop = L.Dropout(n.eltwise2_l2, dropout_param={'dropout_ratio': 0.3}) n.prediction = L.InnerProduct(n.eltwise2_drop, num_output=3000, weight_filler=dict(type='xavier'), param=fixed_weights) n.loss = L.SoftmaxWithLoss(n.prediction, n.label) # Embed VQA GT answer during training n.exp_emb_ans = L.Embed(n.label, input_dim=3000, num_output=300, \ weight_filler=dict(type='uniform', min=-0.08, max=0.08)) n.exp_emb_ans_tanh = L.TanH(n.exp_emb_ans) n.exp_emb_ans2 = L.InnerProduct(n.exp_emb_ans_tanh, num_output=2048, weight_filler=dict(type='xavier')) # Merge VQA answer and visual+textual feature n.exp_emb_resh = L.Reshape(n.exp_emb_ans2, reshape_param=dict(shape=dict(dim=[-1,2048,1,1]))) n.exp_emb_tiled_1 = L.Tile(n.exp_emb_resh, axis=2, tiles=14) n.exp_emb_tiled = L.Tile(n.exp_emb_tiled_1, axis=3, tiles=14) n.eltwise_emb = L.Convolution(n.eltwise, kernel_size=1, stride=1, num_output=2048, pad=0, weight_filler=dict(type='xavier')) n.exp_eltwise = L.Eltwise(n.eltwise_emb, n.exp_emb_tiled, eltwise_param={'operation': P.Eltwise.PROD}) n.exp_eltwise_sqrt = L.SignedSqrt(n.exp_eltwise) n.exp_eltwise_l2 = L.L2Normalize(n.exp_eltwise_sqrt) n.exp_eltwise_drop = L.Dropout(n.exp_eltwise_l2, dropout_param={'dropout_ratio': 0.3}) # Attention for Explanation n.exp_att_conv1 = L.Convolution(n.exp_eltwise_drop, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.exp_att_conv1_relu = L.ReLU(n.exp_att_conv1) n.exp_att_conv2 = L.Convolution(n.exp_att_conv1_relu, kernel_size=1, stride=1, num_output=1, pad=0, weight_filler=dict(type='xavier')) n.exp_att_reshaped = L.Reshape(n.exp_att_conv2,reshape_param=dict(shape=dict(dim=[-1,1,14*14]))) n.exp_att_softmax = L.Softmax(n.exp_att_reshaped, axis=2) n.exp_att_map = L.Reshape(n.exp_att_softmax,reshape_param=dict(shape=dict(dim=[-1,1,14,14]))) exp_dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) n.exp_att_feature_prev = L.SoftAttention(n.img_feature, n.exp_att_map, exp_dummy) n.exp_att_feature_resh = L.Reshape(n.exp_att_feature_prev, reshape_param=dict(shape=dict(dim=[-1, 2048]))) n.exp_att_feature_embed = L.InnerProduct(n.exp_att_feature_resh, num_output=2048, weight_filler=dict(type='xavier')) n.exp_lstm12_embed = L.InnerProduct(n.lstm_12, num_output=2048, weight_filler=dict(type='xavier')) n.exp_eltwise2 = L.Eltwise(n.exp_lstm12_embed, n.exp_att_feature_embed, eltwise_param={'operation': P.Eltwise.PROD}) n.exp_att_feature = L.Eltwise(n.exp_emb_ans2, n.exp_eltwise2, eltwise_param={'operation': P.Eltwise.PROD}) # LSTM1 for Explanation n.exp_lstm1 = L.LSTM(\ n.exp_embed, n.exp_cont_1,\ recurrent_param=dict(\ num_output=2048,\ weight_filler=dict(type='uniform',min=-0.08,max=0.08),\ bias_filler=dict(type='constant',value=0))) n.exp_lstm1_dropped = L.Dropout(n.exp_lstm1,dropout_param={'dropout_ratio':0.3}) # merge with LSTM1 for explanation n.exp_att_resh = L.Reshape(n.exp_att_feature, reshape_param=dict(shape=dict(dim=[1, -1, 2048]))) n.exp_att_tiled = L.Tile(n.exp_att_resh, axis=0, tiles=exp_T) n.exp_eltwise_all = L.Eltwise(n.exp_lstm1_dropped, n.exp_att_tiled, eltwise_param={'operation': P.Eltwise.PROD}) n.exp_eltwise_all_sqrt = L.SignedSqrt(n.exp_eltwise_all) n.exp_eltwise_all_l2 = L.L2Normalize(n.exp_eltwise_all_sqrt) n.exp_eltwise_all_drop = L.Dropout(n.exp_eltwise_all_l2, dropout_param={'dropout_ratio': 0.3}) # LSTM2 for Explanation n.exp_lstm2 = L.LSTM(\ n.exp_eltwise_all_drop, n.exp_cont_2,\ recurrent_param=dict(\ num_output=1024,\ weight_filler=dict(type='uniform',min=-0.08,max=0.08),\ bias_filler=dict(type='constant',value=0))) n.exp_lstm2_dropped = L.Dropout(n.exp_lstm2,dropout_param={'dropout_ratio':0.3}) n.exp_prediction = L.InnerProduct(n.exp_lstm2_dropped, num_output=exp_vocab_size, weight_filler=dict(type='xavier'), axis=2) n.exp_loss = L.SoftmaxWithLoss(n.exp_prediction, n.exp_out, loss_param=dict(ignore_label=-1), softmax_param=dict(axis=2)) n.exp_accuracy = L.Accuracy(n.exp_prediction, n.exp_out, axis=2, ignore_label=-1) return n.to_proto()
def make_caffenet(self, bottom, return_layer, weight_filler={}, bias_filler={}, learning_param={}): default_weight_filler = self.gaussian_filler() default_bias_filler = self.gaussian_filler(1) default_learning_param = self.learning_params([[1, 1], [2, 0]]) for layer in [ 'conv1', 'conv2', 'conv3', 'conv4', 'conv5', 'fc6', 'fc7', 'fc8' ]: if layer not in weight_filler.keys(): weight_filler[layer] = default_weight_filler if layer not in bias_filler.keys(): bias_filler[layer] = default_bias_filler if layer not in learning_param.keys(): learning_param[layer] = default_learning_param self.n.tops['conv1'], self.n.tops['relu1'] = self.conv_relu( bottom, 11, 96, stride=4, weight_filler=weight_filler['conv1'], bias_filler=bias_filler['conv1'], learning_param=learning_param['conv1']) if return_layer in self.n.tops.keys(): return self.n.tops['pool1'] = self.max_pool(self.n.tops['relu1'], 3, stride=2) if return_layer in self.n.tops.keys(): return self.n.tops['norm1'] = L.LRN(self.n.tops['pool1'], local_size=5, alpha=1e-4, beta=0.75) if return_layer in self.n.tops.keys(): return self.n.tops['conv2'], self.n.tops['relu2'] = self.conv_relu( self.n.tops['norm1'], 5, 256, pad=2, group=2, weight_filler=weight_filler['conv2'], bias_filler=bias_filler['conv2'], learning_param=learning_param['conv2']) if return_layer in self.n.tops.keys(): return self.n.tops['pool2'] = self.max_pool(self.n.tops['relu2'], 3, stride=2) if return_layer in self.n.tops.keys(): return self.n.tops['norm2'] = L.LRN(self.n.tops['pool2'], local_size=5, alpha=1e-4, beta=0.75) if return_layer in self.n.tops.keys(): return self.n.tops['conv3'], self.n.tops['relu3'] = self.conv_relu( self.n.tops['norm2'], 3, 384, pad=1, weight_filler=weight_filler['conv3'], bias_filler=bias_filler['conv3'], learning_param=learning_param['conv3']) if return_layer in self.n.tops.keys(): return self.n.tops['conv4'], self.n.tops['relu4'] = self.conv_relu( self.n.tops['relu3'], 3, 384, pad=1, group=2, weight_filler=weight_filler['conv4'], bias_filler=bias_filler['conv4'], learning_param=learning_param['conv4']) if return_layer in self.n.tops.keys(): return self.n.tops['conv5'], self.n.tops['relu5'] = self.conv_relu( self.n.tops['relu4'], 3, 256, pad=1, group=2, weight_filler=weight_filler['conv5'], bias_filler=bias_filler['conv5'], learning_param=learning_param['conv5']) if return_layer in self.n.tops.keys(): return self.n.tops['pool5'] = self.max_pool(self.n.tops['relu5'], 3, stride=2) if return_layer in self.n.tops.keys(): return self.n.tops['fc6'], self.n.tops['relu6'] = self.fc_relu( self.n.tops['pool5'], 4096, weight_filler=weight_filler['fc6'], bias_filler=bias_filler['fc6'], learning_param=learning_param['fc6']) if return_layer in self.n.tops.keys(): return self.n.tops['drop6'] = L.Dropout(self.n.tops['relu6'], in_place=True) if return_layer in self.n.tops.keys(): return self.n.tops['fc7'], self.n.tops['relu7'] = self.fc_relu( self.n.tops['drop6'], 4096, weight_filler=weight_filler['fc7'], bias_filler=bias_filler['fc7'], learning_param=learning_param['fc7']) if return_layer in self.n.tops.keys(): return 'relu7' self.n.tops['drop7'] = L.Dropout(self.n.tops['relu7'], in_place=True) if return_layer in self.n.tops.keys(): return self.n.tops['fc8'] = L.InnerProduct(self.n.tops['drop7'], num_output=1000, weight_filler=weight_filler['fc8'], bias_filler=bias_filler['fc8'], param=learning_param['fc8'])
def VGGNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False, dilated=False, nopool=False, dropout=True, freeze_layers=[]): kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } assert from_layer in net.keys() net.conv1_1 = L.Convolution(net[from_layer], num_output=64, pad=1, kernel_size=3, **kwargs) net.relu1_1 = L.ReLU(net.conv1_1, in_place=True) net.conv1_2 = L.Convolution(net.relu1_1, num_output=64, pad=1, kernel_size=3, **kwargs) net.relu1_2 = L.ReLU(net.conv1_2, in_place=True) if nopool: name = 'conv1_3' net[name] = L.Convolution(net.relu1_2, num_output=64, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool1' net.pool1 = L.Pooling(net.relu1_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv2_1 = L.Convolution(net[name], num_output=128, pad=1, kernel_size=3, **kwargs) net.relu2_1 = L.ReLU(net.conv2_1, in_place=True) net.conv2_2 = L.Convolution(net.relu2_1, num_output=128, pad=1, kernel_size=3, **kwargs) net.relu2_2 = L.ReLU(net.conv2_2, in_place=True) if nopool: name = 'conv2_3' net[name] = L.Convolution(net.relu2_2, num_output=128, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool2' net[name] = L.Pooling(net.relu2_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv3_1 = L.Convolution(net[name], num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_1 = L.ReLU(net.conv3_1, in_place=True) net.conv3_2 = L.Convolution(net.relu3_1, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_2 = L.ReLU(net.conv3_2, in_place=True) net.conv3_3 = L.Convolution(net.relu3_2, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu3_3 = L.ReLU(net.conv3_3, in_place=True) if nopool: name = 'conv3_4' net[name] = L.Convolution(net.relu3_3, num_output=256, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool3' net[name] = L.Pooling(net.relu3_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv4_1 = L.Convolution(net[name], num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_1 = L.ReLU(net.conv4_1, in_place=True) net.conv4_2 = L.Convolution(net.relu4_1, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_2 = L.ReLU(net.conv4_2, in_place=True) net.conv4_3 = L.Convolution(net.relu4_2, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu4_3 = L.ReLU(net.conv4_3, in_place=True) if nopool: name = 'conv4_4' net[name] = L.Convolution(net.relu4_3, num_output=512, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool4' net[name] = L.Pooling(net.relu4_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv5_1 = L.Convolution(net[name], num_output=512, pad=1, kernel_size=3, **kwargs) net.relu5_1 = L.ReLU(net.conv5_1, in_place=True) net.conv5_2 = L.Convolution(net.relu5_1, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu5_2 = L.ReLU(net.conv5_2, in_place=True) net.conv5_3 = L.Convolution(net.relu5_2, num_output=512, pad=1, kernel_size=3, **kwargs) net.relu5_3 = L.ReLU(net.conv5_3, in_place=True) if need_fc: if dilated: if nopool: name = 'conv5_4' net[name] = L.Convolution(net.relu5_3, num_output=512, pad=1, kernel_size=3, stride=1, **kwargs) else: name = 'pool5' net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, pad=1, kernel_size=3, stride=1) else: if nopool: name = 'conv5_4' net[name] = L.Convolution(net.relu5_3, num_output=512, pad=1, kernel_size=3, stride=2, **kwargs) else: name = 'pool5' net[name] = L.Pooling(net.relu5_3, pool=P.Pooling.MAX, kernel_size=2, stride=2) if fully_conv: if dilated: if reduced: net.fc6 = L.Convolution(net[name], num_output=1024, pad=6, kernel_size=3, dilation=6, **kwargs) else: net.fc6 = L.Convolution(net[name], num_output=4096, pad=6, kernel_size=7, dilation=2, **kwargs) else: if reduced: net.fc6 = L.Convolution(net[name], num_output=1024, pad=3, kernel_size=3, dilation=3, **kwargs) else: net.fc6 = L.Convolution(net[name], num_output=4096, pad=3, kernel_size=7, **kwargs) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) if reduced: net.fc7 = L.Convolution(net.relu6, num_output=1024, kernel_size=1, **kwargs) else: net.fc7 = L.Convolution(net.relu6, num_output=4096, kernel_size=1, **kwargs) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) else: net.fc6 = L.InnerProduct(net.pool5, num_output=4096) net.relu6 = L.ReLU(net.fc6, in_place=True) if dropout: net.drop6 = L.Dropout(net.relu6, dropout_ratio=0.5, in_place=True) net.fc7 = L.InnerProduct(net.relu6, num_output=4096) net.relu7 = L.ReLU(net.fc7, in_place=True) if dropout: net.drop7 = L.Dropout(net.relu7, dropout_ratio=0.5, in_place=True) # Update freeze layers. kwargs['param'] = [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ] layers = net.keys() for freeze_layer in freeze_layers: if freeze_layer in layers: net.update(freeze_layer, kwargs) return net
def lstm_unit(self, prefix, x, cont, static=None, h=None, c=None, batch_size=100, timestep=0, lstm_hidden=1000, weight_filler=None, bias_filler=None, weight_lr_mult=1, bias_lr_mult=2, weight_decay_mult=1, bias_decay_mult=0, concat_hidden=True): #assume static is already transformed if not weight_filler: weight_filler = self.uniform_weight_filler(-0.08, 0.08) if not bias_filler: bias_filler = self.constant_filler(0) if not h: h = self.dummy_data_layer([1, batch_size, lstm_hidden], 1) if not c: c = self.dummy_data_layer([1, batch_size, lstm_hidden], 1) gate_dim = self.gate_dim def get_name(name): return '%s_%s' % (prefix, name) def get_param(weight_name, bias_name=None): w = dict(lr_mult=weight_lr_mult, decay_mult=weight_decay_mult, name=get_name(weight_name)) if bias_name is not None: b = dict(lr_mult=bias_lr_mult, decay_mult=bias_decay_mult, name=get_name(bias_name)) return [w, b] return [w] # gate_dim is the dimension of the cell state inputs: # 4 gates (i, f, o, g), each with dimension dim # Add layer to transform all timesteps of x to the hidden state dimension. # x_transform = W_xc * x + b_c cont_reshape = L.Reshape(cont, shape=dict(dim=[1, 1, -1])) x = L.InnerProduct(x, num_output=gate_dim, axis=2, weight_filler=weight_filler, bias_filler=bias_filler, param=get_param('W_xc', 'b_c')) setattr(self.n, get_name('%d_x_transform' % timestep), x) h_conted = L.Eltwise(h, cont_reshape, coeff_blob=True) h = L.InnerProduct(h_conted, num_output=gate_dim, axis=2, bias_term=False, weight_filler=weight_filler, param=get_param('W_hc')) h_name = get_name('%d_h_transform' % timestep) if not hasattr(self.n, h_name): setattr(self.n, h_name, h) gate_input_args = x, h if static is not None: gate_input_args += (static, ) gate_input = L.Eltwise(*gate_input_args) assert cont is not None c, h = L.LSTMUnit(c, gate_input, cont_reshape, ntop=2) return h, c
def create_deploy(): #少了第一层数据层 #第二层,卷积层 conv1 = L.Convolution( bottom='data', kernel_size=11, stride=4, num_output=96, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) #第三层,激活函数层 relu1 = L.ReLU(conv1, in_place=True) #第四层,池化层 pool1 = L.Pooling(relu1, pool=P.Pooling.MAX, kernel_size=3, stride=2) #第五层,LRN层 norm1 = L.LRN(pool1, local_size=5, alpha=1e-4, beta=0.75) #第六层,卷积层 conv2 = L.Convolution( norm1, kernel_size=5, stride=1, num_output=256, pad=2, group=2, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=1)) #第七层,激活函数层 relu2 = L.ReLU(conv2, in_place=True) #第八层,池化层 pool2 = L.Pooling(relu2, pool=P.Pooling.MAX, kernel_size=3, stride=2) #第九层,LRN层 norm2 = L.LRN(pool2, local_size=5, alpha=1e-4, beta=0.75) #第十层,卷积层 conv3 = L.Convolution( norm2, kernel_size=3, stride=1, num_output=384, pad=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) #第十一层,激活函数层 relu3 = L.ReLU(conv3, in_place=True) #第十二层,卷积层 conv4 = L.Convolution( relu3, kernel_size=3, stride=1, num_output=384, pad=1, group=2, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=1)) #第十三层,激活函数层 relu4 = L.ReLU(conv4, in_place=True) #第十四层,卷积层 conv5 = L.Convolution( relu4, kernel_size=3, stride=1, num_output=256, pad=1, group=2, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=1)) #第十五层,激活函数层 relu5 = L.ReLU(conv5, in_place=True) #第十六层,池化层 pool5 = L.Pooling(relu5, pool=P.Pooling.MAX, kernel_size=3, stride=2) #第十七层,全连接层 fc6 = L.InnerProduct( pool5, num_output=4096, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.005), bias_filler=dict(type='constant', value=1)) #第十八层,激活函数层 relu6 = L.ReLU(fc6, in_place=True) #第十九层,Dropout层 drop6 = L.Dropout(relu6, dropout_ratio=0.5, in_place=True) #第二十层,全连接层 fc7 = L.InnerProduct( drop6, num_output=4096, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.005), bias_filler=dict(type='constant', value=1)) #第二十一层,激活函数层 relu7 = L.ReLU(fc7, in_place=True) #第二十二层,Dropout层 drop7 = L.Dropout(relu7, dropout_ratio=0.5) #第二十三层,全连接层 fc8 = L.InnerProduct( drop7, num_output=1000, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) #最后没有accuracy层,但有一个Softmax层 prob = L.Softmax(fc8) return to_proto(prob)
def net(self, params=[]): conv_param = [ dict(lr_mult=0.01, decay_mult=1), # weight_param dict(lr_mult=0.02, decay_mult=0) ] # learned_param fc_param = [ dict(lr_mult=1, decay_mult=1), # weight_param dict(lr_mult=2, decay_mult=0) ] # learned_param wfiller = dict(type='gaussian', std=0.01) wfiller_fc = dict(type='gaussian', std=0.005) bfiller = dict(type='constant', value=0.1) # initialize net and data layer n = caffe.NetSpec() # layer 0 n.data = self.data # layer 1 n.conv1 = L.Convolution(n.data, kernel_size=11, num_output=96, stride=4, pad=0, group=1, param=conv_param, weight_filler=wfiller, bias_filler=bfiller) self.receptiveFieldStride.append(4) if self.last_layer == 'conv1': self.__network_end(n, n.conv1, params) return n.relu1 = L.ReLU(n.conv1, in_place=True) self.receptiveFieldStride.append(1) if self.last_layer == 'relu1': self.__network_end(n, n.relu1, params) return n.norm1 = L.LRN(n.relu1, local_size=5, alpha=1e-4, beta=0.75) self.receptiveFieldStride.append(1) if self.last_layer == 'norm1': self.__network_end(n, n.norm1, params) return n.pool1 = L.Pooling(n.norm1, pool=P.Pooling.MAX, kernel_size=3, stride=2) self.receptiveFieldStride.append(2) if self.last_layer == 'pool1': self.__network_end(n, n.pool1, params) return # layer 2 n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=256, stride=1, pad=2, group=2, param=conv_param, weight_filler=wfiller, bias_filler=bfiller) self.receptiveFieldStride.append(1) if self.last_layer == 'conv2': self.__network_end(n, n.conv2, params) return n.relu2 = L.ReLU(n.conv2, in_place=True) self.receptiveFieldStride.append(1) if self.last_layer == 'relu2': self.__network_end(n, n.relu2, params) return n.norm2 = L.LRN(n.relu2, local_size=5, alpha=1e-4, beta=0.75) self.receptiveFieldStride.append(1) if self.last_layer == 'norm2': self.__network_end(n, n.norm2, params) return n.pool2 = L.Pooling(n.norm2, pool=P.Pooling.MAX, kernel_size=3, stride=2) self.receptiveFieldStride.append(2) if self.last_layer == 'pool2': self.__network_end(n, n.pool2, params) return # layer 3 n.conv3 = L.Convolution(n.pool2, kernel_size=3, num_output=384, stride=1, pad=1, group=1, param=conv_param, weight_filler=wfiller, bias_filler=bfiller) self.receptiveFieldStride.append(1) if self.last_layer == 'conv3': self.__network_end(n, n.conv3, params) return n.relu3 = L.ReLU(n.conv3, in_place=True) self.receptiveFieldStride.append(1) if self.last_layer == 'relu3': self.__network_end(n, n.relu3, params) return # layer 4 n.conv4 = L.Convolution(n.relu3, kernel_size=3, num_output=384, stride=1, pad=1, group=2, param=conv_param, weight_filler=wfiller, bias_filler=bfiller) self.receptiveFieldStride.append(1) if self.last_layer == 'conv4': self.__network_end(n, n.conv4, params) return n.relu4 = L.ReLU(n.conv4, in_place=True) self.receptiveFieldStride.append(1) if self.last_layer == 'relu4': self.__network_end(n, n.relu4, params) return # layer 5 n.conv5 = L.Convolution(n.relu4, kernel_size=3, num_output=256, stride=1, pad=1, group=2, param=conv_param, weight_filler=wfiller, bias_filler=bfiller) self.receptiveFieldStride.append(1) if self.last_layer == 'conv5': self.__network_end(n, n.conv5, params) return n.relu5 = L.ReLU(n.conv5, in_place=True) self.receptiveFieldStride.append(1) if self.last_layer == 'relu5': self.__network_end(n, n.relu5, params) return n.pool5 = L.Pooling(n.relu5, pool=P.Pooling.MAX, kernel_size=3, stride=2) self.receptiveFieldStride.append(2) if self.last_layer == 'pool5': self.__network_end(n, n.pool5, params) return # layer 6 n.fc6 = L.InnerProduct(n.pool5, num_output=4096, param=fc_param, weight_filler=wfiller_fc, bias_filler=bfiller) self.receptiveFieldStride.append(1) if self.last_layer == 'fc6': self.__network_end(n, n.fc6, params) return n.relu6 = L.ReLU(n.fc6, in_place=True) self.receptiveFieldStride.append(1) if self.last_layer == 'relu6': self.__network_end(n, n.relu6, params) return # layer 7 n.fc7 = L.InnerProduct(n.relu6, num_output=4096, param=fc_param, weight_filler=wfiller_fc, bias_filler=bfiller) self.receptiveFieldStride.append(1) if self.last_layer == 'fc7': self.__network_end(n, n.fc7, params) return n.relu7 = L.ReLU(n.fc7, in_place=True) self.receptiveFieldStride.append(1) if self.last_layer == 'relu7': self.__network_end(n, n.relu7, params) return # layer 8: always learn fc8 (param=learned_param) n.fc8 = L.InnerProduct(n.relu7, num_output=1000, param=fc_param, weight_filler=wfiller_fc, bias_filler=bfiller) self.receptiveFieldStride.append(1) if self.last_layer == 'fc8': self.__network_end(n, n.fc8, params) return