def bn(input, is_train): if is_train: kwargs = {'engine': 3} else: kwargs = {'engine': 3, 'use_global_stats': True} return L.Scale(L.BatchNorm(input, **kwargs), bias_term=True)
def mynet(batch, steps, loss_type, dep=False, descr=False, part='gen'): conv_lr = [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=1)] bcnv_lr = [dict(lr_mult=1, decay_mult=1)] scale_lr = [dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=1)] bn_param = dict(eps=0.001, use_global_stats=False) fr_lr = [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)] fr_clr = [dict(lr_mult=0, decay_mult=0)] #fr_bn = dict(eps=0.001,use_global_stats=True) fr_bn = dict(eps=0.001, use_global_stats=False) if part == 'gen': gen_conv_lr = conv_lr gen_bcnv_lr = bcnv_lr gen_scale_lr = scale_lr gen_bn_param = bn_param dsc_conv_lr = fr_lr else: gen_conv_lr = fr_lr gen_bcnv_lr = fr_clr gen_scale_lr = fr_lr gen_bn_param = fr_bn dsc_conv_lr = conv_lr n = caffe.NetSpec() sp = dict(bias_term=True, filler=dict(value=1.0)) if dep: n.source = L.Input(input_param=dict(shape=[dict(dim=[1, 1, 64, 64])])) else: if descr: if part == 'gen': bs = batch else: bs = batch / 2 else: bs = batch n.data = L.Data( data_param=dict(source="db", batch_size=bs, backend=P.Data.LMDB)) n.expected, n.source = L.Slice(n.data, slice_param=dict(axis=1, slice_point=1), ntop=2) if descr: if part != 'gen': #n.data_ref = L.Split(n.expected) n.data_ref = L.Data(data_param=dict( source="db_ref", batch_size=batch / 2, backend=P.Data.LMDB)) n.label_0 = L.DummyData(shape=[dict(dim=[batch / 2])], data_filler=dict(value=0.0)) n.label_1 = L.DummyData(shape=[dict(dim=[batch / 2])], data_filler=dict(value=1.0)) n.label = L.Concat(n.label_0, n.label_1, concat_param=dict(axis=0)) else: n.label = L.DummyData(shape=[dict(dim=[batch])], data_filler=dict(value=1.0)) n.conv1 = L.Convolution(n.source, convolution_param=conv_param_nb(3, 16), param=gen_bcnv_lr) n.bn1 = L.BatchNorm(n.conv1, batch_norm_param=gen_bn_param) n.scale1 = L.Scale(n.bn1, scale_param=sp, param=gen_scale_lr) n.scale1 = L.ReLU(n.scale1) inp = "scale1" for m in range(steps): k = m + 1 cid1 = "step%d/conv1" % k cid2 = "step%d/conv2" % k bid1 = "step%d/bn1" % k bid2 = "step%d/bn2" % k eid = "step%d/elt" % k n[cid1] = L.Convolution(n[inp], convolution_param=conv_param_nb(3, 16), param=gen_bcnv_lr) n[bid1] = L.BatchNorm(n[cid1], batch_norm_param=gen_bn_param) n[bid1] = L.Scale(n[bid1], scale_param=sp, param=gen_scale_lr) n[bid1] = L.ReLU(n[bid1]) n[cid2] = L.Convolution(n[bid1], convolution_param=conv_param_nb(3, 16), param=gen_bcnv_lr) n[bid2] = L.BatchNorm(n[cid2], batch_norm_param=gen_bn_param) n[bid2] = L.Scale(n[bid2], scale_param=sp, param=gen_scale_lr) n[bid2] = L.ReLU(n[bid2]) n[eid] = L.Eltwise(n[bid2], n[inp]) inp = eid outname = "topconv" n[outname] = L.Convolution(n[inp], convolution_param=conv_param(3, 1), param=gen_conv_lr) n.generated = L.Sigmoid(n.topconv) if not dep: lw = 1 if part == 'gen' else 0 if loss_type == 'euc': n.l2_loss = L.EuclideanLoss(n.generated, n.expected, name="loss", loss_weight=lw) else: n.l2_loss = L.EuclideanLoss(n.generated, n.expected, name="loss", loss_weight=0) n.cross_entropy_loss = L.SigmoidCrossEntropyLoss(n.topconv, n.expected, name="loss", loss_weight=lw) if descr: if part != 'gen': n.desc_inp = L.Concat(n.generated, n.data_ref, concat_param=dict(axis=0)) cinp = "desc_inp" else: cinp = "generated" n.d_conv1 = L.Convolution(n[cinp], convolution_param=conv_param(5, 32), param=dsc_conv_lr) n.d_pool1 = L.Pooling(n.d_conv1, pooling_param=dict(kernel_size=3, stride=2, pool=P.Pooling.MAX)) n.d_pool1 = L.ReLU(n.d_pool1) n.d_conv2 = L.Convolution(n.d_pool1, convolution_param=conv_param(5, 32), param=dsc_conv_lr) n.d_pool2 = L.Pooling(n.d_conv2, pooling_param=dict(kernel_size=3, stride=2, pool=P.Pooling.MAX)) n.d_pool2 = L.ReLU(n.d_pool2) n.d_conv3 = L.Convolution(n.d_pool2, convolution_param=conv_param(5, 64), param=dsc_conv_lr) n.d_pool3 = L.Pooling(n.d_conv3, pooling_param=dict(kernel_size=3, stride=2, pool=P.Pooling.MAX)) n.d_pool3 = L.ReLU(n.d_pool3) n.d_conv4 = L.Convolution(n.d_pool3, convolution_param=conv_param(3, 64), param=dsc_conv_lr) n.d_pool4 = L.Pooling(n.d_conv4, pooling_param=dict(kernel_size=3, stride=2, pool=P.Pooling.MAX)) n.d_pool4 = L.ReLU(n.d_pool4) n.d_ip1 = L.InnerProduct(n.d_pool4, param=dsc_conv_lr, inner_product_param=ip_param(512)) n.d_ip1 = L.ReLU(n.d_ip1) n.d_ip2 = L.InnerProduct(n.d_ip1, param=dsc_conv_lr, inner_product_param=ip_param(1)) n.sigmoid_loss = L.SigmoidCrossEntropyLoss(n.d_ip2, n.label, name="loss", loss_weight=100) n.score = L.Sigmoid(n.d_ip2) n.lbl_flat = L.Reshape(n.label, reshape_param=dict(shape=dict(dim=[-1, 1]))) n.diff = L.Eltwise( n.score, n.lbl_flat, eltwise_param=dict(coeff=[1.0 / batch, -1.0 / batch])) n.error = L.Reduction(n.diff, reduction_param=dict(operation=P.Reduction.ASUM)) #n.output = L.Split(n[cinp]) #n.output_labels = L.Split(n.score) #n.inputs = n.source return n
def create_bnn_cnn_net(num_input_points, height, width, phase=None): n = caffe.NetSpec() n.input_color = L.Input(shape=[dict(dim=[1, 2, 1, num_input_points])]) n.in_features = L.Input(shape=[dict(dim=[1, 4, 1, num_input_points])]) n.out_features = L.Input(shape=[dict(dim=[1, 4, height, width])]) n.scales = L.Input(shape=[dict(dim=[1, 4, 1, 1])]) n.flatten_scales = L.Flatten(n.scales, flatten_param=dict(axis=0)) n.in_scaled_features = L.Scale(n.in_features, n.flatten_scales, scale_param=dict(axis=1)) n.out_scaled_features = L.Scale(n.out_features, n.flatten_scales, scale_param=dict(axis=1)) ### Start of BNN # BNN - stage - 1 n.out_color1 = L.Permutohedral( n.input_color, n.in_scaled_features, n.out_scaled_features, permutohedral_param=dict(num_output=32, group=1, neighborhood_size=0, bias_term=True, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.NONE), filter_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0.5), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.bnn_out_relu_1 = L.ReLU(n.out_color1, in_place=True) # BNN - stage - 2 n.out_color2 = L.Permutohedral(n.bnn_out_relu_1, n.out_scaled_features, n.out_scaled_features, permutohedral_param=dict( num_output=32, group=1, neighborhood_size=0, bias_term=True, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.NONE), filter_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.bnn_out_relu_2 = L.ReLU(n.out_color2, in_place=True) # BNN - combination n.connection_out = L.Concat(n.bnn_out_relu_1, n.bnn_out_relu_2) n.out_color_bilateral = L.Convolution( n.connection_out, convolution_param=dict(num_output=2, kernel_size=1, stride=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.out_color_bilateral_relu = L.ReLU(n.out_color_bilateral, in_place=True) ### Start of CNN # CNN - Stage 1 n.out_color_spatial1 = L.Convolution( n.out_color_bilateral_relu, convolution_param=dict(num_output=32, kernel_size=3, stride=1, pad_h=1, pad_w=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.out_color_spatial_relu1 = L.ReLU(n.out_color_spatial1, in_place=True) # CNN - Stage 2 n.out_color_spatial2 = L.Convolution( n.out_color_spatial_relu1, convolution_param=dict(num_output=32, kernel_size=3, stride=1, pad_h=1, pad_w=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.out_color_spatial_relu2 = L.ReLU(n.out_color_spatial2, in_place=True) # CNN - Stage 3 n.out_color_spatial = L.Convolution(n.out_color_spatial_relu2, convolution_param=dict( num_output=2, kernel_size=3, stride=1, pad_h=1, pad_w=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) n.out_color_spatial_relu = L.ReLU(n.out_color_spatial, in_place=True) n.final_connection_out = L.Concat(n.out_color_bilateral_relu, n.out_color_spatial_relu) n.out_color_result = L.Convolution(n.final_connection_out, convolution_param=dict( num_output=2, kernel_size=1, stride=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0.0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) return n.to_proto()
def densenet(data_file=None, mode='train', batch_size=64, depth=20, first_output=32, growth_rate=32, dropout=0.5): if mode == 'train': data, label = L.Data( source=data_file, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, image_data_param=dict(shuffle=True), transform_param= dict( #mean_file="/home/ljf/caffe-master/examples/ljftest_alphabet_DenseNet/imagenet_mean.binaryproto" crop_size=28, #scale=0.00390625, mirror=True)) if mode == 'test': data, label = L.Data( source=data_file, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, #image_data_param=dict(shuffle=True), transform_param= dict( #mean_file="/home/ljf/caffe-master/examples/ljftest_alphabet_DenseNet/imagenet_mean.binaryproto" crop_size=28, #scale=0.00390625, #mirror=True )) nchannels = first_output if mode == 'deploy': model = L.Convolution(bottom="data", kernel_size=3, stride=1, num_output=nchannels, pad=1, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant')) else: model = L.Convolution(data, kernel_size=3, stride=1, num_output=nchannels, pad=1, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant')) #N = (depth-4)/4 N = 3 for i in range(N): model = add_layer(model, growth_rate, dropout) nchannels += growth_rate model = transition(model, nchannels, dropout) N = 3 for i in range(N): model = add_layer(model, growth_rate, dropout) nchannels += growth_rate model = transition(model, nchannels, dropout) N = 3 for i in range(N): model = add_layer(model, growth_rate, dropout) nchannels += growth_rate model = transition(model, nchannels, dropout) N = 3 for i in range(N): model = add_layer(model, growth_rate, dropout) nchannels += growth_rate model = transition(model, nchannels, dropout) # N=7 # for i in range(N): # model = add_layer(model, growth_rate, dropout) # nchannels += growth_rate model = L.BatchNorm(model, in_place=False, param=[ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ]) model = L.Scale(model, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0)) model = L.ReLU(model, in_place=True) model = L.Pooling(model, pool=P.Pooling.AVE, global_pooling=True) model = L.InnerProduct(model, num_output=10, bias_term=True, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) if mode == 'deploy': prob = L.Softmax(model) return to_proto(prob) else: loss = L.SoftmaxWithLoss(model, label) if mode == 'train': return to_proto(loss) accuracy = L.Accuracy(model, label) return to_proto(loss, accuracy)
def deconv_BN_scale_relu(bottom, nout, ks=3, stride=1, pad=1, bias_term=True): deconv = L.Deconvolution(bottom, convolution_param=dict(num_output=nout, kernel_size=ks, stride=stride, pad=pad, bias_term=bias_term, weight_filler=dict(type="bilinear") ) ) # param=[dict(lr_mult=0)]) return deconv, L.BatchNorm(deconv, in_place=bias_term), L.Scale(deconv, in_place=True, bias_term=bias_term), L.ReLU(deconv, in_place=True)
def convert_symbol2proto(symbol): def looks_like_weight(name): """Internal helper to figure out if node should be hidden with `hide_weights`. """ if name.endswith("_weight"): return True if name.endswith("_bias"): return True if name.endswith("_beta") or name.endswith("_gamma") or name.endswith("_moving_var") or name.endswith( "_moving_mean"): return True return False json_symbol = json.loads(symbol.tojson()) all_nodes = json_symbol['nodes'] no_weight_nodes = [] for node in all_nodes: op = node['op'] name = node['name'] if op == 'null': if looks_like_weight(name): continue no_weight_nodes.append(node) # build next node dict next_node = dict() for node in no_weight_nodes: node_name = node['name'] for input in node['inputs']: last_node_name = all_nodes[input[0]]['name'] if last_node_name in next_node: next_node[last_node_name].append(node_name) else: next_node[last_node_name] = [node_name] supported_op_type = ['null', 'BatchNorm', 'Convolution', 'Activation', 'Pooling', 'elemwise_add', 'SliceChannel', 'FullyConnected', 'SoftmaxOutput', '_maximum', 'add_n', 'Concat', '_mul_scalar', 'Deconvolution', 'UpSampling'] top_dict = dict() caffe_net = caffe.NetSpec() for node in no_weight_nodes: if node['op'] == 'null': input_param = dict() if node['name'] == 'data': input_param['shape'] = dict(dim=[1, 3, 160, 160]) else: input_param['shape'] = dict(dim=[1]) top_data = CL.Input(ntop=1, input_param=input_param) top_dict[node['name']] = [top_data] setattr(caffe_net, node['name'], top_data) elif node['op'].endswith('_copy'): pass elif node['op'] == 'BatchNorm': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] in_place = False if len(next_node[bottom_node_name]) == 1: in_place = True if 'momentum' in attr: momentum = float(attr['momentum']) else: momentum = 0.9 if 'eps' in attr: eps = float(attr['eps']) else: eps = 0.001 if NO_INPLACE: in_place = False bn_top = CL.BatchNorm(top_dict[bottom_node_name][input[1]], ntop=1, batch_norm_param=dict(use_global_stats=True, moving_average_fraction=momentum, eps=eps), in_place=in_place) setattr(caffe_net, node['name'], bn_top) scale_top = CL.Scale(bn_top, ntop=1, scale_param=dict(bias_term=True), in_place=not NO_INPLACE) top_dict[node['name']] = [scale_top] setattr(caffe_net, node['name'] + '_scale', scale_top) elif node['op'] == 'Convolution': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] convolution_param = dict() if 'kernel' in attr: kernel_size = eval(attr['kernel']) assert kernel_size[0] == kernel_size[1] convolution_param['kernel_size'] = kernel_size[0] else: convolution_param['kernel_size'] = 1 if 'no_bias' in attr: convolution_param['bias_term'] = not eval(attr['no_bias']) if 'num_group' in attr: convolution_param['group'] = int(attr['num_group']) convolution_param['num_output'] = int(attr['num_filter']) if 'pad' in attr: pad_size = eval(attr['pad']) assert pad_size[0] == pad_size[1] convolution_param['pad'] = pad_size[0] if 'stride' in attr: stride_size = eval(attr['stride']) assert stride_size[0] == stride_size[1] convolution_param['stride'] = stride_size[0] conv_top = CL.Convolution(top_dict[bottom_node_name][input[1]], ntop=1, convolution_param=convolution_param) top_dict[node['name']] = [conv_top] setattr(caffe_net, node['name'], conv_top) elif node['op'] == 'Deconvolution': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] convolution_param = dict() if 'kernel' in attr: kernel_size = eval(attr['kernel']) assert kernel_size[0] == kernel_size[1] convolution_param['kernel_size'] = kernel_size[0] else: convolution_param['kernel_size'] = 1 if 'no_bias' in attr: convolution_param['bias_term'] = not eval(attr['no_bias']) else: convolution_param['bias_term'] = False if 'num_group' in attr: convolution_param['group'] = int(attr['num_group']) convolution_param['num_output'] = int(attr['num_filter']) if 'pad' in attr: pad_size = eval(attr['pad']) assert pad_size[0] == pad_size[1] convolution_param['pad'] = pad_size[0] if 'stride' in attr: stride_size = eval(attr['stride']) assert stride_size[0] == stride_size[1] convolution_param['stride'] = stride_size[0] conv_top = CL.Deconvolution(top_dict[bottom_node_name][input[1]], ntop=1, convolution_param=convolution_param) top_dict[node['name']] = [conv_top] setattr(caffe_net, node['name'], conv_top) elif node['op'] == 'UpSampling': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] convolution_param = dict() if 'scale' in attr: kernel_size = 2 * eval(attr['scale']) - eval(attr['scale']) % 2 convolution_param['kernel_size'] = kernel_size else: convolution_param['kernel_size'] = 1 convolution_param['bias_term'] = False convolution_param['num_output'] = int(attr['num_filter']) convolution_param['group'] = int(attr['num_filter']) convolution_param['pad'] = int(math.ceil((eval(attr['scale']) - 1) / 2.)) convolution_param['stride'] = eval(attr['scale']) conv_top = CL.Deconvolution(top_dict[bottom_node_name][input[1]], ntop=1, convolution_param=convolution_param) top_dict[node['name']] = [conv_top] setattr(caffe_net, node['name'], conv_top) elif node['op'] == 'Activation': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] in_place = False if len(next_node[bottom_node_name]) == 1: in_place = True if NO_INPLACE: in_place = False if attr['act_type'] == 'relu': ac_top = CL.ReLU(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place) elif attr['act_type'] == 'sigmoid': ac_top = CL.Sigmoid(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place) elif attr['act_type'] == 'tanh': ac_top = CL.TanH(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place) top_dict[node['name']] = [ac_top] setattr(caffe_net, node['name'], ac_top) elif node['op'] == 'Pooling': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] pooling_param = dict() if attr['pool_type'] == 'avg': pooling_param['pool'] = 1 elif attr['pool_type'] == 'max': pooling_param['pool'] = 0 else: assert False, attr['pool_type'] if 'global_pool' in attr and eval(attr['global_pool']) is True: pooling_param['global_pooling'] = True else: if 'kernel' in attr: kernel_size = eval(attr['kernel']) assert kernel_size[0] == kernel_size[1] pooling_param['kernel_size'] = kernel_size[0] if 'pad' in attr: pad_size = eval(attr['pad']) assert pad_size[0] == pad_size[1] pooling_param['pad'] = pad_size[0] if 'stride' in attr: stride_size = eval(attr['stride']) assert stride_size[0] == stride_size[1] pooling_param['stride'] = stride_size[0] pool_top = CL.Pooling(top_dict[bottom_node_name][input[1]], ntop=1, pooling_param=pooling_param) top_dict[node['name']] = [pool_top] setattr(caffe_net, node['name'], pool_top) elif node['op'] == 'elemwise_add' or node['op'] == 'add_n': input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] eltwise_param = dict() eltwise_param['operation'] = 1 ele_add_top = CL.Eltwise(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1, eltwise_param=eltwise_param) top_dict[node['name']] = [ele_add_top] setattr(caffe_net, node['name'], ele_add_top) elif node['op'] == '_maximum': input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] eltwise_param = dict() eltwise_param['operation'] = 2 ele_add_top = CL.Eltwise(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1, eltwise_param=eltwise_param) top_dict[node['name']] = [ele_add_top] setattr(caffe_net, node['name'], ele_add_top) elif node['op'] == '_mul_scalar': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] in_place = False if len(next_node[bottom_node_name]) == 1: in_place = True if NO_INPLACE: in_place = False scale_top = CL.Scale(top_dict[bottom_node_name][input[1]], ntop=1, scale_param=dict(bias_term=False, filler=dict(value=-1)), in_place=in_place) # scale_top = CL.Power(top_dict[bottom_node_name][input[1]], power=1.0, scale=float(attr['scalar']), shift=0, in_place=in_place) top_dict[node['name']] = [scale_top] setattr(caffe_net, node['name'], scale_top) elif node['op'] == 'SliceChannel': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] slice_param = dict() slice_param['slice_dim'] = 1 slice_num = 2 slice_outputs = CL.Slice(top_dict[bottom_node_name][input[1]], ntop=slice_num, slice_param=slice_param) top_dict[node['name']] = slice_outputs for idx, output in enumerate(slice_outputs): setattr(caffe_net, node['name'] + '_' + str(idx), output) elif node['op'] == 'FullyConnected': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] inner_product_param = dict() inner_product_param['num_output'] = int(attr['num_hidden']) fc_top = CL.InnerProduct(top_dict[bottom_node_name][input[1]], ntop=1, inner_product_param=inner_product_param) top_dict[node['name']] = [fc_top] setattr(caffe_net, node['name'], fc_top) elif node['op'] == 'SoftmaxOutput': input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] softmax_loss = CL.SoftmaxWithLoss(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1) top_dict[node['name']] = [softmax_loss] setattr(caffe_net, node['name'], softmax_loss) elif node['op'] == 'Concat': if len(node['inputs']) == 2: input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] concat_top = CL.Concat(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1) top_dict[node['name']] = [concat_top] setattr(caffe_net, node['name'], concat_top) elif len(node['inputs']) == 3: input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break input_c = node['inputs'][2] while True: if all_nodes[input_c[0]]['op'] not in supported_op_type: input_c = all_nodes[input_c[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] bottom_node_name_c = all_nodes[input_c[0]]['name'] concat_top = CL.Concat(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], top_dict[bottom_node_name_c][input_c[1]], ntop=1) top_dict[node['name']] = [concat_top] setattr(caffe_net, node['name'], concat_top) else: logging.warn('unknown op type = %s' % node['op']) return caffe_net.to_proto()
def ConvBNLayer(net, from_layer, out_layer, use_bn, use_relu, num_output, kernel_size, pad, stride, dilation=1, use_scale=True, eps=0.001, conv_prefix='', conv_postfix='', bn_prefix='', bn_postfix='_bn', scale_prefix='', scale_postfix='_scale', bias_prefix='', bias_postfix='_bias'): if use_bn: # parameters for convolution layer with batchnorm. kwargs = { 'param': [dict(lr_mult=1, decay_mult=1)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_term': False, } # parameters for batchnorm layer. bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ], 'eps': eps, } # parameters for scale bias layer after batchnorm. if use_scale: sb_kwargs = { 'bias_term': True, 'param': [dict(lr_mult=1, decay_mult=0), dict(lr_mult=1, decay_mult=0)], 'filler': dict(type='constant', value=1.0), 'bias_filler': dict(type='constant', value=0.0), } else: bias_kwargs = { 'param': [dict(lr_mult=1, decay_mult=0)], 'filler': dict(type='constant', value=0.0), } else: kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } conv_name = '{}{}{}'.format(conv_prefix, out_layer, conv_postfix) [kernel_h, kernel_w] = UnpackVariable(kernel_size, 2) [pad_h, pad_w] = UnpackVariable(pad, 2) [stride_h, stride_w] = UnpackVariable(stride, 2) if kernel_h == kernel_w: net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, kernel_size=kernel_h, pad=pad_h, stride=stride_h, **kwargs) else: net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, kernel_h=kernel_h, kernel_w=kernel_w, pad_h=pad_h, pad_w=pad_w, stride_h=stride_h, stride_w=stride_w, **kwargs) if dilation > 1: net.update(conv_name, {'dilation': dilation}) if use_bn: bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix) net[bn_name] = L.BatchNorm(net[conv_name], in_place=True, **bn_kwargs) if use_scale: sb_name = '{}{}{}'.format(scale_prefix, out_layer, scale_postfix) net[sb_name] = L.Scale(net[bn_name], in_place=True, **sb_kwargs) else: bias_name = '{}{}{}'.format(bias_prefix, out_layer, bias_postfix) net[bias_name] = L.Bias(net[bn_name], in_place=True, **bias_kwargs) if use_relu: relu_name = '{}_relu'.format(conv_name) net[relu_name] = L.ReLU(net[conv_name], in_place=True)
def make_resnet(training_data='cifar10_train', test_data='cifar10_test', mean_file='mean.binaryproto', num_res_in_stage=3): num_feature_maps = np.array([16, 32, 64]) # feature map size: [32, 16, 8] n = caffe.NetSpec() # make training data layer n.data, n.label = L.Data(source=training_data, backend=P.Data.LMDB, batch_size=128, ntop=2, transform_param=dict(crop_size=32, mean_file=mean_file, mirror=True), image_data_param=dict(shuffle=True), include=dict(phase=0)) # make test data layer n.test_data, n.test_label = L.Data(source=test_data, backend=P.Data.LMDB, batch_size=100, ntop=2, transform_param=dict( crop_size=32, mean_file=mean_file, mirror=False), include=dict(phase=1)) # conv1 should accept both training and test data layers. But this is inconvenient to code in pycaffe. # You have to write two conv layers for them. To deal with this, I temporarily ignore the test data layer # and let conv1 accept the output of training data layer. Then, after making the whole prototxt, I postprocess # the top name of the two data layers, renaming their names to the same. n.conv_start = L.Convolution( n.data, kernel_size=3, stride=1, num_output=num_feature_maps[0], pad=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=weight_filler, bias_filler=bias_filler) # set up a checkpoint so as to know where we get. checkpoint = 'n.conv_start' # start making blocks. # num_feature_maps: the number of feature maps for each stage. Default is [16,32,64], # suggesting the network has three stages. # num_res_in_stage: a parameter from the original paper, telling us how many blocks there are in # each stage. # stride_proj: control the stride of project path; the first project path uses stride 1, and the rest # use stride 2. stride_proj = 1 for num_map in num_feature_maps: num_map = int(num_map) for res in list(range(num_res_in_stage)): # stage name stage = 'map' + str(num_map) + '_' + str(res + 1) + '_' # use the projecting block when downsample the feature map if np.where(num_feature_maps == num_map)[0] >= 0 and res == 0: make_res = 'n.' + stage + 'bn_pre_train,' + \ 'n.' + stage + 'bn_pre_test,' + \ 'n.' + stage + 'pre_scale,' + \ 'n.' + stage + 'pre_relu,' + \ 'n.' + stage + 'conv1,' + \ 'n.' + stage + 'bn1_train, ' + \ 'n.' + stage + 'bn1_test, ' + \ 'n.' + stage + 'scale1, ' + \ 'n.' + stage + 'relu1, ' + \ 'n.' + stage + 'conv2, ' + \ 'n.' + stage + 'bn2_train, ' + \ 'n.' + stage + 'bn2_test, ' + \ 'n.' + stage + 'scale2, ' + \ 'n.' + stage + 'relu2, ' + \ 'n.' + stage + 'conv_end, ' + \ 'n.' + stage + 'eltsum' + \ ' = project_block(' + checkpoint + ', base_channels=num_map, stride=' + str(stride_proj) +', pad=1)' exec(make_res) if stride_proj == 1: stride_proj += 1 checkpoint = 'n.' + stage + 'eltsum' # where we get continue # most blocks have this shape make_res = 'n.' + stage + 'bn_pre_train, ' + \ 'n.' + stage + 'bn_pre_test, ' + \ 'n.' + stage + 'pre_scale, ' + \ 'n.' + stage + 'pre_relu, ' + \ 'n.' + stage + 'conv1, ' + \ 'n.' + stage + 'bn1_train, ' + \ 'n.' + stage + 'bn1_test, ' + \ 'n.' + stage + 'scale1, ' + \ 'n.' + stage + 'relu1, ' + \ 'n.' + stage + 'conv2, ' + \ 'n.' + stage + 'bn2_train, ' + \ 'n.' + stage + 'bn2_test, ' + \ 'n.' + stage + 'scale2, ' + \ 'n.' + stage + 'relu2, ' + \ 'n.' + stage + 'conv_end, ' + \ 'n.' + stage + 'eltsum, ' + \ ' = identity_block(' + checkpoint + ', base_channels=num_map, stride=1, pad=1)' exec(make_res) checkpoint = 'n.' + stage + 'eltsum' # where we get # add the rest layers exec( 'n.BN_train_end = L.BatchNorm(' + checkpoint + ', param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), \ dict(lr_mult=0, decay_mult=0)], \ use_global_stats=False, in_place=False, include=dict(phase=0))' ) exec( 'n.BN_test_end = L.BatchNorm(' + checkpoint + ', param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), \ dict(lr_mult=0, decay_mult=0)], \ use_global_stats=True, in_place=False, include=dict(phase=1))' ) n.scale_end = L.Scale(n.BN_train_end, scale_param=dict(bias_term=True), in_place=True) n.relu_end = L.ReLU(n.scale_end, in_place=True) n.pool_global = L.Pooling(n.relu_end, pool=P.Pooling.AVE, global_pooling=True) n.score = L.InnerProduct( n.pool_global, num_output=10, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) n.loss = L.SoftmaxWithLoss(n.score, n.label) n.acc = L.Accuracy(n.score, n.label) return n.to_proto()
def conv_batch_relu(net, bottom, name, output, kernel, stride, pad, phase, with_relu=True): def conv_params(name): conv_kwargs = { 'param': [{ 'name': name + '_w', 'lr_mult': 1, 'decay_mult': 1 }, { 'name': name + '_b', 'lr_mult': 2, 'decay_mult': 0 }], 'weight_filler': dict(type='msra'), 'bias_filler': dict(type='constant', value=0) } return conv_kwargs def bn_params(name, phase): bn_kwargs = { 'use_global_stats': phase == caffe.TEST, 'in_place': True, 'param': [{ "name": name + '_w', "lr_mult": 0 }, { "name": name + '_b', "lr_mult": 0 }, { "name": name + '_t', "lr_mult": 0 }] } return bn_kwargs def scale_params(name): scale_kwargs = { 'in_place': True, 'param': [{ 'name': name + '_w' }, { 'name': name + '_b' }], 'bias_term': True } return scale_kwargs conv_kwargs = conv_params(name + '_conv') bn_kwargs = bn_params(name + '_bn', phase) scale_kwargs = scale_params(name + '_scale') conv = netset( net, name + '_conv', L.Convolution(bottom, kernel_size=kernel, stride=stride, num_output=output, pad=pad, **conv_kwargs)) batch = netset(net, name + '_bn', L.BatchNorm(conv, **bn_kwargs)) scale = netset(net, name + '_scale', L.Scale(batch, **scale_kwargs)) if with_relu: relu = netset(net, name + '_relu', L.ReLU(scale, in_place=True)) return relu else: return scale
def densenet(data_file=None, mode='train_test', batch_size=64, depth=40, first_output=16, growth_rate=12, dropout=0.2): nchannels = first_output if mode == 'deploy': # deploy.prototxt dont need data layer model = L.Convolution(bottom='data', kernel_size=3, stride=1, num_output=nchannels, pad=1, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant')) else: data, label = L.Data(source=data_file, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(mirror=True, crop_size=32, mean_value=[129, 124, 112], scale=1)) model = L.Convolution(data, kernel_size=3, stride=1, num_output=nchannels, pad=1, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant')) N = (depth - 4) / 3 for i in range(N): model = dense_block(model, growth_rate, dropout) nchannels += growth_rate model = transition(model, nchannels, dropout) for i in range(N): model = dense_block(model, growth_rate, dropout) nchannels += growth_rate model = transition(model, nchannels, dropout) for i in range(N): model = dense_block(model, growth_rate, dropout) nchannels += growth_rate model = L.BatchNorm(model, in_place=False) model = L.Scale(model, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0)) model = L.ReLU(model, in_place=True) model = L.Pooling(model, pool=P.Pooling.AVE, global_pooling=True) model = L.InnerProduct(model, num_output=100, bias_term=True, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) if mode == 'deploy': prob = L.Softmax(model) return to_proto(prob) else: loss = L.SoftmaxWithLoss(model, label) accuracy = L.Accuracy(model, label) return to_proto(loss, accuracy)
def bn_scale_relu(bottom): bn = L.BatchNorm(bottom, use_global_stats=False) scale = L.Scale(bn, scale_param=dict(bias_term=True), in_place=True) relu = L.ReLU(bn, in_place=True) return bn, scale, relu
def _conv_block(net, bottom, name, num_output, use_relu=True, kernel_size=3, stride=1, pad=1, bn_prefix='', bn_postfix='/bn', scale_prefix='', scale_postfix='/scale', direction=0): if direction is 0: conv = L.Convolution(bottom, kernel_size=kernel_size, stride=stride, num_output=num_output, pad=pad, bias_term=False, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) elif direction is 1: conv = L.Convolution(bottom, kernel_w=kernel_size, kernel_h=1, stride=stride, num_output=num_output, pad_w=pad, pad_h=0, bias_term=False, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) elif direction is 2: conv = L.Convolution(bottom, kernel_w=1, kernel_h=kernel_size, stride=stride, num_output=num_output, pad_h=pad, pad_w=0, bias_term=False, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net[name] = conv bn_name = '{}{}{}'.format(bn_prefix, name, bn_postfix) bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ], 'eps': 0.001, 'moving_average_fraction': 0.999, } batch_norm = L.BatchNorm(conv, in_place=True, **bn_kwargs) net[bn_name] = batch_norm scale_kwargs = { 'param': [ dict(lr_mult=1, decay_mult=0), dict(lr_mult=2, decay_mult=0), ], } scale = L.Scale(batch_norm, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0), **scale_kwargs) sb_name = '{}{}{}'.format(scale_prefix, name, scale_postfix) net[sb_name] = scale if use_relu: out_layer = L.ReLU(scale, in_place=True) relu_name = '{}/relu'.format(name) net[relu_name] = out_layer else: out_layer = scale return out_layer
def compile_time_operation(self, learning_option, cluster): ksize = self.get_attr('ksize') stride = self.get_attr('stride') padding = self.get_attr('padding', self.padding) input_ = self.get_input('input') indim = self.get_dimension('input') # padding if padding == 'SAME': #print self.name + " : " + str(indim) outdim = [ np.ceil(float(indim[i]) / float(stride)) for i in xrange(2) ] p = [ int(((outdim[i] - 1) * stride + ksize - indim[i]) / 2) for i in xrange(2) ] else: outdim = [ np.ceil(float(indim[i] - ksize + 1) / float(stride)) for i in xrange(2) ] p = [0, 0] # pool=0: max_pool, pool=1: avr_pool layer = L.Pooling(input_, name=self.name, pool=1, kernel_size=ksize, stride=stride, pad_h=p[0], pad_w=p[1]) ### activation activation = self.get_attr('activation', self.activation) if len(activation) != 0: for act in activation: # relu if act == 'relu': layer = L.ReLU(layer, name=self.name + '_relu', in_place=True) # batch normalization elif act == 'batchnorm': use_global_stats = self.get_attr('use_global_stats', self.use_global_stats) moving_average_fraction = self.get_attr( 'moving_average_fraction', self.moving_average_fraction) epsilon = self.get_attr('epsilon', self.epsilon) layer = L.BatchNorm( layer, name=self.name + '_batchnorm', use_global_stats=use_global_stats, moving_average_fraction=moving_average_fraction, eps=epsilon, in_place=True) # scale if self.get_attr('is_scale', self.is_scale): bias_term = self.get_attr('bias_term', self.bias_term) layer = L.Scale(layer, bias_term=bias_term, in_place=True) # TODO: output이름 DLMDL과 맞출 지 고민 self.set_output('output', layer) self.set_dimension('output', outdim)
def convert(keras_model, keras_format, caffe_net_file, caffe_params_file): caffe_net = caffe.NetSpec() net_params = dict() outputs=dict() shape=() input_str = '' # tensorflow 2.0 if len(caffe_net.tops) == 0 and False: input_name = 'data' input_shape = [1, keras_model.input.shape[1], keras_model.input.shape[2], keras_model.input.shape[3]] input_param = {'shape': {'dim': list(input_shape)}} caffe_net[input_name] = L.Input(input_param=input_param) input_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format( '"' + input_name + '"', 1, input_shape[1], input_shape[2], input_shape[3]) top = keras_model.input.name outputs[top] = input_name for layer in keras_model.layers: name = layer.name layer_type = type(layer).__name__ config = layer.get_config() blobs = layer.get_weights() blobs_num = len(blobs) if type(layer.output)==list: raise Exception('Layers with multiply outputs are not supported') else: top=layer.output.name if type(layer.input)!=list: bottom = layer.input.name #first we need to create Input layer ''' if layer_type=='InputLayer' or len(caffe_net.tops)==0: input_name = 'data' caffe_net[input_name] = L.Layer() input_shape = config['batch_input_shape'] input_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format('"' + input_name + '"', 1, input_shape[3], input_shape[1], input_shape[2]) outputs[layer.input.name] = input_name if layer_type=='InputLayer': continue ''' if layer_type == 'InputLayer' and len(caffe_net.tops)==0: name = 'data' input_shape = config['batch_input_shape'] if "first" in keras_format: input_shape = [1, input_shape[1], input_shape[2], input_shape[3]] else: input_shape = [1, input_shape[3], input_shape[1], input_shape[2]] input_param = {'shape': {'dim': list(input_shape)}} caffe_net[name] = L.Input(input_param=input_param) elif layer_type=='Conv2D' or layer_type=='Convolution2D': strides = config['strides'] kernel_size = config['kernel_size'] kwargs = { 'num_output': config['filters'] } if kernel_size[0]==kernel_size[1]: kwargs['kernel_size']=kernel_size[0] else: kwargs['kernel_h']=kernel_size[0] kwargs['kernel_w']=kernel_size[1] if strides[0]==strides[1]: kwargs['stride']=strides[0] else: kwargs['stride_h']=strides[0] kwargs['stride_w']=strides[1] if not config['use_bias']: kwargs['bias_term'] = False #kwargs['param']=[dict(lr_mult=0)] else: #kwargs['param']=[dict(lr_mult=0), dict(lr_mult=0)] pass set_padding(config, layer.input_shape, kwargs) caffe_net[name] = L.Convolution(caffe_net[outputs[bottom]], **kwargs) blobs[0] = np.array(blobs[0]).transpose(3,2,0,1) net_params[name] = blobs if config['activation'] == 'relu': name_s = name+'s' caffe_net[name_s] = L.ReLU(caffe_net[name], in_place=True) elif config['activation'] == 'sigmoid': name_s = name+'s' caffe_net[name_s] = L.Sigmoid(caffe_net[name], in_place=True) elif config['activation'] == 'linear': #do nothing pass else: raise Exception('Unsupported activation '+config['activation']) elif layer_type == 'Permute': # skip the layer name = outputs[bottom] elif layer_type=='DepthwiseConv2D': strides = config['strides'] kernel_size = config['kernel_size'] kwargs = {'num_output': layer.input_shape[3]} if kernel_size[0] == kernel_size[1]: kwargs['kernel_size'] = kernel_size[0] else: kwargs['kernel_h'] = kernel_size[0] kwargs['kernel_w'] = kernel_size[1] if strides[0] == strides[1]: kwargs['stride'] = strides[0] else: kwargs['stride_h'] = strides[0] kwargs['stride_w'] = strides[1] set_padding(config, layer.input_shape, kwargs) kwargs['group'] = layer.input_shape[3] kwargs['bias_term'] = False caffe_net[name] = L.Convolution(caffe_net[outputs[bottom]], **kwargs) blob = np.array(blobs[0]).transpose(2, 3, 0, 1) blob.shape = (1,) + blob.shape net_params[name] = blob if config['activation'] == 'relu': name_s = name+'s' caffe_net[name_s] = L.ReLU(caffe_net[name], in_place=True) elif config['activation'] == 'sigmoid': name_s = name+'s' caffe_net[name_s] = L.Sigmoid(caffe_net[name], in_place=True) elif config['activation'] == 'linear': #do nothing pass else: raise Exception('Unsupported activation '+config['activation']) elif layer_type == 'SeparableConv2D': strides = config['strides'] kernel_size = config['kernel_size'] kwargs = {'num_output': layer.input_shape[3]} if kernel_size[0] == kernel_size[1]: kwargs['kernel_size'] = kernel_size[0] else: kwargs['kernel_h'] = kernel_size[0] kwargs['kernel_w'] = kernel_size[1] if strides[0] == strides[1]: kwargs['stride'] = strides[0] else: kwargs['stride_h'] = strides[0] kwargs['stride_w'] = strides[1] set_padding(config, layer.input_shape, kwargs) kwargs['group'] = layer.input_shape[3] kwargs['bias_term'] = False caffe_net[name] = L.Convolution(caffe_net[outputs[bottom]], **kwargs) blob = np.array(blobs[0]).transpose(2, 3, 0, 1) blob.shape = (1,) + blob.shape net_params[name] = blob name2 = name + '_' kwargs = {'num_output': config['filters'], 'kernel_size': 1, 'bias_term': config['use_bias']} caffe_net[name2] = L.Convolution(caffe_net[name], **kwargs) if config['use_bias'] == True: blob2 = [] blob2.append(np.array(blobs[1]).transpose(3, 2, 0, 1)) blob2.append(np.array(blobs[2])) blob2[0].shape = (1,) + blob2[0].shape else: blob2 = np.array(blobs[1]).transpose(3, 2, 0, 1) blob2.shape = (1,) + blob2.shape net_params[name2] = blob2 name = name2 elif layer_type=='BatchNormalization': param = dict() variance = np.array(blobs[-1]) mean = np.array(blobs[-2]) if config['scale']: gamma = np.array(blobs[0]) sparam=[dict(lr_mult=1), dict(lr_mult=1)] else: gamma = np.ones(mean.shape, dtype=np.float32) #sparam=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=1, decay_mult=1)] sparam=[dict(lr_mult=0), dict(lr_mult=1)] #sparam=[dict(lr_mult=0), dict(lr_mult=0)] if config['center']: beta = np.array(blobs[-3]) param['bias_term']=True else: beta = np.zeros(mean.shape, dtype=np.float32) param['bias_term']=False caffe_net[name] = L.BatchNorm(caffe_net[outputs[bottom]], in_place=True) #param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=1), dict(lr_mult=0, decay_mult=0)]) #param=[dict(lr_mult=1), dict(lr_mult=1), dict(lr_mult=0)]) net_params[name] = (mean, variance, np.array(1.0)) name_s = name+'s' caffe_net[name_s] = L.Scale(caffe_net[name], in_place=True, param=sparam, scale_param={'bias_term': config['center']}) net_params[name_s] = (gamma, beta) elif layer_type=='Dense': caffe_net[name] = L.InnerProduct(caffe_net[outputs[bottom]], num_output=config['units'], weight_filler=dict(type='xavier')) if config['use_bias']: weight=np.array(blobs[0]).transpose(1, 0) if False and type(layer._inbound_nodes[0].inbound_layers[0]).__name__=='Flatten': flatten_shape=layer._inbound_nodes[0].inbound_layers[0].input_shape for i in range(weight.shape[0]): weight[i]=np.array(weight[i].reshape(flatten_shape[1],flatten_shape[2],flatten_shape[3]).transpose(2,0,1).reshape(weight.shape[1])) net_params[name] = (weight, np.array(blobs[1])) else: weight=np.array(blobs[0]).transpose(1, 0) net_params[name] = (weight, np.zeros(weight.shape[0], dtype=weight.dtype)) name_s = name+'s' if config['activation']=='softmax': caffe_net[name_s] = L.Softmax(caffe_net[name], in_place=True) elif config['activation']=='relu': caffe_net[name_s] = L.ReLU(caffe_net[name], in_place=True) elif layer_type=='Activation': if config['activation']=='relu': #caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]], in_place=True) if len(layer.input.consumers())>1: caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]]) else: caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]], in_place=True) elif config['activation']=='relu6': #TODO caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]]) elif config['activation']=='softmax': caffe_net[name] = L.Softmax(caffe_net[outputs[bottom]], in_place=True) elif config['activation'] == 'sigmoid': # name_s = name+'s' caffe_net[name] = L.Sigmoid(caffe_net[outputs[bottom]], in_place=True) #used to finish the image normalization. elif config['activation'] == 'linear': name = name + '_linear' caffe_net[name] = L.Scale(caffe_net[outputs[bottom]], filler=dict(type="constant", value=0.003921)) else: raise Exception('Unsupported activation '+config['activation']) elif layer_type=='Cropping2D': shape = layer.output_shape ddata = L.DummyData(shape=dict(dim=[1, shape[3],shape[1], shape[2]])) layers = [] layers.append(caffe_net[outputs[bottom]]) layers.append(ddata) #TODO caffe_net[name] = L.Crop(*layers) elif layer_type=='Concatenate' or layer_type=='Merge': layers = [] for i in layer.input: layers.append(caffe_net[outputs[i.name]]) caffe_net[name] = L.Concat(*layers, axis=1) elif layer_type=='Add': '''PROD = 0; SUM = 1; MAX = 2;''' layers = [] for i in layer.input: layers.append(caffe_net[outputs[i.name]]) caffe_net[name] = L.Eltwise(*layers, eltwise_param ={'operation': 1 }) elif layer_type=='Flatten': caffe_net[name] = L.Flatten(caffe_net[outputs[bottom]]) elif layer_type=='Reshape': shape = config['target_shape'] if len(shape)==3: #shape = (layer.input_shape[0], shape[2], shape[0], shape[1]) shape = (1, shape[2], shape[0], shape[1]) elif len(shape)==1: #shape = (layer.input_shape[0], 1, 1, shape[0]) shape = (1, 1, 1, shape[0]) caffe_net[name] = L.Reshape(caffe_net[outputs[bottom]], reshape_param={'shape':{'dim': list(shape)}}) elif layer_type=='MaxPooling2D' or layer_type=='AveragePooling2D': kwargs={} if layer_type=='MaxPooling2D': kwargs['pool'] = P.Pooling.MAX else: kwargs['pool'] = P.Pooling.AVE pool_size = config['pool_size'] strides = config['strides'] if pool_size[0]!=pool_size[1]: raise Exception('Unsupported pool_size') if strides[0]!=strides[1]: raise Exception('Unsupported strides') set_padding(config, layer.input_shape, kwargs) caffe_net[name] = L.Pooling(caffe_net[outputs[bottom]], kernel_size=pool_size[0], stride=strides[0], **kwargs) elif layer_type=='Dropout': caffe_net[name] = L.Dropout(caffe_net[outputs[bottom]], dropout_param=dict(dropout_ratio=config['rate'])) elif layer_type=='GlobalAveragePooling2D': caffe_net[name] = L.Pooling(caffe_net[outputs[bottom]], pool=P.Pooling.AVE, pooling_param=dict(global_pooling=True)) elif layer_type=='UpSampling2D': if config['size'][0]!=config['size'][1]: raise Exception('Unsupported upsampling factor') factor = config['size'][0] kernel_size = 2 * factor - factor % 2 stride = factor pad = int(math.ceil((factor - 1) / 2.0)) channels = layer.input_shape[-1] caffe_net[name] = L.Deconvolution(caffe_net[outputs[bottom]], convolution_param=dict(num_output=channels, group=channels, kernel_size=kernel_size, stride=stride, pad=pad, weight_filler=dict(type='bilinear'), bias_term=False), param=dict(lr_mult=0, decay_mult=0)) elif layer_type=='LeakyReLU': caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]], negative_slope=config['alpha'], in_place=True) #TODO elif layer_type=='ZeroPadding2D': padding=config['padding'] #ch = layer.input_shape[3] #caffe_net[name] = L.Convolution(caffe_net[outputs[bottom]], num_output=ch, kernel_size=1, stride=1, group=ch, # pad_h=padding[0][0], pad_w=padding[1][0], convolution_param=dict(bias_term = False)) #params = np.ones((1,ch,1,1)) #net_params[name] = np.ones((1,ch,1,1,1)) #net_params[name] = np.ones(layer.output_shape) caffe_net[name] = L.Pooling(caffe_net[outputs[bottom]], kernel_size=1, stride=1, pad_h=padding[0][0]+padding[0][1], pad_w=padding[1][0]+padding[1][1], pool=P.Pooling.AVE) else: raise Exception('Unsupported layer type: '+layer_type) outputs[top]=name #replace empty layer with input blob #net_proto = input_str + '\n' + 'layer {' + 'layer {'.join(str(caffe_net.to_proto()).split('layer {')[2:]) net_proto = str(caffe_net.to_proto()) f = open(caffe_net_file, 'w') f.write(net_proto) f.close() caffe_model = caffe.Net(caffe_net_file, caffe.TEST) for layer in caffe_model.params.keys(): print(layer) if 'up_sampling2d' in layer: continue if "activation_linear" in layer: continue for n in range(0, len(caffe_model.params[layer])): caffe_model.params[layer][n].data[...] = net_params[layer][n] caffe_model.save(caffe_params_file)
def reduction_b(bottom): """ input:1152x17x17 output:2048x8x8 :param bottom: bottom layer :return: layers """ pool = L.Pooling(bottom, kernel_size=3, stride=2, pool=P.Pooling.MAX) # 1152x8x8 conv_3x3_reduce = L.Convolution(bottom, kernel_size=1, num_output=256, stride=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) # 256x17x17 conv_3x3_reduce_bn = L.BatchNorm(conv_3x3_reduce, use_global_stats=False, in_place=True) conv_3x3_reduce_scale = L.Scale(conv_3x3_reduce, scale_param=dict(bias_term=True), in_place=True) conv_3x3_reduce_relu = L.ReLU(conv_3x3_reduce, in_place=True) conv_3x3 = L.Convolution(conv_3x3_reduce, kernel_size=3, num_output=384, stride=2, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) # 384x8x8 conv_3x3_bn = L.BatchNorm(conv_3x3, use_global_stats=False, in_place=True) conv_3x3_scale = L.Scale(conv_3x3, scale_param=dict(bias_term=True), in_place=True) conv_3x3_relu = L.ReLU(conv_3x3, in_place=True) conv_3x3_2_reduce = L.Convolution(bottom, kernel_size=1, num_output=256, stride=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) # 256x17x17 conv_3x3_2_reduce_bn = L.BatchNorm(conv_3x3_2_reduce, use_global_stats=False, in_place=True) conv_3x3_2_reduce_scale = L.Scale(conv_3x3_2_reduce, scale_param=dict(bias_term=True), in_place=True) conv_3x3_2_reduce_relu = L.ReLU(conv_3x3_2_reduce, in_place=True) conv_3x3_2 = L.Convolution(conv_3x3_2_reduce, kernel_size=3, num_output=256, stride=2, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) # 256x8x8 conv_3x3_2_bn = L.BatchNorm(conv_3x3_2, use_global_stats=False, in_place=True) conv_3x3_2_scale = L.Scale(conv_3x3_2, scale_param=dict(bias_term=True), in_place=True) conv_3x3_2_relu = L.ReLU(conv_3x3_2, in_place=True) conv_3x3_3_reduce = L.Convolution(bottom, kernel_size=1, num_output=256, stride=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) # 256x17x17 conv_3x3_3_reduce_bn = L.BatchNorm(conv_3x3_3_reduce, use_global_stats=False, in_place=True) conv_3x3_3_reduce_scale = L.Scale(conv_3x3_3_reduce, scale_param=dict(bias_term=True), in_place=True) conv_3x3_3_reduce_relu = L.ReLU(conv_3x3_3_reduce, in_place=True) conv_3x3_3 = L.Convolution(conv_3x3_3_reduce, kernel_size=3, num_output=256, stride=1, pad=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) # 256x17x17 conv_3x3_3_bn = L.BatchNorm(conv_3x3_3, use_global_stats=False, in_place=True) conv_3x3_3_scale = L.Scale(conv_3x3_3, scale_param=dict(bias_term=True), in_place=True) conv_3x3_3_relu = L.ReLU(conv_3x3_3, in_place=True) conv_3x3_4 = L.Convolution(conv_3x3_3, kernel_size=3, num_output=256, stride=2, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) # 256x8x8 conv_3x3_4_bn = L.BatchNorm(conv_3x3_4, use_global_stats=False, in_place=True) conv_3x3_4_scale = L.Scale(conv_3x3_4, scale_param=dict(bias_term=True), in_place=True) conv_3x3_4_relu = L.ReLU(conv_3x3_4, in_place=True) concat = L.Concat(pool, conv_3x3, conv_3x3_2, conv_3x3_4) # 2048x8x8 return pool, conv_3x3_reduce, conv_3x3_reduce_bn, conv_3x3_reduce_scale, conv_3x3_reduce_relu, conv_3x3, \ conv_3x3_bn, conv_3x3_scale, conv_3x3_relu, conv_3x3_2_reduce, conv_3x3_2_reduce_bn, \ conv_3x3_2_reduce_scale, conv_3x3_2_reduce_relu, conv_3x3_2, conv_3x3_2_bn, conv_3x3_2_scale, \ conv_3x3_2_relu, conv_3x3_3_reduce, conv_3x3_3_reduce_bn, conv_3x3_3_reduce_scale, conv_3x3_3_reduce_relu, \ conv_3x3_3, conv_3x3_3_bn, conv_3x3_3_scale, conv_3x3_3_relu, conv_3x3_4, conv_3x3_4_bn, conv_3x3_4_scale, \ conv_3x3_4_relu, concat
def AbdNet(): growth_rate = 16 dropout = 0.2 vgg_nout = 64 N = 5 nchannels = 16 imsize = 256 msra = dict(type='msra') gs_1e_2 = dict(type='gaussian', std=0.01) # n = caffe.NetSpec() data, data2, albedo_diff_gt, albedo_gt = L.Python(ntop=4, \ python_param=dict(\ module='image_layer3_gradient',\ layer='ImageLayer3',\ param_str="{{'data_dir': '/home/albertxavier/dataset/sintel/images/', 'tops': ['data', 'data2', 'albedo_diff_gt', 'albedo_gt'],'seed': 1337,'split': 'train', 'list_file':'train_two_folds_split_scene.txt', 'mean_bgr': (104.00699, 116.66877, 122.67892), 'crop_size':({imsize},{imsize})}}".format(imsize=imsize)\ )\ ) pool1, pool2, pool3, pool4, pool5 = make_VGG(data) # scale 2 model = L.Convolution(data2, kernel_size=4, stride=2, num_output=96, pad=1, bias_term=True, weight_filler=msra, bias_filler=dict(type='constant', value=0)) model = L.BatchNorm(model, in_place=False, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]) model = L.Scale(model, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0)) model = L.ReLU(model, in_place=True) model = L.Pooling(model, pooling_param=dict(pool=P.Pooling.MAX, kernel_size=2, stride=2)) model = L.Dropout(model, dropout_ratio=dropout) # concat VGG vgg1 = upsampleVGG(pool1, upsample = 2/4, dropout=dropout, nout=vgg_nout) vgg2 = upsampleVGG(pool2, upsample = 4/4, dropout=dropout, nout=vgg_nout) vgg3 = upsampleVGG(pool3, upsample = 8/4, dropout=dropout, nout=vgg_nout) vgg4 = upsampleVGG(pool4, upsample = 16/4, dropout=dropout, nout=vgg_nout) vgg5 = upsampleVGG(pool5, upsample = 32/4, dropout=dropout, nout=vgg_nout) model = L.Concat(model, vgg1, vgg2, vgg3, vgg4, vgg5, axis=1) # block 1: dense for i in range(N): model = add_layer(model, growth_rate, dropout) nchannels += growth_rate model = transition(model, nchannels, dropout, weight_filler=msra) # block 2: dense for i in range(N): model = add_layer(model, growth_rate, dropout) nchannels += growth_rate model = transition(model, nchannels, dropout, weight_filler=msra) # block 3: res # nchannels = int(nchannels * 0.6) # for i in range(N): # if i == 0: project = True # else: project = False # model = add_layer(bottom, nchannels, dropout, project=project) block 3: dense for i in range(N): model = add_layer(model, growth_rate, dropout) nchannels += growth_rate model = transition(model, nchannels, dropout, weight_filler=msra) # deep supervision model_deep = L.Convolution(model, kernel_size=1, stride=1, num_output=96, pad=0, bias_term=False, weight_filler=gs_1e_2, param=[dict(lr_mult=1, decay_mult=1)]) model_deep = L.Deconvolution(model_deep, convolution_param=dict(kernel_size=8, stride=4, num_output=3, pad=2, bias_term=True, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0)), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) loss_deep = L.Python(\ model_deep, albedo_gt,\ loss_weight=1.0, ntop=1,\ python_param=dict(\ module='l2loss',\ layer='L2LossLayer',\ )\ ) # model = L.Concat(model, model_deep, propagate_down=[True, False]) # block 4 for i in range(N): model = add_layer(model, growth_rate, dropout) nchannels += growth_rate model = transition(model, nchannels, dropout=0., weight_filler=msra) # fuse feature model = L.Convolution(model, kernel_size=1, stride=1, num_output=96, pad=0, bias_term=False, weight_filler=gs_1e_2, bias_filler=dict(type='constant')) # upsample model = L.Deconvolution(model, convolution_param=dict(kernel_size=8, stride=4, num_output=6, pad=2, bias_term=True, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0)), param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)]) # loss loss = L.Python(\ model, albedo_diff_gt,\ loss_weight=1.0, ntop=1,\ python_param=dict(\ module='l2loss-gradient-hist',\ layer='L2LossLayer',\ param_str="{'display': True}"\ )\ ) return to_proto(loss, loss_deep)
def stem_299x299(bottom): """ input:3x299x299 output:384x35x35 :param bottom: bottom layer :return: layers """ conv1_3x3_s2 = L.Convolution(bottom, kernel_size=3, num_output=32, stride=2, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier', std=0.01), bias_filler=dict(type='constant', value=0.2)) # 32x149x149 conv1_3x3_s2_bn = L.BatchNorm(conv1_3x3_s2, use_global_stats=False, in_place=True) conv1_3x3_s2_scale = L.Scale(conv1_3x3_s2, scale_param=dict(bias_term=True), in_place=True) conv1_3x3_s2_relu = L.ReLU(conv1_3x3_s2, in_place=True) conv2_3x3_s1 = L.Convolution(conv1_3x3_s2, kernel_size=3, num_output=32, stride=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier', std=0.01), bias_filler=dict(type='constant', value=0.2)) # 32x147x147 conv2_3x3_s1_bn = L.BatchNorm(conv2_3x3_s1, use_global_stats=False, in_place=True) conv2_3x3_s1_scale = L.Scale(conv2_3x3_s1, scale_param=dict(bias_term=True), in_place=True) conv2_3x3_s1_relu = L.ReLU(conv2_3x3_s1, in_place=True) conv3_3x3_s1 = L.Convolution(conv2_3x3_s1, kernel_size=3, num_output=64, stride=1, pad=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier', std=0.01), bias_filler=dict(type='constant', value=0.2)) # 64x147x147 conv3_3x3_s1_bn = L.BatchNorm(conv3_3x3_s1, use_global_stats=False, in_place=True) conv3_3x3_s1_scale = L.Scale(conv3_3x3_s1, scale_param=dict(bias_term=True), in_place=True) conv3_3x3_s1_relu = L.ReLU(conv3_3x3_s1, in_place=True) inception_stem1_3x3_s2 = L.Convolution(conv3_3x3_s1, kernel_size=3, num_output=96, stride=2, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier', std=0.01), bias_filler=dict(type='constant', value=0.2)) # 96x73x73 inception_stem1_3x3_s2_bn = L.BatchNorm(inception_stem1_3x3_s2, use_global_stats=False, in_place=True) inception_stem1_3x3_s2_scale = L.Scale(inception_stem1_3x3_s2, scale_param=dict(bias_term=True), in_place=True) inception_stem1_3x3_s2_relu = L.ReLU(inception_stem1_3x3_s2, in_place=True) inception_stem1_pool = L.Pooling(conv3_3x3_s1, kernel_size=3, stride=2, pool=P.Pooling.MAX) # 64x73x73 inception_stem1 = L.Concat(inception_stem1_3x3_s2, inception_stem1_pool) # 160x73x73 inception_stem2_3x3_reduce = L.Convolution(inception_stem1, kernel_size=1, num_output=64, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier', std=0.01), bias_filler=dict(type='constant', value=0.2)) # 64x73x73 inception_stem2_3x3_reduce_bn = L.BatchNorm(inception_stem2_3x3_reduce, use_global_stats=False, in_place=True) inception_stem2_3x3_reduce_scale = L.Scale(inception_stem2_3x3_reduce, scale_param=dict(bias_term=True), in_place=True) inception_stem2_3x3_reduce_relu = L.ReLU(inception_stem2_3x3_reduce, in_place=True) inception_stem2_3x3 = L.Convolution(inception_stem2_3x3_reduce, kernel_size=3, num_output=96, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier', std=0.01), bias_filler=dict(type='constant', value=0.2)) # 96x71x71 inception_stem2_3x3_bn = L.BatchNorm(inception_stem2_3x3, use_global_stats=False, in_place=True) inception_stem2_3x3_scale = L.Scale(inception_stem2_3x3, scale_param=dict(bias_term=True), in_place=True) inception_stem2_3x3_relu = L.ReLU(inception_stem2_3x3, in_place=True) inception_stem2_7x1_reduce = L.Convolution(inception_stem1, kernel_size=1, num_output=64, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier', std=0.01), bias_filler=dict(type='constant', value=0.2)) # 64x73x73 inception_stem2_7x1_reduce_bn = L.BatchNorm(inception_stem2_7x1_reduce, use_global_stats=False, in_place=True) inception_stem2_7x1_reduce_scale = L.Scale(inception_stem2_7x1_reduce, scale_param=dict(bias_term=True), in_place=True) inception_stem2_7x1_reduce_relu = L.ReLU(inception_stem2_7x1_reduce, in_place=True) inception_stem2_7x1 = L.Convolution(inception_stem2_7x1_reduce, kernel_h=7, kernel_w=1, num_output=64, pad_h=3, pad_w=0, stride=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) # 64x73x73 inception_stem2_7x1_bn = L.BatchNorm(inception_stem2_7x1, use_global_stats=False, in_place=True) inception_stem2_7x1_scale = L.Scale(inception_stem2_7x1, scale_param=dict(bias_term=True), in_place=True) inception_stem2_7x1_relu = L.ReLU(inception_stem2_7x1, in_place=True) inception_stem2_1x7 = L.Convolution(inception_stem2_7x1, kernel_h=1, kernel_w=7, num_output=64, pad_h=0, pad_w=3, stride=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) # 64x73x73 inception_stem2_1x7_bn = L.BatchNorm(inception_stem2_1x7, use_global_stats=False, in_place=True) inception_stem2_1x7_scale = L.Scale(inception_stem2_1x7, scale_param=dict(bias_term=True), in_place=True) inception_stem2_1x7_relu = L.ReLU(inception_stem2_1x7, in_place=True) inception_stem2_3x3_2 = L.Convolution(inception_stem2_1x7, kernel_size=3, num_output=96, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier', std=0.01), bias_filler=dict(type='constant', value=0.2)) # 96x71x71 inception_stem2_3x3_2_bn = L.BatchNorm(inception_stem2_3x3_2, use_global_stats=False, in_place=True) inception_stem2_3x3_2_scale = L.Scale(inception_stem2_3x3_2, scale_param=dict(bias_term=True), in_place=True) inception_stem2_3x3_2_relu = L.ReLU(inception_stem2_3x3_2, in_place=True) inception_stem2 = L.Concat(inception_stem2_3x3, inception_stem2_3x3_2) # 192x71x71 inception_stem3_3x3_s2 = L.Convolution(inception_stem2, kernel_size=3, num_output=192, stride=2, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier', std=0.01), bias_filler=dict(type='constant', value=0.2)) # 192x35x35 inception_stem3_3x3_s2_bn = L.BatchNorm(inception_stem3_3x3_s2, use_global_stats=False, in_place=True) inception_stem3_3x3_s2_scale = L.Scale(inception_stem3_3x3_s2, scale_param=dict(bias_term=True), in_place=True) inception_stem3_3x3_s2_relu = L.ReLU(inception_stem3_3x3_s2, in_place=True) inception_stem3_pool = L.Pooling(inception_stem2, kernel_size=3, stride=2, pool=P.Pooling.MAX) # 192x35x35 inception_stem3 = L.Concat(inception_stem3_3x3_s2, inception_stem3_pool) # 384x35x35 return conv1_3x3_s2, conv1_3x3_s2_bn, conv1_3x3_s2_scale, conv1_3x3_s2_relu, conv2_3x3_s1, conv2_3x3_s1_bn, \ conv2_3x3_s1_scale, conv2_3x3_s1_relu, conv3_3x3_s1, conv3_3x3_s1_bn, conv3_3x3_s1_scale, conv3_3x3_s1_relu, \ inception_stem1_3x3_s2, inception_stem1_3x3_s2_bn, inception_stem1_3x3_s2_scale, inception_stem1_3x3_s2_relu, \ inception_stem1_pool, inception_stem1, inception_stem2_3x3_reduce, inception_stem2_3x3_reduce_bn, \ inception_stem2_3x3_reduce_scale, inception_stem2_3x3_reduce_relu, inception_stem2_3x3, \ inception_stem2_3x3_bn, inception_stem2_3x3_scale, inception_stem2_3x3_relu, inception_stem2_7x1_reduce, \ inception_stem2_7x1_reduce_bn, inception_stem2_7x1_reduce_scale, inception_stem2_7x1_reduce_relu, \ inception_stem2_7x1, inception_stem2_7x1_bn, inception_stem2_7x1_scale, inception_stem2_7x1_relu, \ inception_stem2_1x7, inception_stem2_1x7_bn, inception_stem2_1x7_scale, inception_stem2_1x7_relu, \ inception_stem2_3x3_2, inception_stem2_3x3_2_bn, inception_stem2_3x3_2_scale, inception_stem2_3x3_2_relu, \ inception_stem2, inception_stem3_3x3_s2, inception_stem3_3x3_s2_bn, inception_stem3_3x3_s2_scale, \ inception_stem3_3x3_s2_relu, inception_stem3_pool, inception_stem3
def densenet(data_file, mode='train', batch_size=64, depth=40, first_output=16, growth_rate=12, dropout=0.2): data, label = L.Data( source=data_file, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict( mean_file= "/home/sjxy/densenetcaffe/examples/cifar10/mean.binaryproto")) nchannels = first_output model = L.Convolution(data, kernel_size=3, stride=1, num_output=nchannels, pad=1, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant')) N = (depth - 4) / 3 for i in range(N): model = add_layer(model, growth_rate, dropout) nchannels += growth_rate model = transition(model, nchannels, dropout) for i in range(N): model = add_layer(model, growth_rate, dropout) nchannels += growth_rate model = transition(model, nchannels, dropout) for i in range(N): model = add_layer(model, growth_rate, dropout) nchannels += growth_rate model = L.BatchNorm(model, in_place=False, param=[ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ]) model = L.Scale(model, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0)) model = L.ReLU(model, in_place=True) model = L.Pooling(model, pool=P.Pooling.AVE, global_pooling=True) model = L.InnerProduct(model, num_output=10, bias_term=True, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) loss = L.SoftmaxWithLoss(model, label) accuracy = L.Accuracy(model, label) return to_proto(loss, accuracy)
def ConvBNLayer(net, from_layer, out_layer, use_bn, use_relu, num_output, kernel_size, pad, stride, dilation=1, use_scale=True, lr_mult=1, conv_prefix='', conv_postfix='', bn_prefix='', bn_postfix='_bn', scale_prefix='', scale_postfix='_scale', bias_prefix='', bias_postfix='_bias', **bn_params): if use_bn: # parameters for convolution layer with batchnorm. kwargs = { 'param': [dict(lr_mult=lr_mult, decay_mult=1)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_term': False, } eps = bn_params.get('eps', 0.001) moving_average_fraction = bn_params.get('moving_average_fraction', 0.999) use_global_stats = bn_params.get('use_global_stats', False) # parameters for batchnorm layer. bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], 'eps': eps, 'moving_average_fraction': moving_average_fraction, } bn_lr_mult = lr_mult if use_global_stats: # only specify if use_global_stats is explicitly provided; # otherwise, use_global_stats_ = this->phase_ == TEST; bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], 'eps': eps, 'use_global_stats': use_global_stats, } # not updating scale/bias parameters bn_lr_mult = 0 # parameters for scale bias layer after batchnorm. if use_scale: sb_kwargs = { 'bias_term': True, 'param': [ dict(lr_mult=bn_lr_mult, decay_mult=0), dict(lr_mult=bn_lr_mult, decay_mult=0)], 'filler': dict(type='constant', value=1.0), 'bias_filler': dict(type='constant', value=0.0), } else: bias_kwargs = { 'param': [dict(lr_mult=bn_lr_mult, decay_mult=0)], 'filler': dict(type='constant', value=0.0), } else: kwargs = { 'param': [ dict(lr_mult=lr_mult, decay_mult=1), dict(lr_mult=2 * lr_mult, decay_mult=0)], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } conv_name = '{}{}{}'.format(conv_prefix, out_layer, conv_postfix) [kernel_h, kernel_w] = UnpackVariable(kernel_size, 2) [pad_h, pad_w] = UnpackVariable(pad, 2) [stride_h, stride_w] = UnpackVariable(stride, 2) if kernel_h == kernel_w: net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, kernel_size=kernel_h, pad=pad_h, stride=stride_h, **kwargs) else: net[conv_name] = L.Convolution(net[from_layer], num_output=num_output, kernel_h=kernel_h, kernel_w=kernel_w, pad_h=pad_h, pad_w=pad_w, stride_h=stride_h, stride_w=stride_w, **kwargs) if dilation > 1: net.update(conv_name, {'dilation': dilation}) if use_bn: bn_name = '{}{}{}'.format(bn_prefix, out_layer, bn_postfix) net[bn_name] = L.BatchNorm(net[conv_name], in_place=True, **bn_kwargs) if use_scale: sb_name = '{}{}{}'.format(scale_prefix, out_layer, scale_postfix) net[sb_name] = L.Scale(net[bn_name], in_place=True, **sb_kwargs) else: bias_name = '{}{}{}'.format(bias_prefix, out_layer, bias_postfix) net[bias_name] = L.Bias(net[bn_name], in_place=True, **bias_kwargs) if use_relu: relu_name = '{}_relu'.format(conv_name) net[relu_name] = L.ReLU(net[conv_name], in_place=True)
def qlstm(mode, batchsize, T, question_vocab_size): n = caffe.NetSpec() mode_str = json.dumps({'mode':mode, 'batchsize':batchsize}) # n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\ # module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=5 ) n.data, n.cont, n.img_feature, n.label = L.Python(\ module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=4 ) # word embedding n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \ weight_filler=dict(type='uniform',min=-0.08,max=0.08)) # n.embed = L.TanH(n.embed_ba) n.embed_scale = L.Scale(n.embed_ba, n.cont, scale_param=dict(dict(axis=0))) n.embed_scale_resh = L.Reshape(n.embed_scale,\ reshape_param=dict(\ shape=dict(dim=[batchsize,1,T,-1]))) # convolution n.word_feature1_1 = L.Convolution(n.embed_scale_resh, kernel_h=1, kernel_w=300, stride=1, num_output=512, pad_h=0, pad_w=0, weight_filler=dict(type='xavier')) # N x C x T x 1 n.word_feature1_3 = L.Convolution(n.embed_scale_resh, kernel_h=3, kernel_w=300, stride=1, num_output=512, pad_h=1, pad_w=0, weight_filler=dict(type='xavier')) n.word_feature1_5 = L.Convolution(n.embed_scale_resh, kernel_h=5, kernel_w=300, stride=1, num_output=512, pad_h=2, pad_w=0, weight_filler=dict(type='xavier')) n.word_feature1_7 = L.Convolution(n.embed_scale_resh, kernel_h=7, kernel_w=300, stride=1, num_output=512, pad_h=3, pad_w=0, weight_filler=dict(type='xavier')) n.word_relu1_1 = L.ReLU(n.word_feature1_1) n.word_relu1_3 = L.ReLU(n.word_feature1_3) n.word_relu1_5 = L.ReLU(n.word_feature1_5) n.word_relu1_7 = L.ReLU(n.word_feature1_7) word_vec1 = [n.word_relu1_1, n.word_relu1_3, n.word_relu1_5, n.word_relu1_7] n.concat_vec1 = L.Concat(*word_vec1, concat_param={'axis': 1}) # N x C' x T x 1 n.word_feature2_1 = L.Convolution(n.concat_vec1, kernel_h=1, kernel_w=1, stride=1, num_output=512, pad_h=0, pad_w=0, weight_filler=dict(type='xavier')) # N x C x T x 1 n.word_feature2_3 = L.Convolution(n.concat_vec1, kernel_h=3, kernel_w=1, stride=1, num_output=512, pad_h=1, pad_w=0, weight_filler=dict(type='xavier')) n.word_feature2_5 = L.Convolution(n.concat_vec1, kernel_h=5, kernel_w=1, stride=1, num_output=512, pad_h=2, pad_w=0, weight_filler=dict(type='xavier')) n.word_feature2_7 = L.Convolution(n.concat_vec1, kernel_h=7, kernel_w=1, stride=1, num_output=512, pad_h=3, pad_w=0, weight_filler=dict(type='xavier')) word_vec2 = [n.word_feature2_1, n.word_feature2_3, n.word_feature2_5, n.word_feature2_7] n.concat_vec2 = L.Concat(*word_vec2, concat_param={'axis': 1}) # N x 4C x T x 1 n.res_1 = L.Eltwise(n.concat_vec1,n.concat_vec2) n.res_1_relu = L.ReLU(n.res_1) n.word_vec_p = L.Pooling(n.res_1_relu, kernel_h=T, kernel_w=1, stride=T, pool=P.Pooling.MAX) # N x C x 1 x 1 n.sentence_vec = L.Dropout(n.word_vec_p, dropout_param={'dropout_ratio':0.5}) n.q_emb_tanh_droped_resh_tiled_1 = L.Tile(n.sentence_vec, axis=2, tiles=14) n.q_emb_tanh_droped_resh_tiled = L.Tile(n.q_emb_tanh_droped_resh_tiled_1, axis=3, tiles=14) n.i_emb_tanh_droped_resh = L.Reshape(n.img_feature,reshape_param=dict(shape=dict(dim=[-1,2048,14,14]))) n.blcf = L.CompactBilinear(n.q_emb_tanh_droped_resh_tiled, n.i_emb_tanh_droped_resh, compact_bilinear_param=dict(num_output=16000,sum_pool=False)) n.blcf_sign_sqrt = L.SignedSqrt(n.blcf) n.blcf_sign_sqrt_l2 = L.L2Normalize(n.blcf_sign_sqrt) n.blcf_droped = L.Dropout(n.blcf_sign_sqrt_l2,dropout_param={'dropout_ratio':0.1}) # multi-channel attention n.att_conv1 = L.Convolution(n.blcf_droped, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.att_conv1_relu = L.ReLU(n.att_conv1) n.att_conv2 = L.Convolution(n.att_conv1_relu, kernel_size=1, stride=1, num_output=2, pad=0, weight_filler=dict(type='xavier')) n.att_reshaped = L.Reshape(n.att_conv2,reshape_param=dict(shape=dict(dim=[-1,2,14*14]))) n.att_softmax = L.Softmax(n.att_reshaped, axis=2) n.att = L.Reshape(n.att_softmax,reshape_param=dict(shape=dict(dim=[-1,2,14,14]))) att_maps = L.Slice(n.att, ntop=2, slice_param={'axis':1}) n.att_map0 = att_maps[0] n.att_map1 = att_maps[1] dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) n.att_feature0 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map0, dummy) n.att_feature1 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map1, dummy) n.att_feature0_resh = L.Reshape(n.att_feature0, reshape_param=dict(shape=dict(dim=[-1,2048]))) n.att_feature1_resh = L.Reshape(n.att_feature1, reshape_param=dict(shape=dict(dim=[-1,2048]))) n.att_feature = L.Concat(n.att_feature0_resh, n.att_feature1_resh) # merge attention and lstm with compact bilinear pooling n.att_feature_resh = L.Reshape(n.att_feature, reshape_param=dict(shape=dict(dim=[-1,4096,1,1]))) #n.lstm_12_resh = L.Reshape(n.lstm_12, reshape_param=dict(shape=dict(dim=[-1,2048,1,1]))) n.bc_att_lstm = L.CompactBilinear(n.att_feature_resh, n.sentence_vec, compact_bilinear_param=dict(num_output=16000,sum_pool=False)) n.bc_sign_sqrt = L.SignedSqrt(n.bc_att_lstm) n.bc_sign_sqrt_l2 = L.L2Normalize(n.bc_sign_sqrt) n.bc_dropped = L.Dropout(n.bc_sign_sqrt_l2, dropout_param={'dropout_ratio':0.1}) n.bc_dropped_resh = L.Reshape(n.bc_dropped, reshape_param=dict(shape=dict(dim=[-1, 16000]))) n.prediction = L.InnerProduct(n.bc_dropped_resh, num_output=3000, weight_filler=dict(type='xavier')) n.loss = L.SoftmaxWithLoss(n.prediction, n.label) return n.to_proto()
def conv_bn_relu(bottom, kernel_size, num_output, name, deploy, stride=1, pad=0, group=1, conv_bias_term=True): if conv_bias_term: conv = L.Convolution(bottom, kernel_size=kernel_size, stride=stride, num_output=num_output, pad=pad, group=group, weight_filler=dict(type="xavier"), bias_filler=dict(type="constant", value=0), param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], name="{0}_conv".format(name)) else: conv = L.Convolution(bottom, kernel_size=kernel_size, stride=stride, num_output=num_output, pad=pad, group=group, weight_filler=dict(type="xavier"), bias_term=False, param=[dict(lr_mult=1, decay_mult=1)], name="{0}_conv".format(name)) if deploy: # In our BN layers, the provided mean and variance are strictly computed using # average (not moving average) on a sufficiently large training batch after the training procedure. # The numerical results are very stable (variation of val error < 0.1%). # Using moving average might lead to different results. # from https://github.com/KaimingHe/deep-residual-networks # So set use_global_stats = true in deployment. See also ReNet deployment. batch_norm = L.BatchNorm(conv, in_place=True, batch_norm_param=dict(use_global_stats=True), name="{0}_batch_norm".format(name)) else: # By default, use_global_stats is set to false when the network is in the training # // phase and true when the network is in the testing phase. # from caffe BatchNorm batch_norm = L.BatchNorm(conv, in_place=True, param=[ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ], name="{0}_batch_norm".format(name)) scale = L.Scale(batch_norm, bias_term=True, in_place=True, name="{0}_scale".format(name)) relu = L.ReLU(scale, in_place=True, name="{0}_relu".format(name)) return relu
def lenet(lmdb, batch_size): n = caffe.NetSpec() # Input layer n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb, transform_param=dict(scale=1. / 255), ntop=2) # Residual convolution n.convres = L.Convolution(n.data, kernel_size=5, num_output=12, stride=1, weight_filler=dict(type='xavier')) # No activation for this first layer # Two layers of convolution n.conv1 = L.Convolution(n.convres, kernel_size=7, num_output=64, stride=2, weight_filler=dict(type='xavier')) n.batch_norm1 = L.BatchNorm(n.conv1, in_place=True, param=[ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ]) n.scale1 = L.Scale(n.batch_norm1, bias_term=True, in_place=True) n.relu2 = L.TanH(n.scale1, in_place=True) #n.relu2 = L.ReLU(n.scale1, in_place=True) n.pool1 = L.Pooling(n.relu2, kernel_size=3, stride=2, pool=P.Pooling.MAX) n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=48, stride=1, weight_filler=dict(type='xavier')) n.batch_norm2 = L.BatchNorm(n.conv2, in_place=True, param=[ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ]) n.scale2 = L.Scale(n.batch_norm2, bias_term=True, in_place=True) n.relu3 = L.TanH(n.scale2, in_place=True) #n.relu3 = L.ReLU(n.scale2, in_place=True) n.pool2 = L.Pooling(n.relu3, kernel_size=3, stride=2, pool=P.Pooling.MAX) # Dense classifier n.fc1 = L.InnerProduct(n.pool2, num_output=4096, weight_filler=dict(type='xavier')) n.relu4 = L.ReLU(n.fc1, in_place=True) n.drop1 = L.Dropout(n.relu4, in_place=True) n.fc2 = L.InnerProduct(n.drop1, num_output=4096, weight_filler=dict(type='xavier')) n.relu5 = L.ReLU(n.fc2, in_place=True) n.drop2 = L.Dropout(n.relu5, in_place=True) # Outputs n.score = L.InnerProduct(n.drop2, num_output=2, weight_filler=dict(type='xavier')) n.loss = L.SoftmaxWithLoss(n.score, n.label) return n.to_proto()
def conv_BN_scale_relu(bottom, nout, ks=3, stride=1, pad=1): conv = L.Convolution(bottom, kernel_size=ks, stride=stride, num_output=nout, pad=pad, bias_term=False, weight_filler=dict(type="msra") ) return conv, L.BatchNorm(conv, in_place=True), L.Scale(conv, in_place=True, bias_term=True), L.ReLU(conv, in_place=True)
def project_residual(bottom, kernel_size=3, num_out=64, stride=1, pad=0, first=None): # branch 1 if (first == 'both_act'): pre_bn = L.BatchNorm(bottom, in_place=True) pre_scale = L.Scale(pre_bn, scale_param=dict(bias_term=True), in_place=True) pre_relu = L.ReLU(pre_scale, in_place=True) conv_proj = L.Convolution(pre_relu, kernel_size=1, num_output=num_out * 4, stride=stride, pad=0, param=[dict(lr_mult=1, decay_mult=1)], bias_term=False, weight_filler=weight_filler) elif (first != 'pre_act'): pre_bn = L.BatchNorm(bottom, in_place=True) pre_scale = L.Scale(pre_bn, scale_param=dict(bias_term=True), in_place=True) pre_relu = L.ReLU(pre_scale, in_place=True) conv_proj = L.Convolution(bottom, kernel_size=1, num_output=num_out * 4, stride=stride, pad=0, param=[dict(lr_mult=1, decay_mult=1)], bias_term=False, weight_filler=weight_filler) else: pre_relu = bottom pre_bn = bottom pre_scale = bottom pre_relu = bottom conv_proj = L.Convolution(bottom, kernel_size=1, num_output=num_out * 4, stride=stride, pad=0, param=[dict(lr_mult=1, decay_mult=1)], bias_term=False, weight_filler=weight_filler) # branch 2 conv1, bn1, scale1, relu1 = conv_bn_scale_relu(pre_relu, kernel_size=1, num_out=num_out, stride=1, pad=0) conv2, bn2, scale2, relu2 = conv_bn_scale_relu(conv1, kernel_size=3, num_out=num_out, stride=stride, pad=1) conv3 = L.Convolution(relu2, kernel_size=1, num_output=num_out * 4, stride=1, pad=0, param=[dict(lr_mult=1, decay_mult=1)], bias_term=False, weight_filler=weight_filler) eltsum = eltsum_block(conv_proj, conv3) return pre_bn, pre_scale, pre_relu, \ conv_proj, \ conv1, bn1, scale1, relu1, \ conv2, bn2, scale2, relu2, \ conv3, eltsum
def qlstm(mode, batchsize, T, question_vocab_size, embed_size): n = caffe.NetSpec() mode_str = json.dumps({'mode': mode, 'batchsize': batchsize}) n.data, n.cont, n.img_feature, n.label = L.Python(\ module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=4 ) # word embedding (static + dynamic) n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=embed_size, \ weight_filler=dict(type='uniform',min=-0.08,max=0.08)) n.embed_scale = L.Scale(n.embed_ba, n.cont, scale_param=dict(dict(axis=0))) # N x T x d_w n.embed_scale_resh = L.Reshape( n.embed_scale, reshape_param=dict(shape=dict(dim=[batchsize, T, embed_size, 1]))) # avg of word embedding n.embed_avg = L.Convolution(n.embed_scale_resh, convolution_param={ 'kernel_size': 1, 'num_output': 1, 'bias_term': False, 'weight_filler': dict(type='constant', value=1) }, param=dict(lr_mult=0, decay_mult=0)) # N x 1 x d_w x 1 n.embed_avg_resh = L.Reshape( n.embed_avg, reshape_param=dict(shape=dict(dim=[batchsize, embed_size, 1, 1]))) n.q_emb_tanh_droped_resh_tiled_1 = L.Tile(n.embed_avg_resh, axis=2, tiles=14) n.q_emb_tanh_droped_resh_tiled = L.Tile(n.q_emb_tanh_droped_resh_tiled_1, axis=3, tiles=14) n.i_emb_tanh_droped_resh = L.Reshape( n.img_feature, reshape_param=dict(shape=dict(dim=[-1, 2048, 14, 14]))) n.blcf = L.CompactBilinear(n.q_emb_tanh_droped_resh_tiled, n.i_emb_tanh_droped_resh, compact_bilinear_param=dict(num_output=16000, sum_pool=False)) n.blcf_sign_sqrt = L.SignedSqrt(n.blcf) n.blcf_sign_sqrt_l2 = L.L2Normalize(n.blcf_sign_sqrt) n.blcf_droped = L.Dropout(n.blcf_sign_sqrt_l2, dropout_param={'dropout_ratio': 0.1}) # multi-channel attention n.att_conv1 = L.Convolution(n.blcf_droped, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.att_conv1_relu = L.ReLU(n.att_conv1) n.att_conv2 = L.Convolution(n.att_conv1_relu, kernel_size=1, stride=1, num_output=2, pad=0, weight_filler=dict(type='xavier')) n.att_reshaped = L.Reshape( n.att_conv2, reshape_param=dict(shape=dict(dim=[-1, 2, 14 * 14]))) n.att_softmax = L.Softmax(n.att_reshaped, axis=2) n.att = L.Reshape(n.att_softmax, reshape_param=dict(shape=dict(dim=[-1, 2, 14, 14]))) att_maps = L.Slice(n.att, ntop=2, slice_param={'axis': 1}) n.att_map0 = att_maps[0] n.att_map1 = att_maps[1] dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) n.att_feature0 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map0, dummy) n.att_feature1 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map1, dummy) n.att_feature0_resh = L.Reshape( n.att_feature0, reshape_param=dict(shape=dict(dim=[-1, 2048]))) n.att_feature1_resh = L.Reshape( n.att_feature1, reshape_param=dict(shape=dict(dim=[-1, 2048]))) n.att_feature = L.Concat(n.att_feature0_resh, n.att_feature1_resh) # merge attention and lstm with compact bilinear pooling n.att_feature_resh = L.Reshape( n.att_feature, reshape_param=dict(shape=dict(dim=[-1, 4096, 1, 1]))) #n.lstm_12_resh = L.Reshape(n.lstm_12, reshape_param=dict(shape=dict(dim=[-1,2048,1,1]))) n.bc_att_lstm = L.CompactBilinear(n.att_feature_resh, n.embed_avg_resh, compact_bilinear_param=dict( num_output=16000, sum_pool=False)) n.bc_sign_sqrt = L.SignedSqrt(n.bc_att_lstm) n.bc_sign_sqrt_l2 = L.L2Normalize(n.bc_sign_sqrt) n.bc_dropped = L.Dropout(n.bc_sign_sqrt_l2, dropout_param={'dropout_ratio': 0.1}) n.bc_dropped_resh = L.Reshape( n.bc_dropped, reshape_param=dict(shape=dict(dim=[-1, 16000]))) n.prediction = L.InnerProduct(n.bc_dropped_resh, num_output=3000, weight_filler=dict(type='xavier')) n.loss = L.SoftmaxWithLoss(n.prediction, n.label) return n.to_proto()
def make_resnet(training_data='train_data_path', test_data='test_data_path', mean_file='mean.binaryproto', depth=50): # num_feature_maps = np.array([16, 32, 64]) # feature map size: [32, 16, 8] configs = { 50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3], 200: [3, 24, 36, 3], } block_config = configs[depth] num_feature_maps = [64, 128, 256, 512] n_stage = len(num_feature_maps) n = caffe.NetSpec() # make training data layer n.data, n.label = L.Data(source=training_data, backend=P.Data.LMDB, batch_size=256, ntop=2, transform_param=dict(crop_size=224, mean_file=mean_file, mirror=True), image_data_param=dict(shuffle=True), include=dict(phase=0)) # make test data layer n.test_data, n.test_label = L.Data(source=test_data, backend=P.Data.LMDB, batch_size=100, ntop=2, transform_param=dict( crop_size=224, mean_file=mean_file, mirror=False), include=dict(phase=1)) # conv1 should accept both training and test data layers. But this is inconvenient to code in pycaffe. # You have to write two conv layers for them. To deal with this, I temporarily ignore the test data layer # and let conv1 accept the output of training data layer. Then, after making the whole prototxt, I postprocess # the top name of the two data layers, renaming their names to the same. n.conv = L.Convolution( n.data, kernel_size=7, stride=2, num_output=64, pad=3, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=weight_filler, bias_filler=bias_filler) n.bn = L.BatchNorm(n.conv, in_place=True) n.scale = L.Scale(n.bn, scale_param=dict(bias_term=True), in_place=True) n.relu = L.ReLU(n.scale, in_place=True) n.max_pooling = L.Pooling(n.relu, pool=P.Pooling.MAX, kernel_size=3, stride=2, pad=0) # set up a checkpoint so as to know where we get. checkpoint = 'n.max_pooling' # start making blocks. # num_feature_maps: the number of feature maps for each stage. # suggesting the network has three stages. # nblocks: a parameter from the original paper, telling us how many blocks there are in # each stage. # depth : for i in range(n_stage): num_map = num_feature_maps[i] nblocks = block_config[i] first = None if (i == 0): stride = 1 else: stride = 2 for res in range(nblocks): # stage name stage = 'block' + str(res + 1) + '_stage' + str(i + 1) # use the projecting block when downsample the feature map if res == 0: if (i == 0): first = 'pre_act' else: first = 'both_act' make_res = 'n.' + 'bn_' + stage + '_pre,' + \ 'n.' + 'scale_' + stage + '_pre,' + \ 'n.' + 'relu_' + stage + '_pre,' + \ 'n.' + 'conv_proj_' + stage + '_proj,' + \ 'n.' + 'conv_' + stage + '_a, ' + \ 'n.' + 'bn_' + stage + '_a, ' + \ 'n.' + 'scale_' + stage + '_a, ' + \ 'n.' + 'relu_' + stage + '_a, ' + \ 'n.' + 'conv_' + stage + '_b, ' + \ 'n.' + 'bn_' + stage + '_b, ' + \ 'n.' + 'scale_' + stage + '_b, ' + \ 'n.' + 'relu_' + stage + '_b, ' + \ 'n.' + 'conv_' + stage + '_c, ' + \ 'n.' + 'eltsum_' + stage + \ ' = project_residual(' + checkpoint + ', num_out=num_map, stride=' + str(stride) + ', first=\'' + first + '\')' exec(make_res) checkpoint = 'n.' + 'eltsum_' + stage # where we get continue # most blocks have this shape make_res = 'n.' + 'bn_' + stage + '_pre, ' + \ 'n.' + 'scale_' + stage + '_pre, ' + \ 'n.' + 'relu_' + stage + '_pre, ' + \ 'n.' + 'conv_' + stage + '_a, ' + \ 'n.' + 'bn_' + stage + '_a, ' + \ 'n.' + 'scale_' + stage + '_a, ' + \ 'n.' + 'relu_' + stage + '_a, ' + \ 'n.' + 'conv_' + stage + '_b, ' + \ 'n.' + 'bn_' + stage + '_b, ' + \ 'n.' + 'scale_' + stage + '_b, ' + \ 'n.' + 'relu_' + stage + '_b, ' + \ 'n.' + 'conv_' + stage + '_c, ' + \ 'n.' + 'eltsum_' + stage + \ ' = identity_residual(' + checkpoint + ', num_out=num_map, stride=1)' exec(make_res) checkpoint = 'n.' + 'eltsum_' + stage # where we get # add the bn, relu, ave-pooling layers exec('n.bn_end = L.BatchNorm(' + checkpoint + ', in_place=False)') n.scale_end = L.Scale(n.bn_end, scale_param=dict(bias_term=True), in_place=True) n.relu_end = L.ReLU(n.scale_end, in_place=True) n.pool_global = L.Pooling(n.relu_end, pool=P.Pooling.AVE, global_pooling=True) n.score = L.InnerProduct( n.pool_global, num_output=1000, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) n.loss = L.SoftmaxWithLoss(n.score, n.label) n.acc = L.Accuracy(n.score, n.label) return n.to_proto()
def fcn(split): n = caffe.NetSpec() pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892), seed=1337) if split == 'train': pydata_params['sbdd_dir'] = '../../data/sbdd/dataset' pylayer = 'SBDDSegDataLayer' else: pydata_params['voc_dir'] = '../../data/pascal/VOC2011' pylayer = 'VOCSegDataLayer' n.data, n.label = L.Python(module='voc_layers', layer=pylayer, ntop=2, param_str=str(pydata_params)) # the base net n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.pool5 = max_pool(n.relu5_3) # fully conv n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0) n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.upscore2 = L.Deconvolution(n.score_fr, convolution_param=dict(num_output=21, kernel_size=4, stride=2, bias_term=False), param=[dict(lr_mult=0)]) # scale pool4 skip for compatibility n.scale_pool4 = L.Scale(n.pool4, filler=dict(type='constant', value=0.01), param=[dict(lr_mult=0)]) n.score_pool4 = L.Convolution(n.scale_pool4, num_output=21, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.score_pool4c = crop(n.score_pool4, n.upscore2) n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c, operation=P.Eltwise.SUM) n.upscore_pool4 = L.Deconvolution(n.fuse_pool4, convolution_param=dict(num_output=21, kernel_size=4, stride=2, bias_term=False), param=[dict(lr_mult=0)]) # scale pool3 skip for compatibility n.scale_pool3 = L.Scale(n.pool3, filler=dict(type='constant', value=0.0001), param=[dict(lr_mult=0)]) n.score_pool3 = L.Convolution(n.scale_pool3, num_output=21, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.score_pool3c = crop(n.score_pool3, n.upscore_pool4) n.fuse_pool3 = L.Eltwise(n.upscore_pool4, n.score_pool3c, operation=P.Eltwise.SUM) n.upscore8 = L.Deconvolution(n.fuse_pool3, convolution_param=dict(num_output=21, kernel_size=16, stride=8, bias_term=False), param=[dict(lr_mult=0)]) n.score = crop(n.upscore8, n.data) n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=False, ignore_label=255)) return n.to_proto()
def factorization_inception_resnet_a(bottom): """ input:384x35x35 output:384x35x35 :param bottom: bottom layer :return: layers """ conv_1x1 = L.Convolution(bottom, kernel_size=1, num_output=32, stride=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) # 32x35x35 conv_1x1_bn = L.BatchNorm(conv_1x1, use_global_stats=False, in_place=True) conv_1x1_scale = L.Scale(conv_1x1, scale_param=dict(bias_term=True), in_place=True) conv_1x1_relu = L.ReLU(conv_1x1, in_place=True) conv_3x3_reduce = L.Convolution(bottom, kernel_size=1, num_output=32, stride=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) # 32x35x35 conv_3x3_reduce_bn = L.BatchNorm(conv_3x3_reduce, use_global_stats=False, in_place=True) conv_3x3_reduce_scale = L.Scale(conv_3x3_reduce, scale_param=dict(bias_term=True), in_place=True) conv_3x3_reduce_relu = L.ReLU(conv_3x3_reduce, in_place=True) conv_3x3 = L.Convolution(conv_3x3_reduce, kernel_size=3, num_output=32, stride=1, pad=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) # 32x35x35 conv_3x3_bn = L.BatchNorm(conv_3x3, use_global_stats=False, in_place=True) conv_3x3_scale = L.Scale(conv_3x3, scale_param=dict(bias_term=True), in_place=True) conv_3x3_relu = L.ReLU(conv_3x3, in_place=True) conv_3x3_2_reduce = L.Convolution(bottom, kernel_size=1, num_output=32, stride=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) # 32x35x35 conv_3x3_2_reduce_bn = L.BatchNorm(conv_3x3_2_reduce, use_global_stats=False, in_place=True) conv_3x3_2_reduce_scale = L.Scale(conv_3x3_2_reduce, scale_param=dict(bias_term=True), in_place=True) conv_3x3_2_reduce_relu = L.ReLU(conv_3x3_2_reduce, in_place=True) conv_3x3_2 = L.Convolution(conv_3x3_2_reduce, kernel_size=3, num_output=48, stride=1, pad=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) # 48x35x35 conv_3x3_2_bn = L.BatchNorm(conv_3x3_2, use_global_stats=False, in_place=True) conv_3x3_2_scale = L.Scale(conv_3x3_2, scale_param=dict(bias_term=True), in_place=True) conv_3x3_2_relu = L.ReLU(conv_3x3_2, in_place=True) conv_3x3_3 = L.Convolution(conv_3x3_2, kernel_size=3, num_output=64, stride=1, pad=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) # 64x35x35 conv_3x3_3_bn = L.BatchNorm(conv_3x3_3, use_global_stats=False, in_place=True) conv_3x3_3_scale = L.Scale(conv_3x3_3, scale_param=dict(bias_term=True), in_place=True) conv_3x3_3_relu = L.ReLU(conv_3x3_3, in_place=True) concat = L.Concat(conv_1x1, conv_3x3, conv_3x3_3) # 128x35x35 conv_1x1_2 = L.Convolution(concat, kernel_size=1, num_output=384, stride=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) # 384x35x35 conv_1x1_2_bn = L.BatchNorm(conv_1x1_2, use_global_stats=False, in_place=True) conv_1x1_2_scale = L.Scale(conv_1x1_2, scale_param=dict(bias_term=True), in_place=True) # conv_1x1_2_relu = L.ReLU(conv_1x1_2_scale, in_place=True) # linear activation residual_eltwise = L.Eltwise(bottom, conv_1x1_2, eltwise_param=dict(operation=1)) return conv_1x1, conv_1x1_bn, conv_1x1_scale, conv_1x1_relu, conv_3x3_reduce, conv_3x3_reduce_bn, \ conv_3x3_reduce_scale, conv_3x3_reduce_relu, conv_3x3, conv_3x3_bn, conv_3x3_scale, conv_3x3_relu, \ conv_3x3_2_reduce, conv_3x3_2_reduce_bn, conv_3x3_2_reduce_scale, conv_3x3_2_reduce_relu, \ conv_3x3_2, conv_3x3_2_bn, conv_3x3_2_scale, conv_3x3_2_relu, conv_3x3_3, conv_3x3_3_bn, conv_3x3_3_scale, \ conv_3x3_3_relu, concat, conv_1x1_2, conv_1x1_2_bn, conv_1x1_2_scale, residual_eltwise
def Inception_v2(data, cin, co, relu=True, norm=True, is_train=True): ''' 一种Inception结构,使输入与输出大小保持不变。 @data:待卷积数据 @cin:输入通道数 @cout:输出通道数 @relu:输出是否进行Relu激活 @norm:输出是否BatchNormalization ''' assert (co % 4 == 0) cos = [co / 4] * 4 #分支1:1*1卷积,步长为1 if is_train: kwargs = kwargs = {'engine': 3} else: kwargs = {'engine': 3, 'use_global_stats': True} branch1 = L.Convolution(data, kernel_size=1, stride=1, pad=0, num_output=cos[0], weight_filler=dict(type='xavier')) #分支2:Conv+BN+RELU+Conv branch2_conv1 = L.Convolution(data, kernel_size=1, stride=1, pad=0, num_output=2 * cos[1], weight_filler=dict(type='xavier')) branch2_norm1 = L.BatchNorm(branch2_conv1, **kwargs) branch2_scale1 = L.Scale(branch2_norm1, bias_term=True) branch2_relu1 = L.ReLU(branch2_scale1, engine=3) branch2 = L.Convolution(branch2_relu1, kernel_size=3, stride=1, pad=1, num_output=cos[1], weight_filler=dict(type='xavier')) #分支3:Conv(1,1,0)+BN+RELU+Conv(5,1,2) branch3_conv1 = L.Convolution(data, kernel_size=1, stride=1, pad=0, num_output=2 * cos[2], weight_filler=dict(type='xavier')) branch3_norm1 = L.BatchNorm(branch3_conv1, **kwargs) branch3_scale1 = L.Scale(branch3_norm1, bias_term=True) branch3_relu1 = L.ReLU(branch3_scale1, engine=3) branch3 = L.Convolution(branch3_relu1, kernel_size=5, stride=1, pad=2, num_output=cos[2], weight_filler=dict(type='xavier')) #分支4:MaxPool+Conv branch4_pool1 = L.Pooling(data, kernel_size=3, stride=1, pad=1, pool=P.Pooling.MAX) branch4 = L.Convolution(branch4_pool1, kernel_size=1, stride=1, pad=0, num_output=cos[3], weight_filler=dict(type='xavier')) #concat branch1,branch2,branch3,branch4 bottom_layers = [branch1, branch2, branch3, branch4] result = L.Concat(*bottom_layers) if norm: result = L.BatchNorm(result, **kwargs) result = L.Scale(result, bias_term=True) if relu: result = L.ReLU(result, engine=3) return result
def conv_factory(bottom, ks, nout, stride=1, pad=0): conv = L.Convolution(bottom, kernel_size=ks, stride=stride, num_output=nout, pad=pad, bias_term=True, weight_filler=dict(type='msra'), bias_filler=dict(type='constant')) batch_norm = L.BatchNorm(conv, in_place=True, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]) scale = L.Scale(batch_norm, bias_term=True, in_place=True) return scale