def centroid_pos_color_loss(trans_features, computed_spixel_feat, new_spix_indices, num_spixels, l_weight_pos, l_weight_color): new_spixel_features = L.SpixelFeature(trans_features, new_spix_indices, spixel_feature_param =\ dict(type = P.SpixelFeature.AVGRGB, rgb_scale = 1.0, ignore_idx_value = -10, ignore_feature_value = 255, max_spixels = int(num_spixels)), propagate_down = [True, False]) pos_recon_feat, color_recon_feat = L.Slice(computed_spixel_feat, slice_param=dict(axis=1, slice_point=2), ntop=2) pos_pix_feat, color_pix_feat = L.Slice(new_spixel_features, slice_param=dict(axis=1, slice_point=2), ntop=2) pos_loss = L.EuclideanLoss(pos_recon_feat, pos_pix_feat, loss_weight=l_weight_pos) color_loss = L.EuclideanLoss(color_recon_feat, color_pix_feat, loss_weight=l_weight_color) return pos_loss, color_loss
def gru(self, prefix, x, cont, static=None, h=None, batch_size=100, T=0, gru_hidden=1000, weight_lr_mult=1, bias_lr_mult=2, weight_decay_mult=1, bias_decay_mult=0, weight_filler=None, bias_filler=None): if not weight_filler: weight_filler = self.uniform_weight_filler(-0.08, 0.08) if not bias_filler: bias_filler = self.constant_filler(0) if not h: h = self.dummy_data_layer([1, batch_size, lstm_hidden], 1) gate_dim = gru_hidden * 3 if static: #assume static NXF blob static_transform = L.InnerProduct(static, num_output=gate_dim, axis=2, weight_filler=weight_filler, bias_filler=bias_filler) static_transform = L.Reshape(static, shape=dict(dim=[1, -1, gate_dim])) self.rename_tops(static_transform, '%s_x_static' % prefix) h = None x_in = L.Slice(x, ntop=self.T, axis=0) cont_in = L.Slice(cont, ntop=self.T, axis=0) for t in range(T): h = self.gru_unit(prefix, x_in[t], cont[t], static, h, batch_size=batch - size, timestep=t, gru_hidden=gru_hidden, weight_lr_mult=weight_lr_mult, bias_lr_mult=bias_lr_mult, weight_decay_mult=weight_dicay_mult, bias_decay_mult=bias_decay_mult, weight_filler=weight_filler, bias_filler=bias_filler) return h
def _semantic_regularization(self, xSemPr, xSemLb, semReg): ns = self.netspec if self.semantics == ATTRIBUTES: name = 'SCoRe/semLoss' ns[name] = L.SigmoidCrossEntropyLoss( *[xSemPr, xSemLb], name=name, loss_weight=semReg / (len(self.constrains) * np.sqrt(2.)) * 10., include=dict(phase=caffe.TRAIN)) else: c_keys = [key for key in self.constrains.keys()] losses = ['SCoRe/semLoss/%s' % key for key in c_keys] scores = ['SCoRe/semLoss/%s/scores' % key for key in c_keys] labels = ['SCoRe/semLoss/%s/labels' % key for key in c_keys] # Slice semantic scores xSemPr_name = [k for k, v in ns.tops.iteritems() if v == xSemPr][0] slice_scores = L.Slice(name='SCoRe/semLoss/slice_scores', bottom=[xSemPr_name], ntop=len(scores), top=scores, in_place=True, slice_point=np.cumsum( self.num_states)[:-1].tolist(), include=dict(phase=caffe.TRAIN)) # Slice semantic labels xSemLb_name = [k for k, v in ns.tops.iteritems() if v == xSemLb][0] slice_labels = L.Slice(name='SCoRe/semLoss/slice_labels', bottom=[xSemLb_name], ntop=len(labels), top=labels, in_place=True, slice_point=range(1, len(self.constrains)), include=dict(phase=caffe.TRAIN)) # Add supervision to each slice for i, xLoss in enumerate(losses): ns[xLoss] = L.SoftmaxWithLoss( *[slice_scores[i], slice_labels[i]], name=xLoss, loss_weight=semReg / len(self.constrains), include=dict(phase=caffe.TRAIN)) # Summarize supervisions for display ns['SCoRe/semLoss'] = L.Eltwise( *[ns[l] for l in losses], name='SCoRe/semLoss', operation=P.Eltwise.SUM, coeff=[semReg / len(self.constrains)] * len(losses), include=dict(phase=caffe.TRAIN))
def add_cnn(n, data, act, batch_size, T, K, num_step, mode='train'): n.x_flat = L.Flatten(data, axis=1, end_axis=2) n.act_flat = L.Flatten(act, axis=1, end_axis=2) if mode == 'train': x = L.Slice(n.x_flat, axis=1, ntop=T) act_slice = L.Slice(n.act_flat, axis=1, ntop=T - 1) x_set = () label_set = () x_hat_set = () silence_set = () for i in range(T): t = tag(i + 1) n.tops['x' + t] = x[i] if i < K: x_set += (x[i], ) if i < T - 1: n.tops['act' + t] = act_slice[i] if i < K - 1: silence_set += (n.tops['act' + t], ) if i >= K: label_set += (x[i], ) n.label = L.Concat(*label_set, axis=0) input_list = list(x_set) for step in range(0, num_step): step_tag = tag(step + 1) if step > 0 else '' t = tag(step + K) tp = tag(step + K + 1) input_tuple = tuple(input_list) n.tops['input' + step_tag] = L.Concat(*input_tuple, axis=1) top = add_conv_enc(n, n.tops['input' + step_tag], tag=step_tag) n.tops['x_hat' + tp] = add_decoder(n, top, n.tops['act' + t], flatten=False, tag=step_tag) input_list.pop(0) input_list.append(n.tops['x_hat' + tp]) else: top = add_conv_enc(n, n.x_flat) n.tops['x_hat' + tag(K + 1)] = add_decoder(n, top, n.act_flat, flatten=False) if mode == 'train': x_hat = () for i in range(K, T): t = tag(i + 1) x_hat += (n.tops['x_hat' + t], ) n.x_hat = L.Concat(*x_hat, axis=0) n.silence = L.Silence(*silence_set, ntop=0) n.l2_loss = L.EuclideanLoss(n.x_hat, n.label) return n
def data_layer_trimese(net, inputdb, mean_file, batch_size, net_type, height, width, nchannels, slice_points, crop_size=-1): data, label = data_layer_stacked(net, inputdb, mean_file, batch_size, net_type, height, width, nchannels, crop_size=crop_size) slices = L.Slice(data[0], ntop=3, name="data_trimese", slice_param=dict(axis=1, slice_point=slice_points)) #for n,slice in enumerate(slices): # net.__setattr__( slice, "data_plane%d"%(n) ) return slices, label
def context_supervision_loss(self, distance, lw=1, ind_loss=None): """ Distance is positive; want gt distance to be SMALLER than other distances. Loss used for context supervision is also ranking loss: Look at rank loss between all possible pairs of moments; want gt distance to be smaller. Take average. """ slices = L.Slice(distance, ntop=21, axis=1) gt = slices[0] setattr(self.n, 'gt_slice', gt) ranking_losses = [] for i in range(1, 21): setattr(self.n, 'context_slice_%d' % i, slices[i]) negate_distance = L.Power(slices[i], scale=-1) max_sum = L.Eltwise(gt, negate_distance, operation=1) max_sum_margin = L.Power(max_sum, shift=self.margin) max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False) if ind_loss: max_sum_margin_relu = L.Reshape( max_sum_margin_relu, shape=dict(dim=[self.batch_size, 1])) max_sum_margin_relu = L.Eltwise(max_sum_margin_relu, ind_loss, operation=0) setattr(self.n, 'max_sum_margin_relu_%d' % i, max_sum_margin_relu) ranking_loss = L.Reduction(max_sum_margin_relu, operation=4) ranking_losses.append(ranking_loss) sum_ranking_losses = L.Eltwise(*ranking_losses, operation=1) loss = L.Power(sum_ranking_losses, scale=1 / 21., loss_weight=[lw]) return loss
def exmaple_use_of_lstm(): T = 3 # number of time steps B = 10 # batch size lstm_output = 500 # dimension of LSTM unit # use net spec ns = caffe.NetSpec() # we need initial values for h and c ns.h0 = L.DummyData(name='h0', dummy_data_param={'shape':{'dim':[1,B,lstm_output]}, 'data_filler':{'type':'constant','value':0}}) ns.c0 = L.DummyData(name='c0', dummy_data_param={'shape':{'dim':[1,B,lstm_output]}, 'data_filler':{'type':'constant','value':0}}) # simulate input X over T time steps and B sequences (batch size) ns.X = L.DummyData(name='X', dummy_data_param={'shape': {'dim':[T,B,128,10,10]}} ) # slice X for T time steps xt = L.Slice(ns.X, name='slice_X',ntop=T,slice_param={'axis':0,'slice_point':range(1,T)}) # unroling h = ns.h0 c = ns.c0 lstm_weights = None tops = [] for t in xrange(T): c, h, lstm_weights = single_time_step_lstm( ns, h, c, xt[t], 't'+str(t)+'/', lstm_output, lstm_weights) tops.append(h) ns.__setattr__('c'+str(t),c) ns.__setattr__('h'+str(t),h) # concat all LSTM tops (h[t]) to a single layer ns.H = L.Concat( *tops, name='concat_h',concat_param={'axis':0} ) return ns
def concat_slice_net(): n = caffe.NetSpec() n.data = L.DummyData(dummy_data_param=dict(num=20,channels=50,height=64,width=64,data_filler=dict(type="gaussian"))) n.a, n.b,n.c = L.Slice(n.data, ntop=3, slice_point=[20,30],axis=0) n.d = L.Concat(n.a,n.b,axis=0) n.e = L.Eltwise(n.a,n.c) return n.to_proto()
def test_slice(self): n = caffe.NetSpec() n.input1 = L.Input(shape=make_shape([6, 4, 64, 64])) n.output1, n.output2, n.output3 = L.Slice(n.input1, ntop=3, axis=1, slice_point=[1, 3]) self._test_model(*self._netspec_to_model(n, 'slice'))
def slice_layer(net, layername, inputlayer, axis, slice_points): slices = L.Slice(inputlayer, ntop=3, name=layername, slice_param=dict(axis=axis, slice_point=slice_points)) for n, slic in enumerate(slices): net.__setattr__(layername + "_%d" % (n), slic) return slices
def mPoseNet_Decomp_3S_Train(net, data_layer="data", label_layer="label", train=True, **decomp_kwargs): # input if train: net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp = \ L.Slice(net[label_layer], ntop=4, slice_param=dict(slice_point=[34,52,86], axis=1)) else: net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp, net.gt = \ L.Slice(net[label_layer], ntop=5, slice_param=dict(slice_point=[34,52,86,104], axis=1)) # label net.vec_label = L.Eltwise(net.vec_mask, net.vec_temp, eltwise_param=dict(operation=P.Eltwise.PROD)) net.heat_label = L.Eltwise(net.heat_mask, net.heat_temp, eltwise_param=dict(operation=P.Eltwise.PROD)) # Darknet19 net = YoloNetPart_Decomp(net, from_layer=data_layer, use_bn=True, use_layers=5, use_sub_layers=5, \ final_pool=False, lr=1, decay=1, **decomp_kwargs) # concat conv4_3 & conv5_5 net = UnifiedMultiScaleLayers(net, layers=["conv4_3_c","conv5_5_c"], tags=["Ref","Up"], \ unifiedlayer="convf", upsampleMethod="Reorg") # Stages baselayer = "convf" use_3_layers = 5 use_1_layers = 0 net = mPose_StageX_decomp_Train(net, from_layer=baselayer, out_layer="concat_stage1", stage=1, \ mask_vec="vec_mask", mask_heat="heat_mask", \ label_vec="vec_label", label_heat="heat_label", \ use_3_layers=use_3_layers, use_1_layers=use_1_layers, short_cut=True, \ base_layer=baselayer, lr=4, decay=1, **decomp_kwargs) net = mPose_StageX_decomp_Train(net, from_layer="concat_stage1", out_layer="concat_stage2", stage=2, \ mask_vec="vec_mask", mask_heat="heat_mask", \ label_vec="vec_label", label_heat="heat_label", \ use_3_layers=use_3_layers, use_1_layers=use_1_layers, short_cut=True, \ base_layer=baselayer, lr=4, decay=1, **decomp_kwargs) net = mPose_StageX_decomp_Train(net, from_layer="concat_stage2", out_layer="concat_stage3", stage=3, \ mask_vec="vec_mask", mask_heat="heat_mask", \ label_vec="vec_label", label_heat="heat_label", \ use_3_layers=use_3_layers, use_1_layers=use_1_layers, short_cut=False, \ lr=4, decay=1, **decomp_kwargs) return net
def net(): n = caffe.NetSpec() n.data = L.Input(input_param=dict(shape=dict(dim=data_shape))) # slice point 拆分点 n.dout1, n.dout2, n.dout3 = L.Slice(n.data, slice_param={'axis': 0}, ntop=3, slice_point=[10, 15]) # n.lr, n.lg, n.lb = L.Slice(n.data, slice_param={'slice_dim':0},ntop=3,slice_point=[10, 15]) return n.to_proto()
def position_color_loss(recon_feat, pixel_features, pos_weight, col_weight): pos_recon_feat, color_recon_feat = L.Slice(recon_feat, slice_param=dict(axis=1, slice_point=2), ntop=2) pos_pix_feat, color_pix_feat = L.Slice(pixel_features, slice_param=dict(axis=1, slice_point=2), ntop=2) pos_loss = L.EuclideanLoss(pos_recon_feat, pos_pix_feat, loss_weight=pos_weight) color_loss = L.EuclideanLoss(color_recon_feat, color_pix_feat, loss_weight=col_weight) return pos_loss, color_loss
def centroid_pos_color_loss2(trans_features, new_spixel_features, num_spixels, l_weight_pos, l_weight_color): pos_recon_feat, color_recon_feat = L.Slice(new_spixel_features, slice_param=dict(axis=1, slice_point=2), ntop=2) pos_pix_feat, color_pix_feat = L.Slice(trans_features, slice_param=dict(axis=1, slice_point=2), ntop=2) pos_loss = L.EuclideanLoss(pos_recon_feat, pos_pix_feat, loss_weight=l_weight_pos) color_loss = L.EuclideanLoss(color_recon_feat, color_pix_feat, loss_weight=l_weight_color) return pos_loss, color_loss
def generate_caffe_prototxt(self, caffe_net, layer): if self.stride == 1: layer_x1, layer_x2 = L.Slice(layer, ntop=2, axis=1, slice_point=[self.in_channels//2]) caffe_net[self.g_name + '/slice1'] = layer_x1 caffe_net[self.g_name + '/slice2'] = layer_x2 layer_x2 = slim.generate_caffe_prototxt(self.conv, caffe_net, layer_x2) else: layer_x1 = slim.generate_caffe_prototxt(self.conv0, caffe_net, layer) layer_x2 = slim.generate_caffe_prototxt(self.conv, caffe_net, layer) layer = L.Concat(layer_x1, layer_x2, axis=1) caffe_net[self.g_name + '/concat'] = layer layer = slim.generate_caffe_prototxt(self.shuffle, caffe_net, layer) return layer
def RemPoseNet_Train(net, data_layer="data", label_layer="label"): # input net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp = \ L.Slice(net[label_layer], ntop=4, slice_param=dict(slice_point=[34,52,86], axis=1)) # label net.vec_label = L.Eltwise(net.vec_mask, net.vec_temp, eltwise_param=dict(operation=P.Eltwise.PROD)) net.heat_label = L.Eltwise(net.heat_mask, net.heat_temp, eltwise_param=dict(operation=P.Eltwise.PROD)) # BaseNet net = RemBaseNet(net, from_layer=data_layer, use_bn=base_use_bn, use_conv6=False, lr=1, decay=1) # Stage-5 stage_5 = "{}_{}".format(conv_stage_name[4], len(stage5_layers)) if use_stride_conv[4]: stage_5 = "{}_{}".format(conv_stage_name[4], len(stage5_layers) - 1) # Stage-4 stage_4 = "{}_{}".format(conv_stage_name[3], len(stage4_layers)) if use_stride_conv[3]: stage_4 = "{}_{}".format(conv_stage_name[3], len(stage4_layers) - 1) net = UnifiedMultiScaleLayers(net, layers=[stage_4, stage_5], tags=["Ref", "Up"], unifiedlayer="convf", upsampleMethod="Reorg") # Stages baselayer = "convf" stage_lr = 1 # STG#1 net = RemPoseStage_Train(net, from_layer=baselayer, out_layer="concat_stage1", stage=1, \ mask_vec="vec_mask", mask_heat="heat_mask", \ label_vec="vec_label", label_heat="heat_label", \ short_cut=True, base_layer=baselayer, lr=stage_lr, decay=1) # STG#2 net = RemPoseStage_Train(net, from_layer="concat_stage1", out_layer="concat_stage2", stage=2, \ mask_vec="vec_mask", mask_heat="heat_mask", \ label_vec="vec_label", label_heat="heat_label", \ short_cut=True, base_layer=baselayer, lr=stage_lr, decay=1) # STG#3 net = RemPoseStage_Train(net, from_layer="concat_stage2", out_layer="concat_stage3", stage=3, \ mask_vec="vec_mask", mask_heat="heat_mask", \ label_vec="vec_label", label_heat="heat_label", \ short_cut=False, base_layer=baselayer, lr=stage_lr, decay=1) return net
def generate_scores(split, config): n = caffe.NetSpec() batch_size = config.N mode_str = str(dict(split=split, batch_size=batch_size)) n.language, n.cont, n.img_feature, n.spatial, n.label = L.Python(module=config.data_provider, layer='TossLayer', param_str=mode_str, ntop=5) # embedding n.embed = L.Embed(n.language, input_dim=config.vocab_size, num_output=config.embed_dim, weight_filler=dict(type='uniform', min=-0.08, max=0.08)) # LSTM n.lstm = L.LSTM(n.embed, n.cont, recurrent_param=dict(num_output=config.lstm_dim, weight_filler=dict(type='uniform', min=-0.08, max=0.08), bias_filler=dict(type='constant', value=0))) tops = L.Slice(n.lstm, ntop=config.T, slice_param=dict(axis=0)) for i in range(config.T - 1): n.__setattr__('slice'+str(i), tops[i]) n.__setattr__('silence'+str(i), L.Silence(tops[i], ntop=0)) n.lstm_out = tops[-1] n.lstm_feat = L.Reshape(n.lstm_out, reshape_param=dict(shape=dict(dim=[-1, config.lstm_dim]))) # L2 Normalize image and language features n.img_l2norm = L.L2Normalize(n.img_feature) n.lstm_l2norm = L.L2Normalize(n.lstm_feat) n.img_l2norm_resh = L.Reshape(n.img_l2norm, reshape_param=dict(shape=dict(dim=[-1, config.D_im]))) n.lstm_l2norm_resh = L.Reshape(n.lstm_l2norm, reshape_param=dict(shape=dict(dim=[-1, config.D_text]))) # Concatenate n.feat_all = L.Concat(n.lstm_l2norm_resh, n.img_l2norm_resh, n.spatial, concat_param=dict(axis=1)) # MLP Classifier over concatenated feature n.mlp_l1, n.mlp_relu1 = fc_relu(n.feat_all, config.mlp_hidden_dims) if config.mlp_dropout: n.mlp_drop1 = L.Dropout(n.mlp_relu1, dropout_ratio=0.5, in_place=True) n.scores = fc(n.mlp_drop1, 1) else: n.scores = fc(n.mlp_relu1, 1) # Loss Layer n.loss = L.SigmoidCrossEntropyLoss(n.scores, n.label) return n.to_proto()
def compile_time_operation(self, learning_option, cluster): """ define split operation for input blobs """ # get input input_ = self.get_input('input') indim = self.get_dimension('input') # get attr # required field # WARNING: size_split is only required in Caffe, not TF or MXNet size_split = self.get_attr('size_split', default=None) if size_split is None: raise Exception( '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format( 'size_split', self.name)) slice_point = [] for idx, val in enumerate(size_split): if idx == 0: slice_point.insert(idx, val) else: slice_point.insert(idx, val + size_split[idx]) # optional field axis = self.get_attr('axis', default=0) slice_param = {'axis': axis, 'slice_point': slice_point} # get output dimension outdim = [] ntop = len(size_split) for i in range(ntop): outdim.insert(i, []) for j in range(len(indim)): if j != axis: outdim[i].insert(j, indim[j]) else: outdim[i].insert(j, size_split[j]) slice = L.Slice(input_, name=self.name, slice_param=slice_param, ntop=ntop) # set output for idx, val in enumerate(slice): self.set_output('output{0}'.fomrmat(idx), val) self.set_dimension('output{0}'.format(idx), outdim[idx])
def res50_train(mean_value, list_file, is_train, batch_size): # setup the python data layer net = caffe.NetSpec() net.data, net.label \ = L.ReidData(transform_param=dict(mirror=True,crop_size=224,mean_value=mean_value), reid_data_param=dict(source=list_file,batch_size=batch_size, new_height=256, new_width=256, pos_fraction=1,neg_fraction=1,pos_limit=1,neg_limit=4,pos_factor=1, neg_factor=1.01), ntop = 2) net, final = res50_body(net, 'data', '', is_train) param = [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)] net['score'] = fc_relu(net[final], nout=751, is_train=is_train, has_relu=False, param=param) #net['euclidean'], net['label_dif'] = L.PairEuclidean(net[final], net['label'], ntop = 2) net['label_dif'] = L.PairReidLabel(net['label'], propagate_down=[0], ntop=1) net['feature_a'], net['feature_b'] = L.Slice(net[final], slice_param=dict( axis=0, slice_point=batch_size), ntop=2) net['euclidean'] = L.Eltwise(net['feature_a'], net['feature_b'], operation=P.Eltwise.PROD) net['score_dif'] = fc_relu(net['euclidean'], nout=2, is_train=is_train, has_relu=False, param=param) net['loss'] = L.SoftmaxWithLoss(net['score'], net['label'], propagate_down=[1, 0], loss_weight=0.5) net['loss_dif'] = L.SoftmaxWithLoss(net['score_dif'], net['label_dif'], propagate_down=[1, 0], loss_weight=1) return str(net.to_proto())
def bilinear_interpolation_fixed(net, input, level, num_output=1, pad=1, kernel_size=4, stride=2): slice_namex = 'slice_x_{}'.format(level) slice_namey = 'slice_y_{}'.format(level) slice_conv_namex = 'slice_conv_x_{}'.format(level) slice_conv_namey = 'slice_conv_y{}'.format(level) input_x, input_y = L.Slice(input, slice_param=dict(axis=1, slice_point=1), ntop=2) setattr(net, slice_namex, input_x) setattr(net, slice_namey, input_y) output_x, output_y = L.Deconvolution(input_x, input_y, param=dict(lr_mult=0, decay_mult=0), convolution_param=dict(num_output=num_output, pad=pad, kernel_size=kernel_size, stride=stride, weight_filler=dict(type='bilinear'), bias_term=False, engine=2 ), ntop=2) setattr(net, slice_conv_namex, output_x) setattr(net, slice_conv_namey, output_y) output = L.Concat(output_x, output_y, concat_param=dict(axis=1)) return net, output
def _make_module(model_path, n, i_channels, i_size, axis, slice_point): ns = caffe.NetSpec() ns.data = L.Input( name="data", input_param={"shape": { "dim": [n, i_channels, i_size[0], i_size[1]] }}) # when ntop > 2, it seems that there is a bug for slice in caffe ns.s1, ns.s2 = L.Slice(ns.data, name='slice', ntop=2, slice_point=slice_point, axis=axis) with open(os.path.join(model_path, 'test.prototxt'), 'w') as f: f.write(str(ns.to_proto())) net = caffe.Net(f.name, caffe.TEST) net.save(os.path.join(model_path, 'test.caffemodel'))
def concat_slice_net(): n = caffe.NetSpec() n.data = L.DummyData(dummy_data_param=dict(num=20, channels=50, height=64, width=64, data_filler=dict( type="gaussian"))) # 将输入的data层分为a,b,c输出,slice_point比Slice的个数少1 # 如本例将输入的data层分为a,b,c输出,即top有三个,slice_point则有两个, # 其中第一个slice_point=20是top:"a"的个数,第二个slice_point=30是top:"b"+top:"a"的个数 # 而top:"c"的个数:channels-第二个slice_point=50-30=20, # 因此a,b,c的channels分别是:20,10,20 n.a, n.b, n.c = L.Slice(n.data, ntop=3, slice_point=[20, 30], axis=0) n.d = L.Concat(n.a, n.b, axis=0) # Eltwise层的操作有三个:product(点乘), sum(相加减) 和 max(取大值),其中sum是默认操作 n.e = L.Eltwise(n.a, n.c) return n.to_proto()
def modeltrain(hdf5s,hdf5t, batch_size): #logistic regression: data, matrix multiplication, and 2-class softmax loss n = caffe.NetSpec() n.source_data, n.lp_label= L.HDF5Data(batch_size=batch_size, source=hdf5s, ntop=2, shuffle=False) n.source_domain_labels= L.DummyData(data_filler=dict(type='constant', value=0), num=batch_size, channels=1, height=1, width=1) #n.target_data, n.lp_target_label, n.bag_target_label = L.HDF5Data(batch_size=batch_size, source=hdf5t, ntop=3, shuffle=False) #n.target_data, n.lp_target_label = L.HDF5Data(batch_size=batch_size, source=hdf5t, ntop=2, shuffle=False) #n.target_data, n.lp_target_label, n.instance_target_label = L.HDF5Data(batch_size=batch_size, source=hdf5t, ntop=3, shuffle=False) n.target_data, n.lp_target_label, n.bag_target_label, n.instance_target_label = L.HDF5Data(batch_size=batch_size, source=hdf5t, ntop=4, shuffle=False) n.target_domain_labels=L.DummyData(data_filler=dict(type='constant', value=1), num=batch_size, channels=1, height=1, width=1) bottom_layers_data=[n.source_data, n.target_data] n.data=L.Concat(*bottom_layers_data, concat_dim=0) bottom_layers_domain=[n.source_domain_labels, n.target_domain_labels] n.dc_label=L.Concat(*bottom_layers_domain, concat_dim=0) n.ip1= L.InnerProduct(n.data, num_output=neuronL1, weight_filler=dict(type='xavier')) n.relu1 = L.Sigmoid(n.ip1, in_place=True) #n.dropout1 = L.Dropout(n.relu1, dropout_ratio=0.5) n.ip2= L.InnerProduct(n.relu1, num_output=neuronL1-400, weight_filler=dict(type='xavier')) n.source_feature, n.target_feature = L.Slice(n.ip2, slice_dim=0, ntop=2) #L.Silence(n.target_feature); #clfe.fit(n.source_feature, n.lp_label) #n.real, n.ip3 = L.Python(n.target_feature, n.lp_target_label, n.bag_label, module= 'missSVM', layer='missSVMLayer', ntop=2) n.ip3 = L.InnerProduct(n.source_feature, num_output=1, weight_filler=dict(type='xavier')) #n.ip3=L.Sigmoid(n.ip33, in_place=True) n.ip4= L.InnerProduct(n.target_feature, num_output=1, weight_filler=dict(type='xavier')) #n.ip5=L.Sigmoid(n.ip4, in_place=True) #n.ll=clfe.predict(n.source_feature) #n.accuracy = L.Accuracy(n.ip4, n.lp_target_label) #n.losslp = L.Python(n.ip4, n.lp_target_label, n.bag_target_label, module = 'GMloss', layer='MultipleInstanceLossLayer') #n.P, n.Y = L.Python(n.ip4, n.lp_target_label, n.bag_target_label, module = 'MIloss', layer='MultipleInstanceLossLayer', ntop=2) #n.losslp = L.SigmoidCrossEntropyLoss(n.P, n.Y) n.losslp = L.SigmoidCrossEntropyLoss(n.ip4, n.lp_target_label) n.losslps = L.SigmoidCrossEntropyLoss(n.ip3, n.lp_label) n.grl= L.GradientScaler(n.ip2, lower_bound=0.0) n.ip11= L.InnerProduct(n.grl, num_output=300, weight_filler=dict(type='xavier')) n.relu11 = L.Sigmoid(n.ip11, in_place=True) n.dropout11 = L.Dropout(n.relu11, dropout_ratio=0.5) n.ip12 = L.InnerProduct(n.dropout11, num_output=1, weight_filler=dict(type='xavier')) #n.final = L.Sigmoid(n.ip12, in_place=True) n.lossdc = L.SigmoidCrossEntropyLoss(n.ip12, n.dc_label, loss_weight=0.1) return n.to_proto()
def language_model_lstm_no_embed(self, sent_bottom, cont_bottom, text_name='embedding_text', tag=''): lstm_lr = self.args.lstm_lr embedding_lr = self.args.language_embedding_lr lstm = L.LSTM( sent_bottom, cont_bottom, recurrent_param=dict(num_output=self.language_embedding_dim[0], weight_filler=self.uniform_weight_filler( -0.08, 0.08), bias_filler=self.constant_filler(0)), param=self.learning_params( [[lstm_lr, lstm_lr], [lstm_lr, lstm_lr], [lstm_lr, lstm_lr]], ['lstm1' + tag, 'lstm2' + tag, 'lstm3' + tag])) lstm_slices = L.Slice(lstm, slice_point=self.params['sentence_length'] - 1, axis=0, ntop=2) self.n.tops['silence_cell_' + str(self.silence_count)] = L.Silence( lstm_slices[0], ntop=0) self.silence_count += 1 top_lstm = L.Reshape( lstm_slices[1], shape=dict(dim=[-1, self.language_embedding_dim[0]])) top_text = L.InnerProduct( top_lstm, num_output=self.language_embedding_dim[1], weight_filler=self.uniform_weight_filler(-0.08, .08), bias_filler=self.constant_filler(0), param=self.learning_params( [[embedding_lr, embedding_lr], [embedding_lr * 2, 0]], ['lstm_embed1' + tag, 'lstm_embed_1b' + tag])) setattr(self.n, text_name, top_text) return top_text
def data_layer_trimese(net, inputdb, mean_file, batch_size, net_type, height, width, nchannels, crop_size=-1): data, label = data_layer_stacked(net, inputdb, mean_file, batch_size, net_type, height, width, nchannels, crop_size=crop_size) slices = L.Slice(data[0], ntop=3, name="data_trimese", slice_param=dict(axis=1, slice_point=[1, 2])) return slices, label
def setLayers_twoBranches(data_source, batch_size, layername, kernel, stride, outCH, label_name, transform_param_in, deploy=False, batchnorm=0, lr_mult_distro=[1, 1, 1]): # it is tricky to produce the deploy prototxt file, as the data input is not from a layer, so we have to creat a workaround # producing training and testing prototxt files is pretty straight forward n = caffe.NetSpec() assert len(layername) == len(kernel) assert len(layername) == len(stride) assert len(layername) == len(outCH) num_parts = transform_param['num_parts'] if deploy == False and "lmdb" not in data_source: if (len(label_name) == 1): n.data, n.tops[label_name[0]] = L.HDF5Data(hdf5_data_param=dict( batch_size=batch_size, source=data_source), ntop=2) elif (len(label_name) == 2): n.data, n.tops[label_name[0]], n.tops[label_name[1]] = L.HDF5Data( hdf5_data_param=dict(batch_size=batch_size, source=data_source), ntop=3) # produce data definition for deploy net elif deploy == False: n.data, n.tops['label'] = L.CPMData( data_param=dict(backend=1, source=data_source, batch_size=batch_size), cpm_transform_param=transform_param_in, ntop=2) n.tops[label_name[2]], n.tops[label_name[3]], n.tops[ label_name[4]], n.tops[label_name[5]] = L.Slice( n.label, slice_param=dict( axis=1, slice_point=[38, num_parts + 1, num_parts + 39]), ntop=4) n.tops[label_name[0]] = L.Eltwise(n.tops[label_name[2]], n.tops[label_name[4]], operation=P.Eltwise.PROD) n.tops[label_name[1]] = L.Eltwise(n.tops[label_name[3]], n.tops[label_name[5]], operation=P.Eltwise.PROD) else: input = "data" dim1 = 1 dim2 = 4 dim3 = 368 dim4 = 368 # make an empty "data" layer so the next layer accepting input will be able to take the correct blob name "data", # we will later have to remove this layer from the serialization string, since this is just a placeholder n.data = L.Layer() # something special before everything n.image, n.center_map = L.Slice(n.data, slice_param=dict(axis=1, slice_point=3), ntop=2) n.silence2 = L.Silence(n.center_map, ntop=0) #n.pool_center_lower = L.Pooling(n.center_map, kernel_size=9, stride=8, pool=P.Pooling.AVE) # just follow arrays..CPCPCPCPCCCC.... last_layer = ['image', 'image'] stage = 1 conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' # can be image or fuse share_point = 0 for l in range(0, len(layername)): if layername[l] == 'V': #pretrained VGG layers conv_name = 'conv%d_%d' % (pool_counter, local_counter) lr_m = lr_mult_distro[0] n.tops[conv_name] = L.Convolution( n.tops[last_layer[0]], kernel_size=kernel[l], num_output=outCH[l], pad=int(math.floor(kernel[l] / 2)), param=[ dict(lr_mult=lr_m, decay_mult=1), dict(lr_mult=lr_m * 2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant')) last_layer[0] = conv_name last_layer[1] = conv_name print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m) ReLUname = 'relu%d_%d' % (pool_counter, local_counter) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True) local_counter += 1 print ReLUname if layername[l] == 'B': pool_counter += 1 local_counter = 1 if layername[l] == 'C': if state == 'image': #conv_name = 'conv%d_stage%d' % (conv_counter, stage) conv_name = 'conv%d_%d_CPM' % ( pool_counter, local_counter ) # no image state in subsequent stages if stage == 1: lr_m = lr_mult_distro[1] else: lr_m = lr_mult_distro[1] else: # fuse conv_name = 'Mconv%d_stage%d' % (conv_counter, stage) lr_m = lr_mult_distro[2] conv_counter += 1 #if stage == 1: # lr_m = 1 #else: # lr_m = lr_sub n.tops[conv_name] = L.Convolution( n.tops[last_layer[0]], kernel_size=kernel[l], num_output=outCH[l], pad=int(math.floor(kernel[l] / 2)), param=[ dict(lr_mult=lr_m, decay_mult=1), dict(lr_mult=lr_m * 2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant')) last_layer[0] = conv_name last_layer[1] = conv_name print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m) if layername[l + 1] != 'L': if (state == 'image'): if (batchnorm == 1): batchnorm_name = 'bn%d_stage%d' % (conv_counter, stage) n.tops[batchnorm_name] = L.BatchNorm( n.tops[last_layer[0]], param=[ dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0) ]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[0] = batchnorm_name #ReLUname = 'relu%d_stage%d' % (conv_counter, stage) ReLUname = 'relu%d_%d_CPM' % (pool_counter, local_counter) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True) else: if (batchnorm == 1): batchnorm_name = 'Mbn%d_stage%d' % (conv_counter, stage) n.tops[batchnorm_name] = L.BatchNorm( n.tops[last_layer[0]], param=[ dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0) ]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[0] = batchnorm_name ReLUname = 'Mrelu%d_stage%d' % (conv_counter, stage) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True) #last_layer = ReLUname print ReLUname #conv_counter += 1 local_counter += 1 elif layername[l] == 'C2': for level in range(0, 2): if state == 'image': #conv_name = 'conv%d_stage%d' % (conv_counter, stage) conv_name = 'conv%d_%d_CPM_L%d' % ( pool_counter, local_counter, level + 1 ) # no image state in subsequent stages if stage == 1: lr_m = lr_mult_distro[1] else: lr_m = lr_mult_distro[1] else: # fuse conv_name = 'Mconv%d_stage%d_L%d' % (conv_counter, stage, level + 1) lr_m = lr_mult_distro[2] #conv_counter += 1 #if stage == 1: # lr_m = 1 #else: # lr_m = lr_sub if layername[l + 1] == 'L2' or layername[l + 1] == 'L3': if level == 0: outCH[l] = 38 else: outCH[l] = 19 n.tops[conv_name] = L.Convolution( n.tops[last_layer[level]], kernel_size=kernel[l], num_output=outCH[l], pad=int(math.floor(kernel[l] / 2)), param=[ dict(lr_mult=lr_m, decay_mult=1), dict(lr_mult=lr_m * 2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant')) last_layer[level] = conv_name print '%s\tch=%d\t%.1f' % (last_layer[level], outCH[l], lr_m) if layername[l + 1] != 'L2' and layername[l + 1] != 'L3': if (state == 'image'): if (batchnorm == 1): batchnorm_name = 'bn%d_stage%d_L%d' % ( conv_counter, stage, level + 1) n.tops[batchnorm_name] = L.BatchNorm( n.tops[last_layer[level]], param=[ dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0) ]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[level] = batchnorm_name #ReLUname = 'relu%d_stage%d' % (conv_counter, stage) ReLUname = 'relu%d_%d_CPM_L%d' % ( pool_counter, local_counter, level + 1) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]], in_place=True) else: if (batchnorm == 1): batchnorm_name = 'Mbn%d_stage%d_L%d' % ( conv_counter, stage, level + 1) n.tops[batchnorm_name] = L.BatchNorm( n.tops[last_layer[level]], param=[ dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0) ]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[level] = batchnorm_name ReLUname = 'Mrelu%d_stage%d_L%d' % (conv_counter, stage, level + 1) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]], in_place=True) print ReLUname conv_counter += 1 local_counter += 1 elif layername[l] == 'P': # Pooling n.tops['pool%d_stage%d' % (pool_counter, stage)] = L.Pooling( n.tops[last_layer[0]], kernel_size=kernel[l], stride=stride[l], pool=P.Pooling.MAX) last_layer[0] = 'pool%d_stage%d' % (pool_counter, stage) pool_counter += 1 local_counter = 1 conv_counter += 1 print last_layer[0] elif layername[l] == 'L': # Loss: n.loss layer is only in training and testing nets, but not in deploy net. if deploy == False and "lmdb" not in data_source: n.tops['map_vec_stage%d' % stage] = L.Flatten( n.tops[last_layer[0]]) n.tops['loss_stage%d' % stage] = L.EuclideanLoss( n.tops['map_vec_stage%d' % stage], n.tops[label_name[1]]) elif deploy == False: level = 1 name = 'weight_stage%d' % stage n.tops[name] = L.Eltwise(n.tops[last_layer[level]], n.tops[label_name[(level + 2)]], operation=P.Eltwise.PROD) n.tops['loss_stage%d' % stage] = L.EuclideanLoss( n.tops[name], n.tops[label_name[level]]) print 'loss %d' % stage stage += 1 conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' elif layername[l] == 'L2': # Loss: n.loss layer is only in training and testing nets, but not in deploy net. weight = [lr_mult_distro[3], 1] # print lr_mult_distro[3] for level in range(0, 2): if deploy == False and "lmdb" not in data_source: n.tops['map_vec_stage%d_L%d' % (stage, level + 1)] = L.Flatten( n.tops[last_layer[level]]) n.tops['loss_stage%d_L%d' % (stage, level + 1)] = L.EuclideanLoss( n.tops['map_vec_stage%d' % stage], n.tops[label_name[level]], loss_weight=weight[level]) elif deploy == False: name = 'weight_stage%d_L%d' % (stage, level + 1) n.tops[name] = L.Eltwise(n.tops[last_layer[level]], n.tops[label_name[(level + 2)]], operation=P.Eltwise.PROD) n.tops['loss_stage%d_L%d' % (stage, level + 1)] = L.EuclideanLoss( n.tops[name], n.tops[label_name[level]], loss_weight=weight[level]) print 'loss %d level %d' % (stage, level + 1) stage += 1 #last_connect = last_layer #last_layer = 'image' conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' elif layername[l] == 'L3': # Loss: n.loss layer is only in training and testing nets, but not in deploy net. weight = [lr_mult_distro[3], 1] # print lr_mult_distro[3] if deploy == False: level = 0 n.tops['loss_stage%d_L%d' % (stage, level + 1)] = L.Euclidean2Loss( n.tops[last_layer[level]], n.tops[label_name[level]], n.tops[label_name[2]], loss_weight=weight[level]) print 'loss %d level %d' % (stage, level + 1) level = 1 n.tops['loss_stage%d_L%d' % (stage, level + 1)] = L.EuclideanLoss( n.tops[last_layer[level]], n.tops[label_name[level]], loss_weight=weight[level]) print 'loss %d level %d' % (stage, level + 1) stage += 1 #last_connect = last_layer #last_layer = 'image' conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' elif layername[l] == 'D': if deploy == False: n.tops['drop%d_stage%d' % (drop_counter, stage)] = L.Dropout( n.tops[last_layer[0]], in_place=True, dropout_param=dict(dropout_ratio=0.5)) drop_counter += 1 elif layername[l] == '@': #if not share_point: # share_point = last_layer n.tops['concat_stage%d' % stage] = L.Concat( n.tops[last_layer[0]], n.tops[last_layer[1]], n.tops[share_point], concat_param=dict(axis=1)) local_counter = 1 state = 'fuse' last_layer[0] = 'concat_stage%d' % stage last_layer[1] = 'concat_stage%d' % stage print last_layer elif layername[l] == '$': share_point = last_layer[0] pool_counter += 1 local_counter = 1 print 'share' # final process stage -= 1 #if stage == 1: # n.silence = L.Silence(n.pool_center_lower, ntop=0) if deploy == False: return str(n.to_proto()) # for generating the deploy net else: # generate the input information header string deploy_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format( '"' + input + '"', dim1, dim2, dim3, dim4) # assemble the input header with the net layers string. remove the first placeholder layer from the net string. return deploy_str + '\n' + 'layer {' + 'layer {'.join( str(n.to_proto()).split('layer {')[2:])
def qlstm(mode, batchsize, T, question_vocab_size): n = caffe.NetSpec() mode_str = json.dumps({'mode': mode, 'batchsize': batchsize}) n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\ module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=5 ) n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \ weight_filler=dict(type='uniform',min=-0.08,max=0.08)) n.embed = L.TanH(n.embed_ba) concat_word_embed = [n.embed, n.glove] n.concat_embed = L.Concat(*concat_word_embed, concat_param={'axis': 2}) # T x N x 600 # LSTM1 n.lstm1 = L.LSTM(\ n.concat_embed, n.cont,\ recurrent_param=dict(\ num_output=1024,\ weight_filler=dict(type='uniform',min=-0.08,max=0.08),\ bias_filler=dict(type='constant',value=0))) tops1 = L.Slice(n.lstm1, ntop=T, slice_param={'axis': 0}) for i in xrange(T - 1): n.__setattr__('slice_first' + str(i), tops1[int(i)]) n.__setattr__('silence_data_first' + str(i), L.Silence(tops1[int(i)], ntop=0)) n.lstm1_out = tops1[T - 1] n.lstm1_reshaped = L.Reshape(n.lstm1_out,\ reshape_param=dict(\ shape=dict(dim=[-1,1024]))) n.lstm1_reshaped_droped = L.Dropout(n.lstm1_reshaped, dropout_param={'dropout_ratio': 0.3}) n.lstm1_droped = L.Dropout(n.lstm1, dropout_param={'dropout_ratio': 0.3}) # LSTM2 n.lstm2 = L.LSTM(\ n.lstm1_droped, n.cont,\ recurrent_param=dict(\ num_output=1024,\ weight_filler=dict(type='uniform',min=-0.08,max=0.08),\ bias_filler=dict(type='constant',value=0))) tops2 = L.Slice(n.lstm2, ntop=T, slice_param={'axis': 0}) for i in xrange(T - 1): n.__setattr__('slice_second' + str(i), tops2[int(i)]) n.__setattr__('silence_data_second' + str(i), L.Silence(tops2[int(i)], ntop=0)) n.lstm2_out = tops2[T - 1] n.lstm2_reshaped = L.Reshape(n.lstm2_out,\ reshape_param=dict(\ shape=dict(dim=[-1,1024]))) n.lstm2_reshaped_droped = L.Dropout(n.lstm2_reshaped, dropout_param={'dropout_ratio': 0.3}) concat_botom = [n.lstm1_reshaped_droped, n.lstm2_reshaped_droped] n.lstm_12 = L.Concat(*concat_botom) n.q_emb_tanh_droped_resh = L.Reshape( n.lstm_12, reshape_param=dict(shape=dict(dim=[-1, 2048, 1, 1]))) n.q_emb_tanh_droped_resh_tiled_1 = L.Tile(n.q_emb_tanh_droped_resh, axis=2, tiles=14) n.q_emb_tanh_droped_resh_tiled = L.Tile(n.q_emb_tanh_droped_resh_tiled_1, axis=3, tiles=14) n.i_emb_tanh_droped_resh = L.Reshape( n.img_feature, reshape_param=dict(shape=dict(dim=[-1, 2048, 14, 14]))) n.blcf = L.CompactBilinear(n.q_emb_tanh_droped_resh_tiled, n.i_emb_tanh_droped_resh, compact_bilinear_param=dict(num_output=16000, sum_pool=False)) n.blcf_sign_sqrt = L.SignedSqrt(n.blcf) n.blcf_sign_sqrt_l2 = L.L2Normalize(n.blcf_sign_sqrt) n.blcf_droped = L.Dropout(n.blcf_sign_sqrt_l2, dropout_param={'dropout_ratio': 0.1}) # multi-channel attention n.att_conv1 = L.Convolution(n.blcf_droped, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.att_conv1_relu = L.ReLU(n.att_conv1) n.att_conv2 = L.Convolution(n.att_conv1_relu, kernel_size=1, stride=1, num_output=2, pad=0, weight_filler=dict(type='xavier')) n.att_reshaped = L.Reshape( n.att_conv2, reshape_param=dict(shape=dict(dim=[-1, 2, 14 * 14]))) n.att_softmax = L.Softmax(n.att_reshaped, axis=2) n.att = L.Reshape(n.att_softmax, reshape_param=dict(shape=dict(dim=[-1, 2, 14, 14]))) att_maps = L.Slice(n.att, ntop=2, slice_param={'axis': 1}) n.att_map0 = att_maps[0] n.att_map1 = att_maps[1] dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) n.att_feature0 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map0, dummy) n.att_feature1 = L.SoftAttention(n.i_emb_tanh_droped_resh, n.att_map1, dummy) n.att_feature0_resh = L.Reshape( n.att_feature0, reshape_param=dict(shape=dict(dim=[-1, 2048]))) n.att_feature1_resh = L.Reshape( n.att_feature1, reshape_param=dict(shape=dict(dim=[-1, 2048]))) n.att_feature = L.Concat(n.att_feature0_resh, n.att_feature1_resh) # merge attention and lstm with compact bilinear pooling n.att_feature_resh = L.Reshape( n.att_feature, reshape_param=dict(shape=dict(dim=[-1, 4096, 1, 1]))) n.lstm_12_resh = L.Reshape( n.lstm_12, reshape_param=dict(shape=dict(dim=[-1, 2048, 1, 1]))) n.bc_att_lstm = L.CompactBilinear(n.att_feature_resh, n.lstm_12_resh, compact_bilinear_param=dict( num_output=16000, sum_pool=False)) n.bc_sign_sqrt = L.SignedSqrt(n.bc_att_lstm) n.bc_sign_sqrt_l2 = L.L2Normalize(n.bc_sign_sqrt) n.bc_dropped = L.Dropout(n.bc_sign_sqrt_l2, dropout_param={'dropout_ratio': 0.1}) n.bc_dropped_resh = L.Reshape( n.bc_dropped, reshape_param=dict(shape=dict(dim=[-1, 16000]))) n.prediction = L.InnerProduct(n.bc_dropped_resh, num_output=3000, weight_filler=dict(type='xavier')) n.loss = L.SoftmaxWithLoss(n.prediction, n.label) return n.to_proto()
def mfb_coatt(mode, batchsize, T, question_vocab_size, folder): n = caffe.NetSpec() mode_str = json.dumps({'mode':mode, 'batchsize':batchsize,'folder':folder}) if mode == 'val': n.data, n.cont, n.img_feature, n.label, n.glove = L.Python( \ module='vqa_data_layer_hdf5', layer='VQADataProviderLayer', \ param_str=mode_str, ntop=5 ) else: n.data, n.cont, n.img_feature, n.label, n.glove = L.Python(\ module='vqa_data_layer_kld_hdf5', layer='VQADataProviderLayer', \ param_str=mode_str, ntop=5 ) n.embed = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \ weight_filler=dict(type='xavier')) n.embed_tanh = L.TanH(n.embed) concat_word_embed = [n.embed_tanh, n.glove] n.concat_embed = L.Concat(*concat_word_embed, concat_param={'axis': 2}) # T x N x 600 # LSTM n.lstm1 = L.LSTM(\ n.concat_embed, n.cont,\ recurrent_param=dict(\ num_output=config.LSTM_UNIT_NUM,\ weight_filler=dict(type='xavier'))) n.lstm1_droped = L.Dropout(n.lstm1,dropout_param={'dropout_ratio':config.LSTM_DROPOUT_RATIO}) n.lstm1_resh = L.Permute(n.lstm1_droped, permute_param=dict(order=[1,2,0])) n.lstm1_resh2 = L.Reshape(n.lstm1_resh, \ reshape_param=dict(shape=dict(dim=[0,0,0,1]))) ''' Question Attention ''' n.qatt_conv1 = L.Convolution(n.lstm1_resh2, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.qatt_relu = L.ReLU(n.qatt_conv1) n.qatt_conv2 = L.Convolution(n.qatt_relu, kernel_size=1, stride=1, num_output=config.NUM_QUESTION_GLIMPSE, pad=0, weight_filler=dict(type='xavier')) n.qatt_reshape = L.Reshape(n.qatt_conv2, reshape_param=dict(shape=dict(dim=[-1,config.NUM_QUESTION_GLIMPSE,config.MAX_WORDS_IN_QUESTION,1]))) # N*NUM_QUESTION_GLIMPSE*15 n.qatt_softmax = L.Softmax(n.qatt_reshape, axis=2) qatt_maps = L.Slice(n.qatt_softmax,ntop=config.NUM_QUESTION_GLIMPSE,slice_param={'axis':1}) dummy_lstm = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) qatt_feature_list = [] for i in xrange(config.NUM_QUESTION_GLIMPSE): if config.NUM_QUESTION_GLIMPSE == 1: n.__setattr__('qatt_feat%d'%i, L.SoftAttention(n.lstm1_resh2, qatt_maps, dummy_lstm)) else: n.__setattr__('qatt_feat%d'%i, L.SoftAttention(n.lstm1_resh2, qatt_maps[i], dummy_lstm)) qatt_feature_list.append(n.__getattr__('qatt_feat%d'%i)) n.qatt_feat_concat = L.Concat(*qatt_feature_list) ''' Image Attention with MFB ''' n.q_feat_resh = L.Reshape(n.qatt_feat_concat,reshape_param=dict(shape=dict(dim=[0,-1,1,1]))) n.i_feat_resh = L.Reshape(n.img_feature,reshape_param=dict(shape=dict(dim=[0,-1,config.IMG_FEAT_WIDTH,config.IMG_FEAT_WIDTH]))) n.iatt_q_proj = L.InnerProduct(n.q_feat_resh, num_output = config.JOINT_EMB_SIZE, weight_filler=dict(type='xavier')) n.iatt_q_resh = L.Reshape(n.iatt_q_proj, reshape_param=dict(shape=dict(dim=[-1,config.JOINT_EMB_SIZE,1,1]))) n.iatt_q_tile1 = L.Tile(n.iatt_q_resh, axis=2, tiles=config.IMG_FEAT_WIDTH) n.iatt_q_tile2 = L.Tile(n.iatt_q_tile1, axis=3, tiles=config.IMG_FEAT_WIDTH) n.iatt_i_conv = L.Convolution(n.i_feat_resh, kernel_size=1, stride=1, num_output=config.JOINT_EMB_SIZE, pad=0, weight_filler=dict(type='xavier')) n.iatt_i_resh1 = L.Reshape(n.iatt_i_conv, reshape_param=dict(shape=dict(dim=[-1,config.JOINT_EMB_SIZE, config.IMG_FEAT_WIDTH,config.IMG_FEAT_WIDTH]))) n.iatt_iq_eltwise = L.Eltwise(n.iatt_q_tile2, n.iatt_i_resh1, eltwise_param=dict(operation=0)) n.iatt_iq_droped = L.Dropout(n.iatt_iq_eltwise, dropout_param={'dropout_ratio':config.MFB_DROPOUT_RATIO}) n.iatt_iq_resh2 = L.Reshape(n.iatt_iq_droped, reshape_param=dict(shape=dict(dim=[-1,config.JOINT_EMB_SIZE,config.IMG_FEAT_SIZE,1]))) n.iatt_iq_permute1 = L.Permute(n.iatt_iq_resh2, permute_param=dict(order=[0,2,1,3])) n.iatt_iq_resh2 = L.Reshape(n.iatt_iq_permute1, reshape_param=dict(shape=dict(dim=[-1,config.IMG_FEAT_SIZE, config.MFB_OUT_DIM,config.MFB_FACTOR_NUM]))) n.iatt_iq_sumpool = L.Pooling(n.iatt_iq_resh2, pool=P.Pooling.SUM, \ pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1)) n.iatt_iq_permute2 = L.Permute(n.iatt_iq_sumpool, permute_param=dict(order=[0,2,1,3])) n.iatt_iq_sqrt = L.SignedSqrt(n.iatt_iq_permute2) n.iatt_iq_l2 = L.L2Normalize(n.iatt_iq_sqrt) ## 2 conv layers 1000 -> 512 -> 2 n.iatt_conv1 = L.Convolution(n.iatt_iq_l2, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.iatt_relu = L.ReLU(n.iatt_conv1) n.iatt_conv2 = L.Convolution(n.iatt_relu, kernel_size=1, stride=1, num_output=config.NUM_IMG_GLIMPSE, pad=0, weight_filler=dict(type='xavier')) n.iatt_resh = L.Reshape(n.iatt_conv2, reshape_param=dict(shape=dict(dim=[-1,config.NUM_IMG_GLIMPSE,config.IMG_FEAT_SIZE]))) n.iatt_softmax = L.Softmax(n.iatt_resh, axis=2) n.iatt_softmax_resh = L.Reshape(n.iatt_softmax,reshape_param=dict(shape=dict(dim=[-1,config.NUM_IMG_GLIMPSE,config.IMG_FEAT_WIDTH,config.IMG_FEAT_WIDTH]))) iatt_maps = L.Slice(n.iatt_softmax_resh, ntop=config.NUM_IMG_GLIMPSE,slice_param={'axis':1}) dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) iatt_feature_list = [] for i in xrange(config.NUM_IMG_GLIMPSE): if config.NUM_IMG_GLIMPSE == 1: n.__setattr__('iatt_feat%d'%i, L.SoftAttention(n.i_feat_resh, iatt_maps, dummy)) else: n.__setattr__('iatt_feat%d'%i, L.SoftAttention(n.i_feat_resh, iatt_maps[i], dummy)) n.__setattr__('iatt_feat%d_resh'%i, L.Reshape(n.__getattr__('iatt_feat%d'%i), \ reshape_param=dict(shape=dict(dim=[0,-1])))) iatt_feature_list.append(n.__getattr__('iatt_feat%d_resh'%i)) n.iatt_feat_concat = L.Concat(*iatt_feature_list) n.iatt_feat_concat_resh = L.Reshape(n.iatt_feat_concat, reshape_param=dict(shape=dict(dim=[0,-1,1,1]))) ''' Fine-grained Image-Question MFB fusion ''' n.mfb_q_proj = L.InnerProduct(n.q_feat_resh, num_output=config.JOINT_EMB_SIZE, weight_filler=dict(type='xavier')) n.mfb_i_proj = L.InnerProduct(n.iatt_feat_concat_resh, num_output=config.JOINT_EMB_SIZE, weight_filler=dict(type='xavier')) n.mfb_iq_eltwise = L.Eltwise(n.mfb_q_proj, n.mfb_i_proj, eltwise_param=dict(operation=0)) n.mfb_iq_drop = L.Dropout(n.mfb_iq_eltwise, dropout_param={'dropout_ratio':config.MFB_DROPOUT_RATIO}) n.mfb_iq_resh = L.Reshape(n.mfb_iq_drop, reshape_param=dict(shape=dict(dim=[-1,1,config.MFB_OUT_DIM,config.MFB_FACTOR_NUM]))) n.mfb_iq_sumpool = L.Pooling(n.mfb_iq_resh, pool=P.Pooling.SUM, \ pooling_param=dict(kernel_w=config.MFB_FACTOR_NUM, kernel_h=1)) n.mfb_out = L.Reshape(n.mfb_iq_sumpool,\ reshape_param=dict(shape=dict(dim=[-1,config.MFB_OUT_DIM]))) n.mfb_sign_sqrt = L.SignedSqrt(n.mfb_out) n.mfb_l2 = L.L2Normalize(n.mfb_sign_sqrt) n.prediction = L.InnerProduct(n.mfb_l2, num_output=config.NUM_OUTPUT_UNITS, weight_filler=dict(type='xavier')) if mode == 'val': n.loss = L.SoftmaxWithLoss(n.prediction, n.label) else: n.loss = L.SoftmaxKLDLoss(n.prediction, n.label) return n.to_proto()
def gru_unit(self, prefix, x, cont, static=None, h=None, batch_size=100, timestep=0, gru_hidden=1000, weight_lr_mult=1, bias_lr_mult=2, weight_decay_mult=1, bias_decay_mult=0, concat_hidden=True, weight_filler=None, bias_filler=None): #assume static input already transformed if not weight_filler: weight_filler = self.uniform_weight_filler(-0.08, 0.08) if not bias_filler: bias_filler = self.constant_filler(0) if not h: h = self.dummy_data_layer([1, batch_size, lstm_hidden], 1) def get_name(name): return '%s_%s' % (prefix, name) def get_param(weight_name, bias_name=None): #TODO: write this in terms of earlier method "init_params" w = dict(lr_mult=weight_lr_mult, decay_mult=weight_decay_mult, name=get_name(weight_name)) if bias_name is not None: b = dict(lr_mult=bias_lr_mult, decay_mult=bias_decay_mult, name=get_name(bias_name)) return [w, b] return [w] gate_dim = gru_hidden * 3 #transform x_t x = L.InnerProduct(x, num_output=gate_dim, axis=2, weight_filler=weight_filler, bias_filler=bias_filler, param=get_param('W_xc', 'b_c')) self.rename_tops(x, get_name('%d_x_transform' % timestep)) #transform h h_conted = L.Scale(h, cont, axis=0) h = L.InnerProduct(h_conted, num_output=gru_hidden * 2, axis=2, bias_term=False, weight_filler=weight_filler, param=get_param('W_hc')) h_name = get_name('%d_h_transform' % timestep) if not hasattr(self.n, h_name): setattr(self.n, h_name, h) #gru stuff TODO: write GRUUnit in caffe? would make all this much prettier. x_transform_z_r, x_transform_hc = L.Slice(x, slice_point=gru_hidden * 2, axis=2, ntop=2) sum_items = [x_transform_z_r, h] if static: sum_items += static z_r_sum = self.sum(sum_items) z_r = L.Sigmoid(z_r_sum) z, r = L.Slice(z_r, slice_point=gru_hidden, axis=2, ntop=2) z_weighted_h = self.prod([r, h_conted]) z_h_transform = L.InnerProduct(z_weighted_h, num_output=gru_hidden, axis=2, bias_term=False, weight_filler=weight_filler, param=get_param('W_hzc')) sum_items = [x_transform_hc, z_h_transform] if static: sum_items += static hc_sum = self.sum(sum_items) hc = L.TanH(hc) zm1 = L.Power(z, scale=-1, shift=1) h_h = self.prod([zm1, h_conted]) h_hc = self.prod([z, hc]) h = self.sum([h_h, h_hc]) return h
def va_net_proto(self, batch_size, train=True): n = caffe.NetSpec() # if train: # source_data = '../prepare_data/AFEW-VA/crop/train_data_lmdb' # source_label = '../prepare_data/AFEW-VA/crop/train_label_lmdb' # mu = tools.get_mu('../prepare_data/AFEW-VA/crop/train_data.binaryproto') # else: # source_data = '../prepare_data/AFEW-VA/crop/test_data_lmdb' # source_label = '../prepare_data/AFEW-VA/crop/test_label_lmdb' # mu = tools.get_mu('../prepare_data/AFEW-VA/crop/test_data.binaryproto') # # n.data = L.Data(source=source_data, backend=P.Data.LMDB, batch_size=batch_size, ntop=1, # transform_param=dict(scale=1. / 255, mean_value=mu), # input_param=dict(shape=dict(dim=[batch_size, 3, 170, 170]))) # n.label = L.Data(source=source_label, backend=P.Data.LMDB, batch_size=batch_size, ntop=1) if train: data_layer_params = dict(batch_size=batch_size, im_shape=[170, 170], split='train', data_root=data_root, mean_file=train_mean_file) else: data_layer_params = dict(batch_size=batch_size, im_shape=[170, 170], split='test', data_root=data_root, mean_file=test_mean_file) n.data, n.label = L.Python(module='va_datalayer', layer='VADataLayerSync', ntop=2, param_str=str(data_layer_params)) n.conv1, n.conv1_bn, n.conv1_scale, n.conv1_relu = block_def.conv_bn_scale_relu( n.data, ks=11, nout=256, stride=4, pad=0, learn_all=self.learn_all) n.res0, n.conv2_bn, n.conv2_scale, n.conv2_relu = block_def.conv_bn_scale_relu( n.conv1, ks=9, nout=128, stride=2, pad=0, learn_all=self.learn_all) n_core = 8 n_attr = 2 n_au = 2 # 8 层 rpoly-2 for core layer for num in range(n_core): exec( 'n.conv{0}_1, n.relu{0}_1, n.conv{0}_2, n.relu{0}_2, n.conv{0}_3, n.relu{0}_3, n.res{0} =' 'block_def.rPoly2(n.res{1}, learn_all=self.learn_all)'.format( str(num + 1), str(num))) # Core Layer to 4 Attribute Layer # exec('n.res{0}_face, n.res{0}_eye, n.res{0}_eyebrow, n.res{0}_mouth = L.Split(n.res{0}, ntop=4)' # .format(str(n_core))) exec( 'n.res{0}_eye, n.res{0}_eyebrow, n.res{0}_mouth = L.Split(n.res{0}, ntop=3)' .format(str(n_core))) # # 2 层 rpoly-2 for attribute layer -- Face Layer # for num in range(n_attr): # exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_3_{2}, ' # 'n.res{0}_{2} = block_def.rPoly2(n.res{1}_{2}, learn_all=self.learn_all)'. # format(str(num + n_core + 1), str(num + n_core), 'face')) # 2 层 rpoly-2 for attribute layer -- Eye Layer for num in range(n_attr): exec( 'n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_3_{2}, ' 'n.res{0}_{2} = block_def.rPoly2(n.res{1}_{2}, learn_all=self.learn_all)' .format(str(num + n_core + 1), str(num + n_core), 'eye')) # 2 层 rpoly-2 for attribute layer -- Eyebrow Layer for num in range(n_attr): exec( 'n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_3_{2}, ' 'n.res{0}_{2} = block_def.rPoly2(n.res{1}_{2}, learn_all=self.learn_all)' .format(str(num + n_core + 1), str(num + n_core), 'eyebrow')) # 2 层 rpoly-2 for attribute layer -- Mouth Layer for num in range(n_attr): exec( 'n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_3_{2}, ' 'n.res{0}_{2} = block_def.rPoly2(n.res{1}_{2}, learn_all=self.learn_all)' .format(str(num + n_core + 1), str(num + n_core), 'mouth')) ######################################## # Eye Layer to 2 AU Layer exec('n.res{0}_AU6_7, n.res{0}_AU45 = L.Split(n.res{0}_eye, ntop=2)'. format(str(n_core + n_attr))) # 2 层 rpoly-3 for AU layer -- AU6_7 for num in range(n_au): exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \ 'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})' .format(str(num+n_core+n_attr+1),str(num+n_core+n_attr),'AU6_7')) # 2 层 rpoly-3 for AU layer -- AU45 for num in range(n_au): exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \ 'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})' .format(str(num+n_core+n_attr+1),str(num+n_core+n_attr),'AU45')) # Eyebrow Layer to 3 AU Layer exec( 'n.res{0}_AU1, n.res{0}_AU2, n.res{0}_AU4 = L.Split(n.res{0}_eyebrow, ntop=3)' .format(str(n_core + n_attr))) # 2 层 rpoly-3 for AU layer -- AU1 for num in range(n_au): exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \ 'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})' .format(str(num+n_core+n_attr+1),str(num+n_core+n_attr),'AU1')) # 2 层 rpoly-3 for AU layer -- AU2 for num in range(n_au): exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \ 'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})' .format(str(num+n_core+n_attr+1),str(num+n_core+n_attr),'AU2')) # 2 层 rpoly-3 for AU layer -- AU4 for num in range(n_au): exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \ 'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})' .format(str(num+n_core+n_attr+1),str(num+n_core+n_attr),'AU4')) # Mouth Layer to 3 AU Layer exec( 'n.res{0}_Chin, n.res{0}_Lip, n.res{0}_Mouth_AU = L.Split(n.res{0}_mouth, ntop=3)' .format(str(n_core + n_attr))) # 2 层 rpoly-3 for AU layer -- Chin for num in range(n_au): exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \ 'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})' .format(str(num+n_core+n_attr+1),str(num+n_core+n_attr),'Chin')) # 2 层 rpoly-3 for AU layer -- Lip_c & Lip_u # for num in range(n_au): # exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \ # 'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})' # .format(str(num+n_core+n_attr+1),str(num+n_core+n_attr),'Lip')) exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \ 'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})' .format(str(n_core + n_attr + 1), str(n_core + n_attr), 'Lip')) # exec('n.res{0}_Lip_c, n.res{0}_Lip_u = L.Split(n.res{0}_Lip, ntop=2)'.format(str(n_core + n_attr + 1))) exec('n.res{0}_Lip_c = L.Split(n.res{0}_Lip, ntop=1)'.format( str(n_core + n_attr + 1))) exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \ 'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})' .format(str(n_core + n_attr + 2), str(n_core + n_attr +1), 'Lip_c')) # exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \ # 'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})' # .format(str(n_core + n_attr + 2), str(n_core + n_attr +1), 'Lip_u')) # 2 层 rpoly-3 for AU layer -- Mouth_AU for num in range(n_au): exec('n.conv{0}_1_{2}, n.relu{0}_1_{2}, n.conv{0}_2_{2}, n.relu{0}_2_{2}, n.conv{0}_3_{2}, n.relu{0}_{2},' \ 'n.conv{0}_4_{2}, n.relu{0}_4_{2}, n.res{0}_{2}= block_def.rPoly3(n.res{1}_{2})' .format(str(num+n_core+n_attr+1),str(num+n_core+n_attr),'Mouth_AU')) ######################################## # AU6_7, AU45, AU4, Lip_c, Mouth_AU for Valence layer exec('n.res_Val = L.Concat(n.res{0}_AU6_7, n.res{0}_AU45, n.res{0}_AU4, n.res{0}_Lip_c, n.res{0}_Mouth_AU, axis=1)'\ .format(str(n_core+n_attr+n_au))) # AU45, AU1, AU2, AU4, Chin for Arousal layer exec('n.res_Aro = L.Concat(n.res{0}_AU45, n.res{0}_AU1, n.res{0}_AU2, n.res{0}_AU4, n.res{0}_Chin, axis=1)' \ .format(str(n_core + n_attr + n_au))) # va labels n.Val_label, n.Aro_label = L.Slice(n.label, name='slice', axis=1, slice_point=[1], ntop=2) va_layers = ['Val', 'Aro'] out = [10, 10] for num in range(2): exec( 'n.fc1_{0}, n.fc1_bn_{0}, n.fc1_drop_{0} = block_def.fc_bn_drop(n.res_{0}, num_output=1024, ' 'dropout_ratio=0.5)'.format(va_layers[num])) exec( 'n.fc2_{0}, n.fc2_bn_{0}, n.fc2_drop_{0} = block_def.fc_bn_drop(n.fc1_{0}, num_output=1024, ' 'dropout_ratio=0.5)'.format(va_layers[num])) exec( 'n.{0}_score = block_def.fc(n.fc2_{0}, num_output={1})'.format( va_layers[num], str(out[num]))) exec('n.loss_{0} = L.SoftmaxWithLoss(n.{0}_score, n.{0}_label)'. format(va_layers[num])) exec('n.acc_{0} = L.Accuracy(n.{0}_score, n.{0}_label)'.format( va_layers[num])) if train: pass else: n.probs_Val = L.Softmax(n.Val_score) n.probs_Aro = L.Softmax(n.Aro_score) return n.to_proto()