def create(self, dataLayerParams, phase="train"): n = caffe.NetSpec() n.data, n.label = L.Python(module="NpyDataLayer", layer=self.dataLayer, ntop=2, param_str=str(dataLayerParams)) n.input_conv = L.Convolution(n.data, num_output=16, kernel_size=1, stride=1, pad=1, bias_term=False, param=[dict(lr_mult=1, decay_mult=1)], weight_filler=dict(type="xavier")) n.input_relu = L.ReLU(n.input_conv, in_place=False) for i in range(len(self.stages)): for j in range(self.stages[i]): stageString = self.resnetString bottomString = 'n.input_relu' if (i != 0) or (j != 0): bottomString = 'n.res{}_add'.format( str(sum(self.stages[:i]) + j)) exec( stageString.replace('(bottom)', bottomString).replace( '(output)', str(2**i * 64)).replace( '(n)', str(sum(self.stages[:i]) + j + 1))) exec( 'n.pool_ave = L.Pooling(n.res{}_add, pool=P.Pooling.AVE, global_pooling=True)' .format(str(sum(self.stages)))) n.classifier = L.InnerProduct(n.pool_ave, num_output=self.classCount, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) if phase == "train": n.loss = L.SoftmaxWithLoss(n.classifier, n.label) elif phase == "test": n.softmax_out = L.Softmax(n.classifier) n.accuracy_top1 = L.Accuracy(n.softmax_out, n.label, accuracy_param=dict(top_k=1, axis=1)) n.accuracy_top5 = L.Accuracy(n.softmax_out, n.label, accuracy_param=dict(top_k=5, axis=1)) else: # deploy n.softmax_out = L.Softmax(n.classifier) n.result = L.ArgMax(n.softmax_out, argmax_param=dict(axis=1)) return n.to_proto()
def test_argmax2(): # type: ()->caffe.NetSpec n = caffe.NetSpec() n.input1 = L.Input(shape=make_shape([6, 4, 64, 64])) n.argmax1 = L.ArgMax(n.input1, axis=-1) return n
def net(): n = caffe.NetSpec() n.data = L.Input(input_param=dict(shape=dict(dim=data_shape))) n.dataout = L.ArgMax(n.data, out_max_val=_out_max_val, top_k=_top_k, axis=_axis) return n.to_proto()
def create_UNet(): n = caffe.NetSpec() n.data = L.Input(include={'phase': caffe.TEST}, input_param={'shape': {'dim': [1, 1, SEG_MASK_WIDTH, SEG_MASK_HEIGHT]}}) # encoder => level 1 n.conv1_1, n.relu1_1 = conv_relu(n.data, 3, 1, 64, 0, True) # conv_relu(bottom, kernel_size, stride, nout, pad): n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 3, 1, 64, 0, True) n.pool1 = L.Pooling(n.relu1_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) # encoder => level 2 n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 3, 1, 128, 0, True) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 3, 1, 128, 0, True) n.pool2 = L.Pooling(n.relu2_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) # encoder => level 3 n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 3, 1, 256, 0, True) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 3, 1, 256, 0, True) n.pool3 = L.Pooling(n.relu3_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) # encoder => level 4 n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 3, 1, 512, 0, True) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 3, 1, 512, 0, True) n.pool4 = L.Pooling(n.relu4_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) # encoder => level 5 n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 3, 1, 1024, 0, True) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 3, 1, 1024, 0, True) # (bottom, ks, stride, nout, pad, crop_offset, cat) n.upconv1, n.concat1 = upconv_concat(n.relu5_2, 2, 2, 512, 0, 4, n.relu4_2) # decoder => level 1 n.conv6_1, n.relu6_1 = conv_relu(n.concat1, 3, 1, 512, 0, True) n.conv6_2, n.relu6_2 = conv_relu(n.relu6_1, 3, 1, 512, 0, True) n.upconv2, n.concat2 = upconv_concat(n.relu6_2, 2, 2, 256, 0, 16, n.relu3_2) # decoder => level 2 n.conv7_1, n.relu7_1 = conv_relu(n.concat2, 3, 1, 256, 0, True) n.conv7_2, n.relu7_2 = conv_relu(n.relu7_1, 3, 1, 256, 0, True) n.upconv3, n.concat3 = upconv_concat(n.relu7_2, 2, 2, 128, 0, 40, n.relu2_2) # decoder => level 3 n.conv8_1, n.relu8_1 = conv_relu(n.concat3, 3, 1, 128, 0, True) n.conv8_2, n.relu8_2 = conv_relu(n.relu8_1, 3, 1, 128, 0, True) n.upconv4, n.concat4 = upconv_concat(n.relu8_2, 2, 2, 64, 0, 88, n.relu1_2) # decoder => level 4 n.conv9_1, n.relu9_1 = conv_relu(n.concat4, 3, 1, 64, 0, True) n.conv9_2, n.relu9_2 = conv_relu(n.relu9_1, 3, 1, 64, 0, True) n.score = L.Convolution(n.relu9_2, kernel_size=1, num_output=3, pad=0) # n.labelcrop = L.Crop(n.label, n.score, crop_param={'axis': 2, 'offset': 92}) # n.loss = L.SoftmaxWithLoss(n.score, n.labelcrop, loss_param={'ignore_label': 3}, propagate_down=[True, False]) n.argmax = L.ArgMax(n.score, argmax_param={'axis': 1}, include=dict(phase=caffe.TEST)) # n.acc, accuracy_by_class = L.Accuracy(n.score, n.labelcrop, accuracy_param={'axis': 1}, include=dict(phase=caffe.TEST), ntop=2) # n.confmat = L.Python(n.argmax, n.labelcrop, python_param={'module': 'python_confmat', 'layer': 'PythonConfMat', 'param_str': '{"test_iter":3780}'}, include=dict(phase=caffe.TEST)) return n.to_proto()
def create(self, dataLayerParams, phase="train"): n = caffe.NetSpec() n.data, n.label = L.Python(module="VDataLayer", layer=self.dataLayer, ntop=2, param_str=str(dataLayerParams)) n.input_conv = L.Convolution(n.data, num_output=16, kernel_size=1, stride=1, pad=1, bias_term=False, param=[dict(lr_mult=1, decay_mult=1)], weight_filler=dict(type="xavier")) n.input_relu = L.ReLU(n.input_conv, in_place=False) for i in range(len(self.stages)): for j in range(self.stages[i]): stageString = self.resnetString bottomString = 'n.input_relu' if (i != 0) or (j != 0): bottomString = 'n.res{}_add'.format( str(sum(self.stages[:i]) + j)) exec( stageString.replace('(bottom)', bottomString).replace( '(output)', str(2**i * 64)).replace( '(n)', str(sum(self.stages[:i]) + j + 1))) exec( 'n.conv_output = L.Convolution(n.res{}_add, num_output=2, kernel_size=1, stride=1, pad=1, bias_term=False, param=[dict(lr_mult=1, decay_mult=1)], weight_filler=dict(type="xavier"))' .format(str(sum(self.stages)))) # reshape result and label n.flat_output = L.Reshape(n.conv_output, reshape_param={"shape": { "dim": [0, 2, -1] }}) n.flat_label = L.Reshape(n.label, reshape_param={"shape": { "dim": [0, 1, -1] }}) if phase == "train": n.softmax_out = L.Softmax(n.flat_output) n.loss = L.DiceLoss(n.softmax_out, n.flat_label) elif phase == "test": n.softmax_out = L.Softmax(n.flat_output) n.accu = L.DiceLoss(n.softmax_out, n.flat_label) else: # deploy n.softmax_out = L.Softmax(n.flat_output) n.result = L.ArgMax(n.softmax_out, argmax_param=dict(axis=1)) return n.to_proto()
def buildExecutableNet(lmdb_images, lmdb_labels, batch_size, phase): n = caffe.NetSpec() pref = common.layer_prefix # LOAD DATA n.data = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb_images, transform_param=dict(mean_file=common.mean_file)) n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb_labels) if phase == "Test": n.data = L.Data(batch_size=1, backend=P.Data.LMDB, source=lmdb_images, transform_param=dict(mean_file=common.mean_file)) n.label = L.Data(batch_size=1, backend=P.Data.LMDB, source=lmdb_labels) # Embed net.... output = multinet(n.data, n, phase, pref) # Upsample to full size (fixed weights upsampling) setattr( n, pref + "score_temp3", L.Deconvolution(output, param=[ dict(lr_mult=common.lw, decay_mult=common.lw), dict(lr_mult=common.lb, decay_mult=0) ], convolution_param=dict( kernel_size=16, stride=8, num_output=common.c, pad=4, weight_filler=dict(type='bilinear')))) score_temp3 = getattr(n, pref + "score_temp3") batch_norm(pref + "score_temp3", n, "temp3", pref, "", phase) if phase == "Train": n.loss = L.SoftmaxWithLoss(score_temp3, n.label, loss_param=dict(normalize=False, ignore_label=common.ig_lbl)) if phase == "Test": n.score_argmax = L.ArgMax(score_temp3, argmax_param=dict(axis=1)) n.class_iou = L.IntersectionOverUnion( n.score_argmax, n.label, parse_iou_param=dict(num_labels=common.c, ignore_label=common.ig_lbl, total_im_num=common.testset_size)) return n.to_proto()
def compute_final_spixel_labels(pixel_spixel_assoc, spixel_init, num_spixels_h, num_spixels_w): # Compute new spixel indices # 计算新的像素指数 rel_label = L.ArgMax(pixel_spixel_assoc, argmax_param = dict(axis = 1), propagate_down = False) new_spix_indices = L.RelToAbsIndex(rel_label, spixel_init, rel_to_abs_index_param = dict(num_spixels_h = int(num_spixels_h), num_spixels_w = int(num_spixels_w)), propagate_down = [False, False]) return new_spix_indices
def generator_proto(mode, batchsize, T, exp_T, question_vocab_size, exp_vocab_size, use_gt=True): n = caffe.NetSpec() mode_str = json.dumps({'mode':mode, 'batchsize':batchsize}) n.data, n.cont, n.img_feature, n.label, n.exp, n.exp_out, n.exp_cont_1, n.exp_cont_2 = \ L.Python(module='vqa_data_provider_layer', layer='VQADataProviderLayer', param_str=mode_str, ntop=8) n.embed_ba = L.Embed(n.data, input_dim=question_vocab_size, num_output=300, \ weight_filler=dict(type='uniform',min=-0.08,max=0.08), param=fixed_weights) n.embed = L.TanH(n.embed_ba) # LSTM1 n.lstm1 = L.LSTM(\ n.embed, n.cont,\ recurrent_param=dict(\ num_output=1024,\ weight_filler=dict(type='uniform',min=-0.08,max=0.08),\ bias_filler=dict(type='constant',value=0)), param=fixed_weights_lstm) tops1 = L.Slice(n.lstm1, ntop=T, slice_param={'axis':0}) for i in range(T-1): n.__setattr__('slice_first'+str(i), tops1[int(i)]) n.__setattr__('silence_data_first'+str(i), L.Silence(tops1[int(i)],ntop=0)) n.lstm1_out = tops1[T-1] n.lstm1_reshaped = L.Reshape(n.lstm1_out,\ reshape_param=dict(\ shape=dict(dim=[-1,1024]))) n.lstm1_reshaped_droped = L.Dropout(n.lstm1_reshaped,dropout_param={'dropout_ratio':0.3}) n.lstm1_droped = L.Dropout(n.lstm1,dropout_param={'dropout_ratio':0.3}) # LSTM2 n.lstm2 = L.LSTM(\ n.lstm1_droped, n.cont,\ recurrent_param=dict(\ num_output=1024,\ weight_filler=dict(type='uniform',min=-0.08,max=0.08),\ bias_filler=dict(type='constant',value=0)), param=fixed_weights_lstm) tops2 = L.Slice(n.lstm2, ntop=T, slice_param={'axis':0}) for i in range(T-1): n.__setattr__('slice_second'+str(i), tops2[int(i)]) n.__setattr__('silence_data_second'+str(i), L.Silence(tops2[int(i)],ntop=0)) n.lstm2_out = tops2[T-1] n.lstm2_reshaped = L.Reshape(n.lstm2_out,\ reshape_param=dict(\ shape=dict(dim=[-1,1024]))) n.lstm2_reshaped_droped = L.Dropout(n.lstm2_reshaped,dropout_param={'dropout_ratio':0.3}) concat_botom = [n.lstm1_reshaped_droped, n.lstm2_reshaped_droped] n.lstm_12 = L.Concat(*concat_botom) # Tile question feature n.q_emb_resh = L.Reshape(n.lstm_12, reshape_param=dict(shape=dict(dim=[-1,2048,1,1]))) n.q_emb_tiled_1 = L.Tile(n.q_emb_resh, axis=2, tiles=14) n.q_emb_resh_tiled = L.Tile(n.q_emb_tiled_1, axis=3, tiles=14) # Embed image feature n.i_emb = L.Convolution(n.img_feature, kernel_size=1, stride=1, num_output=2048, pad=0, weight_filler=dict(type='xavier'), param=fixed_weights) # Eltwise product and normalization n.eltwise = L.Eltwise(n.q_emb_resh_tiled, n.i_emb, eltwise_param={'operation': P.Eltwise.PROD}) n.eltwise_sqrt = L.SignedSqrt(n.eltwise) n.eltwise_l2 = L.L2Normalize(n.eltwise_sqrt) n.eltwise_drop = L.Dropout(n.eltwise_l2, dropout_param={'dropout_ratio': 0.3}) # Attention for VQA n.att_conv1 = L.Convolution(n.eltwise_drop, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier'), param=fixed_weights) n.att_conv1_relu = L.ReLU(n.att_conv1) n.att_conv2 = L.Convolution(n.att_conv1_relu, kernel_size=1, stride=1, num_output=1, pad=0, weight_filler=dict(type='xavier'), param=fixed_weights) n.att_reshaped = L.Reshape(n.att_conv2,reshape_param=dict(shape=dict(dim=[-1,1,14*14]))) n.att_softmax = L.Softmax(n.att_reshaped, axis=2) n.att_map = L.Reshape(n.att_softmax,reshape_param=dict(shape=dict(dim=[-1,1,14,14]))) dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) n.att_feature = L.SoftAttention(n.img_feature, n.att_map, dummy) n.att_feature_resh = L.Reshape(n.att_feature, reshape_param=dict(shape=dict(dim=[-1,2048]))) # eltwise product + normalization again for VQA n.i_emb2 = L.InnerProduct(n.att_feature_resh, num_output=2048, weight_filler=dict(type='xavier'), param=fixed_weights) n.eltwise2 = L.Eltwise(n.lstm_12, n.i_emb2, eltwise_param={'operation': P.Eltwise.PROD}) n.eltwise2_sqrt = L.SignedSqrt(n.eltwise2) n.eltwise2_l2 = L.L2Normalize(n.eltwise2_sqrt) n.eltwise2_drop = L.Dropout(n.eltwise2_l2, dropout_param={'dropout_ratio': 0.3}) n.prediction = L.InnerProduct(n.eltwise2_drop, num_output=3000, weight_filler=dict(type='xavier'), param=fixed_weights) # Take GT answer or Take the logits of the VQA model and get predicted answer to embed if use_gt: n.exp_emb_ans = L.Embed(n.label, input_dim=3000, num_output=300, weight_filler=dict(type='uniform', min=-0.08, max=0.08)) else: n.vqa_ans = L.ArgMax(n.prediction, axis=1) n.exp_emb_ans = L.Embed(n.vqa_ans, input_dim=3000, num_output=300, weight_filler=dict(type='uniform', min=-0.08, max=0.08)) n.exp_emb_ans_tanh = L.TanH(n.exp_emb_ans) n.exp_emb_ans2 = L.InnerProduct(n.exp_emb_ans_tanh, num_output=2048, weight_filler=dict(type='xavier')) # Merge VQA answer and visual+textual feature n.exp_emb_resh = L.Reshape(n.exp_emb_ans2, reshape_param=dict(shape=dict(dim=[-1,2048,1,1]))) n.exp_emb_tiled_1 = L.Tile(n.exp_emb_resh, axis=2, tiles=14) n.exp_emb_tiled = L.Tile(n.exp_emb_tiled_1, axis=3, tiles=14) #n.exp_eltwise = L.Eltwise(n.eltwise_drop, n.exp_emb_tiled, eltwise_param={'operation': P.Eltwise.PROD}) n.eltwise_emb = L.Convolution(n.eltwise, kernel_size=1, stride=1, num_output=2048, pad=0, weight_filler=dict(type='xavier')) n.exp_eltwise = L.Eltwise(n.eltwise_emb, n.exp_emb_tiled, eltwise_param={'operation': P.Eltwise.PROD}) n.exp_eltwise_sqrt = L.SignedSqrt(n.exp_eltwise) n.exp_eltwise_l2 = L.L2Normalize(n.exp_eltwise_sqrt) n.exp_eltwise_drop = L.Dropout(n.exp_eltwise_l2, dropout_param={'dropout_ratio': 0.3}) # Attention for Explanation n.exp_att_conv1 = L.Convolution(n.exp_eltwise_drop, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.exp_att_conv1_relu = L.ReLU(n.exp_att_conv1) n.exp_att_conv2 = L.Convolution(n.exp_att_conv1_relu, kernel_size=1, stride=1, num_output=1, pad=0, weight_filler=dict(type='xavier')) n.exp_att_reshaped = L.Reshape(n.exp_att_conv2,reshape_param=dict(shape=dict(dim=[-1,1,14*14]))) n.exp_att_softmax = L.Softmax(n.exp_att_reshaped, axis=2) n.exp_att_map = L.Reshape(n.exp_att_softmax,reshape_param=dict(shape=dict(dim=[-1,1,14,14]))) exp_dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) n.exp_att_feature_prev = L.SoftAttention(n.img_feature, n.exp_att_map, exp_dummy) n.exp_att_feature_resh = L.Reshape(n.exp_att_feature_prev, reshape_param=dict(shape=dict(dim=[-1, 2048]))) n.exp_att_feature_embed = L.InnerProduct(n.exp_att_feature_resh, num_output=2048, weight_filler=dict(type='xavier')) n.exp_lstm12_embed = L.InnerProduct(n.lstm_12, num_output=2048, weight_filler=dict(type='xavier')) n.exp_eltwise2 = L.Eltwise(n.exp_lstm12_embed, n.exp_att_feature_embed, eltwise_param={'operation': P.Eltwise.PROD}) n.exp_att_feature = L.Eltwise(n.exp_emb_ans2, n.exp_eltwise2, eltwise_param={'operation': P.Eltwise.PROD}) n.silence_exp_att = L.Silence(n.exp_att_feature, ntop=0) return n.to_proto()
def act_proto(mode, batchsize, exp_vocab_size, use_gt=True): n = caffe.NetSpec() mode_str = json.dumps({'mode': mode, 'batchsize': batchsize}) n.img_feature, n.label, n.exp, n.exp_out, n.exp_cont_1, n.exp_cont_2 = \ L.Python(module='activity_data_provider_layer', layer='ActivityDataProviderLayer', param_str=mode_str, ntop=6) # Attention n.att_conv1 = L.Convolution(n.img_feature, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.att_conv1_relu = L.ReLU(n.att_conv1) n.att_conv2 = L.Convolution(n.att_conv1_relu, kernel_size=1, stride=1, num_output=1, pad=0, weight_filler=dict(type='xavier')) n.att_reshaped = L.Reshape( n.att_conv2, reshape_param=dict(shape=dict(dim=[-1, 1, 14 * 14]))) n.att_softmax = L.Softmax(n.att_reshaped, axis=2) n.att_map = L.Reshape(n.att_softmax, reshape_param=dict(shape=dict(dim=[-1, 1, 14, 14]))) dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) n.att_feature = L.SoftAttention(n.img_feature, n.att_map, dummy) n.att_feature_resh = L.Reshape( n.att_feature, reshape_param=dict(shape=dict(dim=[-1, 2048]))) # Prediction n.prediction = L.InnerProduct(n.att_feature_resh, num_output=config.NUM_OUTPUT_UNITS, weight_filler=dict(type='xavier'), param=fixed_weights) # Take GT answer or Take the logits of the VQA model and get predicted answer to embed if use_gt: n.exp_emb_ans = L.Embed(n.label, input_dim=config.NUM_OUTPUT_UNITS, num_output=300, weight_filler=dict(type='uniform', min=-0.08, max=0.08)) else: n.vqa_ans = L.ArgMax(n.prediction, axis=1) n.exp_emb_ans = L.Embed(n.vqa_ans, input_dim=config.NUM_OUTPUT_UNITS, num_output=300, weight_filler=dict(type='uniform', min=-0.08, max=0.08)) n.exp_emb_ans_tanh = L.TanH(n.exp_emb_ans) n.exp_emb_ans2 = L.InnerProduct(n.exp_emb_ans_tanh, num_output=2048, weight_filler=dict(type='xavier')) # Merge activity answer and visual feature n.exp_emb_resh = L.Reshape( n.exp_emb_ans2, reshape_param=dict(shape=dict(dim=[-1, 2048, 1, 1]))) n.exp_emb_tiled_1 = L.Tile(n.exp_emb_resh, axis=2, tiles=14) n.exp_emb_tiled = L.Tile(n.exp_emb_tiled_1, axis=3, tiles=14) n.img_embed = L.Convolution(n.img_feature, kernel_size=1, stride=1, num_output=2048, pad=0, weight_filler=dict(type='xavier')) n.exp_eltwise = L.Eltwise(n.img_embed, n.exp_emb_tiled, eltwise_param={'operation': P.Eltwise.PROD}) n.exp_eltwise_sqrt = L.SignedSqrt(n.exp_eltwise) n.exp_eltwise_l2 = L.L2Normalize(n.exp_eltwise_sqrt) n.exp_eltwise_drop = L.Dropout(n.exp_eltwise_l2, dropout_param={'dropout_ratio': 0.3}) # Attention for Explanation n.exp_att_conv1 = L.Convolution(n.exp_eltwise_drop, kernel_size=1, stride=1, num_output=512, pad=0, weight_filler=dict(type='xavier')) n.exp_att_conv1_relu = L.ReLU(n.exp_att_conv1) n.exp_att_conv2 = L.Convolution(n.exp_att_conv1_relu, kernel_size=1, stride=1, num_output=1, pad=0, weight_filler=dict(type='xavier')) n.exp_att_reshaped = L.Reshape( n.exp_att_conv2, reshape_param=dict(shape=dict(dim=[-1, 1, 14 * 14]))) n.exp_att_softmax = L.Softmax(n.exp_att_reshaped, axis=2) n.exp_att_map = L.Reshape( n.exp_att_softmax, reshape_param=dict(shape=dict(dim=[-1, 1, 14, 14]))) exp_dummy = L.DummyData(shape=dict(dim=[batchsize, 1]), data_filler=dict(type='constant', value=1), ntop=1) n.exp_att_feature_prev = L.SoftAttention(n.img_feature, n.exp_att_map, exp_dummy) n.exp_att_feature_resh = L.Reshape( n.exp_att_feature_prev, reshape_param=dict(shape=dict(dim=[-1, 2048]))) n.exp_att_feature_embed = L.InnerProduct(n.exp_att_feature_resh, num_output=2048, weight_filler=dict(type='xavier')) n.exp_att_feature = L.Eltwise(n.exp_emb_ans2, n.exp_att_feature_embed, eltwise_param={'operation': P.Eltwise.PROD}) n.silence_exp_att = L.Silence(n.exp_att_feature, ntop=0) return n.to_proto()
def test_argmax2(self): n = caffe.NetSpec() n.input1 = L.Input(shape=make_shape([6, 4, 64, 64])) n.argmax1 = L.ArgMax(n.input1, axis=-1) self._test_model(*self._netspec_to_model(n, 'argmax2'))
def create_network_structure(self): self.pad = (self.kernel_size - 1) / 2 self.net_spec = caffe.NetSpec() self.net_spec.data = L.Input(ntop=1, input_param={ 'shape': { 'dim': [ self.batch_size, self.data_channels, self.input_size, self.input_size, self.input_size ] } }) self.net_spec.target = L.Input(ntop=1, input_param={ 'shape': { 'dim': [ self.batch_size, self.label_channels, self.input_size, self.input_size, self.input_size ] } }, exclude={'stage': 'deploy'}) last_layer = self.net_spec.data for i in range(1, self.num_blocks + 1): last_layer = self.add_contraction_block(last_layer, i) if self.do_dropout: last_layer = L.Dropout(last_layer, dropout_ratio=0.4, in_place=True) if self.use_batchnorm: last_layer = self.add_batchnormscale( name='encode_1', input=L.ReLU(L.Convolution(last_layer, pad=self.pad, kernel_size=self.kernel_size, num_output=self.base_n_filters * pow(2, self.num_blocks), weight_filler=self.weight_filler), in_place=True)) last_layer = self.add_batchnormscale( name='encode_2', input=L.ReLU(L.Convolution(last_layer, pad=self.pad, kernel_size=self.kernel_size, num_output=self.base_n_filters * pow(2, self.num_blocks), weight_filler=self.weight_filler), in_place=True)) else: last_layer = self.add_conv(last_layer, name='encode_1', filter_mult=self.num_blocks) last_layer = self.add_conv(last_layer, name='encode_2', filter_mult=self.num_blocks) for i in range(1, self.num_blocks + 1)[::-1]: last_layer = self.add_expansion_block(last_layer, i) self.net_spec.seg = L.Convolution(last_layer, pad=0, kernel_size=1, num_output=self.num_classes, weight_filler=self.weight_filler) self.net_spec.softmax = L.Softmax(self.net_spec.seg) self.net_spec.argmax = L.ArgMax(self.net_spec.softmax, axis=1) self.net_spec.silence = L.Silence(self.net_spec.argmax, ntop=0, include={'phase': caffe.TRAIN}) self.net_spec.target_argmax = L.ArgMax(self.net_spec.target, axis=1, exclude={'stage': 'deploy'}) if self.loss_func == 'xent': if self.ignore_label is None: self.net_spec.loss = L.SoftmaxWithLoss( self.net_spec.seg, self.net_spec.target_argmax, exclude={'stage': 'deploy'}) self.net_spec.accuracy = L.Accuracy( self.net_spec.seg, self.net_spec.target_argmax, exclude={'stage': 'deploy'}) else: self.net_spec.loss = L.SoftmaxWithLoss( self.net_spec.seg, self.net_spec.target_argmax, exclude={'stage': 'deploy'}, loss_param={'ignore_label': self.ignore_label}) self.net_spec.accuracy = L.Accuracy( self.net_spec.seg, self.net_spec.target_argmax, exclude={'stage': 'deploy'}, accuracy_param={'ignore_label': self.ignore_label}) elif self.loss_func == 'dice': if self.ignore_label is None: self.net_spec.loss = L.Python(self.net_spec.softmax, self.net_spec.target, loss_weight=1, python_param=dict( module='DiceLoss', layer='DiceLossLayer'), exclude={'stage': 'deploy'}) self.net_spec.accuracy = L.Accuracy( self.net_spec.seg, self.net_spec.target_argmax, exclude={'stage': 'deploy'}) else: self.net_spec.loss = L.Python( self.net_spec.softmax, self.net_spec.target, loss_weight=1, python_param=dict(module='DiceLoss', layer='DiceLossLayer', param_str="{'param1': " + str(self.ignore_label) + "}"), exclude={'stage': 'deploy'}) self.net_spec.accuracy = L.Accuracy( self.net_spec.seg, self.net_spec.target_argmax, exclude={'stage': 'deploy'}, accuracy_param={'ignore_label': self.ignore_label}) elif self.loss_func == 'both': if self.ignore_label is None: self.net_spec.xent_loss = L.SoftmaxWithLoss( self.net_spec.seg, self.net_spec.target_argmax, exclude={'stage': 'deploy'}, loss_weight=10) self.net_spec.loss = L.Python(self.net_spec.softmax, self.net_spec.target, loss_weight=1, python_param=dict( module='DiceLoss', layer='DiceLossLayer'), exclude={'stage': 'deploy'}) self.net_spec.accuracy = L.Accuracy( self.net_spec.seg, self.net_spec.target_argmax, exclude={'stage': 'deploy'}) else: self.net_spec.xent_loss = L.SoftmaxWithLoss( self.net_spec.seg, self.net_spec.target_argmax, exclude={'stage': 'deploy'}, loss_weight=10, loss_param={'ignore_label': self.ignore_label}) self.net_spec.loss = L.Python( self.net_spec.softmax, self.net_spec.target, loss_weight=1, python_param=dict(module='DiceLoss', layer='DiceLossLayer', param_str="{'param1': " + str(self.ignore_label) + "}"), exclude={'stage': 'deploy'}) self.net_spec.accuracy = L.Accuracy( self.net_spec.seg, self.net_spec.target_argmax, exclude={'stage': 'deploy'}, accuracy_param={'ignore_label': self.ignore_label}) self.net_spec.dice = L.Python(self.net_spec.softmax, self.net_spec.target, loss_weight=1, python_param=dict( module='DiceIndex', layer='DiceIndexLayer'), exclude={'stage': 'deploy'})
def create_unet_model(batch_size, num_classes, input_size, base_n_filters, output_file): kernel_size = 3 pad = (kernel_size - 1) / 2 do_dropout = True weight_filler = dict(type='msra') n = caffe.NetSpec() n.data = L.Input(ntop=1, input_param = { 'shape' : { 'dim': [batch_size, 1, input_size, input_size] } }) n.target = L.Input(ntop=1, input_param = { 'shape' : { 'dim': [batch_size, 1, input_size, input_size] } }, exclude={'stage' : 'deploy'}) n.contr_1_1 = L.BatchNorm(L.ReLU(L.Convolution(n.data, pad=pad, kernel_size=kernel_size, num_output=base_n_filters, weight_filler=weight_filler), in_place=True), in_place=True) n.contr_1_2 = L.BatchNorm(L.ReLU(L.Convolution(n.contr_1_1, pad=pad, kernel_size=kernel_size, num_output=base_n_filters, weight_filler=weight_filler), in_place=True), in_place=True) n.pool_1 = L.Pooling(n.contr_1_2, kernel_size=2, stride=2, pool=P.Pooling.MAX) n.contr_2_1 = L.BatchNorm(L.ReLU(L.Convolution(n.pool_1, pad=pad, kernel_size=kernel_size, num_output=base_n_filters * 2, weight_filler=weight_filler), in_place=True), in_place=True) n.contr_2_2 = L.BatchNorm(L.ReLU(L.Convolution(n.contr_2_1, pad=pad, kernel_size=kernel_size, num_output=base_n_filters * 2, weight_filler=weight_filler), in_place=True), in_place=True) n.pool_2 = L.Pooling(n.contr_2_2, kernel_size=2, stride=2, pool=P.Pooling.MAX) n.contr_3_1 = L.BatchNorm(L.ReLU(L.Convolution(n.pool_2, pad=pad, kernel_size=kernel_size, num_output=base_n_filters * 4, weight_filler=weight_filler), in_place=True), in_place=True) n.contr_3_2 = L.BatchNorm(L.ReLU(L.Convolution(n.contr_3_1, pad=pad, kernel_size=kernel_size, num_output=base_n_filters * 4, weight_filler=weight_filler), in_place=True), in_place=True) n.pool_3 = L.Pooling(n.contr_3_2, kernel_size=2, stride=2, pool=P.Pooling.MAX) n.contr_4_1 = L.BatchNorm(L.ReLU(L.Convolution(n.pool_3, pad=pad, kernel_size=kernel_size, num_output=base_n_filters * 8, weight_filler=weight_filler), in_place=True), in_place=True) n.contr_4_2 = L.BatchNorm(L.ReLU(L.Convolution(n.contr_4_1, pad=pad, kernel_size=kernel_size, num_output=base_n_filters * 8, weight_filler=weight_filler), in_place=True), in_place=True) n.pool_4 = L.Pooling(n.contr_4_2, kernel_size=2, stride=2, pool=P.Pooling.MAX) if do_dropout: n.pool_4 = L.Dropout(n.pool_4, dropout_ratio=0.4, in_place=True) n.encode_1 = L.BatchNorm(L.ReLU(L.Convolution(n.pool_4, pad=pad, kernel_size=kernel_size, num_output=base_n_filters * 16, weight_filler=weight_filler), in_place=True), in_place=True) n.encode_2 = L.BatchNorm(L.ReLU(L.Convolution(n.encode_1, pad=pad, kernel_size=kernel_size, num_output=base_n_filters * 16, weight_filler=weight_filler), in_place=True), in_place=True) n.upscale_1 = L.Deconvolution(n.encode_2, convolution_param=dict(num_output=base_n_filters * 16, kernel_size=2, stride=2)) n.concat_1 = L.Concat(n.upscale_1, n.contr_4_2, axis=1) n.expand_1_1 = L.BatchNorm(L.ReLU(L.Convolution(n.concat_1, pad=pad, kernel_size=kernel_size, num_output=base_n_filters * 8, weight_filler=weight_filler), in_place=True), in_place=True) n.expand_1_2 = L.BatchNorm(L.ReLU(L.Convolution(n.expand_1_1, pad=pad, kernel_size=kernel_size, num_output=base_n_filters * 8, weight_filler=weight_filler), in_place=True), in_place=True) n.upscale_2 = L.Deconvolution(n.expand_1_2, convolution_param=dict(num_output=base_n_filters * 8, kernel_size=2, stride=2)) n.concat_2 = L.Concat(n.upscale_2, n.contr_3_2, axis=1) n.expand_2_1 = L.BatchNorm(L.ReLU(L.Convolution(n.concat_2, pad=pad, kernel_size=kernel_size, num_output=base_n_filters * 4, weight_filler=weight_filler), in_place=True), in_place=True) n.expand_2_2 = L.BatchNorm(L.ReLU(L.Convolution(n.expand_2_1, pad=pad, kernel_size=kernel_size, num_output=base_n_filters * 4, weight_filler=weight_filler), in_place=True), in_place=True) n.upscale_3 = L.Deconvolution(n.expand_2_2, convolution_param=dict(num_output=base_n_filters * 4, kernel_size=2, stride=2)) n.concat_3 = L.Concat(n.upscale_3, n.contr_2_2, axis=1) n.expand_3_1 = L.BatchNorm(L.ReLU(L.Convolution(n.concat_3, pad=pad, kernel_size=kernel_size, num_output=base_n_filters * 2, weight_filler=weight_filler), in_place=True), in_place=True) n.expand_3_2 = L.BatchNorm(L.ReLU(L.Convolution(n.expand_3_1, pad=pad, kernel_size=kernel_size, num_output=base_n_filters * 2, weight_filler=weight_filler), in_place=True), in_place=True) n.upscale_4 = L.Deconvolution(n.expand_3_2, convolution_param=dict(num_output=base_n_filters * 2, kernel_size=2, stride=2)) n.concat_4 = L.Concat(n.upscale_4, n.contr_1_2, axis=1) n.expand_4_1 = L.BatchNorm(L.ReLU(L.Convolution(n.concat_4, pad=pad, kernel_size=kernel_size, num_output=base_n_filters, weight_filler=weight_filler), in_place=True), in_place=True) n.expand_4_2 = L.BatchNorm(L.ReLU(L.Convolution(n.expand_4_1, pad=pad, kernel_size=kernel_size, num_output=base_n_filters, weight_filler=weight_filler), in_place=True), in_place=True) n.seg = L.Convolution(n.expand_4_2, pad=0, kernel_size=1, num_output=num_classes, weight_filler=weight_filler) n.softmax = L.Softmax(n.seg, include={'phase':caffe.TEST}) n.argmax = L.ArgMax(n.softmax, axis=1, include={'phase':caffe.TEST}) n.loss = L.SoftmaxWithLoss(n.seg, n.target, include={'phase':caffe.TRAIN}) n.accuracy = L.Accuracy(n.seg, n.target, exclude={'stage' : 'deploy'}) if output_file is not None : f = open(output_file, 'w') f.write(str(n.to_proto())) f.close() return n