def start(args): #data_shape = [args.depth, args.width, args.height] input_shape = [132, 132, 132] output_shape = [44, 44, 44] # Start a network net = caffe.NetSpec() # Data input layer #net.data = L.MemoryData(dim=[1, 1], ntop=1) net.data, net.datai = L.MemoryData(dim=[1, 1] + input_shape, ntop=2) # Label input layer net.label, net.labeli = L.MemoryData(dim=[1, 3] + output_shape, ntop=2, include=[dict(phase=0)]) # Components label layer net.components, net.componentsi = L.MemoryData( dim=[1, 1] + output_shape, ntop=2, include=[dict(phase=0, stage='malis')]) # Scale input layer net.scale, net.scalei = L.MemoryData( dim=[1, 3] + output_shape, ntop=2, include=[dict(phase=0, stage='euclid')]) # Silence the not needed data and label integer values net.nhood, net.nhoodi = L.MemoryData( dim=[1, 1, 3, 3], ntop=2, include=[dict(phase=0, stage='malis')]) # Silence the not needed data and label integer values net.silence1 = L.Silence(net.datai, net.labeli, net.scalei, ntop=0, include=[dict(phase=0, stage='euclid')]) net.silence2 = L.Silence(net.datai, net.labeli, net.componentsi, net.nhoodi, ntop=0, include=[dict(phase=0, stage='malis')]) net.silence3 = L.Silence(net.datai, ntop=0, include=[dict(phase=1)]) return net
def resnet(train_lmdb, test_lmdb, batch_size=256, stages=[2, 2, 2, 2], input_size=128, first_output=32, include_acc=False): # now, this code can't recognize include phase, so there will only be a TEST phase data layer data, label = L.Data(source=train_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(crop_size=227, mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TRAIN'))) data, label = L.Data(source=test_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(crop_size=227, mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TEST'))) data, label = L.MemoryData(batch_size=batch_size, height=input_size, width=input_size, channels=3, ntop=2, transform_param=dict(mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TEST'))) # the net itself relu1 = conv_factory_relu(data, 3, first_output, stride=1, pad=1) relu2 = conv_factory_relu(relu1, 3, first_output, stride=1, pad=1) residual = max_pool(relu2, 3, stride=2) for i in stages[1:]: first_output *= 2 for j in range(i): if j==0: if i==0: residual = residual_factory_proj(residual, first_output, 1) else: residual = residual_factory_proj(residual, first_output, 2) else: residual = residual_factory1(residual, first_output) glb_pool = L.Pooling(residual, pool=P.Pooling.AVE, global_pooling=True); fc = L.InnerProduct(glb_pool, num_output=1000) loss = L.SoftmaxWithLoss(fc, label) acc = L.Accuracy(fc, label, include=dict(phase=getattr(caffe_pb2, 'TEST'))) return to_proto(loss, acc)
def stacked_hourglass_network(batch_size, img_size, nfeats, multi, out_dim, include_acc=False): data, label = L.MemoryData(batch_size=batch_size, channels=3, height=img_size, width=img_size, ntop=2, include=dict(phase=0)) data = L.Input() conv1 = conv_bn_relu(data, kernel_size=3, num_output=32, stride=2, pad=1) r1 = residual_mobile(conv1, num_output=32, multi=2, num_input=32) pool1 = L.Pooling(r1, pool=P.Pooling.MAX, stride=2, kernel_size=2) r3 = residual_mobile(pool1, num_output=nfeats, multi=multi, num_input=32) # hg = hourglass_mobile(r3, num_output=nfeats, num_modual=4, multi=multi, num_input=nfeats) hgr = residual_mobile(hg, num_output=nfeats, multi=multi, num_input=nfeats) ll = conv_bn_relu(hgr, kernel_size=1, num_output=nfeats, stride=1, pad=0) out = deconv(ll, num_output=out_dim, kernel_size=4, stride=2, pad=1) loss = L.SigmoidCrossEntropyLoss(out, label) if include_acc: acc = L.Accuracy(out, label, include=dict(phase=1)) return to_proto(loss, acc) else: return to_proto(loss)
def create_dilated_net(): n = caffe.NetSpec() n.im, n.label_1 = L.MemoryData(batch_size=1, channels=3, height=244, width=244, ntop=2) n.conv1_1, n.relu1_1 = conv_relu(n.im, 64, pad=100, param_name='conv1_1') n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64, pad=1, param_name='conv1_2') n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128, pad=1, param_name='conv2_1') n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128, pad=1, param_name='conv2_2') n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256, pad=1, param_name='conv3_1') n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256, pad=1, param_name='conv3_2') n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256, pad=1, param_name='conv3_3') n.pool3 = max_pool(n.relu3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512, pad=1, param_name='conv4_1') n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512, pad=1, param_name='conv4_2') n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512, pad=1, param_name='conv4_3') n.conv5_1, n.relu5_1 = conv_relu(n.relu4_3, 512, pad=2, dilation=2, param_name='conv5_1') n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512, pad=2, dilation=2, param_name='conv5_2') n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512, pad=2, dilation=2, param_name='conv5_3') # fully conv n.fc6, n.relu6 = conv_relu(n.conv5_3, 4096, ks=7, dilation=4) n.fc7, n.relu7 = conv_relu(n.relu6, 4096, ks=1, pad=0) n.fc8 = L.Convolution(n.relu7, kernel_size=1, num_output=21) return n
def data_layer_stacked(net, inputdb, mean_file, batch_size, net_type, height, width, nchannels, crop_size=-1): transform_pars = {"mean_file": mean_file, "mirror": False} if crop_size > 0: transform_pars["crop_size"] = crop_size if net_type in ["train", "test"]: net.data, net.label = L.Data(ntop=2, backend=P.Data.LMDB, source=inputdb, batch_size=batch_size, transform_param=transform_pars) elif net_type == "deploy": net.data, net.label = L.MemoryData(ntop=2, batch_size=batch_size, height=height, width=width, channels=nchannels) return [net.data], net.label
def defineTestNet(inputShape, layerNeuronNum): layerNum = len(layerNeuronNum) - 1 n = caffe.NetSpec() # n.data = L.Input(input_param=dict(shape=inputShape)) n.data, n.label = L.MemoryData(memory_data_param=dict( batch_size=inputShape[0], channels=inputShape[1], height=inputShape[2], width=inputShape[3]), ntop=2) flatdata = L.Flatten(n.data) flatdata_name = 'flatdata' n.__setattr__(flatdata_name, flatdata) for l in range(layerNum): if l == 0: encoder_name_last = flatdata_name else: encoder_name_last = relu_en_name encoder = L.InnerProduct(n[encoder_name_last], num_output=layerNeuronNum[l + 1]) encoder_name = 'encoder' + str(l + 1) n.__setattr__(encoder_name, encoder) relu_en = L.ReLU(n[encoder_name], in_place=True) relu_en_name = 'relu_en' + str(l + 1) n.__setattr__(relu_en_name, relu_en) return n.to_proto()
def InceptionResNetV1(train_lmdb, test_lmdb, input_size=299, batch_size=256, stages=[0, 5, 10, 5], first_output=32, include_acc=False): # now, this code can't recognize include phase, so there will only be a TEST phase data layer data, label = L.Data(source=train_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(crop_size=input_size, mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TRAIN'))) data, label = L.Data(source=test_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(crop_size=input_size, mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TEST'))) data, label = L.MemoryData(batch_size=batch_size, height=input_size, width=input_size, channels=3, ntop=2, transform_param=dict(mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TEST'))) Inception_ResNet_A_input = stem(bottom=data, conv1_num=32, conv2_num=32, conv3_num=64, conv4_num=80, conv5_num=192, conv6_num=256) for i in xrange(stages[1]): Inception_ResNet_A_input = Inception_ResNet_A(bottom=Inception_ResNet_A_input, bottom_size=256, num1x1=32, num3x3=32) Inception_ResNet_B_input = ReductionA(bottom=Inception_ResNet_A_input, num1x1_k=192, num3x3_l=192, num3x3_n=256, num3x3_m=384) for i in xrange(stages[2]): Inception_ResNet_B_input = Inception_ResNet_B(bottom=Inception_ResNet_B_input, bottom_size=896, num1x1=128, num7x1=128, num1x7=128) Inception_ResNet_C_input = ReductionB(bottom=Inception_ResNet_B_input, num1x1=256, num3x3=384, num3x3double=256) for i in xrange(stages[3]): Inception_ResNet_C_input = Inception_ResNet_C(bottom=Inception_ResNet_C_input, bottom_size=1792, num1x1=192, num1x3=192, num3x1=192) glb_pool = L.Pooling(Inception_ResNet_C_input, pool=P.Pooling.AVE, global_pooling=True) dropout = L.Dropout(glb_pool, dropout_ratio = 0.2) fc = L.InnerProduct(dropout, num_output=1000) loss = L.SoftmaxWithLoss(fc, label) acc = L.Accuracy(fc, label, include=dict(phase=getattr(caffe_pb2, 'TEST'))) return to_proto(loss, acc)
def normnet(train_lmdb, test_lmdb, batch_size=256, stages=[2, 2, 2, 2], input_size=32, first_output=32, include_acc=False): # now, this code can't recognize include phase, so there will only be a TEST phase data layer data, label = L.Data(source=train_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(crop_size=227, mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TRAIN'))) data, label = L.Data(source=test_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(crop_size=227, mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TEST'))) data, label = L.MemoryData(batch_size=batch_size, height=input_size, width=input_size, channels=3, ntop=2, transform_param=dict(mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TEST'))) # the net itself conv1 = conv_factory(data, 3, 96, 1, 1) in3a = SimpleFactory(conv1, 32, 32) in3b = SimpleFactory(in3a, 32, 48) in3c = DownsampleFactory(in3b, 80) in4a = SimpleFactory(in3c, 112, 48) in4b = SimpleFactory(in4a, 96, 64) in4c = SimpleFactory(in4b, 80, 80) in4d = SimpleFactory(in4c, 48, 96) # for i in range(25): # in4d = SimpleFactory(in4d, 48, 96) in4e = DownsampleFactory(in4d, 96) in5a = SimpleFactory(in4e, 176, 160) in5b = SimpleFactory(in5a, 176, 160) pool = avg_pool(in5b, 8) fc = L.InnerProduct(pool, num_output=10, weight_filler=dict(type='xavier')) loss = L.SoftmaxWithLoss(fc, label) acc = L.Accuracy(fc, label, include=dict(phase=getattr(caffe_pb2, 'TEST'))) return to_proto(loss, acc)
def make_netspec(): # For reference, the only "documentation" about caffe layer parameters seems to be this page: # https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto n = caffe.NetSpec(); n.data,n.label = L.MemoryData(batch_size=input_batch_size, channels=input_num_channels, height=input_num_rows, width=input_num_cols, ntop=2) n.con11 = L.Convolution(n.data, num_output=6, kernel_w=5, kernel_h=5, stride_w=1, stride_h=1, pad_w=2, pad_h=2); n.relu10 = L.ReLU(n.con11); n.max_pool9 = L.Pooling(n.relu10, pool=P.Pooling.MAX, kernel_w=2, kernel_h=2, stride_w=2, stride_h=2, pad_w=0, pad_h=0); n.con8 = L.Convolution(n.max_pool9, num_output=16, kernel_w=5, kernel_h=5, stride_w=1, stride_h=1, pad_w=2, pad_h=2); n.relu7 = L.ReLU(n.con8); n.max_pool6 = L.Pooling(n.relu7, pool=P.Pooling.MAX, kernel_w=2, kernel_h=2, stride_w=2, stride_h=2, pad_w=0, pad_h=0); n.fc5 = L.InnerProduct(n.max_pool6, num_output=120, bias_term=True); n.relu4 = L.ReLU(n.fc5); n.fc3 = L.InnerProduct(n.relu4, num_output=84, bias_term=True); n.relu2 = L.ReLU(n.fc3); n.fc1 = L.InnerProduct(n.relu2, num_output=10, bias_term=True); return n.to_proto();
def normnet(train_lmdb, test_lmdb, batch_size=256, stages=[2, 2, 2, 2], input_size=28, first_output=32, include_acc=False): # now, this code can't recognize include phase, so there will only be a TEST phase data layer data, label = L.Data(source=train_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(crop_size=227, mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TRAIN'))) data, label = L.Data(source=test_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(crop_size=227, mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TEST'))) data, label = L.MemoryData(batch_size=batch_size, height=input_size, width=input_size, channels=3, ntop=2, transform_param=dict(mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TEST'))) # the net itself batch_norm = L.BatchNorm(data, in_place=True, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]) ip1 = ip_factory(batch_norm, 500) for i in range(30): ip1 = ip_factory(ip1, 100) fc = L.InnerProduct(ip1, num_output=10) loss = L.SoftmaxWithLoss(fc, label) acc = L.Accuracy(fc, label, include=dict(phase=getattr(caffe_pb2, 'TEST'))) return to_proto(loss, acc)
def caffenet(batch_size=32, stack=1): n = caffe.NetSpec() n.data, n.label = L.MemoryData(batch_size=batch_size, channels=3 * stack, height=227, width=227, ntop=2) # the net itself n.conv1, n.relu1 = conv_relu(n.data, 11, 96, stride=4) n.pool1 = max_pool(n.relu1, 3, stride=2) n.norm1 = L.LRN(n.pool1, local_size=5, alpha=1e-4, beta=0.75) n.conv2, n.relu2 = conv_relu(n.norm1, 5, 256, pad=2, group=2) n.pool2 = max_pool(n.relu2, 3, stride=2) n.norm2 = L.LRN(n.pool2, local_size=5, alpha=1e-4, beta=0.75) n.conv3, n.relu3 = conv_relu(n.norm2, 3, 384, pad=1) n.conv4, n.relu4 = conv_relu(n.relu3, 3, 384, pad=1, group=2) n.conv5, n.relu5 = conv_relu(n.relu4, 3, 256, pad=1, group=2) n.pool5 = max_pool(n.relu5, 3, stride=2) n.fc6, n.relu6 = fc_relu(n.pool5, 4096) n.drop6 = L.Dropout(n.relu6, in_place=True) n.fc7, n.relu7 = fc_relu(n.drop6, 4096) n.drop7 = L.Dropout(n.relu7, in_place=True) n.fc8 = L.InnerProduct(n.drop7, num_output=1000) return n
def simple_net(split, initialize_fc8=False): n = caffe.NetSpec() pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892), seed=1337) if split == 'train': pydata_params['sbdd_dir'] = '../../data/sbdd/dataset' pylayer = 'SBDDSegDataLayer' n.cur_im, n.masked_im, n.next_im, n.label = L.Python( module='voc_layers', layer=pylayer, ntop=4, param_str=str(pydata_params)) elif split == 'val': pydata_params['voc_dir'] = '../../data/pascal/VOC2011' pylayer = 'VOCSegDataLayer' n.cur_im, n.masked_im, n.next_im, n.label = L.Python( module='voc_layers', layer=pylayer, ntop=4, param_str=str(pydata_params)) elif split == 'deploy': n.cur_im, n.label_1 = L.MemoryData(batch_size=1, channels=3, height=244, width=244, ntop=2) n.masked_im, n.label_2 = L.MemoryData(batch_size=1, channels=3, height=244, width=244, ntop=2) n.next_im, n.label_3 = L.MemoryData(batch_size=1, channels=3, height=244, width=244, ntop=2) else: raise Exception # current image vgg-net n.conv1_1c, n.relu1_1c = conv_relu(n.cur_im, 64, pad=100, param_name='conv1_1') n.conv1_2c, n.relu1_2c = conv_relu(n.relu1_1c, 64, pad=1, param_name='conv1_2') n.pool1c = max_pool(n.relu1_2c) n.conv2_1c, n.relu2_1c = conv_relu(n.pool1c, 128, pad=1, param_name='conv2_1') n.conv2_2c, n.relu2_2c = conv_relu(n.relu2_1c, 128, pad=1, param_name='conv2_2') n.pool2c = max_pool(n.relu2_2c) n.conv3_1c, n.relu3_1c = conv_relu(n.pool2c, 256, pad=1, param_name='conv3_1') n.conv3_2c, n.relu3_2c = conv_relu(n.relu3_1c, 256, pad=1, param_name='conv3_2') n.conv3_3c, n.relu3_3c = conv_relu(n.relu3_2c, 256, pad=1, param_name='conv3_3') n.pool3c = max_pool(n.relu3_3c) n.conv4_1c, n.relu4_1c = conv_relu(n.pool3c, 512, pad=1, param_name='conv4_1') n.conv4_2c, n.relu4_2c = conv_relu(n.relu4_1c, 512, pad=1, param_name='conv4_2') n.conv4_3c, n.relu4_3c = conv_relu(n.relu4_2c, 512, pad=1, param_name='conv4_3') n.conv5_1c, n.relu5_1c = conv_relu(n.relu4_3c, 512, pad=2, dilation=2, param_name='conv5_1') n.conv5_2c, n.relu5_2c = conv_relu(n.relu5_1c, 512, pad=2, dilation=2, param_name='conv5_2') n.conv5_3c, n.relu5_3c = conv_relu(n.relu5_2c, 512, pad=2, dilation=2, param_name='conv5_3') # masked image vgg-net n.conv1_1m, n.relu1_1m = conv_relu(n.masked_im, 64, pad=100, param_name='conv1_1') n.conv1_2m, n.relu1_2m = conv_relu(n.relu1_1m, 64, pad=1, param_name='conv1_2') n.pool1m = max_pool(n.relu1_2m) n.conv2_1m, n.relu2_1m = conv_relu(n.pool1m, 128, pad=1, param_name='conv2_1') n.conv2_2m, n.relu2_2m = conv_relu(n.relu2_1m, 128, pad=1, param_name='conv2_2') n.pool2m = max_pool(n.relu2_2m) n.conv3_1m, n.relu3_1m = conv_relu(n.pool2m, 256, pad=1, param_name='conv3_1') n.conv3_2m, n.relu3_2m = conv_relu(n.relu3_1m, 256, pad=1, param_name='conv3_2') n.conv3_3m, n.relu3_3m = conv_relu(n.relu3_2m, 256, pad=1, param_name='conv3_3') n.pool3m = max_pool(n.relu3_3m) n.conv4_1m, n.relu4_1m = conv_relu(n.pool3m, 512, pad=1, param_name='conv4_1') n.conv4_2m, n.relu4_2m = conv_relu(n.relu4_1m, 512, pad=1, param_name='conv4_2') n.conv4_3m, n.relu4_3m = conv_relu(n.relu4_2m, 512, pad=1, param_name='conv4_3') n.conv5_1m, n.relu5_1m = conv_relu(n.relu4_3m, 512, pad=2, dilation=2, param_name='conv5_1') n.conv5_2m, n.relu5_2m = conv_relu(n.relu5_1m, 512, pad=2, dilation=2, param_name='conv5_2') n.conv5_3m, n.relu5_3m = conv_relu(n.relu5_2m, 512, pad=2, dilation=2, param_name='conv5_3') # next image vgg-net n.conv1_1n, n.relu1_1n = conv_relu(n.next_im, 64, pad=100, param_name='conv1_1') n.conv1_2n, n.relu1_2n = conv_relu(n.relu1_1n, 64, pad=1, param_name='conv1_2') n.pool1n = max_pool(n.relu1_2n) n.conv2_1n, n.relu2_1n = conv_relu(n.pool1n, 128, pad=1, param_name='conv2_1') n.conv2_2n, n.relu2_2n = conv_relu(n.relu2_1n, 128, pad=1, param_name='conv2_2') n.pool2n = max_pool(n.relu2_2n) n.conv3_1n, n.relu3_1n = conv_relu(n.pool2n, 256, pad=1, param_name='conv3_1') n.conv3_2n, n.relu3_2n = conv_relu(n.relu3_1n, 256, pad=1, param_name='conv3_2') n.conv3_3n, n.relu3_3n = conv_relu(n.relu3_2n, 256, pad=1, param_name='conv3_3') n.pool3n = max_pool(n.relu3_3n) n.conv4_1n, n.relu4_1n = conv_relu(n.pool3n, 512, pad=1, param_name='conv4_1') n.conv4_2n, n.relu4_2n = conv_relu(n.relu4_1n, 512, pad=1, param_name='conv4_2') n.conv4_3n, n.relu4_3n = conv_relu(n.relu4_2n, 512, pad=1, param_name='conv4_3') n.conv5_1n, n.relu5_1n = conv_relu(n.relu4_3n, 512, pad=2, dilation=2, param_name='conv5_1') n.conv5_2n, n.relu5_2n = conv_relu(n.relu5_1n, 512, pad=2, dilation=2, param_name='conv5_2') n.conv5_3n, n.relu5_3n = conv_relu(n.relu5_2n, 512, pad=2, dilation=2, param_name='conv5_3') # concatination n.concat1 = L.Concat(n.relu5_3c, n.relu5_3m, n.relu5_3n) #n.concat1 = n.relu5_3n # fully conv n.fc6, n.relu6 = conv_relu(n.concat1, 4096, ks=7, dilation=4) if split == 'train': n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.fc8 = L.Convolution(n.drop7, kernel_size=1, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], num_output=2) else: n.fc7, n.relu7 = conv_relu(n.relu6, 4096, ks=1, pad=0) if initialize_fc8: n.fc8 = L.Convolution(n.relu7, kernel_size=1, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], weight_filler=dict(type='xavier'), num_output=2) else: n.fc8 = L.Convolution(n.relu7, kernel_size=1, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], num_output=2) n.upscore = L.Deconvolution(n.fc8, convolution_param=dict( kernel_size=16, stride=8, num_output=2, weight_filler=dict(type='bilinear'), bias_term=False), param=dict(lr_mult=0, decay_mult=0)) n.score = crop(n.upscore, n.next_im) if split != 'deploy': n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=False, ignore_label=255)) else: n.prop = L.Softmax(n.score) return n
def data_layer(shape): data, label = L.MemoryData(dim=shape, ntop=2) return data, label
from __future__ import print_function import math import caffe from caffe import layers as L from caffe import params as P from caffe import to_proto from caffe import metalayers as ML # Start a network net = caffe.NetSpec() # Data input layer net.data = L.MemoryData(dim=[1, 1], ntop=1) # Label input layer net.aff_label = L.MemoryData(dim=[1, 3], ntop=1, include=[dict(phase=0)]) # Components label layer net.comp_label = L.MemoryData(dim=[1, 2], ntop=1, include=[dict(phase=0, stage='malis')]) # Scale input layer net.scale = L.MemoryData(dim=[1, 3], ntop=1, include=[dict(phase=0, stage='euclid')]) # Silence the not needed data and label integer values net.nhood = L.MemoryData(dim=[1, 1, 3, 3], ntop=1, include=[dict(phase=0, stage='malis')]) # USK-Net metalayer net.unet = ML.UNet(net.data, fmap_start=12, depth=3, fmap_inc_rule = lambda fmaps: int(math.ceil(float(fmaps) * 5)), fmap_dec_rule = lambda fmaps: int(math.ceil(float(fmaps) / 5)), downsampling_strategy = [[2,2,2],[2,2,2],[3,3,3]], dropout = 0.0, use_deconv_uppath=False, use_stable_upconv=True) net.aff_out = L.Convolution(net.unet, kernel_size=[1], num_output=3, param=[dict(lr_mult=1),dict(lr_mult=2)], weight_filler=dict(type='msra'), bias_filler=dict(type='constant')) # Choose output activation functions net.aff_pred = L.Sigmoid(net.aff_out, ntop=1, in_place=False)
def long_range_unet(name): # Start a network net = caffe.NetSpec() # Data input layer net.data = L.MemoryData(dim=[1, 1], ntop=1) n_channels = 12 # TODO # Label input layer # I guess the second number is the number of channels net.aff_label = L.MemoryData(dim=[1, n_channels], ntop=1, include=[dict(phase=0)]) # Components label layer # No idea about this one... net.comp_label = L.MemoryData(dim=[1, 2], ntop=1, include=[dict(phase=0, stage='malis')]) # Scale input layer # again second = channels ?! net.scale = L.MemoryData(dim=[1, n_channels], ntop=1, include=[dict(phase=0, stage='euclid')]) # Silence the not needed data and label integer values # is this correct ???? net.nhood = L.MemoryData(dim=[1, 1, n_channels, 3], ntop=1, include=[dict(phase=0, stage='malis')]) # USK-Net metalayer net.unet = ML.UNet( net.data, fmap_start=12, depth=3, fmap_inc_rule=lambda fmaps: int(math.ceil(float(fmaps) * 5)), fmap_dec_rule=lambda fmaps: int(math.ceil(float(fmaps) / 5)), downsampling_strategy=[[1, 3, 3], [1, 3, 3], [1, 3, 3]], dropout=0.0, use_deconv_uppath=False, use_stable_upconv=True) net.aff_out = L.Convolution(net.unet, kernel_size=[1], num_output=n_channels, param=[dict(lr_mult=1), dict(lr_mult=2)], weight_filler=dict(type='msra'), bias_filler=dict(type='constant')) # Choose output activation functions net.aff_pred = L.Sigmoid(net.aff_out, ntop=1, in_place=False) # Choose a loss function and input data, label and scale inputs. Only include it during the training phase (phase = 0) net.euclid_loss = L.EuclideanLoss(net.aff_pred, net.aff_label, net.scale, ntop=0, loss_weight=1.0, include=[dict(phase=0, stage='euclid')]) net.malis_loss = L.MalisLoss(net.aff_pred, net.aff_label, net.comp_label, net.nhood, ntop=0, loss_weight=1.0, include=[dict(phase=0, stage='malis')]) # Fix the spatial input dimensions. Note that only spatial dimensions get modified, the minibatch size # and the channels/feature maps must be set correctly by the user (since this code can definitely not # figure out the user's intent). If the code does not seem to terminate, then the issue is most likely # a wrong number of feature maps / channels in either the MemoryData-layers or the network output. # This function takes as input: # - The network # - A list of other inputs to test (note: the nhood input is static and not spatially testable, thus excluded here) # - A list of the maximal shapes for each input # - A list of spatial dependencies; here [-1, 0] means the Y axis is a free parameter, and the X axis should be identical to the Y axis. caffe.fix_input_dims(net, [net.data, net.aff_label, net.comp_label, net.scale], max_shapes=[[84, 268, 268], [100, 100, 100], [100, 100, 100], [100, 100, 100]], shape_coupled=[-1, -1, 1]) protonet = net.to_proto() protonet.name = name # Store the network as prototxt with open(protonet.name + '.prototxt', 'w') as f: print(protonet, file=f)
def simple_net(split, initialize_fc8=False, cur_shape=None, next_shape=None, batch_size=1, num_threads=1, max_queue_size=5): #Get crop layer parameters tmp_net = caffe.NetSpec() tmp_net.im, tmp_net.label = L.MemoryData(batch_size=1, channels=3, height=244, width=244, ntop=2) conv_vgg(tmp_net, tmp_net.im, suffix='', last_layer_pad=0, first_layer_pad=100) tmp_net.fc6, tmp_net.relu6 = conv_relu(tmp_net.conv5_3, 4096, ks=7, dilation=4) tmp_net.fc7, tmp_net.relu7 = conv_relu(tmp_net.relu6, 4096, ks=1, pad=0) tmp_net.fc8 = L.Convolution(tmp_net.relu7, kernel_size=1, num_output=2) tmp_net.upscore = L.Deconvolution(tmp_net.fc8, convolution_param=dict(kernel_size=16, stride=8, num_output=2)) ax, a, b = coord_map_from_to(tmp_net.upscore, tmp_net.im) assert (a == 1).all(), 'scale mismatch on crop (a = {})'.format(a) assert (b <= 0).all(), 'cannot crop negative offset (b = {})'.format(b) assert (np.round(b) == b ).all(), 'cannot crop noninteger offset (b = {})'.format(b) # #Create network n = caffe.NetSpec() if split == 'train': pydata_params = dict(batch_size=batch_size, im_shape=tuple(next_shape), num_threads=num_threads, max_queue_size=max_queue_size) n.cur_im, n.masked_im, n.next_im, n.label = L.Python( module='coco_transformed_datalayers_prefetch', layer='CocoTransformedDataLayerPrefetch', ntop=4, param_str=str(pydata_params)) elif split == 'val': pydata_params = dict(batch_size=batch_size, im_shape=tuple(next_shape), num_threads=num_threads, max_queue_size=max_queue_size) n.cur_im, n.masked_im, n.next_im, n.label = L.Python( module='coco_transformed_datalayers_prefetch', layer='CocoTransformedDataLayerPrefetch', ntop=4, param_str=str(pydata_params)) elif split == 'deploy': n.cur_im, n.label_1 = L.MemoryData(batch_size=1, channels=3, height=244, width=244, ntop=2) n.masked_im, n.label_2 = L.MemoryData(batch_size=1, channels=3, height=244, width=244, ntop=2) n.next_im, n.label_3 = L.MemoryData(batch_size=1, channels=3, height=244, width=244, ntop=2) else: raise Exception if cur_shape is None or next_shape is None: concat_pad = np.zeros((2, )) else: concat_pad = (next_shape - cur_shape) / 2.0 / 8.0 if not all(concat_pad == np.round(concat_pad)): raise Exception conv_vgg(n, n.cur_im, suffix='c', last_layer_pad=concat_pad, first_layer_pad=100) conv_vgg(n, n.masked_im, suffix='m', last_layer_pad=concat_pad, first_layer_pad=100) conv_vgg(n, n.next_im, suffix='n', last_layer_pad=0, first_layer_pad=100) # concatination n.concat1 = L.Concat(n.relu5_3c, n.relu5_3m, n.relu5_3n) # fully conv n.fc6, n.relu6 = conv_relu(n.concat1, 4096, ks=7, dilation=4) if split == 'train': n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.fc8 = L.Convolution(n.drop7, kernel_size=1, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], num_output=2) else: n.fc7, n.relu7 = conv_relu(n.relu6, 4096, ks=1, pad=0) if initialize_fc8: n.fc8 = L.Convolution(n.relu7, kernel_size=1, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=.01), num_output=2) else: n.fc8 = L.Convolution(n.relu7, kernel_size=1, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], num_output=2) n.upscore = L.Deconvolution(n.fc8, convolution_param=dict( kernel_size=16, stride=8, num_output=2, group=2, weight_filler=dict(type='bilinear'), bias_term=False), param=dict(lr_mult=0, decay_mult=0)) n.score = L.Crop( n.upscore, n.next_im, crop_param=dict( axis=ax + 1, # +1 for first cropping dim. offset=list(-np.round(b).astype(int)))) if split != 'deploy': n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(ignore_label=255)) else: n.prop = L.Softmax(n.score) return n
from __future__ import print_function import math import caffe from caffe import layers as L from caffe import params as P from caffe import to_proto from caffe import metalayers as ML # Start a network net = caffe.NetSpec() # Data input layer net.data = L.MemoryData(dim=[1, 1], ntop=1) # Label input layer net.label = L.MemoryData(dim=[1, 1], ntop=1, include=[dict(phase=0)]) # Scale input layer net.scale = L.MemoryData(dim=[1, 1], ntop=1, include=[dict(phase=0)]) # USK-Net metalayer net.unet = ML.UNet(net.data, fmap_start=12, depth=3, fmap_inc_rule = lambda fmaps: int(math.ceil(float(fmaps) * 5)), fmap_dec_rule = lambda fmaps: int(math.ceil(float(fmaps) / 5)), downsampling_strategy = [[2,2,2],[2,2,2],[3,3,3]], dropout = 0.0, use_deconv_uppath=False, use_stable_upconv=True) net.out = L.Convolution(net.unet, kernel_size=[1], num_output=1, param=[dict(lr_mult=1),dict(lr_mult=2)], weight_filler=dict(type='msra'), bias_filler=dict(type='constant')) # Choose output activation functions net.pred = L.Sigmoid(net.out, ntop=1, in_place=False) # Choose a loss function and input data, label and scale inputs. Only include it during the training phase (phase = 0) net.euclid_loss = L.EuclideanLoss(net.pred, net.label, net.scale, ntop=0, loss_weight=1.0, include=[dict(phase=0)]) # Fix the spatial input dimensions. Note that only spatial dimensions get modified, the minibatch size
def createCNN(self, istrain): net = caffe.NetSpec() if istrain: net.s1_input, net.label = L.MemoryData(batch_size=self.batchsize, channels=self.nChannels, height=self.imageHeight, width=self.imageWidth, ntop=2) else: net.s1_input, net.label = L.MemoryData(batch_size=self.batchsize, channels=self.nChannels, height=self.imageHeight, width=self.imageWidth, ntop=2) # STAGE 1 net.s1_conv1_1, net.s1_relu1_1 = conv_relu(net.s1_input, 3, 64) net.s1_batch1_1 = L.BatchNorm(net.s1_relu1_1) net.s1_conv1_2, net.s1_relu1_2 = conv_relu(net.s1_batch1_1, 3, 64) net.s1_batch1_2 = L.BatchNorm(net.s1_relu1_2) net.s1_pool1 = max_pool(net.s1_batch1_2, 2) net.s1_conv2_1, net.s1_relu2_1 = conv_relu(net.s1_pool1, 3, 128) net.s1_batch2_1 = L.BatchNorm(net.s1_relu2_1) net.s1_conv2_2, net.s1_relu2_2 = conv_relu(net.s1_batch2_1, 3, 128) net.s1_batch2_2 = L.BatchNorm(net.s1_relu2_2) net.s1_pool2 = max_pool(net.s1_batch2_2) net.s1_conv3_1, net.s1_relu3_1 = conv_relu(net.s1_pool2, 3, 256) net.s1_batch3_1 = L.BatchNorm(net.s1_relu3_1) net.s1_conv3_2, net.s1_relu3_2 = conv_relu(net.s1_batch3_1, 3, 256) net.s1_batch3_2 = L.BatchNorm(net.s1_relu3_2) net.s1_pool3 = max_pool(net.s1_batch3_2) net.s1_conv4_1, net.s1_relu4_1 = conv_relu(net.s1_pool3, 3, 512) net.s1_batch4_1 = L.BatchNorm(net.s1_relu4_1) net.s1_conv4_2, net.s1_relu4_2 = conv_relu(net.s1_batch4_1, 3, 512) net.s1_batch4_2 = L.BatchNorm(net.s1_relu4_2) net.s1_pool4 = max_pool(net.s1_batch4_2) if istrain: net.s1_fc1_dropout = L.Dropout(net.s1_pool4, dropout_ratio=0.5, in_place=True) else: net.s1_fc1_dropout = net.s1_pool4 net.s1_fc1, net.s1_fc1_relu = fc_relu(net.s1_fc1_dropout, 256) net.s1_fc1_batch = L.BatchNorm(net.s1_fc1_relu) net.s1_output = L.InnerProduct(net.s1_fc1_batch, num_output=136, bias_filler=dict(type='constant', value=0)) net.s1_landmarks = L.Python( net.s1_output, module="InitLandmark", layer="InitLandmark", param_str=str(dict(initlandmarks=self.initLandmarks.tolist()))) if self.nStages == 2: addDANStage(net) net.output = net.s2_landmarks else: net.output = net.s1_landmarks net.loss = L.Python(net.output, net.label, module="SumOfSquaredLossLayer", layer="SumOfSquaredLossLayer", loss_weight=1) return str(net.to_proto())