def build_network(self, input_var, target_var): from lasagne.layers import InputLayer from lasagne.layers import DenseLayer from lasagne.layers import NonlinearityLayer from lasagne.layers import DropoutLayer from lasagne.layers import ReshapeLayer from lasagne.layers import Pool2DLayer as PoolLayer from lasagne.layers import TransposedConv2DLayer as Deconv2DLayer from lasagne.nonlinearities import softmax, sigmoid, tanh import cPickle as pickle try: from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer except ImportError as e: from lasagne.layers import Conv2DLayer as ConvLayer print_warning("Cannot import 'lasagne.layers.dnn.Conv2DDNNLayer' as it requires GPU support and a functional cuDNN installation. Falling back on slower convolution function 'lasagne.layers.Conv2DLayer'.") batch_size = settings.BATCH_SIZE net = {} net['input'] = InputLayer((batch_size, 3, 64, 64), input_var=input_var) net['conv1'] = ConvLayer(net['input'], 64, 3, stride=1, pad='same') # 64x64 net['pool1'] = PoolLayer(net['conv1'], 2) # 32x32 net['conv2'] = ConvLayer(net['pool1'], 64, 3, stride=1, pad='same') # 32x32 net['dropout1'] = DropoutLayer(net['conv2'], p=0.5) net['conv3'] = ConvLayer(net['dropout1'], 64, 3, stride=1, pad='same') # 32x32 net['dropout3'] = DropoutLayer(net['conv3'], p=0.5) net['fc1'] = DenseLayer(net['dropout3'], 3*32*32) net['output'] = ReshapeLayer(net['fc1'], (batch_size, 3, 32, 32)) # net['input'] = InputLayer((batch_size, 3, 64, 64), input_var=input_var) # net['dropout1'] = DropoutLayer(net['input'], p=0.1) # net['conv1'] = ConvLayer(net['dropout1'], 256, 5, stride=2, pad='same') # 32x32 # net['dropout2'] = DropoutLayer(net['conv1'], p=0.5) # net['conv2'] = ConvLayer(net['dropout2'], 256, 7, stride=1, pad='same') # 32x32 # net['dropout3'] = DropoutLayer(net['conv2'], p=0.5) # net['deconv1'] = Deconv2DLayer(net['dropout3'], 256, 7, stride=1, crop='same', output_size=32) # 32x32 # net['dropout4'] = DropoutLayer(net['deconv1'], p=0.5) # net['deconv3'] = Deconv2DLayer(net['dropout4'], 256, 9, stride=1, crop='same', output_size=32) # 32x32 # net['dropout5'] = DropoutLayer(net['deconv3'], p=0.5) # net['fc1'] = DenseLayer(net['dropout5'], 2048) # net['dropout6'] = DropoutLayer(net['fc1'], p=0.5) # net['fc2'] = DenseLayer(net['dropout6'], 2048) # net['dropout7'] = DropoutLayer(net['fc2'], p=0.5) # net['fc3'] = DenseLayer(net['dropout7'], 3*32*32) # net['dropout8'] = DropoutLayer(net['fc3'], p=0.5) # net['reshape'] = ReshapeLayer(net['dropout8'], ([0], 3, 32, 32)) # net['output'] = Deconv2DLayer(net['reshape'], 3, 9, stride=1, crop='same', output_size=32, nonlinearity=sigmoid) self.network, self.network_out = net, net['output'] print ("Conv_Deconv network output shape: {}".format(self.network_out.output_shape)) # self.input_pad, self.input_pad_out = self.build_pad_model(self.network_out) # self.target_pad, self.target_pad_out = self.build_pad_model(InputLayer((batch_size, 3, 32, 32), input_var=target_var)) self.input_scaled, self.input_scaled_out = self.build_scaled_model(self.network_out) self.target_scaled, self.target_scaled_out = self.build_scaled_model(InputLayer((batch_size, 3, 32, 32), input_var=target_var)) print("(Input) scaled network output shape: {}".format(self.input_scaled_out.output_shape)) print("(Target) scaled network output shape: {}".format(self.target_scaled_out.output_shape)) self.vgg_scaled_var = T.tensor4('scaled_vars') self.vgg_model, self.vgg_model_out = self.build_vgg_model(self.vgg_scaled_var) print("VGG model conv1_1 output shape: {}".format(self.vgg_model['conv1_1'].output_shape)) print("VGG model conv2_1 output shape: {}".format(self.vgg_model['conv2_1'].output_shape)) print("VGG model conv3_1 output shape: {}".format(self.vgg_model['conv3_1'].output_shape))
def build_model(width=512, height=512, filename=None, n_classes=5, batch_size=None, p_conv=0.0): """Setup network structure for the original formulation of JeffreyDF's network and optionally load pretrained weights Parameters ---------- width : Optional[int] image width height : Optional[int] image height filename : Optional[str] if filename is not None, weights are loaded from filename n_classes : Optional[int] default 5 for transfer learning on Kaggle DR data batch_size : should only be set if all batches have the same size! p_conv: dropout applied to conv. layers, by default turned off (0.0) Returns ------- dict one lasagne layer per key Notes ----- Reference: Jeffrey De Fauw, 2015: http://jeffreydf.github.io/diabetic-retinopathy-detection/ Download pretrained weights from: https://github.com/JeffreyDF/kaggle_diabetic_retinopathy/blob/ master/dumps/2015_07_17_123003_PARAMSDUMP.pkl original net has leaky rectifier units """ net = OrderedDict() net['0'] = InputLayer((batch_size, 3, width, height), name='images') net['1'] = ConvLayer(net['0'], 32, 7, stride=(2, 2), pad='same', untie_biases=True, nonlinearity=LeakyRectify(leakiness=0.5), W=lasagne.init.Orthogonal(1.0), b=lasagne.init.Constant(0.1)) net['1d'] = DropoutLayer(net['1'], p=p_conv) net['2'] = MaxPool2DLayer(net['1d'], 3, stride=(2, 2)) net['3'] = ConvLayer(net['2'], 32, 3, stride=(1, 1), pad='same', untie_biases=True, nonlinearity=LeakyRectify(leakiness=0.5), W=lasagne.init.Orthogonal(1.0), b=lasagne.init.Constant(0.1)) net['3d'] = DropoutLayer(net['3'], p=p_conv) net['4'] = ConvLayer(net['3d'], 32, 3, stride=(1, 1), pad='same', untie_biases=True, nonlinearity=LeakyRectify(leakiness=0.5), W=lasagne.init.Orthogonal(1.0), b=lasagne.init.Constant(0.1)) net['4d'] = DropoutLayer(net['4'], p=p_conv) net['5'] = MaxPool2DLayer(net['4d'], 3, stride=(2, 2)) net['6'] = ConvLayer(net['5'], 64, 3, stride=(1, 1), pad='same', untie_biases=True, nonlinearity=LeakyRectify(leakiness=0.5), W=lasagne.init.Orthogonal(1.0), b=lasagne.init.Constant(0.1)) net['6d'] = DropoutLayer(net['6'], p=p_conv) net['7'] = ConvLayer(net['6d'], 64, 3, stride=(1, 1), pad='same', untie_biases=True, nonlinearity=LeakyRectify(leakiness=0.5), W=lasagne.init.Orthogonal(1.0), b=lasagne.init.Constant(0.1)) net['7d'] = DropoutLayer(net['7'], p=p_conv) net['8'] = MaxPool2DLayer(net['7d'], 3, stride=(2, 2)) net['9'] = ConvLayer(net['8'], 128, 3, stride=(1, 1), pad='same', untie_biases=True, nonlinearity=LeakyRectify(leakiness=0.5), W=lasagne.init.Orthogonal(1.0), b=lasagne.init.Constant(0.1)) net['9d'] = DropoutLayer(net['9'], p=p_conv) net['10'] = ConvLayer(net['9d'], 128, 3, stride=(1, 1), pad='same', untie_biases=True, nonlinearity=LeakyRectify(leakiness=0.5), W=lasagne.init.Orthogonal(1.0), b=lasagne.init.Constant(0.1)) net['10d'] = DropoutLayer(net['10'], p=p_conv) net['11'] = ConvLayer(net['10d'], 128, 3, stride=(1, 1), pad='same', untie_biases=True, nonlinearity=LeakyRectify(leakiness=0.5), W=lasagne.init.Orthogonal(1.0), b=lasagne.init.Constant(0.1)) net['11d'] = DropoutLayer(net['11'], p=p_conv) net['12'] = ConvLayer(net['11d'], 128, 3, stride=(1, 1), pad='same', untie_biases=True, nonlinearity=LeakyRectify(leakiness=0.5), W=lasagne.init.Orthogonal(1.0), b=lasagne.init.Constant(0.1)) net['12d'] = DropoutLayer(net['12'], p=p_conv) net['13'] = MaxPool2DLayer(net['12d'], 3, stride=(2, 2)) net['14'] = ConvLayer(net['13'], 256, 3, stride=(1, 1), pad='same', untie_biases=True, nonlinearity=LeakyRectify(leakiness=0.5), W=lasagne.init.Orthogonal(1.0), b=lasagne.init.Constant(0.1)) net['14d'] = DropoutLayer(net['14'], p=p_conv) net['15'] = ConvLayer(net['14d'], 256, 3, stride=(1, 1), pad='same', untie_biases=True, nonlinearity=LeakyRectify(leakiness=0.5), W=lasagne.init.Orthogonal(1.0), b=lasagne.init.Constant(0.1)) net['15d'] = DropoutLayer(net['15'], p=p_conv) net['16'] = ConvLayer(net['15'], 256, 3, stride=(1, 1), pad='same', untie_biases=True, nonlinearity=LeakyRectify(leakiness=0.5), W=lasagne.init.Orthogonal(1.0), b=lasagne.init.Constant(0.1)) net['16d'] = DropoutLayer(net['16'], p=p_conv) net['17'] = ConvLayer(net['16d'], 256, 3, stride=(1, 1), pad='same', untie_biases=True, nonlinearity=LeakyRectify(leakiness=0.5), W=lasagne.init.Orthogonal(1.0), b=lasagne.init.Constant(0.1)) net['17d'] = DropoutLayer(net['17'], p=p_conv) net['18'] = MaxPool2DLayer(net['17d'], 3, stride=(2, 2), name='coarse_last_pool') net['19'] = DropoutLayer(net['18'], p=0.5) net['20'] = DenseLayer(net['19'], num_units=1024, nonlinearity=None, W=lasagne.init.Orthogonal(1.0), b=lasagne.init.Constant(0.1), name='first_fc_0') net['21'] = FeaturePoolLayer(net['20'], 2) net['22'] = InputLayer((batch_size, 2), name='imgdim') net['23'] = ConcatLayer([net['21'], net['22']]) # Combine representations of both eyes net['24'] = ReshapeLayer(net['23'], (-1, net['23'].output_shape[1] * 2)) net['25'] = DropoutLayer(net['24'], p=0.5) net['26'] = DenseLayer(net['25'], num_units=1024, nonlinearity=None, W=lasagne.init.Orthogonal(1.0), b=lasagne.init.Constant(0.1), name='combine_repr_fc') net['27'] = FeaturePoolLayer(net['26'], 2) net['28'] = DropoutLayer(net['27'], p=0.5) net['29'] = DenseLayer(net['28'], num_units=n_classes * 2, nonlinearity=None, W=lasagne.init.Orthogonal(1.0), b=lasagne.init.Constant(0.1)) # Reshape back to the number of desired classes net['30'] = ReshapeLayer(net['29'], (-1, n_classes)) net['31'] = NonlinearityLayer(net['30'], nonlinearity=softmax) if filename is not None: with open(filename, 'r') as f: weights = pickle.load(f) set_all_param_values(net['31'], weights) return net
def build_cnn(input_var=None, n=5): # create a residual learning building block with two stacked 3x3 convlayers as in paper def residual_block(l, increase_dim=False, projection=False): input_num_filters = l.output_shape[1] if increase_dim: first_stride = (2, 2) out_num_filters = input_num_filters * 2 else: first_stride = (1, 1) out_num_filters = input_num_filters stack_1 = batch_norm( ConvLayer(l, num_filters=out_num_filters, filter_size=(3, 3), stride=first_stride, nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) stack_2 = batch_norm( ConvLayer(stack_1, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) # add shortcut connections if increase_dim: if projection: # projection shortcut, as option B in paper projection = batch_norm( ConvLayer(l, num_filters=out_num_filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None, flip_filters=False)) block = NonlinearityLayer(ElemwiseSumLayer( [stack_2, projection]), nonlinearity=rectify) else: # identity shortcut, as option A in paper identity = ExpressionLayer( l, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], s[2] // 2, s[3] // 2)) padding = PadLayer(identity, [out_num_filters // 4, 0, 0], batch_ndim=1) block = NonlinearityLayer(ElemwiseSumLayer([stack_2, padding]), nonlinearity=rectify) else: block = NonlinearityLayer(ElemwiseSumLayer([stack_2, l]), nonlinearity=rectify) return block # Building the network l_in = InputLayer(shape=(BATCH_SIZE, 4, 128, 128), input_var=input_var) # first layer, output is 16 x 128 x 128 l = batch_norm( ConvLayer(l_in, num_filters=16, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) # first stack of residual blocks, output is 16 x 128 x 128 for _ in range(n): l = residual_block(l) # second stack of residual blocks, output is 32 x 64 x 64 l = residual_block(l, increase_dim=True, projection=True) for _ in range(1, n): l = residual_block(l) # third stack of residual blocks, output is 64 x 32 x 32 l = residual_block(l, increase_dim=True, projection=True) for _ in range(1, n): l = residual_block(l) # fourth stack of residual blocks, output is 128 x 16 x 16 l = residual_block(l, increase_dim=True, projection=True) for _ in range(1, n): l = residual_block(l) # average pooling l = GlobalPoolLayer(l) # fully connected layer network = DenseLayer(l, num_units=2, W=lasagne.init.HeNormal(), nonlinearity=softmax) return network
def build_net(): net = OrderedDict() net['input'] = InputLayer((BATCH_SIZE, 1, 128, 128)) net['conv_1_1'] = batch_norm( ConvLayer(net['input'], 10, 5, pad='same', stride=1, nonlinearity=lasagne.nonlinearities.elu)) net['conv_1_2'] = batch_norm( ConvLayer(net['conv_1_1'], 10, 3, pad='same', stride=2, nonlinearity=lasagne.nonlinearities.elu)) net['conv_2_1'] = batch_norm( ConvLayer(net['conv_1_2'], 20, 3, pad='same', stride=1, nonlinearity=lasagne.nonlinearities.elu)) net['conv_2_2'] = batch_norm( ConvLayer(net['conv_2_1'], 20, 3, pad='same', stride=2, nonlinearity=lasagne.nonlinearities.elu)) net['conv_3_1'] = batch_norm( ConvLayer(net['conv_2_2'], 40, 3, pad='same', stride=1, nonlinearity=lasagne.nonlinearities.elu)) net['conv_3_2'] = batch_norm( ConvLayer(net['conv_3_1'], 40, 3, pad='same', stride=2, nonlinearity=lasagne.nonlinearities.elu)) net['conv_4_1'] = batch_norm( ConvLayer(net['conv_3_2'], 80, 3, pad='same', stride=1, nonlinearity=lasagne.nonlinearities.elu)) net['conv_4_2'] = batch_norm( ConvLayer(net['conv_4_1'], 80, 3, pad='same', stride=2, nonlinearity=lasagne.nonlinearities.elu)) net['conv_5_1'] = batch_norm( ConvLayer(net['conv_4_2'], 160, 3, pad='same', stride=1, nonlinearity=lasagne.nonlinearities.elu)) net['conv_5_2'] = batch_norm( ConvLayer(net['conv_5_1'], 160, 3, pad='same', stride=2, nonlinearity=lasagne.nonlinearities.elu)) net['globalPool'] = GlobalPoolLayer(net['conv_5_2']) net['fc'] = batch_norm( DenseLayer(net['globalPool'], 200, nonlinearity=lasagne.nonlinearities.elu)) net['prob'] = batch_norm( DenseLayer(net['fc'], 2, nonlinearity=lasagne.nonlinearities.softmax)) return net
def build_model(): net = {} net['input'] = InputLayer((None, 3, 224, 224)) net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1) net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1) net['pool1'] = PoolLayer(net['conv1_2'], 2) net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1) net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad=1) net['pool2'] = PoolLayer(net['conv2_2'], 2) net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1) net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad=1) net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad=1) net['pool3'] = PoolLayer(net['conv3_3'], 2) net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1) net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad=1) net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad=1) net['pool4'] = PoolLayer(net['conv4_3'], 2) net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1) net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad=1) net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad=1) net['pool5'] = PoolLayer(net['conv5_3'], 2) net['fc6'] = DenseLayer(net['pool5'], num_units=4096) net['drop6'] = DropoutLayer(net['fc6'], p=0.5) net['fc7'] = DenseLayer(net['drop6'], num_units=4096) net['drop7'] = DropoutLayer(net['fc7'], p=0.5) net['fc8'] = DenseLayer(net['drop7'], num_units=1000, nonlinearity=None) net['prob'] = NonlinearityLayer(net['fc8'], softmax) return net
def predict(self, input, hidden_state, Ws, bs): npx = self.npx # image size filter_size = self.dynamic_filter_size[0] f = 0 ############################### # filter-generating network # ############################### ## rgb to gray # output = ConvLayer(input, num_filters=1, filter_size=(1,1), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=None); Ws[f] = output.W; bs[f] = output.b; f = f+1 ## encoder output = ConvLayer(input, num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 output = ConvLayer(output, num_filters=32, filter_size=(3, 3), stride=(2, 2), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 ## mid output = ConvLayer(output, num_filters=128, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify, untie_biases=True) Ws[f] = output.W bs[f] = output.b f = f + 1 # hidden = ConvLayer(hidden_state, num_filters=128, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 # hidden = ConvLayer(hidden, num_filters=128, filter_size=(3, 3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 # output = ElemwiseSumLayer([output, hidden]) # hidden_state = output ## decoder output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 output = Upscale2DLayer(output, scale_factor=2) output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 output = ConvLayer(output, num_filters=128, filter_size=(1, 1), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 ## filter-generating layers output = ConvLayer(output, num_filters=filter_size + 1, filter_size=(1, 1), stride=(1, 1), pad=(0, 0), W=Ws[f], b=bs[f], nonlinearity=identity) Ws[f] = output.W bs[f] = output.b f = f + 1 output = ConvLayer(output, num_filters=1, filter_size=(1, 1), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=identity) Ws[f] = output.W bs[f] = output.b f = f + 1 return output, hidden_state
def build_model_vgg16(): net = {} net['input'] = lasagne.layers.InputLayer((None, 3, 224, 224)) net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1, flip_filters=False) net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1, flip_filters=False) net['pool1'] = lasagne.layers.MaxPool2DLayer(net['conv1_2'], 2) net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1, flip_filters=False) net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad=1, flip_filters=False) net['pool2'] = lasagne.layers.MaxPool2DLayer(net['conv2_2'], 2) net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1, flip_filters=False) net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad=1, flip_filters=False) net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad=1, flip_filters=False) net['pool3'] = lasagne.layers.MaxPool2DLayer(net['conv3_3'], 2) net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1, flip_filters=False) net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad=1, flip_filters=False) net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad=1, flip_filters=False) net['pool4'] = lasagne.layers.MaxPool2DLayer(net['conv4_3'], 2) net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1, flip_filters=False) net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad=1, flip_filters=False) net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad=1, flip_filters=False) net['pool5'] = lasagne.layers.MaxPool2DLayer(net['conv5_3'], 2) net['fc6'] = lasagne.layers.DenseLayer(net['pool5'], num_units=4096) net['fc6_dropout'] = lasagne.layers.DropoutLayer(net['fc6'], p=0.5) net['fc7'] = lasagne.layers.DenseLayer(net['fc6_dropout'], num_units=4096) net['fc7_dropout'] = lasagne.layers.DropoutLayer(net['fc7'], p=0.5) net['fc8'] = lasagne.layers.DenseLayer(net['fc7_dropout'], num_units=1000, nonlinearity=None) net['prob'] = lasagne.layers.NonlinearityLayer( net['fc8'], lasagne.nonlinearities.softmax) net['output_layer'] = net['prob'] return net
def build_UNet(inputVar=None, nonlinearity=lasagne.nonlinearities.elu, input_dim=(128, 128), base_n_filters=64, do_dropout=False): net = OrderedDict() pad = "same" if not inputVar: net['input'] = InputLayer((None, 3, input_dim[0], input_dim[1])) else: net['input'] = InputLayer((None, 3, input_dim[0], input_dim[1]), inputVar) net['contr_1_1'] = batch_norm( ConvLayer(net['input'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) net['contr_1_2'] = batch_norm( ConvLayer(net['contr_1_1'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) net['pool1'] = Pool2DLayer(net['contr_1_2'], 2) net['contr_2_1'] = batch_norm( ConvLayer(net['pool1'], base_n_filters * 2, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) net['contr_2_2'] = batch_norm( ConvLayer(net['contr_2_1'], base_n_filters * 2, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) net['pool2'] = Pool2DLayer(net['contr_2_2'], 2) net['contr_3_1'] = batch_norm( ConvLayer(net['pool2'], base_n_filters * 4, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) net['contr_3_2'] = batch_norm( ConvLayer(net['contr_3_1'], base_n_filters * 4, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) net['pool3'] = Pool2DLayer(net['contr_3_2'], 2) net['contr_4_1'] = batch_norm( ConvLayer(net['pool3'], base_n_filters * 8, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) net['contr_4_2'] = batch_norm( ConvLayer(net['contr_4_1'], base_n_filters * 8, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) l = net['pool4'] = Pool2DLayer(net['contr_4_2'], 2) # the paper does not really describe where and how dropout is added. Feel free to try more options if do_dropout: l = DropoutLayer(l, p=0.4) net['encode_1'] = batch_norm( ConvLayer(l, base_n_filters * 16, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) net['encode_2'] = batch_norm( ConvLayer(net['encode_1'], base_n_filters * 16, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) net['upscale1'] = batch_norm( Deconv2DLayer(net['encode_2'], base_n_filters * 16, 2, 2, crop="valid", nonlinearity=nonlinearity, W=HeNormal(gain="relu"))) net['concat1'] = ConcatLayer([net['upscale1'], net['contr_4_2']], cropping=(None, None, "center", "center")) net['expand_1_1'] = batch_norm( ConvLayer(net['concat1'], base_n_filters * 8, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) net['expand_1_2'] = batch_norm( ConvLayer(net['expand_1_1'], base_n_filters * 8, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) net['upscale2'] = batch_norm( Deconv2DLayer(net['expand_1_2'], base_n_filters * 8, 2, 2, crop="valid", nonlinearity=nonlinearity, W=HeNormal(gain="relu"))) net['concat2'] = ConcatLayer([net['upscale2'], net['contr_3_2']], cropping=(None, None, "center", "center")) net['expand_2_1'] = batch_norm( ConvLayer(net['concat2'], base_n_filters * 4, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) net['expand_2_2'] = batch_norm( ConvLayer(net['expand_2_1'], base_n_filters * 4, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) net['upscale3'] = batch_norm( Deconv2DLayer(net['expand_2_2'], base_n_filters * 4, 2, 2, crop="valid", nonlinearity=nonlinearity, W=HeNormal(gain="relu"))) net['concat3'] = ConcatLayer([net['upscale3'], net['contr_2_2']], cropping=(None, None, "center", "center")) net['expand_3_1'] = batch_norm( ConvLayer(net['concat3'], base_n_filters * 2, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) net['expand_3_2'] = batch_norm( ConvLayer(net['expand_3_1'], base_n_filters * 2, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) net['upscale4'] = batch_norm( Deconv2DLayer(net['expand_3_2'], base_n_filters * 2, 2, 2, crop="valid", nonlinearity=nonlinearity, W=HeNormal(gain="relu"))) net['concat4'] = ConcatLayer([net['upscale4'], net['contr_1_2']], cropping=(None, None, "center", "center")) net['expand_4_1'] = batch_norm( ConvLayer(net['concat4'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) net['expand_4_2'] = batch_norm( ConvLayer(net['expand_4_1'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) net['output'] = ConvLayer(net['expand_4_2'], 3, 1, nonlinearity=None) return net
def build_model(input_width, input_height, output_dim, batch_size=BATCH_SIZE): l_in = lasagne.layers.InputLayer(shape=(batch_size, NUM_CHANNELS, input_width, input_height), ) l_conv1 = ConvLayer( l_in, num_filters=32, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.very_leaky_rectify, W=lasagne.init.Orthogonal(), ) l_conv1b = ConvLayer( l_conv1, num_filters=32, filter_size=(3, 3), pad=0, nonlinearity=lasagne.nonlinearities.very_leaky_rectify, W=lasagne.init.Orthogonal(), ) l_conv1c = ConvLayer( l_conv1b, num_filters=32, filter_size=(3, 3), pad=0, nonlinearity=lasagne.nonlinearities.very_leaky_rectify, W=lasagne.init.Orthogonal(), ) l_pool1 = MaxPoolLayer(l_conv1c, pool_size=(3, 3), stride=(2, 2)) l_dropout1 = lasagne.layers.DropoutLayer(l_pool1, p=0.25) l_conv2 = ConvLayer( l_dropout1, num_filters=64, filter_size=(3, 3), pad=0, nonlinearity=lasagne.nonlinearities.very_leaky_rectify, W=lasagne.init.Orthogonal(), ) l_conv2b = ConvLayer( l_conv2, num_filters=64, filter_size=(3, 3), pad=0, nonlinearity=lasagne.nonlinearities.very_leaky_rectify, W=lasagne.init.Orthogonal(), ) l_conv2c = ConvLayer( l_conv2b, num_filters=64, filter_size=(3, 3), pad=0, nonlinearity=lasagne.nonlinearities.very_leaky_rectify, W=lasagne.init.Orthogonal(), ) l_pool2 = MaxPoolLayer(l_conv2c, pool_size=(2, 2), stride=(2, 2)) l_dropout2 = lasagne.layers.DropoutLayer(l_pool2, p=0.25) l_conv3 = ConvLayer( l_dropout2, num_filters=128, filter_size=(3, 3), pad=0, nonlinearity=lasagne.nonlinearities.very_leaky_rectify, W=lasagne.init.Orthogonal(), ) l_conv3b = ConvLayer( l_conv3, num_filters=128, filter_size=(3, 3), pad=0, nonlinearity=lasagne.nonlinearities.very_leaky_rectify, W=lasagne.init.Orthogonal(), ) l_conv3c = ConvLayer( l_conv3b, num_filters=128, filter_size=(3, 3), pad=0, nonlinearity=lasagne.nonlinearities.very_leaky_rectify, W=lasagne.init.Orthogonal(), ) l_pool3 = lasagne.layers.GlobalPoolLayer(l_conv3c, pool_function=T.max) l_dropout3 = lasagne.layers.DropoutLayer(l_pool3, p=0.5) l_out = lasagne.layers.DenseLayer( l_dropout3, num_units=output_dim, nonlinearity=lasagne.nonlinearities.softmax, W=lasagne.init.Orthogonal(), ) return l_out
def build_model_vgg16(input_shape, verbose): ''' See Lasagne Modelzoo: https://github.com/Lasagne/Recipes/blob/master/modelzoo/vgg16.py ''' if verbose: print 'VGG16 (from lasagne model zoo)' net = {} net['input'] = InputLayer(input_shape) net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1, flip_filters=False) net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1, flip_filters=False) net['pool1'] = PoolLayer(net['conv1_2'], 2) net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1, flip_filters=False) net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad=1, flip_filters=False) net['pool2'] = PoolLayer(net['conv2_2'], 2) net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1, flip_filters=False) net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad=1, flip_filters=False) net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad=1, flip_filters=False) net['pool3'] = PoolLayer(net['conv3_3'], 2) net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1, flip_filters=False) net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad=1, flip_filters=False) net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad=1, flip_filters=False) net['pool4'] = PoolLayer(net['conv4_3'], 2) net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1, flip_filters=False) net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad=1, flip_filters=False) net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad=1, flip_filters=False) net['pool5'] = PoolLayer(net['conv5_3'], 2) net['fc6'] = DenseLayer(net['pool5'], num_units=4096) net['fc6_dropout'] = DropoutLayer(net['fc6'], p=0.5) net['fc7'] = DenseLayer(net['fc6_dropout'], num_units=4096) net['fc7_dropout'] = DropoutLayer(net['fc7'], p=0.5) net['fc8'] = DenseLayer(net['fc7_dropout'], num_units=1000, nonlinearity=None) net['prob'] = NonlinearityLayer(net['fc8'], softmax) # for layer in net.values(): # print str(lasagne.layers.get_output_shape(layer)) return net
def build_model_vgg_cnn_s(input_shape, verbose): ''' See Lasagne Modelzoo: https://github.com/Lasagne/Recipes/blob/master/modelzoo/vgg_cnn_s.py ''' if verbose: print 'VGG_cnn_s (from lasagne model zoo)' net = {} net['input'] = InputLayer(input_shape) net['conv1'] = ConvLayer(net['input'], num_filters=96, filter_size=7, stride=2, flip_filters=False) net['norm1'] = LRNLayer( net['conv1'], alpha=0.0001) # caffe has alpha = alpha * pool_size net['pool1'] = PoolLayer(net['norm1'], pool_size=3, stride=3, ignore_border=False) net['conv2'] = ConvLayer(net['pool1'], num_filters=256, filter_size=5, flip_filters=False) net['pool2'] = PoolLayer(net['conv2'], pool_size=2, stride=2, ignore_border=False) net['conv3'] = ConvLayer(net['pool2'], num_filters=512, filter_size=3, pad=1, flip_filters=False) net['conv4'] = ConvLayer(net['conv3'], num_filters=512, filter_size=3, pad=1, flip_filters=False) net['conv5'] = ConvLayer(net['conv4'], num_filters=512, filter_size=3, pad=1, flip_filters=False) net['pool5'] = PoolLayer(net['conv5'], pool_size=3, stride=3, ignore_border=False) net['fc6'] = DenseLayer(net['pool5'], num_units=4096) net['drop6'] = DropoutLayer(net['fc6'], p=0.5) net['fc7'] = DenseLayer(net['drop6'], num_units=4096) net['drop7'] = DropoutLayer(net['fc7'], p=0.5) net['fc8'] = DenseLayer(net['drop7'], num_units=1000, nonlinearity=lasagne.nonlinearities.softmax) if verbose: for layer in net.values(): print str(lasagne.layers.get_output_shape(layer)) return net
def buildNetwork(CFG, params, vocab): # {{{ """ TODO document me """ # Use params to update CFG CFG = get_CFG(CFG, params) #----------------------------------------------------------- # Setting up the image Embedding. #----------------------------------------------------------- l_input_sentence = InputLayer((CFG['BATCH_SIZE'], 1), name='l_input_sentence') # input (1 word) l_sentence_embedding = lasagne.layers.EmbeddingLayer(l_input_sentence, input_size=len(vocab), output_size=CFG['EMBEDDING_SIZE'], name='l_sentence_embedding') # Setting up CNN in case of fine tuning. if CFG['CNN_FINE_TUNE']: cnn, l_input_cnn, l_input_img = build_CNN(CFG) if CFG['CNN_MODEL'] == "vgg": vgg16, resnet50 = cnn, None elif CFG["CNN_MODEL"] == "resnet": vgg16, resnet50 = None, cnn if CFG['START_NORMALIZED'] == 1: l_input_cnn = ExpressionLayer(l_input_cnn, lambda X: X / (T.sum(X, axis=1, keepdims=True) + 1e-8), output_shape='auto') elif CFG['START_NORMALIZED'] == 2: l_input_cnn = ExpressionLayer(l_input_cnn, lambda X: X / T.sqrt( T.sum(X**2, axis=1, keepdims=True) + 1e-8), output_shape='auto') else: l_input_cnn = InputLayer((CFG['BATCH_SIZE'], CFG['CNN_FEATURE_SIZE']), name='l_input_cnn') l_cnn_embedding = DenseLayer(l_input_cnn, num_units=CFG['EMBEDDING_SIZE'], nonlinearity=lasagne.nonlinearities.identity, name='l_cnn_embedding') l_cnn_embedding2 = ReshapeLayer(l_cnn_embedding, ([0], 1, [1]), name='l_cnn_embedding2') l_rnn_input = InputLayer((CFG['BATCH_SIZE'], 1, CFG['EMBEDDING_SIZE']), name='l_rnn_input') l_dropout_input = DropoutLayer( l_rnn_input, p=0.5, name='l_dropout_input') l_input_reg = None l_out_reg = None l_decoder = None l_region_feedback = None l_region = None l_input_img2 = None l_boxes = None l_conv = None l_loc = None l_loc1 = None l_input_loc = None l_sel_region2 = None l_weighted_region_prev = None l_weighted_region = None input_shape = (CFG['BATCH_SIZE'], CFG['EMBEDDING_SIZE']) if CFG['MODE'] == 'normal': # {{{1 l_cell_input = InputLayer(input_shape, name='l_cell_input') l_prev_gru = InputLayer(input_shape, name="l_prev_gru") l_gru = GRUMemoryLayer(CFG['EMBEDDING_SIZE'], l_cell_input, l_prev_gru, name='l_gru') l_dropout_output = DropoutLayer( l_gru, p=0.5, name='l_dropout_output') # decoder is a fully connected layer with one output unit for each word in the vocabulary l_decoder = DenseLayer(l_dropout_output, num_units=len( vocab), nonlinearity=lasagne.nonlinearities.softmax, name='l_decoder') l_out = ReshapeLayer( l_decoder, ([0], 1, [1]), name='l_out') # }}} elif CFG['MODE'] == 'tensor': l_cell_input = InputLayer(input_shape, name='l_cell_input') l_prev_gru = InputLayer(input_shape, name="l_prev_gru") l_gru = GRUMemoryLayer(CFG['EMBEDDING_SIZE'], l_cell_input, l_prev_gru, name='l_gru') l_dropout_output = DropoutLayer(l_gru, p=0.5, name='l_dropout_output') l_dropout_output = ReshapeLayer(l_dropout_output, ([0], 1, [1]), name='l_dropout_output') # TODO put me back if CFG['CNN_FINE_TUNE']: l_input_regions, _input_regions, l_out_reg, l_input_img2, l_boxes, l_conv = build_finetune_proposals(CFG, vgg16, resnet50) else: l_input_regions = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS']), name='l_input_regions') # TODO a block. #l_decoder = build_decoderLayer(l_dropout_output, l_input_regions, vocab, CFG) if CFG.has_key('DISSECT') and CFG['DISSECT'] != 'No': if CFG['DISSECT'] == 'wr': l_decoder = TensorProdFactLayer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len(vocab), nonlinearity=softmax, name='l_tensor', W_hr='skip', b_hr='skip') elif CFG['DISSECT'] == 'rs': l_decoder = TensorProdFactLayer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len(vocab), nonlinearity=softmax, name='l_tensor', W_rw='skip', b_rw='skip') if CFG['DISSECT'] == 'wr': l_decoder = TensorProdFactLayer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len(vocab), nonlinearity=softmax, name='l_tensor', W_hr='skip', b_hr='skip') elif CFG['DISSECT'] == 'rs': l_decoder = TensorProdFactLayer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len(vocab), nonlinearity=softmax, name='l_tensor', W_rw='skip', b_rw='skip') else: l_decoder = TensorProdFactLayer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len(vocab), nonlinearity=softmax, name='l_tensor') l_out = ExpressionLayer(l_decoder, lambda X: X.sum(2), output_shape='auto', name='l_out') # sum over regions elif CFG['MODE'] == 'transformer': #{{{2 print(bcolors.OKGREEN + "Transformer mode." + bcolors.ENDC) from TProd3 import TensorProdFactLayer, WeightedSumLayer, SubsampleLayer # define a cell l_cell_input = InputLayer((CFG['BATCH_SIZE'], CFG['EMBEDDING_SIZE']), name='l_cell_input') from agentnet.memory import GRUMemoryLayer l_prev_gru = InputLayer((CFG['BATCH_SIZE'], CFG['EMBEDDING_SIZE']), name="l_prev_gru") if CFG['TRANS_FEEDBACK']: l_weighted_region_prev = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE']), name="l_weighted_region_prev") if CFG['FEEDBACK'] == 2: l_cell_concat = lasagne.layers.ConcatLayer( [l_cell_input, l_weighted_region_prev], axis=1, name='l_cell_concat') else: print("Are you sure you don't want to use feedback=2? I think you should. Change your mind, then come to see me again.") else: l_cell_concat = l_cell_input l_gru = GRUMemoryLayer(CFG['EMBEDDING_SIZE'], l_cell_concat, l_prev_gru, name='l_gru') l_dropout_output = DropoutLayer(l_gru, p=CFG['RNN_DROPOUT'], name='l_dropout_output') l_dropout_output = ReshapeLayer(l_dropout_output, ([0], 1, [1]), name='l_dropout_output') if CFG['TRANS_USE_PRETRAINED']: l_out_reg = vgg16['conv5_2'] #l_out_reg2 = vgg16['conv5_3'] else: l_out_reg = vgg16['conv5_3'] l_input_reg = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], 14, 14), name='l_input_reg') l_input_regions = l_input_reg if CFG['TRANS_USE_PRETRAINED']: l_input_regions = l_input_regions else: if CFG['CONV_NORMALIZED'] == 1: l_input_regions = ExpressionLayer(l_input_regions, lambda X: X / (T.sum(X, axis=1, keepdims=True) + 1e-8), output_shape='auto') elif CFG['CONV_NORMALIZED'] == 2: l_input_regions = ExpressionLayer(l_input_regions, lambda X: X / T.sqrt(T.sum(X**2, axis=1, keepdims=True) + 1e-8), output_shape='auto') else: l_input_regions = ExpressionLayer(l_input_regions, lambda X: X * 0.01, output_shape='auto') factor = 2.0 W = lasagne.init.Constant(0.0) b = lasagne.init.Constant(0.0) if CFG['TRANS_MULTIPLE_BOXES']: num_prop, l_loc = build_loc_net(CFG, l_gru, l_input_regions, 1, ( 14, 14), (3, 3), CFG['TRANS_STRIDE'], CFG['TRANS_ZOOM'], W, b, name='') if CFG['TRANS_ADD_BIG_PROPOSALS']: num_prop_big, l_loc_big = build_loc_net(CFG, l_gru, l_input_regions, 1, ( 14, 14), (3, 3), CFG['TRANS_STRIDE'], CFG['TRANS_ZOOM'] * 2, W, b, name='_big') l_loc = ConcatLayer((l_loc, l_loc_big), axis=0) num_prop += num_prop_big l_sel_region2 = MultiTransformerLayer(l_input_regions, l_loc, kernel_size=( 3, 3), zero_padding=CFG['TRANS_ZEROPAD']) # 3x3 if CFG['TRANS_USE_PRETRAINED']: Wvgg = vgg16['conv5_3'].W.reshape( (CFG['REGION_SIZE'], CFG['REGION_SIZE'] * 3 * 3)).swapaxes(0, 1) bvgg = vgg16['conv5_3'].b l_sel_region = DenseLayer( l_sel_region2, num_units=CFG['REGION_SIZE'], name='l_sel_region', W=Wvgg, b=bvgg) if CFG['CONV_NORMALIZED'] == 1: l_sel_region = ExpressionLayer(l_sel_region, lambda X: X / (T.sum(X, axis=1, keepdims=True) + 1e-8), output_shape='auto') elif CFG['CONV_NORMALIZED'] == 2: l_sel_region = ExpressionLayer(l_sel_region, lambda X: X / T.sqrt(T.sum(X**2, axis=1, keepdims=True) + 1e-8), output_shape='auto') else: l_sel_region = l_sel_region else: l_sel_region = DenseLayer( l_sel_region, num_units=CFG['REGION_SIZE'], name='l_sel_region') l_sel_region = ReshapeLayer( l_sel_region, (CFG['BATCH_SIZE'], num_prop, CFG['REGION_SIZE'])) l_sel_region = DimshuffleLayer(l_sel_region, (0, 2, 1)) l_sel_region = ReshapeLayer( l_sel_region, (CFG['BATCH_SIZE'], CFG['REGION_SIZE'], num_prop)) else: b = np.zeros((2, 3), dtype='float32') b[0, 0] = 2 b[1, 1] = 2 b = b.flatten() W = lasagne.init.Constant(0.0) l_input_loc = l_gru if CFG['TRANS_USE_STATE']: l_input_im = ConvLayer(l_input_regions, num_filters=512, filter_size=( 3, 3), pad='same', name='l_reduce_im1') l_input_im = lasagne.layers.MaxPool2DLayer(l_input_im, (2, 2)) l_input_im = ConvLayer(l_input_im, num_filters=512, filter_size=( 3, 3), pad='same', name='l_reduce_im2') l_input_im = lasagne.layers.MaxPool2DLayer(l_input_im, (2, 2)) l_input_im = ReshapeLayer(l_input_im, (CFG['BATCH_SIZE'], 512)) l_input_loc = ConcatLayer((l_gru, l_input_im)) l_loc1 = DenseLayer( l_input_loc, num_units=256, name='l_loc1') l_loc = DenseLayer( l_loc1, num_units=6, W=W, b=b, nonlinearity=None, name='l_loc2') l_sel_region = TransformerLayer( l_input_regions, l_loc, downsample_factor=2) l_sel_region = DenseLayer( l_sel_region, num_units=CFG['REGION_SIZE'], name='l_sel_region') l_sel_region = ReshapeLayer( l_sel_region, (CFG['BATCH_SIZE'], CFG['REGION_SIZE'], 1)) l_decoder = TensorProdFactLayer((l_dropout_output, l_sel_region), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len( vocab), W=lasagne.init.Normal(std=0.001, mean=0.0), nonlinearity=lasagne.nonlinearities.softmax, name='l_tensor') if CFG['TRANS_FEEDBACK']: l_region = ExpressionLayer(l_decoder, lambda X: X.sum(3), output_shape='auto', name='l_region') # sum over regions l_weighted_region = WeightedSumLayer([l_sel_region, l_region], name='l_weighted_region') l_out = ExpressionLayer(l_decoder, lambda X: X.sum( 2), output_shape='auto', name='l_out') # sum over regions #}}} elif CFG['MODE'] == 'tensor-feedback': # {{{2 # define a cell l_cell_input = InputLayer((CFG['BATCH_SIZE'], CFG['EMBEDDING_SIZE']), name='l_cell_input') l_region_feedback = InputLayer((CFG['BATCH_SIZE'], CFG['NUM_REGIONS']), name='l_region_feedback') l_cell_concat = lasagne.layers.ConcatLayer( [l_cell_input, l_region_feedback], axis=1, name='l_cell_concat') from agentnet.memory import GRUMemoryLayer l_prev_gru = InputLayer((CFG['BATCH_SIZE'], CFG['EMBEDDING_SIZE']), name="l_prev_gru") l_gru = GRUMemoryLayer(CFG['EMBEDDING_SIZE'], l_cell_concat, l_prev_gru, name='l_gru') l_dropout_output = DropoutLayer( l_gru, p=0.5, name='l_dropout_output') l_dropout_output = ReshapeLayer( l_dropout_output, ([0], 1, [1]), name='l_dropout_output') from TProd3 import TensorProdFactLayer l_input_regions = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS']), name='l_input_regions') l_tensor = TensorProdFactLayer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len( vocab), nonlinearity=lasagne.nonlinearities.softmax, name='l_tensor') l_region = ExpressionLayer(l_tensor, lambda X: X.sum( 3), output_shape='auto', name='l_region') # sum over l_region = ReshapeLayer( l_region, ([0], [2]), name='l_region') l_out = ExpressionLayer(l_tensor, lambda X: X.sum( 2), output_shape='auto', name='l_out') # sum over regions #}}} elif CFG['MODE'] == 'tensor-feedback2': # {{{2 l_feedback = InputLayer((CFG['BATCH_SIZE'], CFG['EMBEDDING_SIZE']), name='l_feedback') l_prev_gru = InputLayer((CFG['BATCH_SIZE'], CFG['EMBEDDING_SIZE']), name="l_prev_gru") from TProd3 import TensorProdFactLayer, WeightedSumLayer if CFG['PROPOSALS'] == 3: # use images at different resolution but without fully connected layers import CNN vgg16_det = CNN.build_model_RCNN( CFG['NUM_REGIONS'], CFG['IM_SIZE'] * 1.5, pool_dims=3, dropout_value=CFG['RNN_DROPOUT']) print "Loading pretrained VGG16 parameters for detection" l_input_img2 = vgg16_det['input'] l_conv = vgg16_det['conv5_3'] l_boxes = vgg16_det['boxes'] l_input_regions = vgg16_det['reshape'] if CFG['CONV_NORMALIZED'] == 1: l_input_regions = ExpressionLayer( l_input_regions, lambda X: X / (T.sum(X, axis=1, keepdims=True) + 1e-8), output_shape='auto') elif CFG['CONV_NORMALIZED'] == 2: l_input_regions = ExpressionLayer( l_input_regions, lambda X: X / T.sqrt(T.sum(X**2, axis=1, keepdims=True) + 1e-8), output_shape='auto') else: l_input_regions = ExpressionLayer( l_input_regions, lambda X: X * 0.01, output_shape='auto') l_cnn_embedding2 = DenseLayer( l_input_regions, num_units=CFG['REGION_SIZE'], name='l_cnn_proposals') l_input_regions = ReshapeLayer( l_cnn_embedding2, (CFG['BATCH_SIZE'], CFG['NUM_REGIONS'], CFG['REGION_SIZE'], 1)) l_input_regions = lasagne.layers.DimshuffleLayer( l_input_regions, (0, 2, 1, 3)) l_out_reg = l_input_regions l_input_reg = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS'], 1), name='l_input_reg') l_input_regions = ReshapeLayer( l_input_reg, (CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS']), name='l_input_regions') # use images at different resolution but without fully connected layers elif CFG['PROPOSALS'] == 4: if CFG['CNN_MODEL'] == 'vgg': import CNN vgg16_det = CNN.build_model_RCNN(CFG['NUM_REGIONS'], int( CFG['IM_SIZE'] * 1.5), pool_dims=1, dropout_value=CFG['RNN_DROPOUT']) print "Loading pretrained VGG16 parameters for detection" model_param_values = pickle.load(open('vgg16.pkl'))[ 'param values'] lasagne.layers.set_all_param_values( vgg16_det['conv5_3'], model_param_values[:-6]) l_input_img2 = vgg16_det['input'] l_conv = vgg16_det['conv5_3'] l_boxes = vgg16_det['boxes'] l_input_regions = vgg16_det['crop'] l_input_regions = ReshapeLayer( l_input_regions, (CFG['BATCH_SIZE'] * CFG['NUM_REGIONS'], CFG['REGION_SIZE'])) else: resnet50_det = resnet_CNN.build_model_RCNN( CFG['NUM_REGIONS'], im_size=CFG['IM_SIZE'] * 1.5, pool_dims=1, dropout_value=CFG['RNN_DROPOUT']) print "Loading pretrained resnet50 parameters for detection" # You can use this format to store other things for best effort model_param_values = pickle.load(open('resnet50.pkl'))[ 'param values'] from save_layers import add_names_layers_and_params add_names_layers_and_params(resnet50_det) #lasagne.layers.set_all_param_values(resnet50['prob'], model_param_values) set_param_dict( resnet50_det['pool5'], model_param_values, prefix='', show_layers=False, relax=False) l_input_img2 = resnet50_det['input'] l_conv = resnet50_det['res4f_relu'] l_boxes = resnet50_det['boxes'] l_input_regions = resnet50_det['crop'] l_input_regions = ReshapeLayer( l_input_regions, (CFG['BATCH_SIZE'] * CFG['NUM_REGIONS'], CFG['REGION_SIZE'])) if CFG['CONV_NORMALIZED'] == 1: l_input_regions = ExpressionLayer( l_input_regions, lambda X: X / (T.sum(X, axis=1, keepdims=True) + 1e-8), output_shape='auto') elif CFG['CONV_NORMALIZED'] == 2: l_input_regions = ExpressionLayer( l_input_regions, lambda X: X / T.sqrt(T.sum(X**2, axis=1, keepdims=True) + 1e-8), output_shape='auto') else: _input_regions = ExpressionLayer( l_input_regions, lambda X: X * 0.01, output_shape='auto') l_input_regions = ReshapeLayer( l_input_regions, (CFG['BATCH_SIZE'], CFG['NUM_REGIONS'], CFG['REGION_SIZE'], 1)) l_input_regions = lasagne.layers.DimshuffleLayer( l_input_regions, (0, 2, 1, 3)) l_out_reg = l_input_regions l_input_reg = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS'], 1), name='l_input_reg') l_input_regions = ReshapeLayer( l_input_reg, (CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS']), name='l_input_regions') else: if CFG['CNN_MODEL'] == 'vgg': l_out_reg = vgg16['conv5_3'] elif CFG['CNN_MODEL'] == 'resnet': l_out_reg = resnet50['res4f_relu'] else: print(bcolors.FAIL + "Unrecognized network" + bcolors.ENDC) l_input_reg = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], 14, 14), name='l_input_reg') if CFG['CONV_REDUCED'] > 1: # added a scaling factor of 100 to avoid exploding gradients l_input_regions = ExpressionLayer( l_input_reg, lambda X: X[:, :, ::CFG['CONV_REDUCED'], ::CFG['CONV_REDUCED']], output_shape='auto') else: l_input_regions = l_input_reg if CFG['CONV_NORMALIZED'] == 1: l_input_regions = ExpressionLayer( l_input_regions, lambda X: X / (T.sum(X, axis=1, keepdims=True) + 1e-8), output_shape='auto') elif CFG['CONV_NORMALIZED'] == 2: l_input_regions = ExpressionLayer( l_input_regions, lambda X: X / T.sqrt(T.sum(X**2, axis=1, keepdims=True) + 1e-8), output_shape='auto') else: l_input_regions = ExpressionLayer( l_input_regions, lambda X: X * 0.01, output_shape='auto') if CFG['TENSOR_ADD_CONV']: l_input_regions = ConvLayer(l_input_regions, num_filters=CFG['REGION_SIZE'], filter_size=( 3, 3), pad='same', name='l_add_con') l_input_regions = ReshapeLayer( l_input_regions, (CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS'])) if CFG['TENSOR_TIED']: l_region_feedback = InputLayer((CFG['BATCH_SIZE'], CFG['NUM_REGIONS']), name='l_region_feedback') l_region_feedback2 = ReshapeLayer( l_region_feedback, ([0], 1, [1]), name='l_region_feedback2') else: l_shp2 = ReshapeLayer( l_prev_gru, (CFG['BATCH_SIZE'], 1, CFG['EMBEDDING_SIZE'])) l_shp2 = DropoutLayer( l_shp2, p=CFG['RNN_DROPOUT'], name='l_shp2') l_tensor2 = TensorProdFactLayer((l_shp2, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len( vocab), nonlinearity=lasagne.nonlinearities.softmax, name='l_tensor2') l_region_feedback = ExpressionLayer(l_tensor2, lambda X: T.sum( X, 3), output_shape='auto', name='l_region') # sum over l_region_feedback2 = ReshapeLayer( l_region_feedback, (CFG['BATCH_SIZE'], 1, CFG['NUM_REGIONS'])) l_weighted_region = WeightedSumLayer( [l_input_regions, l_region_feedback2], name='l_weighted_region') # define a cell l_cell_input = InputLayer((CFG['BATCH_SIZE'], CFG['EMBEDDING_SIZE']), name='l_cell_input') if CFG['FEEDBACK'] == 0: # none l_cell_concat = l_cell_input elif CFG['FEEDBACK'] == 1: # none l_region2 = ReshapeLayer( l_region_feedback2, ([0], [2])) l_cell_concat = lasagne.layers.ConcatLayer( [l_cell_input, l_region2], axis=1, name='l_cell_concat') elif CFG['FEEDBACK'] == 2: l_cell_concat = lasagne.layers.ConcatLayer( [l_cell_input, l_weighted_region], axis=1, name='l_cell_concat') elif CFG['FEEDBACK'] == 3: l_region2 = ReshapeLayer( l_region_feedback2, ([0], [2])) l_cell_concat = lasagne.layers.ConcatLayer( [l_cell_input, l_weighted_region, l_region2], axis=1, name='l_cell_concat') elif CFG['FEEDBACK'] == 4: # See RNNTraining.py for comments on this. from TProd3 import WeightedImageLayer l_weighted_image = WeightedImageLayer( [l_input_regions, l_region_feedback2], name='l_weighted_image') if CFG['IMGFEEDBACK_MECHANISM'] == 'highres': l_weighted_image_reshaped = ReshapeLayer( l_weighted_image, ([0], [1], 14, 14), name='l_weighted_image_reshaped') l_weighted_image_conv_reduced = lasagne.layers.MaxPool2DLayer( l_weighted_image_reshaped, (2, 2), name='l_weighted_image_conv_reduced') l_feedback_co1 = lasagne.layers.Conv2DLayer( incoming=l_weighted_image_conv_reduced, num_filters=512, filter_size=(3, 3), pad='same', name='l_feedback_co1') else: l_weighted_image_reshaped = ReshapeLayer( l_weighted_image, ([0], [1], 7, 7), name='l_weighted_image_reshaped') l_feedback_co1 = lasagne.layers.Conv2DLayer( incoming=l_weighted_image_reshaped, num_filters=512, filter_size=(3, 3), pad='same', name='l_feedback_co1') l_feedback_po1 = lasagne.layers.MaxPool2DLayer( l_feedback_co1, (2, 2), name='l_feedback_po1') l_feedback_co2 = lasagne.layers.Conv2DLayer( incoming=l_feedback_po1, num_filters=512, filter_size=(3, 3), pad='same', name='l_feedback_co2') l_feedback_po2 = lasagne.layers.MaxPool2DLayer( l_feedback_co2, (2, 2), name='l_feedback_po2') l_feedback_po2_reshaped = ReshapeLayer( l_feedback_po2, ([0], [1]), name='l_feedback_po2_reshaped') l_cell_concat = lasagne.layers.ConcatLayer( [l_cell_input, l_feedback_po2_reshaped], axis=1, name='l_cell_concat') from agentnet.memory import GRUMemoryLayer l_gru = GRUMemoryLayer(CFG['EMBEDDING_SIZE'], l_cell_concat, l_prev_gru, name='l_gru') l_dropout_output = DropoutLayer( l_gru, p=0.5, name='l_dropout_output') l_shp1 = ReshapeLayer( l_dropout_output, ([0], 1, [1]), name='l_shp1') if CFG.has_key('DISSECT') and CFG['DISSECT'] != 'No': import pdb pdb.set_trace() # XXX BREAKPOINT if CFG['DISSECT'] == 'wr': l_decoder = TensorProdFactLayer((l_shp1, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len(vocab), nonlinearity=softmax, name='l_tensor', W_hr='skip', b_hr='skip') elif CFG['DISSECT'] == 'rs': import pdb pdb.set_trace() # XXX BREAKPOINT l_decoder = TensorProdFactLayer((l_shp1, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len(vocab), nonlinearity=softmax, name='l_tensor', W_rw='skip', b_rw='skip') else: if CFG.has_key('DENSITY_TEMPERING') and CFG['DENSITY_TEMPERING']: print("TEMPERING") l_gamma = DenseLayer( l_shp1, num_units=1, name='l_gamma') l_gamma_shp = ReshapeLayer( l_gamma, ([0], [1], 1, 1)) from TProd3 import TensorTemperatureLayer l_decoder = TensorTemperatureLayer((l_shp1, l_input_regions, l_gamma_shp), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len( vocab), nonlinearity=lasagne.nonlinearities.softmax, name='l_tensor') else: l_decoder = TensorProdFactLayer((l_shp1, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len(vocab), nonlinearity=softmax, name='l_tensor') if CFG['TENSOR_COND_WORD']: from RNNTraining import get_Regions_cond_words l_region = ExpressionLayer( l_decoder, get_Regions_cond_words, output_shape='auto', name='l_region') else: l_region = ExpressionLayer(l_decoder, lambda X: X.sum( 3), output_shape='auto', name='l_region') # sum over l_region = ReshapeLayer( l_region, ([0], [2]), name='l_region') l_out = ExpressionLayer(l_decoder, lambda X: X.sum( 2), output_shape='auto', name='l_out') # sum over regions #}}} elif CFG['MODE'] == 'tensor-reducedw': # {{{2 from TProd3 import TensorProdFactLayer # input: [h(batch,dimh),r(num_batch,r_dim,num_r)] # output: [ h[0],r[2], dim_w ] l_input_regions = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS']), name='l_input_regins') if CFG.has_key('TENSOR_RECTIFY') and CFG['TENSOR_RECTIFY']: l_tensor = TensorProdFactLayer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG[ 'REGION_SIZE'], dim_w=CFG['EMBEDDING_WORDS'], nonlinearity=lasagne.nonlinearities.rectify, name='l_tensor') else: l_tensor = TensorProdFactLayer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG[ 'REGION_SIZE'], dim_w=CFG['EMBEDDING_WORDS'], nonlinearity=lasagne.nonlinearities.identity, name='l_tensor') # softmax does not accept non-flat layers, then flatten->softmax->reshape l_flatten = ReshapeLayer( l_decoder, (CFG['BATCH_SIZE'] * 1 * CFG['NUM_REGIONS'], CFG['EMBEDDING_WORDS']), name='l_flatten') l_words = DenseLayer(l_flatten, num_units=len(vocab), nonlinearity=lasagne.nonlinearities.identity, name='l_words') l_reshape = ReshapeLayer( l_words, (CFG['BATCH_SIZE'] * 1, CFG['NUM_REGIONS'] * len(vocab)), name='l_reshape') l_softmax = lasagne.layers.NonlinearityLayer( l_reshape, nonlinearity=lasagne.nonlinearities.softmax, name='l_softmax') l_reshape1 = ReshapeLayer( l_softmax, (CFG['BATCH_SIZE'], 1, CFG['NUM_REGIONS'], len(vocab)), name='l_reshape1') l_out = ExpressionLayer(l_reshape1, lambda X: X.sum( 2), output_shape='auto', name='l_out') # sum over regions # }}} elif CFG['MODE'] == 'tensor-removedWrw': from TProd3 import TensorProdFact2Layer # input: [h(batch,dimh),r(num_batch,r_dim,num_r)] # output: [ h[0],r[2], dim_w ] l_input_regions = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS']), name='l_input_regins') l_decoder = TensorProdFact2Layer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len( vocab), nonlinearity=lasagne.nonlinearities.softmax, name='l_decoder') l_out = ExpressionLayer(l_decoder, lambda X: X.sum( 2), output_shape='auto', name='l_out') # sum over regions net_dictionnary = {'loc1': l_loc1, 'input_loc': l_input_loc, 'sel_region2': l_sel_region2, 'loc': l_loc, 'conv': l_conv, 'prev': l_prev_gru, 'input': l_cell_input, 'gru': l_gru, 'sent': l_input_sentence, 'img': l_input_img, 'img2': l_input_img2, 'reg_feedback2': l_region_feedback, 'reg_feedback': l_region, 'reg': l_input_reg, 'out_reg': l_out_reg, 'out': l_out, 'cnn': l_cnn_embedding, 'sent_emb': l_sentence_embedding, 'decoder': l_decoder, 'boxes': l_boxes, 'weighted_regions_prev': l_weighted_region_prev, 'weighted_regions': l_weighted_region} return net_dictionnary
def build_finetune_proposals(CFG, vgg16, resnet50): # {{{ # use images at different resolution but without fully connected layers assert((vgg16 is None) or (resnet50 is None)), "Only one cnn can be used" _input_regions, l_out_reg, l_input_img2, l_boxes, l_input_regions, l_conv = 6 * [None] if CFG['PROPOSALS'] == 3: vgg16_det = CNN.build_model_RCNN( CFG['NUM_REGIONS'], CFG['IM_SIZE'] * 1.5, pool_dims=3, dropout_value=CFG['RNN_DROPOUT']) print "Loading pretrained VGG16 parameters for detection" l_conv = vgg16_det['conv5_3'] l_input_img2 = vgg16_det['input'] l_boxes = vgg16_det['boxes'] l_input_regions = vgg16_det['reshape'] if CFG['CONV_NORMALIZED'] == 1: l_input_regions = ExpressionLayer(l_input_regions, lambda X: X / (T.sum(X, axis=1, keepdims=True) + 1e-8), output_shape='auto') elif CFG['CONV_NORMALIZED'] == 2: l_input_regions = ExpressionLayer(l_input_regions, lambda X: X / T.sqrt(T.sum(X**2, axis=1, keepdims=True) + 1e-8), output_shape='auto') else: _input_regions = ExpressionLayer(l_input_regions, lambda X: X * 0.01, output_shape='auto') l_cnn_embedding2 = DenseLayer(l_input_regions, num_units=CFG['REGION_SIZE'], name='l_cnn_proposals') l_input_regions = ReshapeLayer( l_cnn_embedding2, (CFG['BATCH_SIZE'], CFG['NUM_REGIONS'], CFG['REGION_SIZE'], 1)) l_input_regions = lasagne.layers.DimshuffleLayer(l_input_regions, (0, 2, 1, 3)) l_out_reg = l_input_regions l_input_reg = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS'], 1), name='l_input_reg') l_input_regions = ReshapeLayer(l_input_reg, (CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS']), name='l_input_regions') # use images at different resolution but without fully connected layers elif CFG['PROPOSALS'] == 4: vgg16_det = CNN.build_model_RCNN(CFG['NUM_REGIONS'], int(CFG['IM_SIZE'] * 1.5), pool_dims=1, dropout_value=CFG['RNN_DROPOUT']) print "Loading pretrained VGG16 parameters for detection" model_param_values = pickle.load(open('vgg16.pkl'))['param values'] lasagne.layers.set_all_param_values(vgg16_det['conv5_3'], model_param_values[:-6]) l_input_img2 = vgg16_det['input'] l_conv = vgg16_det['conv5_3'] l_boxes = vgg16_det['boxes'] l_input_regions = vgg16_det['crop'] l_input_regions = ReshapeLayer(l_input_regions, (CFG['BATCH_SIZE'] * CFG['NUM_REGIONS'], CFG['REGION_SIZE'])) if CFG['CONV_NORMALIZED'] == 1: l_input_regions = ExpressionLayer(l_input_regions, lambda X: X / (T.sum(X, axis=1, keepdims=True) + 1e-8), output_shape='auto') elif CFG['CONV_NORMALIZED'] == 2: l_input_regions = ExpressionLayer(l_input_regions, lambda X: X / T.sqrt(T.sum(X**2, axis=1, keepdims=True) + 1e-8), output_shape='auto') else: _input_regions = ExpressionLayer(l_input_regions, lambda X: X * 0.01, output_shape='auto') l_input_regions = ReshapeLayer( l_input_regions, (CFG['BATCH_SIZE'], CFG['NUM_REGIONS'], CFG['REGION_SIZE'], 1)) l_input_regions = lasagne.layers.DimshuffleLayer( l_input_regions, (0, 2, 1, 3)) l_out_reg = l_input_regions l_input_reg = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS'], 1), name='l_input_reg') l_input_regions = ReshapeLayer( l_input_reg, (CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS']), name='l_input_regions') # use images at different resolution but without fully connected layers elif CFG['PROPOSALS'] == 5: vgg16_det = CNN.build_model_RCNN(CFG['NUM_REGIONS'], int( CFG['IM_SIZE'] * 1.5), pool_dims=1, dropout_value=CFG['RNN_DROPOUT']) print "Loading pretrained VGG16 parameters for detection" model_param_values = pickle.load(open('vgg16.pkl'))[ 'param values'] lasagne.layers.set_all_param_values( vgg16_det['conv5_3'], model_param_values[:-6]) l_input_img2 = vgg16_det['input'] l_conv = vgg16_det['conv5_3'] l_input_regions = vgg16_det['conv5_3'] if CFG['CONV_REDUCED'] > 1: l_input_regions = ExpressionLayer(l_input_regions, lambda X: X[:, :, ::CFG['CONV_REDUCED'], ::CFG['CONV_REDUCED']], output_shape='auto') if CFG['CONV_NORMALIZED'] == 1: l_input_regions = ExpressionLayer(l_input_regions, lambda X: X / (T.sum(X, axis=1, keepdims=True) + 1e-8), output_shape='auto') elif CFG['CONV_NORMALIZED'] == 2: l_input_regions = ExpressionLayer(l_input_regions, lambda X: X / T.sqrt(T.sum(X**2, axis=1, keepdims=True) + 1e-8), output_shape='auto') else: _input_regions = ExpressionLayer(l_input_regions, lambda X: X * 0.01, output_shape='auto') l_out_reg = l_input_regions l_input_reg = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS'], 1), name='l_input_reg') l_input_regions = ReshapeLayer(l_input_reg, (CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS']), name='l_input_regions') else: if CFG['CNN_MODEL'] == 'vgg': l_out_reg = vgg16['conv5_3'] elif CFG['CNN_MODEL'] == 'resnet': l_out_reg = resnet50['res4f_relu'] else: print(bcolors.FAIL + "Unrecognized network" + bcolors.ENDC) l_input_reg = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], 14, 14), name='l_input_reg') if CFG['CONV_REDUCED']: # added a scaling factor of 100 to avoid exploding gradients l_input_regions = ExpressionLayer(l_input_reg, lambda X: X[:, :, ::CFG['CONV_REDUCED'], ::CFG['CONV_REDUCED']], output_shape='auto') else: l_input_regions = l_input_reg if CFG['CONV_NORMALIZED'] == 1: l_input_regions = ExpressionLayer(l_input_regions, lambda X: X / (T.sum(X, axis=1, keepdims=True) + 1e-8), output_shape='auto') elif CFG['CONV_NORMALIZED'] == 2: l_input_regions = ExpressionLayer(l_input_regions, lambda X: X / T.sqrt(T.sum(X**2, axis=1, keepdims=True) + 1e-8), output_shape='auto') else: l_input_regions = ExpressionLayer(l_input_regions, lambda X: X * 0.01, output_shape='auto') if CFG['TENSOR_ADD_CONV']: l_input_regions = ConvLayer(l_input_regions, num_filters=CFG['REGION_SIZE'], filter_size=( 3, 3), pad='same', name='l_add_con') l_input_regions = ReshapeLayer( l_input_regions, (CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS'], 1)) return l_input_regions, _input_regions, l_out_reg, l_input_img2, l_boxes, l_conv
def build_model(input_var, nOutput): net = {} net['input'] = InputLayer((None, 3, 32, 32), input_var=input_var) net['conv1'] = ConvLayer(net['input'], num_filters=192, filter_size=5, pad=2, flip_filters=False, W=lasagne.init.GlorotUniform()) net['cccp1'] = ConvLayer(net['conv1'], num_filters=160, filter_size=1, flip_filters=False) net['cccp2'] = ConvLayer(net['cccp1'], num_filters=96, filter_size=1, flip_filters=False) net['pool1'] = PoolLayer(net['cccp2'], pool_size=3, stride=2, mode='max', ignore_border=False) net['drop3'] = DropoutLayer(net['pool1'], p=0.5) net['conv2'] = ConvLayer(net['drop3'], num_filters=192, filter_size=5, pad=2, flip_filters=False) net['cccp3'] = ConvLayer(net['conv2'], num_filters=192, filter_size=1, flip_filters=False) net['cccp4'] = ConvLayer(net['cccp3'], num_filters=192, filter_size=1, flip_filters=False) net['pool2'] = PoolLayer(net['cccp4'], pool_size=3, stride=2, mode='average_exc_pad', ignore_border=False) net['drop6'] = DropoutLayer(net['pool2'], p=0.5) net['conv3'] = ConvLayer(net['drop6'], num_filters=192, filter_size=3, pad=1, flip_filters=False) net['cccp5'] = ConvLayer(net['conv3'], num_filters=192, filter_size=1, flip_filters=False) net['cccp6'] = ConvLayer(net['cccp5'], num_filters=nOutput, filter_size=1, flip_filters=False) net['pool3'] = PoolLayer(net['cccp6'], pool_size=8, mode='average_exc_pad', ignore_border=False) net['output'] = FlattenLayer(net['pool3']) return net
def build_network(input_var, nb_filter=16, \ input_size=(None, 3, tools.INP_PSIZE, tools.INP_PSIZE), \ debug_connections=True): net = OrderedDict() # Input, standardization last = net['input'] = InputLayer(input_size, input_var=input_var) last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x)) # load feature encoder feats = get_features(last) net['features_s8_1'] = feats["conv4_4"] net['features_s8_2'] = feats["conv4_1"] net['features_s4'] = feats["conv3_3"] # Pretrained Encoder as before last = net["conv1_1"] = ConvLayer(last, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=linear) last = net["bn1_1"] = layers.NonUpdateBatchNormLayer(last) last = net["relu1_1"] = NonlinearityLayer(last, nonlinearity=rectify) last = net["conv1_2"] = ConvLayer(last, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=linear) last = net["bn1_2"] = layers.NonUpdateBatchNormLayer(last) last = net["relu1_2"] = NonlinearityLayer(last, nonlinearity=rectify) # feature aggregation at multiple scales last = net["bn1"] = layers.NonUpdateBatchNormLayer(last, beta=None, gamma=None) last = fan1 = fan_module_improved(last, net, "s8_1", net['features_s8_1'], nb_filter=nb_filter, scale=8) last = net["bn2"] = layers.NonUpdateBatchNormLayer(last, beta=None, gamma=None) last = fan2 = fan_module_improved(last, net, "s8_2", net['features_s8_2'], nb_filter=nb_filter, scale=8) last = net["bn3"] = layers.NonUpdateBatchNormLayer(last, beta=None, gamma=None) last = fan3 = fan_module_improved(last, net, "s4", net['features_s4'], nb_filter=nb_filter, scale=4) last = net["bn4"] = layers.FixedBatchNormLayer(last) # Decoder as before last = net["deconv1_2"] = transpose(last, net["conv1_2"], nonlinearity=None) last = net["deconv1_1"] = transpose(last, net["conv1_1"], nonlinearity=None) def debug_connection(l): l = layers.FixedBatchNormLayer(l, beta=net['bn4'].beta, gamma=net['bn4'].gamma, mean=net['bn4'].mean, inv_std=net['bn4'].inv_std) l = transpose(l, net["conv1_2"], nonlinearity=None, b=net['deconv1_2'].b) l = transpose(l, net["conv1_1"], nonlinearity=None, b=net['deconv1_1'].b) return l debug = [] if debug_connections: debug = [debug_connection(l) for l in [fan1, fan2, fan3]] else: debug = [net["relu1_2"], fan1, fan2, fan3, net["bn4"]] # features and resulting representations debug.append(net["s8_1/addition"]) debug.append(net["s8_1/input_gate"]) debug.append(net["s8_2/addition"]) debug.append(net["s8_2/input_gate"]) debug.append(net["s4/addition"]) debug.append(net["s4/input_gate"]) return last, net, debug
def build_model(input, prefix, lastClassNum=200, dropoutratio=0.4, classification=False): net = {} net[prefix + 'input'] = input net[prefix + 'conv1/7x7_s2'] = ConvLayer(net[prefix + 'input'], 64, 7, stride=2, pad=3, flip_filters=False, name=prefix + 'conv1/7x7_s2') net[prefix + 'pool1/3x3_s2'] = PoolLayer(net[prefix + 'conv1/7x7_s2'], pool_size=3, stride=2, ignore_border=False, name=prefix + 'pool1/3x3_s2') net[prefix + 'pool1/norm1'] = LRNLayer(net[prefix + 'pool1/3x3_s2'], alpha=0.00002, k=1, name=prefix + 'pool1/norm1') net[prefix + 'conv2/3x3_reduce'] = ConvLayer(net[prefix + 'pool1/norm1'], 64, 1, flip_filters=False, name=prefix + 'conv2/3x3_reduce') net[prefix + 'conv2/3x3'] = ConvLayer(net[prefix + 'conv2/3x3_reduce'], 192, 3, pad=1, flip_filters=False, name=prefix + 'conv2/3x3') net[prefix + 'conv2/norm2'] = LRNLayer(net[prefix + 'conv2/3x3'], alpha=0.00002, k=1, name=prefix + 'conv2/norm2') net[prefix + 'pool2/3x3_s2'] = PoolLayer(net[prefix + 'conv2/norm2'], pool_size=3, stride=2, ignore_border=False, name=prefix + 'pool2/3x3_s2') net.update( build_inception_module('inception_3a', prefix, net[prefix + 'pool2/3x3_s2'], [32, 64, 96, 128, 16, 32]), ) net.update( build_inception_module('inception_3b', prefix, net[prefix + 'inception_3a/output'], [64, 128, 128, 192, 32, 96])) net[prefix + 'pool3/3x3_s2'] = PoolLayer(net[prefix + 'inception_3b/output'], pool_size=3, stride=2, ignore_border=False, name=prefix + 'pool3/3x3_s2') net.update( build_inception_module('inception_4a', prefix, net[prefix + 'pool3/3x3_s2'], [64, 192, 96, 208, 16, 48])) net.update( build_inception_module('inception_4b', prefix, net[prefix + 'inception_4a/output'], [64, 160, 112, 224, 24, 64])) net.update( build_inception_module('inception_4c', prefix, net[prefix + 'inception_4b/output'], [64, 128, 128, 256, 24, 64])) net.update( build_inception_module('inception_4d', prefix, net[prefix + 'inception_4c/output'], [64, 112, 144, 288, 32, 64])) net.update( build_inception_module('inception_4e', prefix, net[prefix + 'inception_4d/output'], [128, 256, 160, 320, 32, 128])) net[prefix + 'pool4/3x3_s2'] = PoolLayer(net[prefix + 'inception_4e/output'], pool_size=3, stride=2, ignore_border=False, name=prefix + 'pool4/3x3_s2') net.update( build_inception_module('inception_5a', prefix, net[prefix + 'pool4/3x3_s2'], [128, 256, 160, 320, 32, 128])) net.update( build_inception_module('inception_5b', prefix, net[prefix + 'inception_5a/output'], [128, 384, 192, 384, 48, 128])) net[prefix + 'pool5/7x7_s1'] = GlobalPoolLayer( net[prefix + 'inception_5b/output'], name=prefix + 'pool5/7x7_s1') net[prefix + 'dropout'] = lasagne.layers.DropoutLayer( net[prefix + 'pool5/7x7_s1'], p=dropoutratio, name=prefix + 'dropout') if classification == True: net[prefix + 'loss3/classifier'] = DenseLayer(net[prefix + 'dropout'], num_units=lastClassNum, nonlinearity=linear, name=prefix + 'loss3/classifier') net[prefix + 'prob'] = NonlinearityLayer(net[prefix + 'loss3/classifier'], nonlinearity=softmax, name=prefix + 'prob') return net
def vgg16(input_var=None, image_size=256): from lasagne.layers import InputLayer from lasagne.layers import DenseLayer from lasagne.layers import NonlinearityLayer from lasagne.layers import DropoutLayer from lasagne.layers import Pool2DLayer as PoolLayer from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer from lasagne.nonlinearities import softmax net = {} net['input'] = InputLayer((None, 4, image_size, image_size), input_var=input_var) net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1, flip_filters=False) net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1, flip_filters=False) net['pool1'] = PoolLayer(net['conv1_2'], 2) net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1, flip_filters=False) net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad=1, flip_filters=False) net['pool2'] = PoolLayer(net['conv2_2'], 2) net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1, flip_filters=False) net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad=1, flip_filters=False) net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad=1, flip_filters=False) net['pool3'] = PoolLayer(net['conv3_3'], 2) net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1, flip_filters=False) net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad=1, flip_filters=False) net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad=1, flip_filters=False) net['pool4'] = PoolLayer(net['conv4_3'], 2) net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1, flip_filters=False) net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad=1, flip_filters=False) net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad=1, flip_filters=False) net['pool5'] = PoolLayer(net['conv5_3'], 2) net['fc6'] = DenseLayer(net['pool5'], num_units=4096) net['fc6_dropout'] = DropoutLayer(net['fc6'], p=0.5) net['fc7'] = DenseLayer(net['fc6_dropout'], num_units=4096) net['fc7_dropout'] = DropoutLayer(net['fc7'], p=0.5) net['fc8'] = DenseLayer(net['fc7_dropout'], num_units=1000, nonlinearity=lasagne.nonlinearities.sigmoid) return net['fc8']
def predict(self, input, hidden_state, Ws, bs): npx = self.npx # image size filter_size = self.dynamic_filter_size[0] f = 0 ############################### # filter-generating network # ############################### ## rgb to gray # output = ConvLayer(input, num_filters=1, filter_size=(1,1), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=None); Ws[f] = output.W; bs[f] = output.b; f = f+1 ## encoder output = ConvLayer(input, num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 output = ConvLayer(output, num_filters=32, filter_size=(3, 3), stride=(2, 2), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 ## mid output = ConvLayer(output, num_filters=128, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify, untie_biases=True) Ws[f] = output.W bs[f] = output.b f = f + 1 # hidden = ConvLayer(hidden_state, num_filters=128, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 # hidden = ConvLayer(hidden, num_filters=128, filter_size=(3, 3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 # output = ElemwiseSumLayer([output, hidden]) # hidden_state = output ## decoder output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 output = Upscale2DLayer(output, scale_factor=2) output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 output = ConvLayer(output, num_filters=128, filter_size=(1, 1), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify) Ws[f] = output.W bs[f] = output.b f = f + 1 ## filter-generating layers output = ConvLayer(output, num_filters=filter_size + 1, filter_size=(1, 1), stride=(1, 1), pad=(0, 0), W=Ws[f], b=bs[f], nonlinearity=identity) Ws[f] = output.W bs[f] = output.b f = f + 1 filters = SliceLayer(output, indices=slice(0, filter_size), axis=1) filters_biases = SliceLayer(output, indices=slice(filter_size, filter_size + 1), axis=1) # filters = FeaturePoolLayer(filters, pool_size=9*9, pool_function=theano.tensor.nnet.softmax) filters = DimshuffleLayer(filters, (0, 2, 3, 1)) filters = ReshapeLayer(filters, shape=(-1, filter_size)) filters = NonlinearityLayer(filters, nonlinearity=softmax) filters = ReshapeLayer(filters, shape=(-1, npx, npx, filter_size)) filters = DimshuffleLayer(filters, (0, 3, 1, 2)) ######################### # transformer network # ######################### ## get inputs # output = SliceLayer(input, indices=slice(self.nInputs-1, self.nInputs), axis=1) # select the last (most recent) frame from the inputs ## add a bias output = ConcatLayer([input, filters_biases]) output = FeaturePoolLayer(output, pool_size=2, pool_function=theano.tensor.sum) ## dynamic convolution output_dynconv = DynamicFilterLayer([output, filters], filter_size=(1, filter_size, 1), pad=(1 // 2, filter_size // 2)) # ######################## # # refinement network # # ######################## # output = ConcatLayer([output_dynconv, input]) # output = ConvLayer(output, num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', W=HeUniform(), b=Constant(0.0), nonlinearity=rectify) # output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=HeUniform(), b=Constant(0.0), nonlinearity=rectify) # output = ConvLayer(output, num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', W=HeUniform(), b=Constant(0.0), nonlinearity=rectify) # output = ConvLayer(output, num_filters=1, filter_size=(3, 3), stride=(1, 1), pad='same', W=HeUniform(), b=Constant(0.0), nonlinearity=rectify) # output = ElemwiseSumLayer([output_dynconv, output]) # this is a residual connection output = output_dynconv return output, hidden_state, filters
def build_net(input_dim=572, no_channels=3, seg_entities=2): """Implementation of 'U-Net: Convolutional Networks for Biomedical Image Segmentation', https://arxiv.org/pdf/1505.04597.pdf :param input_dim: x and y dimensions of 3D input :param no_channels: z dimension of 3D input :param seg_entities: number of classes to segment, i.e. number of categories per pixel for the softmax function """ nonlin = rectify pad = 'valid' net = OrderedDict() net['input'] = InputLayer((None, no_channels, input_dim, input_dim)) net['encode/conv1_1'] = ConvLayer(net['input'], 64, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['encode/conv1_2'] = ConvLayer(net['encode/conv1_1'], 64, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['encode/pool1'] = PoolLayer(net['encode/conv1_2'], 2) net['encode/conv2_1'] = ConvLayer(net['encode/pool1'], 128, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['encode/conv2_2'] = ConvLayer(net['encode/conv2_1'], 128, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['encode/pool2'] = PoolLayer(net['encode/conv2_2'], 2) net['encode/conv3_1'] = ConvLayer(net['encode/pool2'], 256, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['encode/conv3_2'] = ConvLayer(net['encode/conv3_1'], 256, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['encode/pool3'] = PoolLayer(net['encode/conv3_2'], 2) net['encode/conv4_1'] = ConvLayer(net['encode/pool3'], 512, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['encode/conv4_2'] = ConvLayer(net['encode/conv4_1'], 512, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['encode/pool4'] = PoolLayer(net['encode/conv4_2'], 2) net['encode/conv5_1'] = ConvLayer(net['encode/pool4'], 1024, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['encode/conv5_2'] = ConvLayer(net['encode/conv5_1'], 1024, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['decode/up_conv1'] = TransposedConv2DLayer(net['encode/conv5_2'], 512, 2, stride=2, crop='valid', nonlinearity=None) net['decode/concat_c4_u1'] = ConcatLayer([net['encode/conv4_2'], net['decode/up_conv1']], axis=1, cropping=(None, None, 'center', 'center')) net['decode/conv1_1'] = ConvLayer(net['decode/concat_c4_u1'], 512, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['decode/conv1_2'] = ConvLayer(net['decode/conv1_1'], 512, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['decode/up_conv2'] = TransposedConv2DLayer(net['decode/conv1_2'], 256, 2, stride=2, crop='valid', nonlinearity=None) net['decode/concat_c3_u2'] = ConcatLayer([net['encode/conv3_2'], net['decode/up_conv2']], axis=1, cropping=(None, None, 'center', 'center')) net['decode/conv2_1'] = ConvLayer(net['decode/concat_c3_u2'], 256, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['decode/conv2_2'] = ConvLayer(net['decode/conv2_1'], 256, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['decode/up_conv3'] = TransposedConv2DLayer(net['decode/conv2_2'], 128, 2, stride=2, crop='valid', nonlinearity=None) net['decode/concat_c2_u3'] = ConcatLayer([net['encode/conv2_2'], net['decode/up_conv3']], axis=1, cropping=(None, None, 'center', 'center')) net['decode/conv3_1'] = ConvLayer(net['decode/concat_c2_u3'], 128, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['decode/conv3_2'] = ConvLayer(net['decode/conv3_1'], 128, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['decode/up_conv4'] = TransposedConv2DLayer(net['decode/conv3_2'], 64, 2, stride=2, crop='valid', nonlinearity=None) net['decode/concat_c1_u4'] = ConcatLayer([net['encode/conv1_2'], net['decode/up_conv4']], axis=1, cropping=(None, None, 'center', 'center')) net['decode/conv4_1'] = ConvLayer(net['decode/concat_c1_u4'], 128, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['decode/conv4_2'] = ConvLayer(net['decode/conv4_1'], 128, 3, pad=pad, nonlinearity=nonlin, W=HeNormal(gain="relu")) net['seg_map'] = ConvLayer(net['decode/conv4_2'], seg_entities, 1, pad=pad, nonlinearity=None, W=HeNormal(gain="relu")) net['seg_map/dimshuffle'] = DimshuffleLayer(net['seg_map'], (1, 0, 2, 3)) net['seg_map/reshape'] = ReshapeLayer(net['seg_map/dimshuffle'], (seg_entities, -1)) net['seg_map/flattened'] = DimshuffleLayer(net['seg_map/reshape'], (1, 0)) net['out'] = NonlinearityLayer(net['seg_map/flattened'], nonlinearity=softmax) return net
def build_model(): l0 = InputLayer(data_sizes["sunny"]) l1a = ConvLayer(l0, num_filters=32, filter_size=(3, 3), W=lasagne.init.Orthogonal(), b=lasagne.init.Constant(0.1)) l1b = ConvLayer(l1a, num_filters=32, filter_size=(3, 3), W=lasagne.init.Orthogonal(), b=lasagne.init.Constant(0.1)) l1c = ConvLayer(l1b, num_filters=32, filter_size=(3, 3), W=lasagne.init.Orthogonal(), b=lasagne.init.Constant(0.1)) l1 = MaxPoolLayer(l1c, pool_size=(2, 2)) l2a = ConvLayer(l1, num_filters=32, filter_size=(3, 3), W=lasagne.init.Orthogonal(), b=lasagne.init.Constant(0.1)) l2b = ConvLayer(l2a, num_filters=32, filter_size=(3, 3), W=lasagne.init.Orthogonal(), b=lasagne.init.Constant(0.1)) l2c = ConvLayer(l2b, num_filters=32, filter_size=(3, 3), W=lasagne.init.Orthogonal(), b=lasagne.init.Constant(0.1)) l3 = MaxPoolLayer(l2c, pool_size=(3, 3)) l4a = ConvLayer(l3, num_filters=32, filter_size=(4, 4), W=lasagne.init.Orthogonal(), b=lasagne.init.Constant(0.1)) l4b = ConvLayer(l4a, num_filters=32, filter_size=(3, 3), W=lasagne.init.Orthogonal(), b=lasagne.init.Constant(0.1)) l4c = ConvLayer(l4b, num_filters=32, filter_size=(3, 3), W=lasagne.init.Orthogonal(), b=lasagne.init.Constant(0.1)) l5a = ConvLayer(l4c, num_filters=256, filter_size=(1, 1), W=lasagne.init.Orthogonal(), b=lasagne.init.Constant(0.1)) l5b = ConvLayer(l5a, num_filters=256, filter_size=(1, 1), W=lasagne.init.Orthogonal(), b=lasagne.init.Constant(0.1)) l5c = ConvLayer(l5b, num_filters=1, filter_size=(1, 1), W=lasagne.init.Orthogonal(), b=lasagne.init.Constant(0.1), nonlinearity=lasagne.nonlinearities.sigmoid) l_final = lasagne.layers.FlattenLayer(l5c, outdim=2) return { "inputs": { "sunny": l0 }, "outputs": { "segmentation": l_final, "top": l_final } }
def build_vgg_cnn(input_var, name_pretrained_model): # load pretrained model print 'load pretrained model: ', name_pretrained_model model = pickle.load(open('models/' + name_pretrained_model, 'rb')) # MEAN_IMAGE = np.array([np.full((224, 224), model['mean value'][0]), np.full((224, 224), model['mean value'][1]), np.full((224, 224), model['mean value'][2])]) # Set pretrained model values print 'set pretrained model values of', name_pretrained_model pretrained_model = build_model_vgg16() lasagne.layers.set_all_param_values(pretrained_model['output_layer'], model['param values']) #pretrained layers from vgg16 conv1_1 = pretrained_model['conv1_1'] conv1_2 = pretrained_model['conv1_2'] conv2_1 = pretrained_model['conv2_1'] conv2_2 = pretrained_model['conv2_2'] # pretrained layers network = lasagne.layers.InputLayer(shape=(None, 3, 48, 48), input_var=input_var) network = ConvLayer(network, 64, 3, pad=1, flip_filters=False, W=conv1_1.W.get_value(), b=conv1_1.b.get_value()) network = ConvLayer(network, 64, 3, pad=1, flip_filters=False, W=conv1_2.W.get_value(), b=conv1_2.b.get_value()) network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2)) network = ConvLayer(network, 128, 3, pad=1, flip_filters=False, W=conv2_1.W.get_value(), b=conv2_1.b.get_value()) network = ConvLayer(network, 128, 3, pad=1, flip_filters=False, W=conv2_2.W.get_value(), b=conv2_2.b.get_value()) network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2)) # new layers network = lasagne.layers.batch_norm( ConvLayer(network, num_filters=32, filter_size=(5, 5), nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False, W=lasagne.init.GlorotUniform())) network = lasagne.layers.DenseLayer( lasagne.layers.dropout(network, p=.5), num_units=256, nonlinearity=lasagne.nonlinearities.rectify) network = lasagne.layers.DenseLayer( lasagne.layers.dropout(network, p=.5), num_units=7, nonlinearity=lasagne.nonlinearities.softmax) return network
def build_model(x): net = {} net['input'] = InputLayer((None, 3, None, None), x) net['conv1/7x7_s2'] = ConvLayer(net['input'], 64, 7, stride=2, pad=3, flip_filters=False) net['pool1/3x3_s2'] = PoolLayer(net['conv1/7x7_s2'], pool_size=3, stride=2, ignore_border=False) net['pool1/norm1'] = LRNLayer(net['pool1/3x3_s2'], alpha=0.00002, k=1) net['conv2/3x3_reduce'] = ConvLayer(net['pool1/norm1'], 64, 1, flip_filters=False) net['conv2/3x3'] = ConvLayer(net['conv2/3x3_reduce'], 192, 3, pad=1, flip_filters=False) net['conv2/norm2'] = LRNLayer(net['conv2/3x3'], alpha=0.00002, k=1) net['pool2/3x3_s2'] = PoolLayer(net['conv2/norm2'], pool_size=3, stride=2) net.update( build_inception_module('inception_3a', net['pool2/3x3_s2'], [32, 64, 96, 128, 16, 32])) net.update( build_inception_module('inception_3b', net['inception_3a/output'], [64, 128, 128, 192, 32, 96])) net['pool3/3x3_s2'] = PoolLayer(net['inception_3b/output'], pool_size=3, stride=2) net.update( build_inception_module('inception_4a', net['pool3/3x3_s2'], [64, 192, 96, 208, 16, 48])) net.update( build_inception_module('inception_4b', net['inception_4a/output'], [64, 160, 112, 224, 24, 64])) net.update( build_inception_module('inception_4c', net['inception_4b/output'], [64, 128, 128, 256, 24, 64])) net.update( build_inception_module('inception_4d', net['inception_4c/output'], [64, 112, 144, 288, 32, 64])) net.update( build_inception_module('inception_4e', net['inception_4d/output'], [128, 256, 160, 320, 32, 128])) net['pool4/3x3_s2'] = PoolLayer(net['inception_4e/output'], pool_size=3, stride=2) net.update( build_inception_module('inception_5a', net['pool4/3x3_s2'], [128, 256, 160, 320, 32, 128])) net.update( build_inception_module('inception_5b', net['inception_5a/output'], [128, 384, 192, 384, 48, 128])) net['pool5/7x7_s1'] = GlobalPoolLayer(net['inception_5b/output']) net['loss3/classifier'] = DenseLayer(net['pool5/7x7_s1'], num_units=1000, nonlinearity=linear) net['prob'] = NonlinearityLayer(net['loss3/classifier'], nonlinearity=softmax) file = open('blvc_googlenet.pkl', 'r') vals = pickle.load(file) values = pickle.load(open('blvc_googlenet.pkl'))['param values'] lasagne.layers.set_all_param_values(net['prob'], [v.astype(np.float32) for v in values]) return net, vals['synset words']
def build_model(): net = {} net['input'] = InputLayer((None, 3, None, None)) net['conv1/7x7_s2'] = ConvLayer(net['input'], 64, 7, stride=2, pad=3, flip_filters=False) net['pool1/3x3_s2'] = PoolLayer(net['conv1/7x7_s2'], pool_size=3, stride=2, ignore_border=False) net['pool1/norm1'] = LRNLayer(net['pool1/3x3_s2'], alpha=0.00002, k=1) net['conv2/3x3_reduce'] = ConvLayer(net['pool1/norm1'], 64, 1, flip_filters=False) net['conv2/3x3'] = ConvLayer(net['conv2/3x3_reduce'], 192, 3, pad=1, flip_filters=False) net['conv2/norm2'] = LRNLayer(net['conv2/3x3'], alpha=0.00002, k=1) net['pool2/3x3_s2'] = PoolLayer(net['conv2/norm2'], pool_size=3, stride=2, ignore_border=False) net.update( build_inception_module('inception_3a', net['pool2/3x3_s2'], [32, 64, 96, 128, 16, 32])) net.update( build_inception_module('inception_3b', net['inception_3a/output'], [64, 128, 128, 192, 32, 96])) net['pool3/3x3_s2'] = PoolLayer(net['inception_3b/output'], pool_size=3, stride=2, ignore_border=False) net.update( build_inception_module('inception_4a', net['pool3/3x3_s2'], [64, 192, 96, 208, 16, 48])) net.update( build_inception_module('inception_4b', net['inception_4a/output'], [64, 160, 112, 224, 24, 64])) net.update( build_inception_module('inception_4c', net['inception_4b/output'], [64, 128, 128, 256, 24, 64])) net.update( build_inception_module('inception_4d', net['inception_4c/output'], [64, 112, 144, 288, 32, 64])) net.update( build_inception_module('inception_4e', net['inception_4d/output'], [128, 256, 160, 320, 32, 128])) net['pool4/3x3_s2'] = PoolLayer(net['inception_4e/output'], pool_size=3, stride=2, ignore_border=False) net.update( build_inception_module('inception_5a', net['pool4/3x3_s2'], [128, 256, 160, 320, 32, 128])) net.update( build_inception_module('inception_5b', net['inception_5a/output'], [128, 384, 192, 384, 48, 128])) net['pool5/7x7_s1'] = GlobalPoolLayer(net['inception_5b/output']) net['loss3/classifier'] = DenseLayer(net['pool5/7x7_s1'], num_units=1000, nonlinearity=linear) net['prob'] = NonlinearityLayer(net['loss3/classifier'], nonlinearity=softmax) return net
def __init__(self, height, width): self.net = {} self.net['input'] = InputLayer((1, 3, height, width)) self.net['conv1_1'] = ConvLayer(self.net['input'], 64, 3, pad=1, flip_filters=False) self.net['conv1_2'] = ConvLayer(self.net['conv1_1'], 64, 3, pad=1, flip_filters=False) self.net['pool1'] = Pool2DLayer(self.net['conv1_2'], 2, mode='average_exc_pad') self.net['conv2_1'] = ConvLayer(self.net['pool1'], 128, 3, pad=1, flip_filters=False) self.net['conv2_2'] = ConvLayer(self.net['conv2_1'], 128, 3, pad=1, flip_filters=False) self.net['pool2'] = Pool2DLayer(self.net['conv2_2'], 2, mode='average_exc_pad') self.net['conv3_1'] = ConvLayer(self.net['pool2'], 256, 3, pad=1, flip_filters=False) self.net['conv3_2'] = ConvLayer(self.net['conv3_1'], 256, 3, pad=1, flip_filters=False) self.net['conv3_3'] = ConvLayer(self.net['conv3_2'], 256, 3, pad=1, flip_filters=False) self.net['conv3_4'] = ConvLayer(self.net['conv3_3'], 256, 3, pad=1, flip_filters=False) self.net['pool3'] = Pool2DLayer(self.net['conv3_4'], 2, mode='average_exc_pad') self.net['conv4_1'] = ConvLayer(self.net['pool3'], 512, 3, pad=1, flip_filters=False) self.net['conv4_2'] = ConvLayer(self.net['conv4_1'], 512, 3, pad=1, flip_filters=False) self.net['conv4_3'] = ConvLayer(self.net['conv4_2'], 512, 3, pad=1, flip_filters=False) self.net['conv4_4'] = ConvLayer(self.net['conv4_3'], 512, 3, pad=1, flip_filters=False) self.net['pool4'] = Pool2DLayer(self.net['conv4_4'], 2, mode='average_exc_pad') self.net['conv5_1'] = ConvLayer(self.net['pool4'], 512, 3, pad=1, flip_filters=False) self.net['conv5_2'] = ConvLayer(self.net['conv5_1'], 512, 3, pad=1, flip_filters=False) self.net['conv5_3'] = ConvLayer(self.net['conv5_2'], 512, 3, pad=1, flip_filters=False) self.net['conv5_4'] = ConvLayer(self.net['conv5_3'], 512, 3, pad=1, flip_filters=False) self.net['pool5'] = Pool2DLayer(self.net['conv5_4'], 2, mode='average_exc_pad')
def build_vgg_model(prefix,inputLayer=net_input,classificationFlag=False,stnFlag=False,dropout_ratio = 0.5): net = {} net[prefix + 'input'] = inputLayer net[prefix + 'conv1_1'] = ConvLayer( net[prefix + 'input'], 64, 3, pad=1, flip_filters=False, name=prefix + 'conv1_1') net[prefix + 'conv1_2'] = ConvLayer( net[prefix + 'conv1_1'], 64, 3, pad=1, flip_filters=False, name=prefix + 'conv1_2') net[prefix + 'pool1'] = PoolLayer(net[prefix + 'conv1_2'], 2, name=prefix + 'pool1') net[prefix + 'conv2_1'] = ConvLayer( net[prefix + 'pool1'], 128, 3, pad=1, flip_filters=False, name=prefix + 'conv2_1') net[prefix + 'conv2_2'] = ConvLayer( net[prefix + 'conv2_1'], 128, 3, pad=1, flip_filters=False, name=prefix + 'conv2_2') net[prefix + 'pool2'] = PoolLayer(net[prefix + 'conv2_2'], 2, name=prefix + 'pool2') net[prefix + 'conv3_1'] = ConvLayer( net[prefix + 'pool2'], 256, 3, pad=1, flip_filters=False ,name=prefix + 'conv3_1') net[prefix + 'conv3_2'] = ConvLayer( net[prefix + 'conv3_1'], 256, 3, pad=1, flip_filters=False, name=prefix + 'conv3_2') net[prefix + 'conv3_3'] = ConvLayer( net[prefix + 'conv3_2'], 256, 3, pad=1, flip_filters=False , name=prefix + 'conv3_3') net[prefix + 'pool3'] = PoolLayer(net[prefix +'conv3_3'], 2, name=prefix + 'pool3') net[prefix +'conv4_1'] = ConvLayer( net[prefix +'pool3'], 512, 3, pad=1, flip_filters=False ,name=prefix +'conv4_1') net[prefix +'conv4_2'] = ConvLayer( net[prefix +'conv4_1'], 512, 3, pad=1, flip_filters=False, name=prefix +'conv4_2') net[prefix +'conv4_3'] = ConvLayer( net[prefix +'conv4_2'], 512, 3, pad=1, flip_filters=False, name=prefix +'conv4_3') net[prefix +'pool4'] = PoolLayer(net[prefix +'conv4_3'], 2 ,name=prefix +'pool4') net[prefix +'conv5_1'] = ConvLayer( net[prefix +'pool4'], 512, 3, pad=1, flip_filters=False, name=prefix +'conv5_1') net[prefix +'conv5_2'] = ConvLayer( net[prefix +'conv5_1'], 512, 3, pad=1, flip_filters=False, name=prefix +'conv5_2') net[prefix +'conv5_3'] = ConvLayer( net[prefix +'conv5_2'], 512, 3, pad=1, flip_filters=False ,name=prefix +'conv5_3') net[prefix +'pool5'] = PoolLayer(net[prefix +'conv5_3'], 2,name=prefix +'pool5') if stnFlag == False: net[prefix +'fc6'] = DenseLayer(net[prefix +'pool5'], num_units=4096, name=prefix +'fc6') net[prefix +'fc6_dropout'] = DropoutLayer(net[prefix +'fc6'], p=dropout_ratio, name=prefix +'fc6_dropout') net[prefix +'fc7'] = DenseLayer(net[prefix +'fc6_dropout'], num_units=4096, name=prefix +'fc7') net[prefix +'fc7_dropout'] = DropoutLayer(net[prefix +'fc7'], p=dropout_ratio, name=prefix +'fc7_dropout') if classificationFlag == True: net[prefix +'fc8'] = DenseLayer( net[prefix +'fc7_dropout'], num_units=67, nonlinearity=None, name=prefix +'fc8') net[prefix +'prob'] = NonlinearityLayer(net[prefix +'fc8'], softmax, name=prefix +'prob') return net
def build(inputHeight, inputWidth, input_var,do_dropout=False): #net = OrderedDict() net = {'input': InputLayer((None, 3, inputHeight, inputWidth), input_var=input_var)} #net['input'] = InputLayer((None, 3, inputHeight, inputWidth), input_var=input_var) print "Input: {}".format(net['input'].output_shape[1:]) net['bgr'] = RGBtoBGRLayer(net['input']) net['contr_1_1'] = batch_norm(ConvLayer(net['bgr'], 64, 3,pad='same',W=GlorotNormal(gain="relu"))) print "convtr1_1: {}".format(net['contr_1_1'].output_shape[1:]) net['contr_1_2'] = batch_norm(ConvLayer(net['contr_1_1'],64,3, pad='same',W=GlorotNormal(gain="relu"))) print "convtr1_2: {}".format(net['contr_1_2'].output_shape[1:]) net['pool1'] = Pool2DLayer(net['contr_1_2'], 2) print"pool1: {}".format(net['pool1'].output_shape[1:]) net['contr_2_1'] = batch_norm(ConvLayer(net['pool1'], 128, 3, pad='same',W=GlorotNormal(gain="relu"))) print "convtr2_1: {}".format(net['contr_2_1'].output_shape[1:]) net['contr_2_2'] = batch_norm(ConvLayer(net['contr_2_1'], 128, 3, pad='same',W=GlorotNormal(gain="relu"))) print "convtr2_2: {}".format(net['contr_2_2'].output_shape[1:]) net['pool2'] = Pool2DLayer(net['contr_2_2'], 2) print "pool2: {}".format(net['pool2'].output_shape[1:]) net['contr_3_1'] = batch_norm(ConvLayer(net['pool2'],256, 3, pad='same',W=GlorotNormal(gain="relu"))) print "convtr3_1: {}".format(net['contr_3_1'].output_shape[1:]) net['contr_3_2'] = batch_norm(ConvLayer(net['contr_3_1'], 256, 3, pad='same',W=GlorotNormal(gain="relu"))) print "convtr3_2: {}".format(net['contr_3_2'].output_shape[1:]) net['pool3'] = Pool2DLayer(net['contr_3_2'], 2) print "pool3: {}".format(net['pool3'].output_shape[1:]) net['contr_4_1'] = batch_norm(ConvLayer(net['pool3'], 512, 3, pad='same',W=GlorotNormal(gain="relu"))) print "convtr4_1: {}".format(net['contr_4_1'].output_shape[1:]) net['contr_4_2'] = batch_norm(ConvLayer(net['contr_4_1'],512, 3,pad='same',W=GlorotNormal(gain="relu"))) print "convtr4_2: {}".format(net['contr_4_2'].output_shape[1:]) l = net['pool4'] = Pool2DLayer(net['contr_4_2'], 2) print "pool4: {}".format(net['pool4'].output_shape[1:]) # the paper does not really describe where and how dropout is added. Feel free to try more options if do_dropout: l = DropoutLayer(l, p=0.4) net['encode_1'] = batch_norm(ConvLayer(l,1024, 3,pad='same', W=GlorotNormal(gain="relu"))) print "encode_1: {}".format(net['encode_1'].output_shape[1:]) net['encode_2'] = batch_norm(ConvLayer(net['encode_1'], 1024, 3,pad='same', W=GlorotNormal(gain="relu"))) print "encode_2: {}".format(net['encode_2'].output_shape[1:]) net['upscale1'] = batch_norm(Deconv2DLayer(net['encode_2'],512, 2, 2, crop="valid", W=GlorotNormal(gain="relu"))) print "upscale1: {}".format(net['upscale1'].output_shape[1:]) net['concat1'] = ConcatLayer([net['upscale1'], net['contr_4_2']], cropping=(None, None, "center", "center")) print "concat1: {}".format(net['concat1'].output_shape[1:]) net['expand_1_1'] = batch_norm(ConvLayer(net['concat1'], 512, 3,pad='same', W=GlorotNormal(gain="relu"))) print "expand_1_1: {}".format(net['expand_1_1'].output_shape[1:]) net['expand_1_2'] = batch_norm(ConvLayer(net['expand_1_1'],512, 3,pad='same',W=GlorotNormal(gain="relu"))) print "expand_1_2: {}".format(net['expand_1_2'].output_shape[1:]) net['upscale2'] = batch_norm(Deconv2DLayer(net['expand_1_2'], 256, 2, 2, crop="valid", W=GlorotNormal(gain="relu"))) print "upscale2: {}".format(net['upscale2'].output_shape[1:]) net['concat2'] = ConcatLayer([net['upscale2'], net['contr_3_2']], cropping=(None, None, "center", "center")) print "concat2: {}".format(net['concat2'].output_shape[1:]) net['expand_2_1'] = batch_norm(ConvLayer(net['concat2'], 256, 3,pad='same',W=GlorotNormal(gain="relu"))) print "expand_2_1: {}".format(net['expand_2_1'].output_shape[1:]) net['expand_2_2'] = batch_norm(ConvLayer(net['expand_2_1'], 256, 3,pad='same',W=GlorotNormal(gain="relu"))) print "expand_2_2: {}".format(net['expand_2_2'].output_shape[1:]) net['upscale3'] = batch_norm(Deconv2DLayer(net['expand_2_2'],128, 2, 2, crop="valid",W=GlorotNormal(gain="relu"))) print "upscale3: {}".format(net['upscale3'].output_shape[1:]) net['concat3'] = ConcatLayer([net['upscale3'], net['contr_2_2']], cropping=(None, None, "center", "center")) print "concat3: {}".format(net['concat3'].output_shape[1:]) net['expand_3_1'] = batch_norm(ConvLayer(net['concat3'], 128, 3,pad='same',W=GlorotNormal(gain="relu"))) print "expand_3_1: {}".format(net['expand_3_1'].output_shape[1:]) net['expand_3_2'] = batch_norm(ConvLayer(net['expand_3_1'],128, 3,pad='same', W=GlorotNormal(gain="relu"))) print "expand_3_2: {}".format(net['expand_3_2'].output_shape[1:]) net['upscale4'] = batch_norm(Deconv2DLayer(net['expand_3_2'], 64, 2, 2, crop="valid", W=GlorotNormal(gain="relu"))) print "upscale4: {}".format(net['upscale4'].output_shape[1:]) net['concat4'] = ConcatLayer([net['upscale4'], net['contr_1_2']], cropping=(None, None, "center", "center")) print "concat4: {}".format(net['concat4'].output_shape[1:]) net['expand_4_1'] = batch_norm(ConvLayer(net['concat4'], 64, 3,pad='same', W=GlorotNormal(gain="relu"))) print "expand_4_1: {}".format(net['expand_4_1'].output_shape[1:]) net['expand_4_2'] = batch_norm(ConvLayer(net['expand_4_1'],64, 3,pad='same', W=GlorotNormal(gain="relu"))) print "expand_4_2: {}".format(net['expand_4_2'].output_shape[1:]) net['output'] = ConvLayer(net['expand_4_2'],1, 1, nonlinearity=sigmoid) print "output: {}".format(net['output'].output_shape[1:]) # net['dimshuffle'] = DimshuffleLayer(net['output_segmentation'], (1, 0, 2, 3)) # print "dimshuffle: {}".format(net['dimshuffle'].output_shape[1:]) # net['reshapeSeg'] = ReshapeLayer(net['dimshuffle'], (2, -1)) # print "reshapeSeg: {}".format(net['reshapeSeg'].output_shape[1:]) # net['dimshuffle2'] = DimshuffleLayer(net['reshapeSeg'], (1, 0)) # print "dimshuffle2: {}".format(net['dimshuffle2'].output_shape[1:]) # net['output_flattened'] = NonlinearityLayer(net['dimshuffle2'], nonlinearity=lasagne.nonlinearities.softmax) # print "output_flattened: {}".format(net['output_flattened'].output_shape[1:]) return net
def initialize_network(width): # initialize network - VGG19 style net = {} net['input'] = InputLayer((1, 3, width, width)) net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1) net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1) net['pool1'] = PoolLayer(net['conv1_2'], 2, mode='average_exc_pad') net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1) net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad=1) net['pool2'] = PoolLayer(net['conv2_2'], 2, mode='average_exc_pad') net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1) net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad=1) net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad=1) net['conv3_4'] = ConvLayer(net['conv3_3'], 256, 3, pad=1) net['pool3'] = PoolLayer(net['conv3_4'], 2, mode='average_exc_pad') net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1) net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad=1) net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad=1) net['conv4_4'] = ConvLayer(net['conv4_3'], 512, 3, pad=1) net['pool4'] = PoolLayer(net['conv4_4'], 2, mode='average_exc_pad') net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1) net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad=1) net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad=1) net['conv5_4'] = ConvLayer(net['conv5_3'], 512, 3, pad=1) net['pool5'] = PoolLayer(net['conv5_4'], 2, mode='average_exc_pad') return net
def build_vgg_model(): net = {} net['input'] = InputLayer((None, 3, 224, 224)) net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1, flip_filters=False) net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1, flip_filters=False) net['pool1'] = PoolLayer(net['conv1_2'], 2) net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1, flip_filters=False) net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad=1, flip_filters=False) net['pool2'] = PoolLayer(net['conv2_2'], 2) net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1, flip_filters=False) net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad=1, flip_filters=False) net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad=1, flip_filters=False) net['conv3_4'] = ConvLayer(net['conv3_3'], 256, 3, pad=1, flip_filters=False) net['pool3'] = PoolLayer(net['conv3_4'], 2) net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1, flip_filters=False) net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad=1, flip_filters=False) net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad=1, flip_filters=False) net['conv4_4'] = ConvLayer(net['conv4_3'], 512, 3, pad=1, flip_filters=False) net['pool4'] = PoolLayer(net['conv4_4'], 2) net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1, flip_filters=False) net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad=1, flip_filters=False) net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad=1, flip_filters=False) net['conv5_4'] = ConvLayer(net['conv5_3'], 512, 3, pad=1, flip_filters=False) net['pool5'] = PoolLayer(net['conv5_4'], 2) net['fc6'] = DenseLayer(net['pool5'], num_units=4096) net['fc6_dropout'] = DropoutLayer(net['fc6'], p=0.5) net['fc7'] = DenseLayer(net['fc6_dropout'], num_units=4096) net['fc7_dropout'] = DropoutLayer(net['fc7'], p=0.5) net['fc8'] = DenseLayer(net['fc7_dropout'], num_units=1000, nonlinearity=None) net['prob'] = NonlinearityLayer(net['fc8'], softmax) # Remove the trainable argument from the layers that can potentially have it for key, val in net.iteritems(): if not ('dropout' or 'pool' in key): net[key].params[net[key].W].remove("trainable") net[key].params[net[key].b].remove("trainable") return net
def build_test_model(): T_net = {} T_net['input'] = InputLayer((None, 4, 224, 224)) #slice the input to get image and feat map part T_net['input_map']=SliceLayer(T_net['input'],indices=slice(3,4),axis=1) T_net['map112']=PoolLayer(T_net['input_map'],2) T_net['map56']=PoolLayer(T_net['map112'],2) T_net['map28']=PoolLayer(T_net['map56'],2) T_net_buff56=[T_net['map56'] for i in range(256)] T_net['map56x256']=concat(T_net_buff56) T_net_buff28=[T_net['map28'] for i in range(512)] T_net['map28x512']=concat(T_net_buff28) T_net['input_im']=SliceLayer(T_net['input'],indices=slice(0,3),axis=1) T_net['conv1_1'] = ConvLayer( T_net['input_im'], 64, 3, pad=1, flip_filters=False) T_net['conv1_2'] = ConvLayer( T_net['conv1_1'], 64, 3, pad=1, flip_filters=False) T_net['pool1'] = PoolLayer(T_net['conv1_2'], 2) T_net['conv2_1'] = ConvLayer( T_net['pool1'], 128, 3, pad=1, flip_filters=False) T_net['conv2_2'] = ConvLayer( T_net['conv2_1'], 128, 3, pad=1, flip_filters=False) T_net['pool2'] = PoolLayer(T_net['conv2_2'], 2) T_net['conv3_1'] = ConvLayer( T_net['pool2'], 256, 3, pad=1, flip_filters=False) T_net['conv3_2'] = ConvLayer( T_net['conv3_1'], 256, 3, pad=1, flip_filters=False) T_net['conv3_3'] = ConvLayer( T_net['conv3_2'], 256, 3, pad=1, flip_filters=False) T_net['conv3_4'] = ConvLayer( T_net['conv3_3'], 256, 3, pad=1, flip_filters=False) T_net['conv3_map']=ElemwiseMergeLayer([T_net['conv3_1'],T_net['map56x256']],merge_function=T.mul) T_net['conv3_all']=ElemwiseSumLayer([T_net['conv3_4'],T_net['conv3_map']]) T_net['pool3'] = PoolLayer(T_net['conv3_all'], 2) T_net['conv4_1'] = ConvLayer( T_net['pool3'], 512, 3, pad=1, flip_filters=False) T_net['conv4_2'] = ConvLayer( T_net['conv4_1'], 512, 3, pad=1, flip_filters=False) T_net['conv4_3'] = ConvLayer( T_net['conv4_2'], 512, 3, pad=1, flip_filters=False) T_net['conv4_4'] = ConvLayer( T_net['conv4_3'], 512, 3, pad=1, flip_filters=False) T_net['conv4_map']=ElemwiseMergeLayer([T_net['conv4_1'],T_net['map28x512']],merge_function=T.mul) T_net['conv4_all']=ElemwiseSumLayer([T_net['conv4_4'],T_net['conv4_map']]) T_net['pool4'] = PoolLayer(T_net['conv4_all'], 2) T_net['conv5_1'] = ConvLayer( T_net['pool4'], 512, 3, pad=1, flip_filters=False) T_net['conv5_2'] = ConvLayer( T_net['conv5_1'], 512, 3, pad=1, flip_filters=False) T_net['conv5_3'] = ConvLayer( T_net['conv5_2'], 512, 3, pad=1, flip_filters=False) T_net['conv5_4'] = ConvLayer( T_net['conv5_3'], 512, 3, pad=1, flip_filters=False) T_net['pool5'] = PoolLayer(T_net['conv5_4'], 2) T_net['fc6'] = DenseLayer(T_net['pool5'], num_units=4096) T_net['fc6_dropout'] = DropoutLayer(T_net['fc6'], p=0.) T_net['fc7'] = DenseLayer(T_net['fc6_dropout'], num_units=4096) T_net['fc7_dropout'] = DropoutLayer(T_net['fc7'], p=0.5) T_net['fc8'] = DenseLayer(T_net['fc7_dropout'], num_units=1000, nonlinearity=None) T_net['prob'] = NonlinearityLayer(T_net['fc8'], softmax) # T_net['pos_fc_layer']=DenseLayer(T_net['fc6_dropout'],num_units=2048) # T_net['pos_drop']=DropoutLayer(T_net['pos_fc_layer'],p=0.) # T_net['pred_pos_layer']=DenseLayer(T_net['pos_drop'],num_units=40,nonlinearity=sigmoid) #AU detection part T_net['au_fc_layer']=DenseLayer(T_net['fc6_dropout'],num_units=2048) T_net['au_drop']=DropoutLayer(T_net['au_fc_layer'],p=0.) T_net['output_layer']=DenseLayer(T_net['au_drop'],num_units=12,nonlinearity=sigmoid) # T_net['final']=concat([T_net['pred_pos_layer'],T_net['output_layer']]) return T_net
def build_vgg_model(self, input_var): from lasagne.layers import InputLayer from lasagne.layers import DenseLayer from lasagne.layers import NonlinearityLayer from lasagne.layers import DropoutLayer from lasagne.layers import Pool2DLayer as PoolLayer from lasagne.layers import TransposedConv2DLayer as Deconv2DLayer from lasagne.nonlinearities import softmax, sigmoid, tanh import cPickle as pickle try: from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer except ImportError as e: from lasagne.layers import Conv2DLayer as ConvLayer print_warning("Cannot import 'lasagne.layers.dnn.Conv2DDNNLayer' as it requires GPU support and a functional cuDNN installation. Falling back on slower convolution function 'lasagne.layers.Conv2DLayer'.") print_info("Building VGG-16 model...") net = {} net['input'] = InputLayer(shape = (None, 3, 224, 224), input_var = input_var, name = 'vgg_input') net['conv1_1'] = ConvLayer( net['input'], 64, 3, pad=1, flip_filters=False) net['conv1_2'] = ConvLayer( net['conv1_1'], 64, 3, pad=1, flip_filters=False) net['pool1'] = PoolLayer(net['conv1_2'], 2) net['conv2_1'] = ConvLayer( net['pool1'], 128, 3, pad=1, flip_filters=False) net['conv2_2'] = ConvLayer( net['conv2_1'], 128, 3, pad=1, flip_filters=False) net['pool2'] = PoolLayer(net['conv2_2'], 2) net['conv3_1'] = ConvLayer( net['pool2'], 256, 3, pad=1, flip_filters=False) net['conv3_2'] = ConvLayer( net['conv3_1'], 256, 3, pad=1, flip_filters=False) net['conv3_3'] = ConvLayer( net['conv3_2'], 256, 3, pad=1, flip_filters=False) net['pool3'] = PoolLayer(net['conv3_3'], 2) net['conv4_1'] = ConvLayer( net['pool3'], 512, 3, pad=1, flip_filters=False) net['conv4_2'] = ConvLayer( net['conv4_1'], 512, 3, pad=1, flip_filters=False) net['conv4_3'] = ConvLayer( net['conv4_2'], 512, 3, pad=1, flip_filters=False) net['pool4'] = PoolLayer(net['conv4_3'], 2) net['conv5_1'] = ConvLayer( net['pool4'], 512, 3, pad=1, flip_filters=False) net['conv5_2'] = ConvLayer( net['conv5_1'], 512, 3, pad=1, flip_filters=False) net['conv5_3'] = ConvLayer( net['conv5_2'], 512, 3, pad=1, flip_filters=False) net['pool5'] = PoolLayer(net['conv5_3'], 2) net['fc6'] = DenseLayer(net['pool5'], num_units=4096) net['fc6_dropout'] = DropoutLayer(net['fc6'], p=0.5) net['fc7'] = DenseLayer(net['fc6_dropout'], num_units=4096) net['fc7_dropout'] = DropoutLayer(net['fc7'], p=0.5) net['fc8'] = DenseLayer( net['fc7_dropout'], num_units=1000, nonlinearity=None) net['prob'] = NonlinearityLayer(net['fc8'], softmax) net_output = net['prob'] print_info("Loading VGG16 pre-trained weights from file 'vgg16.pkl'...") with open('vgg16.pkl', 'rb') as f: params = pickle.load(f) #net_output.initialize_layers() lasagne.layers.set_all_param_values(net['prob'], params['param values']) print_info("Alright, pre-trained VGG16 model is ready!") return net, net['prob']