def build_cnn(input_var, input_shape): cnn = {} cnn["in"] = InputLayer(shape=(None, 3, input_shape[0], input_shape[1]), input_var=input_var) cnn["conv1"] = Conv2DLayer(cnn["in"], num_filters=64, stride=2, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["conv2"] = Conv2DLayer(cnn["conv1"], num_filters=64, stride=2, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["pool2"] = Pool2DLayer(cnn["conv2"], pool_size=(2, 2)) cnn["conv3"] = Conv2DLayer(cnn["pool2"], num_filters=128, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["conv4"] = Conv2DLayer(cnn["conv3"], num_filters=128, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["pool4"] = Pool2DLayer(cnn["conv4"], pool_size=(2, 2)) cnn["conv5"] = Conv2DLayer(cnn["pool4"], num_filters=256, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["conv6"] = Conv2DLayer(cnn["conv5"], num_filters=256, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["pool6"] = Pool2DLayer(cnn["conv6"], pool_size=(2, 2)) cnn["fc7"] = DenseLayer(cnn["pool6"], 4096, nonlinearity=lasagne.nonlinearities.rectify) cnn["fc7_drop"] = lasagne.layers.DropoutLayer(cnn["fc7"], p=0.5) cnn["fc8"] = DenseLayer(cnn["fc7_drop"], 4096, nonlinearity=lasagne.nonlinearities.rectify) cnn["fc8_drop"] = lasagne.layers.DropoutLayer(cnn["fc8"], p=0.5) cnn["out"] = DenseLayer(cnn["fc8_drop"], n_classes, nonlinearity=lasagne.nonlinearities.softmax) cnn["fc_mid"] = DenseLayer(cnn["pool4"], 4096, nonlinearity=lasagne.nonlinearities.rectify) cnn["out_mid"] = DenseLayer(cnn["fc_mid"], n_classes, nonlinearity=lasagne.nonlinearities.softmax) return cnn
def inceptionB(input_layer, nfilt): # Corresponds to a modified version of figure 10 in the paper l1 = bn_conv(input_layer, num_filters=nfilt[0][0], filter_size=3, stride=2) l2 = bn_conv(input_layer, num_filters=nfilt[1][0], filter_size=1) l2 = bn_conv(l2, num_filters=nfilt[1][1], filter_size=3, pad=1) l2 = bn_conv(l2, num_filters=nfilt[1][2], filter_size=3, stride=2) l3 = Pool2DLayer(input_layer, pool_size=3, stride=2) return lasagne.layers.ConcatLayer([l1, l2, l3])
def inceptionA(input_layer, nfilt): # Corresponds to a modified version of figure 5 in the paper l1 = bn_conv(input_layer, num_filters=nfilt[0][0], filter_size=1) l2 = bn_conv(input_layer, num_filters=nfilt[1][0], filter_size=1) l2 = bn_conv(l2, num_filters=nfilt[1][1], filter_size=5, pad=2) l3 = bn_conv(input_layer, num_filters=nfilt[2][0], filter_size=1) l3 = bn_conv(l3, num_filters=nfilt[2][1], filter_size=3, pad=1) l3 = bn_conv(l3, num_filters=nfilt[2][2], filter_size=3, pad=1) l4 = Pool2DLayer( input_layer, pool_size=3, stride=1, pad=1, mode='average_exc_pad') l4 = bn_conv(l4, num_filters=nfilt[3][0], filter_size=1) return lasagne.layers.ConcatLayer([l1, l2, l3, l4])
def inceptionE(input_layer, nfilt, pool_mode): # Corresponds to figure 7 in the paper l1 = bn_conv(input_layer, num_filters=nfilt[0][0], filter_size=1) l2 = bn_conv(input_layer, num_filters=nfilt[1][0], filter_size=1) l2a = bn_conv(l2, num_filters=nfilt[1][1], filter_size=(1, 3), pad=(0, 1)) l2b = bn_conv(l2, num_filters=nfilt[1][2], filter_size=(3, 1), pad=(1, 0)) l3 = bn_conv(input_layer, num_filters=nfilt[2][0], filter_size=1) l3 = bn_conv(l3, num_filters=nfilt[2][1], filter_size=3, pad=1) l3a = bn_conv(l3, num_filters=nfilt[2][2], filter_size=(1, 3), pad=(0, 1)) l3b = bn_conv(l3, num_filters=nfilt[2][3], filter_size=(3, 1), pad=(1, 0)) l4 = Pool2DLayer( input_layer, pool_size=3, stride=1, pad=1, mode=pool_mode) l4 = bn_conv(l4, num_filters=nfilt[3][0], filter_size=1) return lasagne.layers.ConcatLayer([l1, l2a, l2b, l3a, l3b, l4])
def inceptionC(input_layer, nfilt): # Corresponds to figure 6 in the paper l1 = bn_conv(input_layer, num_filters=nfilt[0][0], filter_size=1) l2 = bn_conv(input_layer, num_filters=nfilt[1][0], filter_size=1) l2 = bn_conv(l2, num_filters=nfilt[1][1], filter_size=(1, 7), pad=(0, 3)) l2 = bn_conv(l2, num_filters=nfilt[1][2], filter_size=(7, 1), pad=(3, 0)) l3 = bn_conv(input_layer, num_filters=nfilt[2][0], filter_size=1) l3 = bn_conv(l3, num_filters=nfilt[2][1], filter_size=(7, 1), pad=(3, 0)) l3 = bn_conv(l3, num_filters=nfilt[2][2], filter_size=(1, 7), pad=(0, 3)) l3 = bn_conv(l3, num_filters=nfilt[2][3], filter_size=(7, 1), pad=(3, 0)) l3 = bn_conv(l3, num_filters=nfilt[2][4], filter_size=(1, 7), pad=(0, 3)) l4 = Pool2DLayer( input_layer, pool_size=3, stride=1, pad=1, mode='average_exc_pad') l4 = bn_conv(l4, num_filters=nfilt[3][0], filter_size=1) return lasagne.layers.ConcatLayer([l1, l2, l3, l4])
def load_vgg(params_filename): cnn = {} X = theano.shared(np.zeros((1, 3, 1, 1), theano.config.floatX), name="X") cnn["in"] = InputLayer(shape=(None, 3, None, None), input_var=X) cnn["conv1_1"] = Conv2DLayer(cnn["in"], num_filters=64, pad=1, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["conv1_2"] = Conv2DLayer(cnn["conv1_1"], num_filters=64, pad=1, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["pool1"] = Pool2DLayer(cnn["conv1_2"], pool_size=(2, 2), stride=2, mode="average_inc_pad") cnn["conv2_1"] = Conv2DLayer(cnn["pool1"], num_filters=128, pad=1, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["conv2_2"] = Conv2DLayer(cnn["conv2_1"], num_filters=128, pad=1, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["pool2"] = Pool2DLayer(cnn["conv2_2"], pool_size=(2, 2), stride=2, mode="average_inc_pad") cnn["conv3_1"] = Conv2DLayer(cnn["pool2"], num_filters=256, pad=1, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["conv3_2"] = Conv2DLayer(cnn["conv3_1"], num_filters=256, pad=1, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["conv3_3"] = Conv2DLayer(cnn["conv3_2"], num_filters=256, pad=1, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["pool3"] = Pool2DLayer(cnn["conv3_3"], pool_size=(2, 2), stride=2, mode="average_inc_pad") cnn["conv4_1"] = Conv2DLayer(cnn["pool3"], num_filters=512, pad=1, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["conv4_2"] = Conv2DLayer(cnn["conv4_1"], num_filters=512, pad=1, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["conv4_3"] = Conv2DLayer(cnn["conv4_2"], num_filters=512, pad=1, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["pool4"] = Pool2DLayer(cnn["conv4_3"], pool_size=(2, 2), stride=2, mode="average_inc_pad") cnn["conv5_1"] = Conv2DLayer(cnn["pool4"], num_filters=512, pad=1, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["conv5_2"] = Conv2DLayer(cnn["conv5_1"], num_filters=512, pad=1, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["conv5_3"] = Conv2DLayer(cnn["conv5_2"], num_filters=512, pad=1, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify) cnn["pool5"] = Pool2DLayer(cnn["conv5_3"], pool_size=(2, 2), stride=2, mode="average_inc_pad") # DenseLayers break when connectd to a net with variable input shapes on # the last two dimensions. Luckily we don't need them for the style # transfer. #cnn["fc6"] = DenseLayer(cnn["pool5"], 4096, nonlinearity=lasagne.nonlinearities.rectify) #cnn["fc6_drop"] = lasagne.layers.DropoutLayer(cnn["fc6"], p=0.5) #cnn["fc7"] = DenseLayer(cnn["fc6_drop"], 4096, nonlinearity=lasagne.nonlinearities.rectify) #cnn["fc7_drop"] = lasagne.layers.DropoutLayer(cnn["fc7"], p=0.5) #cnn["prob"] = DenseLayer(cnn["fc7_drop"], 1000, nonlinearity=lasagne.nonlinearities.softmax) params = pickle.load(open(params_filename, "rb"), encoding="bytes") for layer in cnn: if layer not in params: continue if params[layer][0].ndim == 4: cnn[layer].W.set_value(params[layer][0].astype( theano.config.floatX)) cnn[layer].b.set_value(params[layer][1].astype( theano.config.floatX)) else: assert (params[layer][0].ndim == 2) cnn[layer].W.set_value(params[layer][0].T.astype( theano.config.floatX)) cnn[layer].b.set_value(params[layer][1].astype( theano.config.floatX)) #cnn["prob"].W.set_value(params["fc8"][0].T.astype(theano.config.floatX)) #cnn["prob"].b.set_value(params["fc8"][1].astype(theano.config.floatX)) return cnn
def __init__(self, weights=None, augmentation=False): super(InceptionV3, self).__init__(weights, augmentation) def bn_conv(input_layer, **kwargs): l = ConvLayer(input_layer, **kwargs) l = lasagne.layers.BatchNormLayer(l, epsilon=0.001) return l def inceptionA(input_layer, nfilt): # Corresponds to a modified version of figure 5 in the paper l1 = bn_conv(input_layer, num_filters=nfilt[0][0], filter_size=1) l2 = bn_conv(input_layer, num_filters=nfilt[1][0], filter_size=1) l2 = bn_conv(l2, num_filters=nfilt[1][1], filter_size=5, pad=2) l3 = bn_conv(input_layer, num_filters=nfilt[2][0], filter_size=1) l3 = bn_conv(l3, num_filters=nfilt[2][1], filter_size=3, pad=1) l3 = bn_conv(l3, num_filters=nfilt[2][2], filter_size=3, pad=1) l4 = Pool2DLayer( input_layer, pool_size=3, stride=1, pad=1, mode='average_exc_pad') l4 = bn_conv(l4, num_filters=nfilt[3][0], filter_size=1) return lasagne.layers.ConcatLayer([l1, l2, l3, l4]) def inceptionB(input_layer, nfilt): # Corresponds to a modified version of figure 10 in the paper l1 = bn_conv(input_layer, num_filters=nfilt[0][0], filter_size=3, stride=2) l2 = bn_conv(input_layer, num_filters=nfilt[1][0], filter_size=1) l2 = bn_conv(l2, num_filters=nfilt[1][1], filter_size=3, pad=1) l2 = bn_conv(l2, num_filters=nfilt[1][2], filter_size=3, stride=2) l3 = Pool2DLayer(input_layer, pool_size=3, stride=2) return lasagne.layers.ConcatLayer([l1, l2, l3]) def inceptionC(input_layer, nfilt): # Corresponds to figure 6 in the paper l1 = bn_conv(input_layer, num_filters=nfilt[0][0], filter_size=1) l2 = bn_conv(input_layer, num_filters=nfilt[1][0], filter_size=1) l2 = bn_conv(l2, num_filters=nfilt[1][1], filter_size=(1, 7), pad=(0, 3)) l2 = bn_conv(l2, num_filters=nfilt[1][2], filter_size=(7, 1), pad=(3, 0)) l3 = bn_conv(input_layer, num_filters=nfilt[2][0], filter_size=1) l3 = bn_conv(l3, num_filters=nfilt[2][1], filter_size=(7, 1), pad=(3, 0)) l3 = bn_conv(l3, num_filters=nfilt[2][2], filter_size=(1, 7), pad=(0, 3)) l3 = bn_conv(l3, num_filters=nfilt[2][3], filter_size=(7, 1), pad=(3, 0)) l3 = bn_conv(l3, num_filters=nfilt[2][4], filter_size=(1, 7), pad=(0, 3)) l4 = Pool2DLayer( input_layer, pool_size=3, stride=1, pad=1, mode='average_exc_pad') l4 = bn_conv(l4, num_filters=nfilt[3][0], filter_size=1) return lasagne.layers.ConcatLayer([l1, l2, l3, l4]) def inceptionD(input_layer, nfilt): # Corresponds to a modified version of figure 10 in the paper l1 = bn_conv(input_layer, num_filters=nfilt[0][0], filter_size=1) l1 = bn_conv(l1, num_filters=nfilt[0][1], filter_size=3, stride=2) l2 = bn_conv(input_layer, num_filters=nfilt[1][0], filter_size=1) l2 = bn_conv(l2, num_filters=nfilt[1][1], filter_size=(1, 7), pad=(0, 3)) l2 = bn_conv(l2, num_filters=nfilt[1][2], filter_size=(7, 1), pad=(3, 0)) l2 = bn_conv(l2, num_filters=nfilt[1][3], filter_size=3, stride=2) l3 = Pool2DLayer(input_layer, pool_size=3, stride=2) return lasagne.layers.ConcatLayer([l1, l2, l3]) def inceptionE(input_layer, nfilt, pool_mode): # Corresponds to figure 7 in the paper l1 = bn_conv(input_layer, num_filters=nfilt[0][0], filter_size=1) l2 = bn_conv(input_layer, num_filters=nfilt[1][0], filter_size=1) l2a = bn_conv(l2, num_filters=nfilt[1][1], filter_size=(1, 3), pad=(0, 1)) l2b = bn_conv(l2, num_filters=nfilt[1][2], filter_size=(3, 1), pad=(1, 0)) l3 = bn_conv(input_layer, num_filters=nfilt[2][0], filter_size=1) l3 = bn_conv(l3, num_filters=nfilt[2][1], filter_size=3, pad=1) l3a = bn_conv(l3, num_filters=nfilt[2][2], filter_size=(1, 3), pad=(0, 1)) l3b = bn_conv(l3, num_filters=nfilt[2][3], filter_size=(3, 1), pad=(1, 0)) l4 = Pool2DLayer( input_layer, pool_size=3, stride=1, pad=1, mode=pool_mode) l4 = bn_conv(l4, num_filters=nfilt[3][0], filter_size=1) return lasagne.layers.ConcatLayer([l1, l2a, l2b, l3a, l3b, l4]) net = {} net['input'] = lasagne.layers.InputLayer((None, 3, 299, 299)) net['conv'] = bn_conv(net['input'], num_filters=32, filter_size=3, stride=2) net['conv_1'] = bn_conv(net['conv'], num_filters=32, filter_size=3) net['conv_2'] = bn_conv(net['conv_1'], num_filters=64, filter_size=3, pad=1) net['pool'] = Pool2DLayer(net['conv_2'], pool_size=3, stride=2, mode='max') net['conv_3'] = bn_conv(net['pool'], num_filters=80, filter_size=1) net['conv_4'] = bn_conv(net['conv_3'], num_filters=192, filter_size=3) net['pool_1'] = Pool2DLayer(net['conv_4'], pool_size=3, stride=2, mode='max') net['mixed/join'] = inceptionA( net['pool_1'], nfilt=((64,), (48, 64), (64, 96, 96), (32,))) net['mixed_1/join'] = inceptionA( net['mixed/join'], nfilt=((64,), (48, 64), (64, 96, 96), (64,))) net['mixed_2/join'] = inceptionA( net['mixed_1/join'], nfilt=((64,), (48, 64), (64, 96, 96), (64,))) net['mixed_3/join'] = inceptionB( net['mixed_2/join'], nfilt=((384,), (64, 96, 96))) net['mixed_4/join'] = inceptionC( net['mixed_3/join'], nfilt=((192,), (128, 128, 192), (128, 128, 128, 128, 192), (192,))) net['mixed_5/join'] = inceptionC( net['mixed_4/join'], nfilt=((192,), (160, 160, 192), (160, 160, 160, 160, 192), (192,))) net['mixed_6/join'] = inceptionC( net['mixed_5/join'], nfilt=((192,), (160, 160, 192), (160, 160, 160, 160, 192), (192,))) net['mixed_7/join'] = inceptionC( net['mixed_6/join'], nfilt=((192,), (192, 192, 192), (192, 192, 192, 192, 192), (192,))) net['mixed_8/join'] = inceptionD( net['mixed_7/join'], nfilt=((192, 320), (192, 192, 192, 192))) net['mixed_9/join'] = inceptionE( net['mixed_8/join'], nfilt=((320,), (384, 384, 384), (448, 384, 384, 384), (192,)), pool_mode='average_exc_pad') net['mixed_10/join'] = inceptionE( net['mixed_9/join'], nfilt=((320,), (384, 384, 384), (448, 384, 384, 384), (192,)), pool_mode='max') net['pool3'] = lasagne.layers.GlobalPoolLayer(net['mixed_10/join']) self.net = net self.out_layer = net['pool3'] if self.weights is not None: init_weights = self._get_weights_from_file(self.weights, 'param values') init_weights = init_weights[:-2] # since we have chopped off the last two layers of the network (loss3/classifier and prob), we won't need those lasagne.layers.set_all_param_values(self.out_layer, init_weights)