def build_model(): ################# # Regular model # ################# input_size = data_sizes["sliced:data:singleslice:difference:middle"] l0 = InputLayer(input_size) # add channel layer # l0r = reshape(l0, (-1, 1, ) + input_size[1:]) # (batch, channel, time, x, y) l = ConvolutionOver2DAxisLayer( l0, num_filters=40, filter_size=(5, 5), axis=(2, 3), channel=1, W=lasagne.init.Orthogonal(), b=lasagne.init.Constant(0.1), nonlinearity=lasagne.nonlinearities.identity) l = BatchNormLayer(l, gamma=None) l = lasagne.layers.NonlinearityLayer( l, nonlinearity=lasagne.nonlinearities.rectify) l = MaxPoolOver2DAxisLayer(l, pool_size=(2, 2), axis=(2, 3), stride=(2, 2)) l = ConvolutionOver2DAxisLayer( l, num_filters=40, filter_size=(3, 3), axis=(2, 3), channel=1, W=lasagne.init.Orthogonal(), b=lasagne.init.Constant(0.1), nonlinearity=lasagne.nonlinearities.identity) l = BatchNormLayer(l, gamma=None) l = lasagne.layers.NonlinearityLayer( l, nonlinearity=lasagne.nonlinearities.rectify) l = MaxPoolOver2DAxisLayer(l, pool_size=(2, 2), axis=(2, 3), stride=(2, 2)) l_systole = lasagne.layers.DenseLayer( lasagne.layers.DropoutLayer(l), num_units=600, nonlinearity=lasagne.nonlinearities.softmax) l_diastole = lasagne.layers.DenseLayer( lasagne.layers.DropoutLayer(l), num_units=600, nonlinearity=lasagne.nonlinearities.softmax) return { "inputs": { "sliced:data:singleslice:difference": l0 }, "outputs": { "systole:onehot": l_systole, "diastole:onehot": l_diastole, } }
def skel_encoder(l_in, tconv_sz, filter_dilation, num_tc_filters, dropout): warmup = 16 l1 = lasagne.layers.DenseLayer( l_in, num_units=480, num_leading_axes=2, nonlinearity=None, b=None) l1 = BatchNormLayer(l1, axes=(0, 1)) l1 = NonlinearityLayer(l1, leaky_rectify) d1 = DropoutLayer(l1, p=dropout) l2 = lasagne.layers.DenseLayer( d1, num_units=480, num_leading_axes=2, nonlinearity=None, b=None) l2 = BatchNormLayer(l2, axes=(0, 1)) l2 = NonlinearityLayer(l2, leaky_rectify) d2 = DropoutLayer(l2, p=dropout) l3 = TemporalConv(d2, num_filters=num_tc_filters, filter_size=tconv_sz, filter_dilation=filter_dilation, pad='same', conv_type='regular', nonlinearity=None, b=None) l3 = BatchNormLayer(l3, axes=(0, 1)) l3 = NonlinearityLayer(l3, leaky_rectify) return { 'l_out': l3, 'warmup': warmup }
def _pre_residual_(self, model, num_filters=None, dim_inc=False): """Residual block for pre-resnet.""" num_filters *= self.width num_filters, first_stride, out_filters = get_dimensions( model, num_filters, dim_inc, self.bottleneck) residual = self.nonlinearity(BatchNormLayer(model)) residual = self.convolution(residual, num_filters, stride=first_stride, filter_size=self.block_config[0]) if self.bottleneck: for filter_size in self.block_config[1:-1]: residual = self.nonlinearity(BatchNormLayer(residual)) if self.dropout > 0: residual = DropoutLayer(residual, self.dropout) residual = self.convolution(residual, num_filters, filter_size=filter_size) residual = self.nonlinearity(BatchNormLayer(residual)) if self.dropout > 0: residual = DropoutLayer(residual, self.dropout) residual = self.convolution(residual, out_filters, filter_size=self.block_config[-1]) else: for filter_size in self.block_config[1:]: residual = self.nonlinearity(BatchNormLayer(residual)) if self.dropout > 0: residual = DropoutLayer(residual, self.dropout) residual = self.convolution(residual, num_filters, filter_size=filter_size) return residual
def build_res_rnn_network(rnnmodel): net = {} net['input'] = InputLayer((batch_size, seq_len, feature_size)) net['rnn0']=DimshuffleLayer(net['input'],(1,0,2)) for l in range(1, num_layers+1): hidini=0 if l==num_layers: hidini=U_lowbound net['rnn%d'%(l-1)]=ReshapeLayer(net['rnn%d'%(l-1)], (batch_size* seq_len, -1)) net['rnn%d'%(l-1)]=DenseLayer(net['rnn%d'%(l-1)],hidden_units,W=ini_W,b=Uniform(range=(0,args.ini_b)),nonlinearity=None) #W=Uniform(ini_rernn_in_to_hid), # net['rnn%d'%(l-1)]=ReshapeLayer(net['rnn%d'%(l-1)], (seq_len, batch_size, -1)) net['rnn%d'%l]=net['rnn%d'%(l-1)] if not args.use_bn_afterrnn: net['rnn%d'%l]=BatchNormLayer(net['rnn%d'%l],axes= (0,1),beta=Uniform(range=(0,args.ini_b))) net['rnn%d'%l]=rnnmodel(net['rnn%d'%l],hidden_units,W_hid_to_hid=Uniform(range=(hidini,U_bound)),nonlinearity=act,only_return_final=False, grad_clipping=args.gradclipvalue) if args.use_bn_afterrnn: net['rnn%d'%l]=BatchNormLayer(net['rnn%d'%l],axes= (0,1)) if l==num_layers: net['rnn%d'%num_layers]=lasagne.layers.SliceLayer(net['rnn%d'%num_layers],indices=-1, axis=0) net['out']=DenseLayer(net['rnn%d'%num_layers],outputclass,nonlinearity=softmax) return net
def build_baseline2_feats(input_var, nb_filter=96): """ Slightly more complex model. Transform x to a feature space first """ net = OrderedDict() # Input, standardization last = net['input'] = InputLayer( (None, 3, tools.INP_PSIZE, tools.INP_PSIZE), input_var=input_var) last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x)) # Pretrained Encoder as before last = net["conv1_1"] = ConvLayer(last, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=linear) last = net["bn1_1"] = BatchNormLayer(last) last = net["relu1_1"] = NonlinearityLayer(last, nonlinearity=rectify) last = net["conv1_2"] = ConvLayer(last, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=linear) last = net["bn1_2"] = BatchNormLayer(last) last = net["relu1_2"] = NonlinearityLayer(last, nonlinearity=rectify) # Modified Middle Part last = net["middle"] = ConvLayer(last, nb_filter, 1, nonlinearity=linear) # Decoder as before last = net["deconv1_2"] = TransposedConv2DLayer( last, net["conv1_2"].input_shape[1], net["conv1_2"].filter_size, stride=net["conv1_2"].stride, crop=net["conv1_2"].pad, W=net["conv1_2"].W, flip_filters=not net["conv1_2"].flip_filters, nonlinearity=None) last = net["deconv1_1"] = TransposedConv2DLayer( last, net["conv1_1"].input_shape[1], net["conv1_1"].filter_size, stride=net["conv1_1"].stride, crop=net["conv1_1"].pad, W=net["conv1_1"].W, flip_filters=not net["conv1_1"].flip_filters, nonlinearity=None) last = net["bn"] = BatchNormLayer(last, beta=nn.init.Constant(128.), gamma=nn.init.Constant(25.)) return last, net
def build_indrnn_network(X_sym): net = {} net['input0'] = InputLayer((batch_size, seq_len, indim, 3), X_sym) net['input'] = ReshapeLayer(net['input0'], (batch_size, seq_len, indim * 3)) net['rnn0'] = DimshuffleLayer(net['input'], (1, 0, 2)) for l in range(1, num_layers + 1): hidini = 0 if l == num_layers: hidini = U_lowbound net['rnn%d' % (l - 1)] = ReshapeLayer(net['rnn%d' % (l - 1)], (batch_size * seq_len, -1)) net['rnn%d' % (l - 1)] = DenseLayer(net['rnn%d' % (l - 1)], hidden_units, W=ini_W, b=lasagne.init.Constant( args.ini_b), nonlinearity=None) # net['rnn%d' % (l - 1)] = ReshapeLayer(net['rnn%d' % (l - 1)], (seq_len, batch_size, -1)) if args.conv_drop: net['rnn%d' % (l - 1)] = DropoutLayer(net['rnn%d' % (l - 1)], p=droprate, shared_axes=(0, )) net['rnn%d' % l] = net['rnn%d' % (l - 1)] if not args.use_bn_afterrnn: net['rnn%d' % l] = BatchNormLayer(net['rnn%d' % l], beta=lasagne.init.Constant( args.ini_b), axes=(0, 1)) net['rnn%d' % l] = rnnmodel(net['rnn%d' % l], hidden_units, W_hid_to_hid=Uniform(range=(hidini, U_bound)), nonlinearity=act, only_return_final=False, grad_clipping=gradclipvalue) if args.use_bn_afterrnn: net['rnn%d' % l] = BatchNormLayer(net['rnn%d' % l], axes=(0, 1)) if args.use_dropout and l % args.drop_layers == 0: net['rnn%d' % l] = DropoutLayer(net['rnn%d' % l], p=droprate, shared_axes=(0, )) net['rnn%d' % num_layers] = lasagne.layers.SliceLayer(net['rnn%d' % num_layers], indices=-1, axis=0) net['out'] = DenseLayer(net['rnn%d' % num_layers], outputclass, nonlinearity=softmax) return net
def build_discrim_net(batch_size, n_feats, input_var_sup, n_hidden_t_enc, n_hidden_s, embedding, disc_nonlinearity, n_targets, batchnorm=False, input_dropout=1.0): # Supervised network discrim_net = InputLayer((None, n_feats), input_var_sup) if input_dropout < 1.0: discrim_net = DropoutLayer(discrim_net, p=input_dropout) """ # Code for convolutional encoder discrim_net = ReshapeLayer(discrim_net, (batch_size, 1, n_feats)) discrim_net = Conv1DLayer(discrim_net, 8, 9, pad='same') discrim_net = ReshapeLayer(discrim_net, (batch_size, 8 * n_feats)) """ discrim_net = DenseLayer(discrim_net, num_units=n_hidden_t_enc[-1], W=embedding, nonlinearity=rectify) hidden_rep = discrim_net # Supervised hidden layers for hid in n_hidden_s: if batchnorm: discrim_net = BatchNormLayer(discrim_net) discrim_net = DropoutLayer(discrim_net) # discrim_net = BatchNormLayer(discrim_net) discrim_net = DenseLayer(discrim_net, num_units=hid) # Predicting labels assert disc_nonlinearity in ["sigmoid", "linear", "rectify", "softmax", "softmax_hierarchy"] if batchnorm: discrim_net = BatchNormLayer(discrim_net) discrim_net = DropoutLayer(discrim_net) if n_targets == 2 and disc_nonlinearity == 'sigmoid': n_targets = 1 if disc_nonlinearity != "softmax_hierarchy": discrim_net = DenseLayer(discrim_net, num_units=n_targets, nonlinearity=eval(disc_nonlinearity)) else: cont_labels = create_1000_genomes_continent_labels() hierarch_softmax_1000_genomes = HierarchicalSoftmax(cont_labels) discrim_net_e= DenseLayer(discrim_net, num_units=n_targets, nonlinearity=hierarch_softmax_1000_genomes) discrim_net_c= DenseLayer(discrim_net, num_units=len(cont_labels), nonlinearity=softmax) discrim_net = HierarchicalMergeSoftmaxLayer([discrim_net_e, discrim_net_c], cont_labels) return discrim_net, hidden_rep
def GraphConvCell(input, adjustment_matrix, num_units, nonlinearity=rectify): graph_conv = GraphConv(input, adjustment_matrix) # bn1 = BatchNormLayer(graph_conv) conc = ConcatLayer([input, graph_conv], axis=1) dense = DenseLayer(conc, num_units, nonlinearity=identity) bn2 = BatchNormLayer(dense) return NonlinearityLayer(bn2, nonlinearity=nonlinearity)
def residual_block(l, increase_dim=False, projection=True, first=False, filters=16): if increase_dim: first_stride = (2, 2) else: first_stride = (1, 1) if first: bn_pre_relu = l else: bn_pre_conv = BatchNormLayer(l) bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify) conv_1 = batch_norm(ConvLayer(bn_pre_relu, num_filters=filters, filter_size=(3,3), stride=first_stride, nonlinearity=rectify, pad='same', W=HeNormal(gain='relu'))) dropout = DropoutLayer(conv_1, p=0.3) conv_2 = ConvLayer(dropout, num_filters=filters, filter_size=(3,3), stride=(1,1), nonlinearity=None, pad='same', W=HeNormal(gain='relu')) if increase_dim: projection = ConvLayer(l, num_filters=filters, filter_size=(1,1), stride=(2,2), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_2, projection]) elif first: projection = ConvLayer(l, num_filters=filters, filter_size=(1,1), stride=(1,1), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_2, projection]) else: block = ElemwiseSumLayer([conv_2, l]) return block
def bgr_encoder(l_in, tconv_sz, filter_dilation, num_tc_filters, dropout): warmup = 16 batch_size, max_time, _, *crop_size = l_in.output_shape crop_size = tuple(crop_size) # stack pairs of small images into one batch of images l_r1 = ReshapeLayer(l_in, (-1, 1) + crop_size) # process through (siamese) CNN l_cnnout = wide_resnet(l_r1, d=16, k=1) # Concatenate feature vectors from the pairs feat_shape = np.asscalar(np.prod(l_cnnout.output_shape[1:])) l_feats = ReshapeLayer(l_cnnout, (batch_size, max_time, 2 * feat_shape)) if dropout > 0: l_feats = DropoutLayer(l_feats, p=dropout) l_out = TemporalConv(l_feats, num_filters=num_tc_filters, filter_size=tconv_sz, filter_dilation=filter_dilation, pad='same', b=None, nonlinearity=None) l_out = BatchNormLayer(l_out, axes=(0, 1)) l_out = NonlinearityLayer(l_out, leaky_rectify) return { 'l_out': l_out, 'warmup': warmup }
def transition(self, args, layers, dropout, name_prefix): # a transition 1x1 convolution followed by avg-pooling self.affine_relu_conv(args, layers, channels=layers[-1].output_shape[1], filter_size=1, dropout=dropout, name_prefix=name_prefix) layers.append( Pool2DLayer(layers[-1], 2, mode='average_inc_pad', name=name_prefix + '_pool')) #TODO: treat initialization as hyperparameter, but don't regularize parameters? layers.append( BatchNormLayer(layers[-1], name=name_prefix + '_bn', beta=None, gamma=None)) #TODO: add Gaussian noise if args.addActivationNoise: layers.append( GaussianNoiseLayer( layers[-1], name=name_prefix + '_Gn', sigma=init.Constant( args.invSigmoidActivationNoiseMagnitude), shared_axes='auto')) self.params_noise.append(layers[-1].sigma) #self.add_params_to_self(args, layers[-1]) #no parameters, beta=gamma=None return layers[-1]
def makeNeuralNet(input_var=None): net = {} net['input'] = InputLayer(shape=(None, 3, 224, 224), input_var=input_var) net['bnorm'] = BatchNormLayer(net['input']) net['conv1'] = ConvLayer(net['bnorm'], num_filters=96, filter_size=5, stride=2) #96*112*112 net['norm1'] = NormLayer( net['conv1'], alpha=0.0001) # caffe has alpha = alpha * pool_size net['pool1'] = PoolLayer(net['norm1'], pool_size=3, stride=3, ignore_border=False) #96*37...approx net['conv2'] = ConvLayer(net['pool1'], num_filters=256, filter_size=5, pad=1) net['pool2'] = PoolLayer(net['conv2'], pool_size=2, stride=2, ignore_border=False) net['fc6'] = DenseLayer(net['pool2'], num_units=1024) net['drop6'] = DropoutLayer(net['fc6'], p=0.2) net['_fc7'] = DenseLayer(net['drop6'], num_units=256) net['_drop7'] = DropoutLayer(net['_fc7'], p=0.2) net['_fc8out'] = DenseLayer(net['_drop7'], num_units=1, nonlinearity=lasagne.nonlinearities.sigmoid) output_layer_driver = net['_fc8out'] return output_layer_driver, net
def build_simple_block(incoming_layer, names, num_filters, filter_size, stride, pad, use_bias=False, nonlin=rectify): """Creates stacked Lasagne layers ConvLayer -> BN -> (ReLu) Parameters: ---------- incoming_layer : instance of Lasagne layer Parent layer names : list of string Names of the layers in block num_filters : int Number of filters in convolution layer filter_size : int Size of filters in convolution layer stride : int Stride of convolution layer pad : int Padding of convolution layer use_bias : bool Whether to use bias in conlovution layer nonlin : function Nonlinearity type of Nonlinearity layer Returns ------- tuple: (net, last_layer_name) net : dict Dictionary with stacked layers last_layer_name : string Last layer name """ net = [] names = list(names) net.append((names[0], ConvLayer(incoming_layer, num_filters, filter_size, pad, stride, flip_filters=False, nonlinearity=None) if use_bias else ConvLayer(incoming_layer, num_filters, filter_size, stride, pad, b=None, flip_filters=False, nonlinearity=None))) net.append((names[1], BatchNormLayer(net[-1][1]))) if nonlin is not None: net.append((names[2], NonlinearityLayer(net[-1][1], nonlinearity=nonlin))) return dict(net), net[-1][0]
def tiramisu_layer(net, no_f_base, f_size_base, dropout): net = BatchNormLayer(net) net = NonlinearityLayer(net, nonlinearity=lasagne.nonlinearities.rectify) net = Conv2DLayer(net, no_f_base, f_size_base, pad="same",W=lasagne.init.HeUniform(gain="relu"),b=None, flip_filters=False) if dropout: net = DropoutLayer(net, dropout) return net
def build_simple_block(incoming_layer, names, num_filters, filter_size, stride, pad, use_bias=False, nonlin=rectify): net = [] net.append((names[0], ConvLayer(incoming_layer, num_filters, filter_size, pad, stride, flip_filters=False, nonlinearity=None) if use_bias else ConvLayer(incoming_layer, num_filters, filter_size, stride, pad, b=None, flip_filters=False, nonlinearity=None))) net.append((names[1], BatchNormLayer(net[-1][1]))) if nonlin is not None: net.append((names[2], NonlinearityLayer(net[-1][1], nonlinearity=nonlin))) return OrderedDict(net), net[-1][0]
def cifar_model(cls, n=9, incoming=None, classes=10, **kwargs): model = incoming or InputLayer(shape=(None, 3, 32, 32)) builder = cls(model, **kwargs) # first layer, output is 16 x 32 x 32 builder.model = builder.convolution(model, 16, init_gain=1.0) # first stack of residual blocks, output is 16 x 32 x 32 for _ in range(n): builder.add_residual_block(16) # second stack of residual blocks, output is 32 x 16 x 16 builder.add_residual_block(32, dim_inc=True) for _ in range(1, n): builder.add_residual_block(32) # third stack of residual blocks, output is 64 x 8 x 8 builder.add_residual_block(64, dim_inc=True) for _ in range(1, n): builder.add_residual_block(64) model = builder.nonlinearity(BatchNormLayer(builder.model)) # average pooling model = GlobalPoolLayer(model) # fully connected layer model = DenseLayer(model, num_units=classes, W=HeNormal(gain='relu'), nonlinearity=softmax) return model
def residual_block(l, increase_dim=False, first=False, filters=16): if increase_dim: first_stride = (2, 2) else: first_stride = (1, 1) if first: # hacky solution to keep layers correct bn_pre_relu = l else: # contains the BN -> ReLU portion, steps 1 to 2 bn_pre_conv = BatchNormLayer(l) bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify) # contains the weight -> BN -> ReLU portion, steps 3 to 5 conv_1 = batch_norm( ConvLayer(bn_pre_relu, num_filters=filters, filter_size=(3, 3), stride=first_stride, nonlinearity=rectify, pad='same', W=HeNormal(gain='relu'))) dropout = DropoutLayer(conv_1, p=0.3) # contains the last weight portion, step 6 conv_2 = ConvLayer(dropout, num_filters=filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=HeNormal(gain='relu')) # add shortcut connections if increase_dim: # projection shortcut, as option B in paper projection = ConvLayer(l, num_filters=filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_2, projection]) elif first: # projection shortcut, as option B in paper projection = ConvLayer(l, num_filters=filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_2, projection]) else: block = ElemwiseSumLayer([conv_2, l]) return block
def norm_lrelu_upscale_conv_norm_lrelu(l_in, feat_out): if do_norm: l_in = BatchNormLayer(l_in, axes=axes) l = NonlinearityLayer(l_in, nonlin) l = Upscale3DLayer(l, 2) l = Conv3DLayer(l, feat_out, 3, 1, 'same', nonlinearity=linear, W=HeNormal(gain='relu')) if do_norm: l = BatchNormLayer(l, axes=axes) l = NonlinearityLayer(l, nonlin) return l
def dense_block(self, args, layers, num_layers, growth_rate, dropout, name_prefix): # concatenated 3x3 convolutions for n in range(num_layers): network = layers[-1] conv = self.affine_relu_conv(args, layers, channels=growth_rate, filter_size=3, dropout=dropout, name_prefix=name_prefix + '_l%02d' % (n + 1)) #TODO: treat initialization as hyperparameter, but don't regularize parameters? conv = BatchNormLayer(conv, name=name_prefix + '_l%02dbn' % (n + 1), beta=None, gamma=None) #TODO: add Gaussian noise? layers.append(conv) #redundant? if args.addActivationNoise: conv = GaussianNoiseLayer( layers[-1], name=name_prefix + '_l%02dGn' % (n + 1), sigma=init.Constant( args.invSigmoidActivationNoiseMagnitude), shared_axes='auto') self.params_noise.append(conv.sigma) layers.append(conv) #self.add_params_to_self(args, conv) #no parameters, beta=gamma=None layers.append( ConcatLayer([network, conv], axis=1, name=name_prefix + '_l%02d_join' % (n + 1))) return layers[-1]
def sub_block(net, no_f_base, f_size_base, dropout, give_name, do_relu=True): net = BatchNormLayer(net, name=give_name+"_bnorm") if do_relu: net = NonlinearityLayer(net, nonlinearity=lasagne.nonlinearities.rectify, name=give_name+"_relu") net = Conv2DLayer(net, no_f_base, f_size_base, pad="same",W=lasagne.init.HeNormal(gain="relu"),b=None, name=give_name+"_conv") if dropout: net = DropoutLayer(net, dropout) return net
def tiramisu_transistion_down(net, drop_p, pooltype="average_inc_pad"): net = BatchNormLayer(net) net = NonlinearityLayer(net, nonlinearity=lasagne.nonlinearities.rectify) net = Conv2DLayer(net, net.output_shape[1], 1, pad="same",W=lasagne.init.HeUniform(gain="relu"),b=None,flip_filters=False) if drop_p: net = DropoutLayer(net, drop_p) net = Pool2DLayer(net, 2) return net
def BN_ReLU_Conv(inputs, n_filters, filter_size=3, dropout_p=0.2): l = NonlinearityLayer(BatchNormLayer(inputs)) l = Conv2DLayer(l, n_filters, filter_size, pad='same', W=HeUniform(gain='relu'), nonlinearity=linear, flip_filters=False) if dropout_p != 0.0: l = DropoutLayer(l, dropout_p) return l
def conv_bn_relu(net, incoming_layer, depth, num_filters, filter_size, pad = 'same'): net['conv'+str(depth)] = ConvLayer(net[incoming_layer], num_filters = num_filters, filter_size = filter_size, pad = pad, nonlinearity=None) net['bn'+str(depth)] = BatchNormLayer(net['conv'+str(depth)]) net['relu'+str(depth)] = NonlinearityLayer( net['bn'+str(depth)], nonlinearity = rectify) incoming_layer = 'relu'+str(depth) return incoming_layer
def ResNet_FullPre_Wide(input_shape=(None, 3, PIXELS, PIXELS), input_var=None, n_classes=10, n=6, k=4): """ Adapted from https://github.com/Lasagne/Recipes/tree/master/papers/deep_residual_learning. Tweaked to be consistent with 'Identity Mappings in Deep Residual Networks', Kaiming He et al. 2016 (https://arxiv.org/abs/1603.05027) And 'Wide Residual Networks', Sergey Zagoruyko, Nikos Komodakis 2016 (http://arxiv.org/pdf/1605.07146v1.pdf) Depth = 6n + 2 """ n_filters = {0: 16, 1: 16 * k, 2: 32 * k, 3: 64 * k} # Building the network l_in = InputLayer(shape=input_shape, input_var=input_var) # first layer, output is 16 x 64 x 64 l = batch_norm( ConvLayer(l_in, num_filters=n_filters[0], filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same', W=he_norm)) # first stack of residual blocks, output is 32 x 64 x 64 l = residual_wide_block(l, first=True, filters=n_filters[1]) for _ in range(1, n): l = residual_wide_block(l, filters=n_filters[1]) # second stack of residual blocks, output is 64 x 32 x 32 l = residual_wide_block(l, increase_dim=True, filters=n_filters[2]) for _ in range(1, (n + 2)): l = residual_wide_block(l, filters=n_filters[2]) # third stack of residual blocks, output is 128 x 16 x 16 l = residual_wide_block(l, increase_dim=True, filters=n_filters[3]) for _ in range(1, (n + 2)): l = residual_wide_block(l, filters=n_filters[3]) bn_post_conv = BatchNormLayer(l) bn_post_relu = NonlinearityLayer(bn_post_conv, rectify) # average pooling avg_pool = GlobalPoolLayer(bn_post_relu) # fully connected layer network = DenseLayer(avg_pool, num_units=n_classes, W=HeNormal(), nonlinearity=softmax) return network
def indiv_block(incoming,num_filt): ''' Returns the conv+concat+bn block network ''' conv_a = Conv2DLayer(incoming,num_filters=num_filt, filter_size=(3,3), pad='same', W = lasagne.init.GlorotUniform()) # Default non-linearity of lasagne's Conv2DLayer is rectify. conv_b = Conv2DLayer(conv_a,num_filters=num_filt, filter_size=(3,3), pad='same', W = lasagne.init.GlorotUniform()) conv_concat = ConcatLayer([conv_a, conv_b]) incoming = BatchNormLayer(conv_concat) return incoming
def _residual_block_(self, l, increase_dim=False, projection=True, first=False): input_num_filters = l.output_shape[1] if increase_dim: first_stride = (2, 2) out_num_filters = input_num_filters * 2 else: first_stride = (1, 1) out_num_filters = input_num_filters if first: # hacky solution to keep layers correct bn_pre_relu = l else: # contains the BN -> ReLU portion, steps 1 to 2 bn_pre_conv = BatchNormLayer(l) bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify) # contains the weight -> BN -> ReLU portion, steps 3 to 5 conv_1 = batch_norm( ConvLayer(bn_pre_relu, num_filters=out_num_filters, filter_size=(3, 3), stride=first_stride, nonlinearity=rectify, pad='same', W=he_norm)) # contains the last weight portion, step 6 conv_2 = ConvLayer(conv_1, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=he_norm) # add shortcut connections if increase_dim: # projection shortcut, as option B in paper projection = ConvLayer(bn_pre_relu, num_filters=out_num_filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_2, projection]) else: block = ElemwiseSumLayer([conv_2, l]) return block
def residual_block(l, increase_dim=False, projection=True, first=False): """ Create a residual learning building block with two stacked 3x3 convlayers as in paper 'Identity Mappings in Deep Residual Networks', Kaiming He et al. 2016 (https://arxiv.org/abs/1603.05027) """ input_num_filters = l.output_shape[1] if increase_dim: first_stride = (2, 2) out_num_filters = input_num_filters * 2 else: first_stride = (1, 1) out_num_filters = input_num_filters if first: # hacky solution to keep layers correct bn_pre_relu = l else: # contains the BN -> ReLU portion, steps 1 to 2 bn_pre_conv = BatchNormLayer(l) bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify) # contains the weight -> BN -> ReLU portion, steps 3 to 5 conv_1 = batch_norm( ConvLayer(bn_pre_relu, num_filters=out_num_filters, filter_size=(3, 3), stride=first_stride, nonlinearity=rectify, pad='same', W=he_norm)) # contains the last weight portion, step 6 conv_2 = ConvLayer(conv_1, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=he_norm) # add shortcut connections if increase_dim: # projection shortcut, as option B in paper projection = ConvLayer(l, num_filters=out_num_filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_2, projection]) else: block = ElemwiseSumLayer([conv_2, l]) return block
def build_network(): """Build LeNet""" input_var = t.tensor4('inputs') target = t.matrix('targets') cnn = InputLayer((None, 1, 28, 28), input_var) cnn = Conv2DLayer(cnn, 16, 5, 2) cnn = BatchNormLayer(cnn) cnn = NonlinearityLayer(cnn, lasagne.nonlinearities.rectify) cnn = Conv2DLayer(cnn, 32, (3, 3), (1, 1)) cnn = BatchNormLayer(cnn) cnn = NonlinearityLayer(cnn, lasagne.nonlinearities.rectify) cnn = MaxPool2DLayer(cnn, (2, 2)) cnn = DropoutLayer(cnn, 0.1) cnn = DenseLayer(cnn, 800) cnn = BatchNormLayer(cnn) cnn = NonlinearityLayer(cnn, lasagne.nonlinearities.rectify) cnn = DropoutLayer(cnn, 0.1) cnn = DenseLayer(cnn, 10, nonlinearity=lasagne.nonlinearities.softmax) train_out = lasagne.layers.get_output(cnn, deterministic=False) train_loss = lasagne.objectives.categorical_crossentropy(train_out, target) train_loss = lasagne.objectives.aggregate(train_loss, mode='mean') parameters = lasagne.layers.get_all_params(cnn, trainable=True) updates = lasagne.updates.adam(train_loss, parameters) train_fn = theano.function([input_var, target], train_loss, updates=updates) test_out = lasagne.layers.get_output(cnn, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_out, target) test_loss = lasagne.objectives.aggregate(test_loss, mode='mean') err = t.mean(t.neq(t.argmax(test_out, 1), t.argmax(target, 1)), dtype=theano.config.floatX) val_fn = theano.function([input_var, target], [test_loss, err]) return {'model': cnn, 'train_fn': train_fn, 'val_fn': val_fn}
def ResNet_BottleNeck_FullPreActivation( input_shape=(None, 3, PIXELS, PIXELS), input_var=None, n_classes=10, n=18): ''' Adapted from https://github.com/Lasagne/Recipes/tree/master/papers/deep_residual_learning. Tweaked to be consistent with 'Identity Mappings in Deep Residual Networks', Kaiming He et al. 2016 (https://arxiv.org/abs/1603.05027) Judging from https://github.com/KaimingHe/resnet-1k-layers/blob/master/resnet-pre-act.lua. Number of filters go 16 -> 64 -> 128 -> 256 Forumala to figure out depth: 9n + 2 ''' # Building the network l_in = InputLayer(shape=input_shape, input_var=input_var) # first layer, output is 16x16x16 l = batch_norm( ConvLayer(l_in, num_filters=16, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same', W=he_norm)) # first stack of residual blocks, output is 64x16x16 l = residual_bottleneck_block(l, first=True) for _ in range(1, n): l = residual_bottleneck_block(l) # second stack of residual blocks, output is 128x8x8 l = residual_bottleneck_block(l, increase_dim=True) for _ in range(1, n): l = residual_bottleneck_block(l) # third stack of residual blocks, output is 256x4x4 l = residual_bottleneck_block(l, increase_dim=True) for _ in range(1, n): l = residual_bottleneck_block(l) bn_post_conv = BatchNormLayer(l) bn_post_relu = NonlinearityLayer(bn_post_conv, rectify) # average pooling avg_pool = GlobalPoolLayer(bn_post_relu) # fully connected layer network = DenseLayer(avg_pool, num_units=n_classes, W=HeNormal(), nonlinearity=softmax) return network
def norm_lrelu_conv(l_in, feat_out, stride=1, filter_size=3): if do_norm: l_in = BatchNormLayer(l_in, axes=axes) l = NonlinearityLayer(l_in, nonlin) return Conv3DLayer(l, feat_out, filter_size, stride, 'same', nonlinearity=linear, W=HeNormal(gain='relu'))