def deeper(network, cropsz, batchsz): # 1st. Data size 117 -> 111 -> 55 network = Conv2DLayer(network, 64, (7, 7), stride=1) network = BatchNormLayer(network, nonlinearity=rectify) network = MaxPool2DLayer(network, (3, 3), stride=2) # 2nd. Data size 55 -> 27 network = Conv2DLayer(network, 112, (5, 5), stride=1, pad='same') network = BatchNormLayer(network, nonlinearity=rectify) network = MaxPool2DLayer(network, (3, 3), stride=2) # 3rd. Data size 27 -> 13 network = Conv2DLayer(network, 192, (3, 3), stride=1, pad='same') network = BatchNormLayer(network, nonlinearity=rectify) network = MaxPool2DLayer(network, (3, 3), stride=2) # 4th. Data size 11 -> 5 network = Conv2DLayer(network, 320, (3, 3), stride=1) network = BatchNormLayer(network, nonlinearity=rectify) network = MaxPool2DLayer(network, (3, 3), stride=2) # 5th. Data size 5 -> 3 network = Conv2DLayer(network, 512, (3, 3), stride=1) # network = DropoutLayer(network) network = BatchNormLayer(network, nonlinearity=rectify) # 6th. Data size 3 -> 1 network = lasagne.layers.DenseLayer(network, 512) network = DropoutLayer(network) # network = BatchNormLayer(network, nonlinearity=rectify) return network
def test_undefined_shape(self, BatchNormLayer): # should work: BatchNormLayer((64, 2, None), axes=(0, 2)) # should not work: with pytest.raises(ValueError) as exc: BatchNormLayer((64, None, 3), axes=(0, 2)) assert 'needs specified input sizes' in exc.value.args[0]
def test_batch_norm_tag(self, BatchNormLayer): input_shape = (20, 30, 40) layer = BatchNormLayer(input_shape) assert len(layer.get_params()) == 4 stat_params = layer.get_params(batch_norm_stat=True) assert len(stat_params) == 2 param_names = [p.name for p in stat_params] assert "mean" in param_names assert "inv_std" in param_names
def test_get_output_for(self, BatchNormLayer, deterministic, use_averages, update_averages): input_shape = (20, 30, 40) # random input tensor, beta, gamma, mean, inv_std and alpha input = (np.random.randn(*input_shape).astype(theano.config.floatX) + np.random.randn(1, 30, 1).astype(theano.config.floatX)) beta = np.random.randn(30).astype(theano.config.floatX) gamma = np.random.randn(30).astype(theano.config.floatX) mean = np.random.randn(30).astype(theano.config.floatX) inv_std = np.random.rand(30).astype(theano.config.floatX) alpha = np.random.rand() # create layer (with default axes: normalize over all but second axis) layer = BatchNormLayer(input_shape, beta=beta, gamma=gamma, mean=mean, inv_std=inv_std, alpha=alpha) # call get_output_for() kwargs = {'deterministic': deterministic} if use_averages is not None: kwargs['batch_norm_use_averages'] = use_averages else: use_averages = deterministic if update_averages is not None: kwargs['batch_norm_update_averages'] = update_averages else: update_averages = not deterministic result = layer.get_output_for(theano.tensor.constant(input), **kwargs).eval() # compute expected results and expected updated parameters input_mean = input.mean(axis=(0, 2)) input_inv_std = 1 / np.sqrt(input.var(axis=(0, 2)) + layer.epsilon) if use_averages: use_mean, use_inv_std = mean, inv_std else: use_mean, use_inv_std = input_mean, input_inv_std bcast = (np.newaxis, slice(None), np.newaxis) exp_result = (input - use_mean[bcast]) * use_inv_std[bcast] exp_result = exp_result * gamma[bcast] + beta[bcast] if update_averages: new_mean = (1 - alpha) * mean + alpha * input_mean new_inv_std = (1 - alpha) * inv_std + alpha * input_inv_std else: new_mean, new_inv_std = mean, inv_std # compare expected results to actual results tol = {'atol': 1e-5, 'rtol': 1e-6} assert np.allclose(layer.mean.get_value(), new_mean, **tol) assert np.allclose(layer.inv_std.get_value(), new_inv_std, **tol) assert np.allclose(result, exp_result, **tol)
def gooey_gadget(network_in, conv_add, stride): network_c = Conv2DLayer(network_in, conv_add / 2, (1, 1), W=HeUniform('relu')) network_c = prelu(network_c) network_c = BatchNormLayer(network_c) network_c = Conv2DLayer(network_c, conv_add, (3, 3), stride=stride, W=HeUniform('relu')) network_c = prelu(network_c) network_c = BatchNormLayer(network_c) network_p = MaxPool2DLayer(network_in, (3, 3), stride=stride) return ConcatLayer((network_c, network_p))
def test_skip_linear_transform(self, BatchNormLayer): input_shape = (20, 30, 40) # random input tensor, beta, gamma input = (np.random.randn(*input_shape).astype(theano.config.floatX) + np.random.randn(1, 30, 1).astype(theano.config.floatX)) beta = np.random.randn(30).astype(theano.config.floatX) gamma = np.random.randn(30).astype(theano.config.floatX) # create layers without beta or gamma layer1 = BatchNormLayer(input_shape, beta=None, gamma=gamma) layer2 = BatchNormLayer(input_shape, beta=beta, gamma=None) # check that one parameter is missing assert len(layer1.get_params()) == 3 assert len(layer2.get_params()) == 3 # call get_output_for() result1 = layer1.get_output_for(theano.tensor.constant(input), deterministic=False).eval() result2 = layer2.get_output_for(theano.tensor.constant(input), deterministic=False).eval() # compute expected results and expected updated parameters mean = input.mean(axis=(0, 2)) std = np.sqrt(input.var(axis=(0, 2)) + layer1.epsilon) exp_result = (input - mean[None, :, None]) / std[None, :, None] exp_result1 = exp_result * gamma[None, :, None] # no beta exp_result2 = exp_result + beta[None, :, None] # no gamma # compare expected results to actual results tol = {'atol': 1e-5, 'rtol': 1e-6} assert np.allclose(result1, exp_result1, **tol) assert np.allclose(result2, exp_result2, **tol)
def test_init(self, BatchNormLayer, init_unique): input_shape = (2, 3, 4) # default: normalize over all but second axis beta = BatchNormLayer(input_shape, beta=init_unique).beta assert np.allclose(beta.get_value(), init_unique((3, ))) # normalize over first axis only beta = BatchNormLayer(input_shape, beta=init_unique, axes=0).beta assert np.allclose(beta.get_value(), init_unique((3, 4))) # normalize over second and third axis beta = BatchNormLayer(input_shape, beta=init_unique, axes=(1, 2)).beta assert np.allclose(beta.get_value(), init_unique((2, )))
def build_simple_block(incoming_layer, names, num_filters, filter_size, stride, pad, use_bias=False, nonlin=rectify): """ Function : Creates stacked Lasagne layers ConvLayer -> BN -> (ReLu) for resnet 50 Input: incoming_layer : instance of Lasagne layer Parent layer names : list of string Names of the layers in block num_filters : int Number of filters in convolution layer filter_size : int Size of filters in convolution layer stride : int Stride of convolution layer pad : int Padding of convolution layer use_bias : bool Whether to use bias in conlovution layer nonlin : function Nonlinearity type of Nonlinearity layer Ouput: tuple: (net, last_layer_name) net : dict Dictionary with stacked layers last_layer_name : string Last layer name """ net = [] net.append((names[0], ConvLayer(incoming_layer, num_filters, filter_size, stride, pad, flip_filters=False, nonlinearity=None) if use_bias else ConvLayer(incoming_layer, num_filters, filter_size, stride, pad, b=None, flip_filters=False, nonlinearity=None))) net.append((names[1], BatchNormLayer(net[-1][1]))) if nonlin is not None: net.append((names[2], NonlinearityLayer(net[-1][1], nonlinearity=nonlin))) return dict(net), net[-1][0]
def build_dec_layer(incoming, z_l, name, transform, specs, l, combinator_type, layer2stats=None, last=False): dirty_net = OrderedDict() if l > 0: # transformation layer: dense, deconv, unpool lname = 'dec_{}_{}'.format( l, transform if 'pool' in transform else 'affine') if transform in ['pool', 'unpool']: W = None else: W = lasagne.init.GlorotUniform() dirty_net[lname] = get_transform_layer(incoming, name + '_' + lname, transform, specs, W) layer2bn = dirty_net.values()[-1] else: layer2bn = incoming # batchnormalization ... u_l ul_name = 'dec_batchn_u_{}'.format(l) bn_broadcast_cond = layer2bn.output_shape[1] == 1 if len(layer2bn.output_shape) == 4 and bn_broadcast_cond: ax = (0, 1, 2, 3) elif len(layer2bn.output_shape) == 2 and bn_broadcast_cond: ax = (0, 1) else: ax = 'auto' dirty_net[ul_name] = BatchNormLayer(layer2bn, axes=ax, alpha=1., beta=None, gamma=None, name=name + '_' + ul_name) # denoised latent \hat{z}_L-i comb_name = 'dec_combinator_{}'.format(l) dirty_net[comb_name] = CombinatorLayer(z_l, dirty_net.values()[-1], combinator_type=combinator_type, name=name + '_' + comb_name) if not last: # batchnormalized latent \hat{z}_L-i^{BN} layer2norm = dirty_net[comb_name] bname = 'dec_batchn_z_{}'.format(l) dirty_net[bname] = SharedNormLayer(layer2stats, layer2norm, name=name + '_' + bname) return dirty_net
def base(network, cropsz, batchsz): # 1st network = Conv2DLayer(network, 64, (8, 8), stride=2, nonlinearity=rectify) network = BatchNormLayer(network, nonlinearity=rectify) network = MaxPool2DLayer(network, (3, 3), stride=2) # 2nd network = Conv2DLayer(network, 96, (5, 5), stride=1, pad='same') network = BatchNormLayer(network, nonlinearity=rectify) network = MaxPool2DLayer(network, (3, 3), stride=2) # 3rd network = Conv2DLayer(network, 128, (3, 3), stride=1, pad='same') network = BatchNormLayer(network, nonlinearity=rectify) network = MaxPool2DLayer(network, (3, 3), stride=2) # 4th network = lasagne.layers.DenseLayer(network, 512) network = BatchNormLayer(network, nonlinearity=rectify) return network
def choosy(network, cropsz, batchsz): # 1st. Data size 117 -> 111 -> 55 network = Conv2DLayer(network, 64, (7, 7), stride=1, W=HeUniform('relu')) network = prelu(network) network = BatchNormLayer(network) network = MaxPool2DLayer(network, (3, 3), stride=2) # 2nd. Data size 55 -> 27 network = Conv2DLayer(network, 112, (5, 5), stride=1, pad='same', W=HeUniform('relu')) network = prelu(network) network = BatchNormLayer(network) network = MaxPool2DLayer(network, (3, 3), stride=2) # 3rd. Data size 27 -> 13 network = Conv2DLayer(network, 192, (3, 3), stride=1, pad='same', W=HeUniform('relu')) network = prelu(network) network = BatchNormLayer(network) network = MaxPool2DLayer(network, (3, 3), stride=2) # 4th. Data size 11 -> 5 network = Conv2DLayer(network, 320, (3, 3), stride=1, W=HeUniform('relu')) network = prelu(network) network = BatchNormLayer(network) network = MaxPool2DLayer(network, (3, 3), stride=2) # 5th. Data size 5 -> 3 network = Conv2DLayer(network, 512, (3, 3), nonlinearity=None) network = prelu(network) network = BatchNormLayer(network) # 6th. Data size 3 -> 1 network = lasagne.layers.DenseLayer(network, 512, nonlinearity=None) network = DropoutLayer(network) network = FeaturePoolLayer(network, 2) return network
def gooey(network, cropsz, batchsz): # 1st. Data size 117 -> 111 -> 55 # 117*117*32 = 438048 network = Conv2DLayer(network, 32, (3, 3), stride=1, W=HeUniform('relu')) network = prelu(network) network = BatchNormLayer(network) # 115*115*32 = 423200 network = Conv2DLayer(network, 32, (3, 3), stride=1, W=HeUniform('relu')) network = prelu(network) network = BatchNormLayer(network) # 55*55*48 = 121000 network = Conv2DLayer(network, 40, (3, 3), stride=1, W=HeUniform('relu')) network = prelu(network) network = BatchNormLayer(network) network = MaxPool2DLayer(network, (3, 3), stride=2) # 2nd. Data size 55 -> 27 # 27*27*96 = 69984 network = Conv2DLayer(network, 96, (3, 3), stride=2, W=HeUniform('relu')) network = prelu(network) network = BatchNormLayer(network) # 3rd. Data size 27 -> 13, 192 + 144 # 13*13*224 = 37856 network = gooey_gadget(network, 128, 2) # 92 + 128 = 224 channels # 4th. Data size 13 -> 11 -> 5 # 11*11*192 = 23232 network = Conv2DLayer(network, 192, (3, 3), W=HeUniform('relu')) network = prelu(network) network = BatchNormLayer(network) # 5*5*412 = 10400 network = gooey_gadget(network, 224, 2) # 192 + 224 = 416 channels # 5th. Data size 5 -> 3 # 3*3*672 = 6048 network = gooey_gadget(network, 256, 1) # 416 + 256 = 672 channels # 6th. Data size 3 -> 1, 592 + 512 channels # 1*1*1184 = 1184 network = gooey_gadget(network, 512, 1) # 672 + 512 = 1184 channels return network
def test_init(self, BatchNormLayer, init_unique): input_shape = (2, 3, 4) # default: normalize over all but second axis beta = BatchNormLayer(input_shape, beta=init_unique).beta assert np.allclose(beta.get_value(), init_unique((3, ))) # normalize over first axis only beta = BatchNormLayer(input_shape, beta=init_unique, axes=0).beta assert np.allclose(beta.get_value(), init_unique((3, 4))) # normalize over second and third axis try: beta = BatchNormLayer(input_shape, beta=init_unique, axes=(1, 2)).beta assert np.allclose(beta.get_value(), init_unique((2, ))) except ValueError as exc: assert "BatchNormDNNLayer only supports" in exc.args[0]
def cslim(network, cropsz, batchsz): # 1st network = Conv2DLayer(network, 64, (5, 5), stride=2, W=HeUniform('relu')) network = prelu(network) network = BatchNormLayer(network) network = MaxPool2DLayer(network, (5, 5), stride=2) # 2nd network = Conv2DLayer(network, 96, (5, 5), stride=1, pad='same', W=HeUniform('relu')) network = prelu(network) network = BatchNormLayer(network) network = MaxPool2DLayer(network, (5, 5), stride=2) # 3rd network = Conv2DLayer(network, 128, (3, 3), stride=1, pad='same', W=HeUniform('relu')) network = prelu(network) network = BatchNormLayer(network) network = MaxPool2DLayer(network, (3, 3), stride=2) # 4th network = Conv2DLayer(network, 128, (3, 3), stride=1, pad='same', W=HeUniform('relu')) network = prelu(network) network = DropoutLayer(network) network = BatchNormLayer(network) network = MaxPool2DLayer(network, (3, 3), stride=2) # 5th network = lasagne.layers.DenseLayer(network, 512, nonlinearity=None) network = DropoutLayer(network) network = FeaturePoolLayer(network, 2) return network
def test_init(self, BatchNormLayer, init_unique): input_shape = (2, 3, 4) # default: normalize over all but second axis beta = BatchNormLayer(input_shape, beta=init_unique).beta assert np.allclose(beta.get_value(), init_unique((3,))) # normalize over first axis only beta = BatchNormLayer(input_shape, beta=init_unique, axes=0).beta assert np.allclose(beta.get_value(), init_unique((3, 4))) # normalize over second and third axis beta = BatchNormLayer(input_shape, beta=init_unique, axes=(1, 2)).beta assert np.allclose(beta.get_value(), init_unique((2,)))
def build_representation(img_size=[64, 64], nchannels=3, ndf=64, vis_filter_size=5, filters_size=5, global_pool=True, strides=[2, 2, 2, 2]): print 'cnn' #if img_size[0] % 32 is not 0 or img_size[1]!=img_size[0]: # # La imagen debe ser cuadrada y multiplo de 32 # raise 1 depth = len(strides) w_sizes = [filters_size] * depth w_sizes[0] = vis_filter_size X = InputLayer((None, nchannels, img_size[0], img_size[1])) ishape = lasagne.layers.get_output_shape(X) # print ishape wf = 1 h = X for i, s in enumerate(strides): wf *= s filter_size = w_sizes[i] x1 = Conv2DLayer(h, num_filters=wf * ndf, filter_size=filter_size, stride=s, pad='same', b=None, nonlinearity=None, name='cnn_l%d_Conv' % i) x2 = BatchNormLayer(x1, name='cnn_l%d_BN' % i) h = NonlinearityLayer(x2, nonlinearity=lrelu) ishape = lasagne.layers.get_output_shape(x1) # print ishape if global_pool: h = GlobalPoolLayer(h, pool_function=T.max, name='cnn_last_code') else: h = FlattenLayer(h, name='cnn_last_code') return h
def test_init(self, BatchNormLayer, init_unique): input_shape = (2, 3, 4) # default: normalize over all but second axis beta = BatchNormLayer(input_shape, beta=init_unique).beta assert np.allclose(beta.get_value(), init_unique((3,))) # normalize over first axis only beta = BatchNormLayer(input_shape, beta=init_unique, axes=0).beta assert np.allclose(beta.get_value(), init_unique((3, 4))) # normalize over second and third axis try: beta = BatchNormLayer( input_shape, beta=init_unique, axes=(1, 2)).beta assert np.allclose(beta.get_value(), init_unique((2,))) except ValueError as exc: assert "BatchNormDNNLayer only supports" in exc.args[0]
def conv_layer(incoming, num_filters): tmp = Conv2DLayer(incoming, num_filters, 3, pad='valid') tmp = BatchNormLayer(tmp) if dropout: tmp = DropoutLayer(tmp, 0.3) return NonlinearityLayer(tmp)
def build_enc_layer(incoming, name, transform, specs, activation, i, p_drop_hidden, shared_net): net = OrderedDict() lname = 'enc_{}_{}'.format(i, transform if 'pool' in transform else 'affine') nbatchn_lname = 'enc_batchn_{}_norm'.format(i) noise_lname = 'enc_noise_{}'.format(i) lbatchn_lname = 'enc_batchn_{}_learn'.format(i) if shared_net is None: # affine pars W = lasagne.init.GlorotUniform() # batchnorm pars beta = lasagne.init.Constant(0) gamma = None if activation == rectify else lasagne.init.Constant(1) else: # batchnorm pars beta = shared_net[lbatchn_lname + '_beta'].get_params()[0] gamma = None if activation == rectify else \ shared_net[lbatchn_lname + '_gamma'].get_params()[0] if not isinstance(shared_net[lname], (pool, unpool)): # affine weights W = shared_net[lname].get_params()[0] else: W = None # affine (conv/dense/deconv) or (un)pooling transformation: $W \hat{h}$ net[lname] = get_transform_layer(incoming, name + '_' + lname, transform, specs, W) # 1. batchnormalize without learning -> goes to combinator layer layer2bn = net.values()[-1] l_name = '{}_{}'.format(name, nbatchn_lname) bn_broadcast_cond = layer2bn.output_shape[1] == 1 if len(layer2bn.output_shape) == 4 and bn_broadcast_cond: ax = (0, 1, 2, 3) elif len(layer2bn.output_shape) == 2 and bn_broadcast_cond: ax = (0, 1) else: ax = 'auto' net[nbatchn_lname] = BatchNormLayer(layer2bn, axes=ax, alpha=0.1, beta=None, gamma=None, name=l_name) if shared_net is None: # for dirty encoder -> add noise net[noise_lname] = GaussianNoiseLayer(net.values()[-1], sigma=p_drop_hidden, name='{}_{}'.format( name, noise_lname)) # 2. scaling & offsetting batchnormalization + noise l_name = '{}_{}'.format(name, lbatchn_lname) # offset by beta net[lbatchn_lname + '_beta'] = BiasLayer(net.values()[-1], b=beta, name=l_name + '_beta') if gamma is not None: # if not rectify, scale by gamma net[lbatchn_lname + '_gamma'] = ScaleLayer(net.values()[-1], scales=gamma, name=l_name + '_gamma') return net