def test_unsupported_settings(self, DummyInputLayer): from lasagne.layers import DilatedConv2DLayer input_layer = DummyInputLayer((10, 20, 30, 40)) for pad in 'same', 'full', 1: with pytest.raises(NotImplementedError) as exc: DilatedConv2DLayer(input_layer, 2, 3, pad=pad) assert "requires pad=0" in exc.value.args[0] with pytest.raises(NotImplementedError) as exc: DilatedConv2DLayer(input_layer, 2, 3, flip_filters=True) assert "requires flip_filters=False" in exc.value.args[0]
def conv_2d_layer(cls, cur_layer, name, num_filters, filter_size, dilation=1, pad=1): if dilation == 1: cur_layer = Conv2DLayer(cur_layer, num_filters=num_filters, filter_size=filter_size, pad=pad, flip_filters=False, name=name) else: if pad == 0: pass elif pad >= 1: cur_layer = PadLayer(cur_layer, width=pad * dilation, name='{}_pad'.format(name)) else: raise ValueError( 'Only padding of 0 or >= 1 supported, not {}'.format(pad)) cur_layer = DilatedConv2DLayer(cur_layer, num_filters=num_filters, filter_size=filter_size, flip_filters=False, dilation=dilation, name=name) return cur_layer, dilation
def build_model(height, width): net = OrderedDict() net['input'] = InputLayer((None, 3, height, width), name='input') net['conv1'] = ConvLayer(net['input'], num_filters=32, filter_size=7, pad='same', name='conv1') net['conv2'] = ConvLayer(net['conv1'], num_filters=32, filter_size=5, pad='same', name='conv2') net['conv3'] = ConvLayer(net['conv2'], num_filters=64, filter_size=3, pad='same', name='conv3') net['conv4'] = ConvLayer(net['conv3'], num_filters=64, filter_size=3, pad='same', name='conv4') net['pad5'] = PadLayer(net['conv4'], width=1, val=0, name='pad5') net['conv_dil5'] = DilatedConv2DLayer(net['pad5'], num_filters=64, filter_size=3, dilation=(1,1), name='conv_dil5') net['pad6'] = PadLayer(net['conv_dil5'], width=2, val=0, name='pad6') net['conv_dil6'] = DilatedConv2DLayer(net['pad6'], num_filters=64, filter_size=3, dilation=(2,2), name='conv_dil6') net['pad7'] = PadLayer(net['conv_dil6'], width=4, val=0, name='pad6') net['conv_dil7'] = DilatedConv2DLayer(net['pad7'], num_filters=64, filter_size=3, dilation=(4,4), name='conv_dil7') net['pad8'] = PadLayer(net['conv_dil7'], width=8, val=0, name='pad8') net['conv_dil8'] = DilatedConv2DLayer(net['pad8'], num_filters=64, filter_size=3, dilation=(8,8), name='conv_dil8') net['pad9'] = PadLayer(net['conv_dil8'], width=16, val=0, name='pad9') net['conv_dil9'] = DilatedConv2DLayer(net['pad9'], num_filters=64, filter_size=3, dilation=(16,16), name='conv_dil9') net['pad10'] = PadLayer(net['conv_dil9'], width=1, val=0, name='pad10') net['l_out'] = DilatedConv2DLayer(net['pad10'], num_filters=2, filter_size=3, dilation=(1,1), name='l_out') for layer in lasagne.layers.get_all_layers(net['l_out']): print layer.name,layer.output_shape print "output shape", net['l_out'].output_shape net['l_in'] = net['input'] return net
def test_defaults(self, DummyInputLayer, input, kernel, output, kwargs): from lasagne.layers import DilatedConv2DLayer b, c, h, w = input.shape input_layer = DummyInputLayer((b, c, h, w)) layer = DilatedConv2DLayer(input_layer, num_filters=kernel.shape[0], filter_size=kernel.shape[2:], W=kernel.transpose(1, 0, 2, 3), **kwargs) actual = layer.get_output_for(theano.shared(input)).eval() assert actual.shape == output.shape assert actual.shape == layer.output_shape assert np.allclose(actual, output)
def test_with_nones(self, DummyInputLayer, input, kernel, output, kwargs): if kwargs.get('untie_biases', False): pytest.skip() from lasagne.layers import DilatedConv2DLayer b, c, h, w = input.shape input_layer = DummyInputLayer((None, c, None, None)) layer = DilatedConv2DLayer(input_layer, num_filters=kernel.shape[0], filter_size=kernel.shape[2:], W=kernel.transpose(1, 0, 2, 3), **kwargs) assert layer.output_shape == (None, output.shape[1], None, None) actual = layer.get_output_for(input).eval() assert actual.shape == output.shape assert np.allclose(actual, output)
def network(image, p): input_image = InputLayer(input_var = image, shape = (None, 128, 256, 3)) input_image = DimshuffleLayer(input_image, pattern = (0,3,1,2)) conv1 = batch_norm(Conv2DLayer(input_image, num_filters = 16, filter_size = (3,3), stride = (1,1), nonlinearity = rectify, pad = 'same')) conv1 = batch_norm(Conv2DLayer(conv1, num_filters = 16, filter_size = (3,3), stride = (1,1), nonlinearity = rectify, pad = 'same')) conv1 = DropoutLayer(conv1, p=p) conv1 = ConcatLayer([input_image, conv1], axis = 1) conv2 = batch_norm(Conv2DLayer(conv1, num_filters = 32, filter_size = (3,3), stride = (1,1), nonlinearity = rectify, pad = 'same')) conv2 = batch_norm(Conv2DLayer(conv2, num_filters = 32, filter_size = (3,3), stride = (1,1), nonlinearity = rectify, pad = 'same')) conv2 = DropoutLayer(conv2, p=p) conv2 = batch_norm(ConcatLayer([conv2, conv1], axis = 1)) atr1 = DilatedConv2DLayer(PadLayer(conv2, width = 1), num_filters = 16, filter_size = (3,3), dilation = (1,1), pad = 0, nonlinearity = rectify) atr2 = DilatedConv2DLayer(PadLayer(conv2, width = 2), num_filters = 16, filter_size = (3,3), dilation = (2,2), pad = 0, nonlinearity = rectify) atr4 = DilatedConv2DLayer(PadLayer(conv2, width = 4), num_filters = 16, filter_size = (3,3), dilation = (4,4), pad = 0, nonlinearity = rectify) atr8 = DilatedConv2DLayer(PadLayer(conv2, width = 8), num_filters = 16, filter_size = (3,3), dilation = (8,8), pad = 0, nonlinearity = rectify) sumblock = ConcatLayer([conv2,atr1,atr2,atr4,atr8], axis = 1) crp = MaxPool2DLayer(PadLayer(sumblock, width = 1), pool_size = (3,3), stride = (1,1), ignore_border = False) crp = batch_norm(Conv2DLayer(crp, num_filters = 115, filter_size = (3,3), stride = (1,1), nonlinearity = rectify, pad = 'same')) sumblock = ElemwiseSumLayer([sumblock, crp]) ground = batch_norm(Conv2DLayer(sumblock, num_filters = 1, filter_size = (3,3), stride = (1,1), nonlinearity = output_layer_nonlinearity, pad = 'same')) ground = ReshapeLayer(ground, shape = ([0],128,256)) return ground
def model_to_fcn(output_layers, allow_unlink=False): """ Converts a Lasagne CNN model for fixed-size spectrogram excerpts into a fully-convolutional network that can handle spectrograms of arbitrary length (but at least the fixed length the original CNN was designed for), producing the same results as if applying it to every possible excerpt of the spectrogram in sequence. This is done by replacing convolutional and pooling layers with dilated versions if they appear after temporal max-pooling in the original model, and the first dense layer with a convolutional layer. If `allow_unlink` is False, the converted model will share all parameters with the original model. Otherwise, some parameters may be unshared for improved performance. """ converted = {} dilations = {} for layer in lasagne.layers.get_all_layers(output_layers): if isinstance(layer, InputLayer): # Input layer: Just set third dimension to be of arbitrary size converted[layer] = InputLayer( layer.shape[:2] + (None, ) + layer.shape[3:], layer.input_var) dilations[layer] = 1 elif isinstance(layer, Conv2DLayer): # Conv2DLayer: Make dilated if needed kwargs = dict(incoming=converted[layer.input_layer], num_filters=layer.num_filters, filter_size=layer.filter_size, nonlinearity=layer.nonlinearity, b=layer.b) dilation = dilations[layer.input_layer] if dilation == 1: converted[layer] = Conv2DLayer(W=layer.W, **kwargs) else: W = layer.W.get_value() if allow_unlink else layer.W converted[layer] = DilatedConv2DLayer(W=W.transpose( 1, 0, 2, 3)[:, :, ::-1, ::-1], dilation=(dilation, 1), **kwargs) dilations[layer] = dilation elif isinstance(layer, MaxPool2DLayer): # MaxPool2DLayer: Make dilated if needed, increase dilation factor kwargs = dict(incoming=converted[layer.input_layer], pool_size=layer.pool_size, stride=(1, layer.stride[1])) dilation = dilations[layer.input_layer] if dilation == 1: converted[layer] = MaxPool2DLayer(**kwargs) else: converted[layer] = TimeDilatedMaxPool2DLayer( dilation=(dilation, 1), **kwargs) dilations[layer] = dilation * layer.stride[0] elif isinstance(layer, DenseLayer): # DenseLayer: Turn into Conv2DLayer/DilatedConv2DLayer if needed, # reset dilation factor dilation = dilations[layer.input_layer] if (dilation == 1 and (getattr(layer, 'num_leading_axes', 1) == -1 or len(layer.input_shape) == 2)): # we can retain it as a DenseLayer converted[layer] = DenseLayer( converted[layer.input_layer], num_units=layer.num_units, W=layer.W, b=layer.b, nonlinearity=layer.nonlinearity, num_leading_axes=layer.num_leading_axes) else: if len(layer.input_shape) == 4: blocklen = int( np.prod(layer.input_shape[1:]) ) // layer.input_shape[1] // layer.input_shape[-1] elif len(layer.input_shape) == 3: blocklen = int(np.prod( layer.input_shape[1:])) // layer.input_shape[1] else: blocklen = 1 W = layer.W.get_value() if allow_unlink else layer.W W = W.T.reshape( (layer.num_units, layer.input_shape[1], blocklen, layer.input_shape[-1])).transpose(1, 0, 2, 3) converted[layer] = DilatedConv2DLayer( converted[layer.input_layer], num_filters=layer.num_units, filter_size=(blocklen, layer.input_shape[-1]), W=W, b=layer.b, dilation=(dilation, 1), nonlinearity=None) converted[layer] = lasagne.layers.DimshuffleLayer( converted[layer], (0, 2, 1, 3)) converted[layer] = lasagne.layers.ReshapeLayer( converted[layer], (-1, [2], [3])) converted[layer] = lasagne.layers.FlattenLayer( converted[layer]) converted[layer] = lasagne.layers.NonlinearityLayer( converted[layer], layer.nonlinearity) dilations[layer] = 1 elif not isinstance(layer, MergeLayer): # all other layers: deepcopy the layer # - set up a memo dictionary so the cloned layer will be linked to # the converted part of the network, not to a new clone of it memo = {id(layer.input_layer): converted[layer.input_layer]} # - in addition, share all parameters with the existing layer memo.update((id(p), p) for p in layer.params.keys()) # - perform the copy clone = deepcopy(layer, memo) # update the input shape of the cloned layer clone.input_shape = converted[layer.input_layer].output_shape # use the cloned layer, keep the dilation factor converted[layer] = clone dilations[layer] = dilations[layer.input_layer] else: raise ValueError("don't know how to convert %r" % layer) # Return list of converted output layers, or single converted output layer try: return [converted[layer] for layer in output_layers] except TypeError: return converted[output_layers]
def buildDAE_contextmod(input_concat_h_vars, input_mask_var, n_classes, path_weights='/Tmp/romerosa/itinf/models/', model_name='dae_model.npz', trainable=False, load_weights=False, out_nonlin=linear, concat_h=['input'], noise=0.1): ''' Build context module Parameters ---------- input_concat_h_vars: list of theano tensors, variables to concatenate input_mask_var: theano tensor, input to context module n_classes: int, number of classes path_weights: string, path to weights directory trainable: bool, whether the model is trainable (freeze parameters or not) load_weights: bool, whether to load pretrained weights out_nonlin: output nonlinearity concat_h: list of strings, names of layers we want to concatenate noise: float, noise ''' # context module does not reduce the image resolution assert all([el in ['input'] for el in concat_h]) net = {} pos = 0 # Contracting path net['input'] = InputLayer((None, n_classes, None, None), input_mask_var) # Noise if noise > 0: # net['noisy_input'] = GaussianNoiseLayerSoftmax(net['input'], # sigma=noise) net['noisy_input'] = GaussianNoiseLayer(net['input'], sigma=noise) in_next = 'noisy_input' else: in_next = 'input' pos, out = model_helpers.concatenate(net, in_next, concat_h, input_concat_h_vars, pos, 3) class IdentityInit(Initializer): """ We adapt the same initializiation method than in the paper""" def sample(self, shape): n_filters, n_filters2, filter_size, filter_size2 = shape assert ((n_filters == n_filters2) & (filter_size == filter_size2)) assert (filter_size % 2 == 1) W = np.zeros(shape, dtype='float32') for i in range(n_filters): W[i, i, filter_size / 2, filter_size / 2] = 1. return W net['conv1'] = Conv2DLayer(net[out], n_classes, 3, pad='same', nonlinearity=rectify, flip_filters=False) net['pad1'] = PadLayer(net['conv1'], width=32, val=0, batch_ndim=2) net['dilconv1'] = DilatedConv2DLayer(net['pad1'], n_classes, 3, 1, W=IdentityInit(), nonlinearity=rectify) net['dilconv2'] = DilatedConv2DLayer(net['dilconv1'], n_classes, 3, 2, W=IdentityInit(), nonlinearity=rectify) net['dilconv3'] = DilatedConv2DLayer(net['dilconv2'], n_classes, 3, 4, W=IdentityInit(), nonlinearity=rectify) net['dilconv4'] = DilatedConv2DLayer(net['dilconv3'], n_classes, 3, 8, W=IdentityInit(), nonlinearity=rectify) net['dilconv5'] = DilatedConv2DLayer(net['dilconv4'], n_classes, 3, 16, W=IdentityInit(), nonlinearity=rectify) net['dilconv6'] = DilatedConv2DLayer(net['dilconv5'], n_classes, 3, 1, W=IdentityInit(), nonlinearity=rectify) net['dilconv7'] = DilatedConv2DLayer(net['dilconv6'], n_classes, 1, 1, W=IdentityInit(), nonlinearity=linear) # Final dimshuffle, reshape and softmax net['final_dimshuffle'] = DimshuffleLayer(net['dilconv7'], (0, 2, 3, 1)) laySize = lasagne.layers.get_output(net['final_dimshuffle']).shape net['final_reshape'] = ReshapeLayer(net['final_dimshuffle'], (T.prod(laySize[0:3]), laySize[3])) net['probs'] = NonlinearityLayer(net['final_reshape'], nonlinearity=out_nonlin) # Go back to 4D net['probs_reshape'] = ReshapeLayer( net['probs'], (laySize[0], laySize[1], laySize[2], n_classes)) net['probs_dimshuffle'] = DimshuffleLayer(net['probs_reshape'], (0, 3, 1, 2)) # print('Input to last layer: ', net['probs_dimshuffle'].input_shape) print(net.keys()) # Load weights if load_weights: with np.load(os.path.join(path_weights, model_name)) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(net['probs_dimshuffle'], param_values) # Do not train if not trainable: model_helpers.freezeParameters(net['probs_dimshuffle'], single=False) return net['probs_dimshuffle']