def build_cnn(input_var=None, n=5): # create a residual learning building block with two stacked 3x3 convlayers as in paper def residual_block(l, increase_dim=False, projection=False): input_num_filters = l.output_shape[1] if increase_dim: first_stride = (2,2) out_num_filters = input_num_filters*2 else: first_stride = (1,1) out_num_filters = input_num_filters stack_1 = batch_norm(ConvLayer(l, num_filters=out_num_filters, filter_size=(3,3), stride=first_stride, nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) stack_2 = batch_norm(ConvLayer(stack_1, num_filters=out_num_filters, filter_size=(3,3), stride=(1,1), nonlinearity=None, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) # add shortcut connections if increase_dim: if projection: # projection shortcut, as option B in paper projection = batch_norm(ConvLayer(l, num_filters=out_num_filters, filter_size=(1,1), stride=(2,2), nonlinearity=None, pad='same', b=None, flip_filters=False)) block = NonlinearityLayer(ElemwiseSumLayer([stack_2, projection]),nonlinearity=rectify) else: # identity shortcut, as option A in paper identity = ExpressionLayer(l, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], s[2]//2, s[3]//2)) padding = PadLayer(identity, [out_num_filters//4,0,0], batch_ndim=1) block = NonlinearityLayer(ElemwiseSumLayer([stack_2, padding]),nonlinearity=rectify) else: block = NonlinearityLayer(ElemwiseSumLayer([stack_2, l]),nonlinearity=rectify) return block # Building the network l_in = InputLayer(shape=(None, 3, 32, 32), input_var=input_var) # first layer, output is 16 x 32 x 32 l = batch_norm(ConvLayer(l_in, num_filters=16, filter_size=(3,3), stride=(1,1), nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) # first stack of residual blocks, output is 16 x 32 x 32 for _ in range(n-3): l = residual_block(l) feanet = MaxPool2DLayer(l, pool_size=4) feanet = FlattenLayer(feanet) # second stack of residual blocks, output is 32 x 16 x 16 l = residual_block(l, increase_dim=True) for _ in range(1,n): l = residual_block(l) # third stack of residual blocks, output is 64 x 8 x 8 l = residual_block(l, increase_dim=True) for _ in range(1,n): l = residual_block(l) # average pooling l = GlobalPoolLayer(l) # fully connected layer network = DenseLayer( l, num_units=10, W=lasagne.init.HeNormal(), nonlinearity=softmax) return network, feanet
def build_model(in_shape=INPUT_SHAPE): """ Compile net architecture """ nonlin = elu net1 = lasagne.layers.InputLayer(shape=(None, in_shape[0], in_shape[1], in_shape[2]), name='Input') # number of filters in first layer # decreased by factor 2 in each block nf0 = 8 # --- encoder --- net1 = conv_bn(net1, num_filters=nf0, filter_size=3, nonlinearity=nonlin, pad='same') net1 = conv_bn(net1, num_filters=nf0, filter_size=3, nonlinearity=nonlin, pad='same') p1 = net1 net1 = MaxPool2DLayer(net1, pool_size=2, stride=2) net1 = conv_bn(net1, num_filters=2 * nf0, filter_size=3, nonlinearity=nonlin, pad='same') net1 = conv_bn(net1, num_filters=2 * nf0, filter_size=3, nonlinearity=nonlin, pad='same') p2 = net1 net1 = MaxPool2DLayer(net1, pool_size=2, stride=2) net1 = conv_bn(net1, num_filters=4 * nf0, filter_size=3, nonlinearity=nonlin, pad='same') net1 = conv_bn(net1, num_filters=4 * nf0, filter_size=3, nonlinearity=nonlin, pad='same') p3 = net1 net1 = MaxPool2DLayer(net1, pool_size=2, stride=2) net1 = conv_bn(net1, num_filters=8 * nf0, filter_size=3, nonlinearity=nonlin, pad='same') net1 = conv_bn(net1, num_filters=8 * nf0, filter_size=3, nonlinearity=nonlin, pad='same') # --- decoder --- net1 = TransposedConv2DLayer(net1, num_filters=4 * nf0, filter_size=2, stride=2) net1 = batch_norm(net1) net1 = ElemwiseSumLayer((p3, net1)) net1 = batch_norm(net1) net1 = conv_bn(net1, num_filters=4 * nf0, filter_size=3, nonlinearity=nonlin, pad='same') net1 = conv_bn(net1, num_filters=4 * nf0, filter_size=3, nonlinearity=nonlin, pad='same') net1 = DropoutLayer(net1, p=0.2) net1 = TransposedConv2DLayer(net1, num_filters=2 * nf0, filter_size=2, stride=2) net1 = batch_norm(net1) net1 = ElemwiseSumLayer((p2, net1)) net1 = batch_norm(net1) net1 = conv_bn(net1, num_filters=2 * nf0, filter_size=3, nonlinearity=nonlin, pad='same') net1 = conv_bn(net1, num_filters=2 * nf0, filter_size=3, nonlinearity=nonlin, pad='same') net1 = DropoutLayer(net1, p=0.1) net1 = TransposedConv2DLayer(net1, num_filters=nf0, filter_size=2, stride=2) net1 = batch_norm(net1) net1 = ElemwiseSumLayer((p1, net1)) net1 = batch_norm(net1) net1 = conv_bn(net1, num_filters=nf0, filter_size=3, nonlinearity=nonlin, pad='same') net1 = conv_bn(net1, num_filters=nf0, filter_size=3, nonlinearity=nonlin, pad='same') net1 = Conv2DLayer(net1, num_filters=1, filter_size=1, nonlinearity=sigmoid, pad='same') return net1
def build_densenet( input_var, input_shape=(None, 3, 224, 224), num_filters_init=64, growth_rate=32, dropout=0.2, num_classes=1000, stages=[6, 12, 24, 16]): if input_shape[2] % (2 ** len(stages)) != 0: raise ValueError("input_shape[2] must be a multiple of {}.".format(2 ** len(stages))) if input_shape[3] % (2 ** len(stages)) != 0: raise ValueError("input_shape[3] must be a multiple of {}.".format(2 ** len(stages))) # Input should be (BATCH_SIZE, NUM_CHANNELS, WIDTH, HEIGHT) # NUM_CHANNELS is usually 3 (R,G,B) and for the ImageNet example the width and height are 224 network = InputLayer(input_shape, input_var) # Apply 2D convolutions with a 7x7 filter (pad by 3 on each side) # Because of the 2x2 stride the shape of the last two dimensions will be half the size of the input (112x112) network = Conv2DLayer(network, num_filters=num_filters_init, filter_size=(7, 7), stride=(2, 2), pad=(3, 3), W=HeNormal(gain='relu'), b=None, flip_filters=False, nonlinearity=None) # Batch normalize network = BatchNormLayer(network, beta=None, gamma=None) # If dropout is enabled, apply after every convolutional and dense layer if dropout > 0: network = DropoutLayer(network, p=dropout) # Apply ReLU network = NonlinearityLayer(network, nonlinearity=rectify) # Keep the maximum value of a 3x3 pool with a 2x2 stride # This operation again divides the size of the last two dimensions by two (56x56) network = MaxPool2DLayer(network, pool_size=(3, 3), stride=(2, 2), pad=(1, 1)) # Add dense blocks for i, num_layers in enumerate(stages): # Except for the first block, we add a transition layer before the dense block that halves the number of filters, width and height if i > 0: network = add_transition(network, math.floor(network.output_shape[1] / 2), dropout) network = build_block(network, num_layers, growth_rate, dropout) # Apply global pooling and add a fully connected layer with softmax function network = ScaleLayer(network) network = BiasLayer(network) network = NonlinearityLayer(network, nonlinearity=rectify) network = GlobalPoolLayer(network) network = DenseLayer(network, num_units=num_classes, W=HeNormal(gain=1), nonlinearity=softmax) return network
def build_critic(input_var=None, cond_var=None, n_conds=0, arch=0, with_BatchNorm=True, loss_type='wgan'): from lasagne.layers import ( InputLayer, Conv2DLayer, DenseLayer, MaxPool2DLayer, concat, dropout, flatten) from lasagne.nonlinearities import rectify, LeakyRectify from lasagne.init import GlorotUniform # Normal lrelu = LeakyRectify(0.2) layer = InputLayer( shape=(None, 1, 128, 128), input_var=input_var, name='d_in_data') # init = Normal(0.02, 0.0) init = GlorotUniform() if cond_var: # class: from data or from generator input layer_cond = InputLayer( shape=(None, n_conds), input_var=cond_var, name='d_in_condition') layer_cond = BatchNorm(DenseLayer( layer_cond, 1024, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) if arch == 'dcgan': # DCGAN inspired layer = BatchNorm(Conv2DLayer( layer, 32, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 64, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 128, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 256, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 512, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) elif arch == 'cont-enc': # convolution layers layer = BatchNorm(Conv2DLayer( layer, 64, 4, stride=2, pad=1, W=init, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 64, 4, stride=2, pad=1, W=init, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 128, 4, stride=2, pad=1, W=init, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 256, 4, stride=2, pad=1, W=init, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 512, 4, stride=2, pad=1, W=init, nonlinearity=lrelu), with_BatchNorm) elif arch == 'mnist': # Jan Schluechter's MNIST discriminator # convolution layers layer = BatchNorm(Conv2DLayer( layer, 128, 5, stride=2, pad='same', W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 128, 5, stride=2, pad='same', W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 128, 5, stride=2, pad='same', W=init, b=None, nonlinearity=lrelu), with_BatchNorm) # layer = BatchNorm(Conv2DLayer( # layer, 128, 5, stride=2, pad='same', W=init, b=None, # nonlinearity=lrelu), with_BatchNorm) # fully-connected layer # layer = BatchNorm(DenseLayer( # layer, 1024, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) elif arch == 'lsgan': layer = batch_norm(Conv2DLayer( layer, 256, 5, stride=2, pad='same', nonlinearity=lrelu)) layer = batch_norm(Conv2DLayer( layer, 256, 5, stride=2, pad='same', nonlinearity=lrelu)) layer = batch_norm(Conv2DLayer( layer, 256, 5, stride=2, pad='same', nonlinearity=lrelu)) elif arch == 'crepe': # CREPE # form words from sequence of characters layer = BatchNorm(Conv2DLayer( layer, 1024, (128, 7), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = MaxPool2DLayer(layer, (1, 3)) # temporal convolution, 7-gram layer = BatchNorm(Conv2DLayer( layer, 512, (1, 7), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = MaxPool2DLayer(layer, (1, 3)) # temporal convolution, 3-gram layer = BatchNorm(Conv2DLayer( layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = flatten(layer) # fully-connected layers layer = dropout(DenseLayer( layer, 1024, W=init, b=None, nonlinearity=rectify)) layer = dropout(DenseLayer( layer, 1024, W=init, b=None, nonlinearity=rectify)) else: raise Exception("Model architecture {} is not supported".format(arch)) # output layer (linear and without bias) if cond_var is not None: layer = DenseLayer(layer, 1024, nonlinearity=lrelu, b=None) layer = concat([layer, layer_cond]) layer = DenseLayer(layer, 1, b=None, nonlinearity=None) print("Critic output:", layer.output_shape) return layer
def buildnet(weight_file, z_hid=50): conv_num_filters = 16 filter_size = 3 pool_size = 2 pad_in = 'valid' pad_out = 'full' input_var = T.tensor4('inputs') # target_var = T.matrix('targets') encode_hid = 1000 decode_hid = encode_hid ii1 = 45 ii2 = 36 dense_upper_mid_size = conv_num_filters * (ii1 - 2) * (ii2 - 2) * 2 relu_shift = 10 input_layer = InputLayer(shape=(None, 27, 32, 30), input_var=input_var) conv1 = Conv2DLayer(input_layer, num_filters=conv_num_filters, filter_size=filter_size, pad=pad_in) conv2 = Conv2DLayer(conv1, num_filters=conv_num_filters, filter_size=filter_size, pad=pad_in) pool1 = MaxPool2DLayer(conv2, pool_size=pool_size) conv3 = Conv2DLayer(pool1, num_filters=2 * conv_num_filters, filter_size=filter_size, pad=pad_in) pool2 = MaxPool2DLayer(conv3, pool_size=pool_size) reshape1 = ReshapeLayer(pool2, shape=(([0], -1))) encode_h_layer = DenseLayer(reshape1, num_units=encode_hid, nonlinearity=None) mu_layer = DenseLayer(encode_h_layer, num_units=z_hid, nonlinearity=None) log_sigma_layer = DenseLayer( encode_h_layer, num_units=z_hid, nonlinearity=lambda a: T.nnet.relu(a + relu_shift) - relu_shift) q_layer = Q_Layer([mu_layer, log_sigma_layer]) decode_h_layer = DenseLayer(q_layer, num_units=decode_hid, nonlinearity=tanh) decode_h_layer_second = DenseLayer(decode_h_layer, num_units=dense_upper_mid_size, nonlinearity=None) reshape2 = ReshapeLayer(decode_h_layer_second, shape=([0], 2 * conv_num_filters, (ii1 - 2), (ii2 - 2))) upscale1 = Upscale2DLayer(reshape2, scale_factor=pool_size) deconv1 = Conv2DLayer(upscale1, num_filters=conv_num_filters, filter_size=filter_size, pad=pad_out) upscale2 = Upscale2DLayer(deconv1, scale_factor=pool_size) deconv2 = Conv2DLayer(upscale2, num_filters=conv_num_filters, filter_size=filter_size, pad=pad_out) deconv3 = Conv2DLayer(deconv2, num_filters=1, filter_size=filter_size, pad=pad_out, nonlinearity=sigmoid) network = ReshapeLayer(deconv3, shape=(([0], -1))) with open(weight_file, 'rb') as f: updated_param_values = pickle.load(f) lasagne.layers.set_all_param_values(network, updated_param_values) encoded_mu = lasagne.layers.get_output(mu_layer) ae_encode_mu = theano.function([input_var], encoded_mu) encoded_log_sigma = lasagne.layers.get_output(log_sigma_layer) ae_encode_log_sigma = theano.function([input_var], encoded_log_sigma) x = theano.tensor.matrix() mu = theano.tensor.matrix() log_sigma = theano.tensor.matrix() noise_adjust = theano.function([x, mu, log_sigma], x * T.exp(log_sigma) + mu) noise_var = T.matrix() gen = get_output(network, {q_layer: noise_var}) gen_from_noise = theano.function([noise_var], gen) def gen_model_from_enc(noise_input, n_steps, gen_from_noise, noise_adjust, ae_encode_mu, ae_encode_log_sigma, threshold=False): generated_i = gen_from_noise(noise_input) generated = generated_i.reshape(-1, 27, 32, 30) for ii in range(0, n_steps): mu = ae_encode_mu(generated) log_sigma = ae_encode_log_sigma(generated) noise_adj = noise_adjust(noise_input, mu, log_sigma) generated = gen_from_noise(noise_adj) generated = generated.reshape(-1, 27, 32, 30) if threshold: generated[generated < 0.5] = 0 generated[generated >= 0.5] = 1 X_gen = generated else: X_gen = generated return X_gen return ae_encode_mu, ae_encode_log_sigma, noise_adjust, gen_from_noise, gen_model_from_enc
def build_fcn(input_var, inner_size): l_in = InputLayer(shape=(None, 1) + inner_size, input_var=input_var) # stage 1 conv1_1 = batch_norm( Conv2DLayer(l_in, num_filters=32, filter_size=(5, 5), nonlinearity=rectify, W=HeNormal(), pad=2)) conv1_2 = batch_norm( Conv2DLayer(conv1_1, num_filters=32, filter_size=(5, 5), nonlinearity=rectify, W=HeNormal(), pad=2)) conv1_3 = batch_norm( Conv2DLayer(conv1_2, num_filters=32, filter_size=(5, 5), nonlinearity=rectify, W=HeNormal(), pad=2)) pool1 = MaxPool2DLayer(conv1_3, pool_size=(2, 2)) # stage 2 conv2_1 = batch_norm( Conv2DLayer(pool1, num_filters=64, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) conv2_2 = batch_norm( Conv2DLayer(conv2_1, num_filters=64, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) conv2_3 = batch_norm( Conv2DLayer(conv2_2, num_filters=64, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) pool2 = MaxPool2DLayer(conv2_3, pool_size=(2, 2)) # stage 3 conv3_1 = batch_norm( Conv2DLayer(pool2, num_filters=64, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) conv3_2 = batch_norm( Conv2DLayer(conv3_1, num_filters=64, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) pool3 = MaxPool2DLayer(conv3_2, pool_size=(2, 2)) # stage 3 conv4_1 = batch_norm( Conv2DLayer(pool3, num_filters=128, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) conv4_2 = batch_norm( Conv2DLayer(conv4_1, num_filters=128, filter_size=(3, 3), nonlinearity=rectify, W=HeNormal(), pad=1)) pool4 = MaxPool2DLayer(conv4_2, pool_size=(2, 2)) # top-down stage 0 up4 = Upscale2DLayer(pool4, (2, 2)) up4_conv = batch_norm( Conv2DLayer(up4, num_filters=2, filter_size=(1, 1), nonlinearity=my_softmax, W=HeNormal())) pool3_conv = batch_norm( Conv2DLayer(pool3, num_filters=2, filter_size=(1, 1), nonlinearity=my_softmax, W=HeNormal())) concat4 = ElemwiseSumLayer([up4_conv, pool3_conv]) # top-down stage 1 up3 = Upscale2DLayer(concat4, (2, 2)) pool2_conv = batch_norm( Conv2DLayer(pool2, num_filters=2, filter_size=(1, 1), nonlinearity=my_softmax, W=HeNormal())) concat3 = ElemwiseSumLayer([up3, pool2_conv]) # top-down stage 2 pool1_conv = batch_norm( Conv2DLayer(pool1, num_filters=2, filter_size=(1, 1), nonlinearity=my_softmax, W=HeNormal())) up2 = Upscale2DLayer(concat3, (2, 2)) concat2 = ElemwiseSumLayer([up2, pool1_conv]) pred = averageLayer(Upscale2DLayer(concat2, (2, 2))) sli = SliceLayer(pred, indices=slice(0, 1), axis=1) area = GlobalPoolLayer(sli) return pred, sli, area
def architecture(input_var, input_shape, cfg): layer = InputLayer(input_shape, input_var) # filterbank, if any if cfg['filterbank'] == 'mel': import audio filterbank = audio.create_mel_filterbank(cfg['sample_rate'], cfg['frame_len'], cfg['mel_bands'], cfg['mel_min'], cfg['mel_max']) filterbank = filterbank[:input_shape[3]].astype(theano.config.floatX) layer = DenseLayer(layer, num_units=cfg['mel_bands'], num_leading_axes=-1, W=T.constant(filterbank), b=None, nonlinearity=None) elif cfg['filterbank'] == 'mel_learn': layer = MelBankLayer(layer, cfg['sample_rate'], cfg['frame_len'], cfg['mel_bands'], cfg['mel_min'], cfg['mel_max']) elif cfg['filterbank'] != 'none': raise ValueError("Unknown filterbank=%s" % cfg['filterbank']) # magnitude transformation, if any if cfg['magscale'] == 'log': layer = ExpressionLayer(layer, lambda x: T.log(T.maximum(1e-7, x))) elif cfg['magscale'] == 'log1p': layer = ExpressionLayer(layer, T.log1p) elif cfg['magscale'].startswith('log1p_learn'): # learnable log(1 + 10^a * x), with given initial a (or default 0) a = float(cfg['magscale'][len('log1p_learn'):] or 0) a = T.exp(theano.shared(lasagne.utils.floatX(a))) layer = lasagne.layers.ScaleLayer(layer, scales=a, shared_axes=(0, 1, 2, 3)) layer = ExpressionLayer(layer, T.log1p) elif cfg['magscale'].startswith('pow_learn'): # learnable x^sigmoid(a), with given initial a (or default 0) a = float(cfg['magscale'][len('pow_learn'):] or 0) a = T.nnet.sigmoid(theano.shared(lasagne.utils.floatX(a))) layer = PowLayer(layer, exponent=a) elif cfg['magscale'] == 'pcen': layer = PCENLayer(layer) if cfg.get('pcen_fix_alpha'): layer.params[layer.log_alpha].remove("trainable") elif cfg['magscale'] == 'loudness_only': # cut away half a block length on the left and right layer = lasagne.layers.SliceLayer(layer, slice(cfg['blocklen'] // 2, -(cfg['blocklen'] // 2)), axis=2) # average over the frequencies and channels layer = lasagne.layers.ExpressionLayer( layer, lambda X: X.mean(axis=(1, 3), keepdims=True), lambda shp: (shp[0], 1, shp[2], 1)) elif cfg['magscale'] != 'none': raise ValueError("Unknown magscale=%s" % cfg['magscale']) # temporal difference, if any if cfg['arch.timediff']: layer = TimeDiffLayer(layer, delta=cfg['arch.timediff']) # standardization per frequency band if cfg.get('input_norm', 'batch') == 'batch': layer = batch_norm_vanilla(layer, axes=(0, 2), beta=None, gamma=None) elif cfg['input_norm'] == 'instance': layer = lasagne.layers.StandardizationLayer(layer, axes=2) elif cfg['input_norm'] == 'none': pass else: raise ValueError("Unknown input_norm=%s" % cfg['input_norm']) # convolutional neural network kwargs = dict(nonlinearity=lasagne.nonlinearities.leaky_rectify, W=lasagne.init.Orthogonal()) maybe_batch_norm = batch_norm if cfg['arch.batch_norm'] else lambda x: x if cfg['arch.convdrop'] == 'independent': maybe_dropout = lambda x: dropout(x, 0.1) elif cfg['arch.convdrop'] == 'channels': maybe_dropout = lambda x: dropout(x, 0.1, shared_axes=(2, 3)) elif cfg['arch.convdrop'] == 'bands': maybe_dropout = lambda x: dropout(x, 0.1, shared_axes=(1, 2)) elif cfg['arch.convdrop'] == 'none': maybe_dropout = lambda x: x else: raise ValueError("Unknown arch.convdrop=%s" % cfg['arch.convdrop']) if cfg['arch'] == 'dense:16': layer = DenseLayer(layer, 16, **kwargs) layer = DenseLayer(layer, 1, nonlinearity=lasagne.nonlinearities.sigmoid, W=lasagne.init.Orthogonal()) return layer convmore = cfg['arch.convmore'] layer = Conv2DLayer(layer, int(64 * convmore), 3, **kwargs) if cfg.get('arch.firstconv_zeromean', False) == 'params': layer.W = layer.W - T.mean(layer.W, axis=(2, 3), keepdims=True) layer = maybe_batch_norm(layer) layer = maybe_dropout(layer) layer = Conv2DLayer(layer, int(32 * convmore), 3, **kwargs) layer = maybe_batch_norm(layer) layer = MaxPool2DLayer(layer, 3) layer = maybe_dropout(layer) layer = Conv2DLayer(layer, int(128 * convmore), 3, **kwargs) layer = maybe_batch_norm(layer) layer = maybe_dropout(layer) layer = Conv2DLayer(layer, int(64 * convmore), 3, **kwargs) layer = maybe_batch_norm(layer) if cfg['arch'] == 'ismir2015': layer = MaxPool2DLayer(layer, 3) elif cfg['arch'] == 'ismir2016': layer = maybe_dropout(layer) layer = Conv2DLayer(layer, int(128 * convmore), (3, layer.output_shape[3] - 3), **kwargs) layer = maybe_batch_norm(layer) layer = MaxPool2DLayer(layer, (1, 4)) else: raise ValueError('Unknown arch=%s' % cfg['arch']) layer = DenseLayer(dropout(layer, 0.5), 256, **kwargs) layer = maybe_batch_norm(layer) layer = DenseLayer(dropout(layer, 0.5), 64, **kwargs) layer = maybe_batch_norm(layer) layer = DenseLayer(dropout(layer, 0.5), 1, nonlinearity=lasagne.nonlinearities.sigmoid, W=lasagne.init.Orthogonal()) return layer
# Model implementation of Facenet # This is a ZFNet based model with 1 X 1 convolutions # Link to Paper : https://arxiv.org/pdf/1503.03832.pdf import lasagne import theano from lasagne.layers import (Conv2DLayer, MaxPool2DLayer, BatchNormLayer, DenseLayer, InputLayer, Deconv2DLayer, NonlinearityLayer, get_output, get_output_shape, batch_norm, NINLayer, GlobalPoolLayer, LocalResponseNormalization2DLayer, DropoutLayer, ReshapeLayer) def build_facenet(input) net = InputLayer((None, 220, 220, 3), input_var=input) net = Conv2DLayer(net, 64, 7, stride=2, pad=2) net = MaxPool2DLayer(net, 2) net = LocalResponseNormalization2DLayer(net) net = Conv2DLayer(net, 64, 1, stride=1) net = LocalResponseNormalization2DLayer(Conv2DLayer(net, 192, 3, stride=1, pad=1)) net = MaxPool2DLayer(net, 2) net = Conv2DLayer(net, 192, 1, stride=1) net = Conv2DLayer(net, 384, 3, stride=1, pad=1) net = MaxPool2DLayer(net, 2) net = Conv2DLayer(net, 384, 1, stride=1) net = Conv2DLayer(net, 256, 3, stride=1, pad=1) net = Conv2DLayer(net, 256, 1, stride=1) net = Conv2DLayer(net, 256, 3, stride=1, pad=1) net = Conv2DLayer(net, 256, 1, stride=1) net = Conv2DLayer(net, 256, 3, stride=1, pad=1) net = MaxPool2DLayer(net, 2) net = DropoutLayer(DenseLayer(net, 4096), p=0.2) net = DropoutLayer(DenseLayer(net, 4096), p=0.2)
def simple_network3(input_size, output_size): network = InputLayer(shape=((None, ) + input_size), name='inputLayer') network = lasagne.layers.Conv2DLayer( network, num_filters=16, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.HeNormal(gain='relu')) network = lasagne.layers.Conv2DLayer( network, num_filters=16, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.HeNormal(gain='relu')) network = lasagne.layers.Conv2DLayer( network, num_filters=16, filter_size=(7, 7), stride=2, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.HeNormal(gain='relu')) network = DropoutLayer(network, p=0.5) network = lasagne.layers.Conv2DLayer( network, num_filters=32, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.HeNormal(gain='relu')) network = MaxPool2DLayer(network, 2) network = lasagne.layers.Conv2DLayer( network, num_filters=32, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.HeNormal(gain='relu')) network = MaxPool2DLayer(network, 2) network = lasagne.layers.Conv2DLayer( network, num_filters=64, filter_size=(5, 5), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.HeNormal(gain='relu')) print "---------------" print lasagne.layers.get_output_shape(network) network = lasagne.layers.DenseLayer( network, num_units=128, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.HeNormal(gain='relu')) network = lasagne.layers.DenseLayer(network, num_units=output_size, nonlinearity=None, b=lasagne.init.Constant(.1)) return network
def model(show_model): """ Compile net architecture """ # --- input layers --- l_view1 = lasagne.layers.InputLayer(shape=(None, INPUT_SHAPE_1[0], INPUT_SHAPE_1[1] // 2, INPUT_SHAPE_1[2] // 2)) l_view2 = lasagne.layers.InputLayer(shape=(None, INPUT_SHAPE_2[0], INPUT_SHAPE_2[1], INPUT_SHAPE_2[2])) net1 = l_view1 net2 = l_view2 # --- feed forward part view 1 --- num_filters_1 = 24 net1 = conv_bn(net1, num_filters_1, nonlin) net1 = conv_bn(net1, num_filters_1, nonlin) net1 = MaxPool2DLayer(net1, pool_size=2) net1 = conv_bn(net1, 2 * num_filters_1, nonlin) net1 = conv_bn(net1, 2 * num_filters_1, nonlin) net1 = MaxPool2DLayer(net1, pool_size=2) net1 = conv_bn(net1, 4 * num_filters_1, nonlin) net1 = conv_bn(net1, 4 * num_filters_1, nonlin) net1 = MaxPool2DLayer(net1, pool_size=2) net1 = conv_bn(net1, 4 * num_filters_1, nonlin) net1 = conv_bn(net1, 4 * num_filters_1, nonlin) net1 = MaxPool2DLayer(net1, pool_size=2) net1 = Conv2DLayer(net1, num_filters=dim_latent, filter_size=1, pad=0, W=init(), nonlinearity=identity) net1 = batch_norm(net1) net1 = lasagne.layers.GlobalPoolLayer(net1) l_v1latent = lasagne.layers.FlattenLayer(net1, name='Flatten') # --- feed forward part view 2 --- num_filters_2 = num_filters_1 net2 = conv_bn(net2, num_filters_2, nonlin) net2 = conv_bn(net2, num_filters_2, nonlin) net2 = MaxPool2DLayer(net2, pool_size=2) net2 = conv_bn(net2, 2 * num_filters_2, nonlin) net2 = conv_bn(net2, 2 * num_filters_2, nonlin) net2 = MaxPool2DLayer(net2, pool_size=2) net2 = conv_bn(net2, 4 * num_filters_2, nonlin) net2 = conv_bn(net2, 4 * num_filters_2, nonlin) net2 = MaxPool2DLayer(net2, pool_size=2) net2 = conv_bn(net2, 4 * num_filters_2, nonlin) net2 = conv_bn(net2, 4 * num_filters_2, nonlin) net2 = MaxPool2DLayer(net2, pool_size=2) net2 = Conv2DLayer(net2, num_filters=dim_latent, filter_size=1, pad=0, W=init(), nonlinearity=identity) net2 = batch_norm(net2) net2 = lasagne.layers.GlobalPoolLayer(net2) l_v2latent = lasagne.layers.FlattenLayer(net2, name='Flatten') # --- multi modality part --- # merge modalities by cca projection or learned embedding layer if use_ccal: net = CCALayer([l_v1latent, l_v2latent], r1, r2, rT, alpha=alpha, wl=weight_tno) else: net = LearnedCCALayer([l_v1latent, l_v2latent], U=init(), V=init(), alpha=alpha) # split modalities again l_v1latent = SliceLayer(net, slice(0, dim_latent), axis=1) l_v2latent = SliceLayer(net, slice(dim_latent, 2 * dim_latent), axis=1) # normalize (per row) output to length 1.0 l_v1latent = LengthNormLayer(l_v1latent) l_v2latent = LengthNormLayer(l_v2latent) # --- print architectures --- if show_model: print_architecture(l_v1latent) print_architecture(l_v2latent) return l_view1, l_view2, l_v1latent, l_v2latent
def build_cnn(input_var=None, W_init=None): # As a third model, we'll create a CNN of two convolution + pooling stages # and a fully-connected hidden layer in front of the output layer. # Input layer, as usual: weights = [] # Keeps the weights for all layers layers = dict() count = 0 if W_init is None: W_init = [lasagne.init.GlorotUniform()] * 7 network = InputLayer(shape=(None, 3, imSize, imSize), input_var=input_var) # CNN Group 1 network = Conv2DLayer(network, num_filters=32, filter_size=(3, 3), W=W_init[count], pad='same') count += 1 weights.append(network.W) network = Conv2DLayer(network, num_filters=32, filter_size=(3, 3), W=W_init[count], pad='same') count += 1 weights.append(network.W) network = Conv2DLayer(network, num_filters=32, filter_size=(3, 3), W=W_init[count], pad='same') count += 1 weights.append(network.W) network = Conv2DLayer(network, num_filters=32, filter_size=(3, 3), W=W_init[count], pad='same') count += 1 weights.append(network.W) network = MaxPool2DLayer(network, pool_size=(2, 2)) layers['conv1_out'] = network # CNN Group 2 network = Conv2DLayer(network, num_filters=64, filter_size=(3, 3), W=W_init[count], pad='same') count += 1 weights.append(network.W) network = Conv2DLayer(network, num_filters=64, filter_size=(3, 3), W=W_init[count], pad='same') count += 1 weights.append(network.W) # network = Conv2DLayer(network, num_filters=64, filter_size=(3, 3), # W=W_init[count], pad='same') # count += 1 # weights.append(network.W) network = MaxPool2DLayer(network, pool_size=(2, 2)) layers['conv2_out'] = network # CNN Group 3 network = Conv2DLayer(network, num_filters=128, filter_size=(3, 3), W=W_init[count], pad='same') count += 1 weights.append(network.W) # network = Conv2DLayer(network, num_filters=128, filter_size=(3, 3), # W=W_init[count], pad='same') # count += 1 # weights.append(network.W) # network = Conv2DLayer(network, num_filters=128, filter_size=(3, 3), # W=W_init[count], pad='same') # count += 1 # weights.append(network.W) network = MaxPool2DLayer(network, pool_size=(2, 2)) layers['conv3_out'] = network return network, weights
def build_network(): conv_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'filter_size': (3, 3), 'stride': (1, 1), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } nin_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } dense_defs = { 'W': lasagne.init.HeNormal(1.0), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.softmax } wn_defs = {'momentum': .999} net = InputLayer(name='input', shape=(None, 3, 32, 32)) net = GaussianNoiseLayer(net, name='noise', sigma=.15) net = WN( Conv2DLayer(net, name='conv1a', num_filters=128, pad='same', **conv_defs), **wn_defs) net = WN( Conv2DLayer(net, name='conv1b', num_filters=128, pad='same', **conv_defs), **wn_defs) net = WN( Conv2DLayer(net, name='conv1c', num_filters=128, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer(net, name='pool1', pool_size=(2, 2)) net = DropoutLayer(net, name='drop1', p=.5) net = WN( Conv2DLayer(net, name='conv2a', num_filters=256, pad='same', **conv_defs), **wn_defs) net = WN( Conv2DLayer(net, name='conv2b', num_filters=256, pad='same', **conv_defs), **wn_defs) net = WN( Conv2DLayer(net, name='conv2c', num_filters=256, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer(net, name='pool2', pool_size=(2, 2)) net = DropoutLayer(net, name='drop2', p=.5) net = WN( Conv2DLayer(net, name='conv3a', num_filters=512, pad=0, **conv_defs), **wn_defs) net = WN(NINLayer(net, name='conv3b', num_units=256, **nin_defs), **wn_defs) net = WN(NINLayer(net, name='conv3c', num_units=128, **nin_defs), **wn_defs) net = GlobalPoolLayer(net, name='pool3') net = WN(DenseLayer(net, name='dense', num_units=10, **dense_defs), **wn_defs) return net
batchs, tsteps, fdim = network['input'].input_var.shape network['reshape'] = ReshapeLayer(network['input'], (batchs, 1, 382, 40)) #print lasagne.layers.get_output(network['reshape']).eval({X: x_test}).shape network['conv1'] = batch_norm( ConvLayer( network['reshape'], 256, (9, 9), pad='same', flip_filters=False, )) #print lasagne.layers.get_output(network['conv1']).eval({X: x_test}).shape network['pool1'] = MaxPool2DLayer(network['conv1'], (1, 3)) #print lasagne.layers.get_output(network['pool1']).eval({X: x_test}).shape network['conv2'] = batch_norm( ConvLayer( network['pool1'], 256, (3, 4), pad=(1, 0), flip_filters=False, )) #print lasagne.layers.get_output(network['conv2']).eval({X: x_test}).shape network['reshape1'] = ReshapeLayer(network['conv2'], (batchs * tsteps, [1], [3])) #print lasagne.layers.get_output(network['reshape1']).eval({X: x_test}).shape
input_var.tag.test_value = X_train target_var.tag.test_value = y_train # setting up theano - use None to indicate that dimension may change coarse_input = InputLayer((minibatchsize, inchan, width, height), input_var=input_var) # choose number of filters and filter size coarse_conv1 = Conv2DLayer(coarse_input, num_filters=32, filter_size=(5, 5), nonlinearity=rectify, W=GlorotUniform(), pad=(2, 2)) coarse_pool1 = MaxPool2DLayer(coarse_conv1, pool_size=(2, 2)) coarse_conv2 = Conv2DLayer(coarse_pool1, num_filters=64, filter_size=(3, 3), nonlinearity=rectify, W=GlorotUniform(), pad=(1, 1)) coarse_pool2 = MaxPool2DLayer(coarse_conv2, pool_size=(2, 2)) coarse_conv3 = Conv2DLayer(coarse_pool2, num_filters=128, filter_size=(3, 3), nonlinearity=rectify, W=GlorotUniform(),
def model_to_fcn(output_layers, allow_unlink=False): """ Converts a Lasagne CNN model for fixed-size spectrogram excerpts into a fully-convolutional network that can handle spectrograms of arbitrary length (but at least the fixed length the original CNN was designed for), producing the same results as if applying it to every possible excerpt of the spectrogram in sequence. This is done by replacing convolutional and pooling layers with dilated versions if they appear after temporal max-pooling in the original model, and the first dense layer with a convolutional layer. If `allow_unlink` is False, the converted model will share all parameters with the original model. Otherwise, some parameters may be unshared for improved performance. """ converted = {} dilations = {} for layer in lasagne.layers.get_all_layers(output_layers): if isinstance(layer, InputLayer): # Input layer: Just set third dimension to be of arbitrary size converted[layer] = InputLayer( layer.shape[:2] + (None, ) + layer.shape[3:], layer.input_var) dilations[layer] = 1 elif isinstance(layer, Conv2DLayer): # Conv2DLayer: Make dilated if needed kwargs = dict(incoming=converted[layer.input_layer], num_filters=layer.num_filters, filter_size=layer.filter_size, nonlinearity=layer.nonlinearity, b=layer.b) dilation = dilations[layer.input_layer] if dilation == 1: converted[layer] = Conv2DLayer(W=layer.W, **kwargs) else: W = layer.W.get_value() if allow_unlink else layer.W converted[layer] = DilatedConv2DLayer(W=W.transpose( 1, 0, 2, 3)[:, :, ::-1, ::-1], dilation=(dilation, 1), **kwargs) dilations[layer] = dilation elif isinstance(layer, MaxPool2DLayer): # MaxPool2DLayer: Make dilated if needed, increase dilation factor kwargs = dict(incoming=converted[layer.input_layer], pool_size=layer.pool_size, stride=(1, layer.stride[1])) dilation = dilations[layer.input_layer] if dilation == 1: converted[layer] = MaxPool2DLayer(**kwargs) else: converted[layer] = TimeDilatedMaxPool2DLayer( dilation=(dilation, 1), **kwargs) dilations[layer] = dilation * layer.stride[0] elif isinstance(layer, DenseLayer): # DenseLayer: Turn into Conv2DLayer/DilatedConv2DLayer if needed, # reset dilation factor dilation = dilations[layer.input_layer] if (dilation == 1 and (getattr(layer, 'num_leading_axes', 1) == -1 or len(layer.input_shape) == 2)): # we can retain it as a DenseLayer converted[layer] = DenseLayer( converted[layer.input_layer], num_units=layer.num_units, W=layer.W, b=layer.b, nonlinearity=layer.nonlinearity, num_leading_axes=layer.num_leading_axes) else: if len(layer.input_shape) == 4: blocklen = int( np.prod(layer.input_shape[1:]) ) // layer.input_shape[1] // layer.input_shape[-1] elif len(layer.input_shape) == 3: blocklen = int(np.prod( layer.input_shape[1:])) // layer.input_shape[1] else: blocklen = 1 W = layer.W.get_value() if allow_unlink else layer.W W = W.T.reshape( (layer.num_units, layer.input_shape[1], blocklen, layer.input_shape[-1])).transpose(1, 0, 2, 3) converted[layer] = DilatedConv2DLayer( converted[layer.input_layer], num_filters=layer.num_units, filter_size=(blocklen, layer.input_shape[-1]), W=W, b=layer.b, dilation=(dilation, 1), nonlinearity=None) converted[layer] = lasagne.layers.DimshuffleLayer( converted[layer], (0, 2, 1, 3)) converted[layer] = lasagne.layers.ReshapeLayer( converted[layer], (-1, [2], [3])) converted[layer] = lasagne.layers.FlattenLayer( converted[layer]) converted[layer] = lasagne.layers.NonlinearityLayer( converted[layer], layer.nonlinearity) dilations[layer] = 1 elif not isinstance(layer, MergeLayer): # all other layers: deepcopy the layer # - set up a memo dictionary so the cloned layer will be linked to # the converted part of the network, not to a new clone of it memo = {id(layer.input_layer): converted[layer.input_layer]} # - in addition, share all parameters with the existing layer memo.update((id(p), p) for p in layer.params.keys()) # - perform the copy clone = deepcopy(layer, memo) # update the input shape of the cloned layer clone.input_shape = converted[layer.input_layer].output_shape # use the cloned layer, keep the dilation factor converted[layer] = clone dilations[layer] = dilations[layer.input_layer] else: raise ValueError("don't know how to convert %r" % layer) # Return list of converted output layers, or single converted output layer try: return [converted[layer] for layer in output_layers] except TypeError: return converted[output_layers]
random_state = np.random.RandomState(1999) # Add batchsize, channel dim X_train = face(gray=True)[None, None].astype('float32') X_train = X_train / 255. y_train = 2 * X_train chan = X_train.shape[1] width = X_train.shape[2] height = X_train.shape[3] input_var = tensor.tensor4('X') target_var = tensor.tensor4('y') l_input = InputLayer((None, chan, width, height), input_var=input_var) l_conv1 = Conv2DLayer(l_input, num_filters=32, filter_size=(3, 3), nonlinearity=rectify, W=GlorotUniform()) l_pool1 = MaxPool2DLayer(l_conv1, pool_size=(2, 2)) l_conv2 = Conv2DLayer(l_pool1, num_filters=32, filter_size=(1, 1), nonlinearity=rectify, W=GlorotUniform()) l_depool1 = Unpool2DLayer(l_pool1, (2, 2)) l_deconv1 = TransposeConv2DLayer(l_depool1, num_filters=chan, filter_size=(3, 3), W=GlorotUniform(), nonlinearity=linear) l_out = l_deconv1 prediction = get_output(l_out) train_loss = squared_error(prediction, target_var) train_loss = train_loss.mean() valid_prediction = get_output(l_out, deterministic=True)
def get_model(input_var, target_var, multiply_var): # input layer with unspecified batch size layer_both_0 = InputLayer(shape=(None, 30, 64, 64), input_var=input_var) # Z-score? # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_both_1 = batch_norm( Conv2DLayer(layer_both_0, 64, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_2 = batch_norm( Conv2DLayer(layer_both_1, 64, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_3 = MaxPool2DLayer(layer_both_2, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_both_4 = DropoutLayer(layer_both_3, p=0.25) # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_both_5 = batch_norm( Conv2DLayer(layer_both_4, 128, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_6 = batch_norm( Conv2DLayer(layer_both_5, 128, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_7 = MaxPool2DLayer(layer_both_6, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_both_8 = DropoutLayer(layer_both_7, p=0.25) # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_both_9 = batch_norm( Conv2DLayer(layer_both_8, 256, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_10 = batch_norm( Conv2DLayer(layer_both_9, 256, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_11 = batch_norm( Conv2DLayer(layer_both_10, 256, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_12 = MaxPool2DLayer(layer_both_11, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_both_13 = DropoutLayer(layer_both_12, p=0.25) # Flatten layer_flatten = FlattenLayer(layer_both_13) # Prediction layer_hidden = DenseLayer(layer_flatten, 500, nonlinearity=sigmoid) layer_prediction = DenseLayer(layer_hidden, 2, nonlinearity=linear) # Loss prediction = get_output(layer_prediction) / multiply_var loss = squared_error(prediction, target_var) loss = loss.mean() #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum params = get_all_params(layer_prediction, trainable=True) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, disabling dropout layers. test_prediction = get_output(layer_prediction, deterministic=True) / multiply_var test_loss = squared_error(test_prediction, target_var) test_loss = test_loss.mean() # crps estimate crps = T.abs_(test_prediction - target_var).mean() / 600 return test_prediction, crps, loss, params
filter_size=(3, 3), stride=1, pad=1, flip_filters=False, nonlinearity=rectify, W=lasagne.init.Normal(0.01)) l_conv1_2 = Conv2DLayer(l_conv1_1, num_filters=64, filter_size=(3, 3), stride=1, pad=1, flip_filters=False, nonlinearity=rectify, W=lasagne.init.Normal(0.01)) # Other arguments: Convolution type (full, same, or valid) and stride l_pool1 = MaxPool2DLayer(l_conv1_2, pool_size=(2, 2), stride=2, pad=0) l_conv2_1 = Conv2DLayer(l_pool1, num_filters=128, filter_size=(3, 3), stride=1, pad=1, flip_filters=False, nonlinearity=rectify, W=lasagne.init.Normal(0.01)) l_conv2_2 = Conv2DLayer(l_conv2_1, num_filters=128, filter_size=(3, 3), stride=1, pad=1, flip_filters=False,
def build(myNet, idxSiam, verbose=True): INITIALIZATION_GAIN = 1.0 # ----------------------------------------------------------------------------- # input layer (2d croped patch) # myNet.layers[idxSiam]['ori-input'] # ----------------------------------------------------------------------------- # 3x Convolution and Max Pooling layers # -------------- # Conv 0 if idxSiam == 0: W_init = HeNormal(gain=INITIALIZATION_GAIN) # W_init = Constant(0.0) b_init = Constant(0.0) else: W_init = myNet.layers[0]['ori-c0'].W b_init = myNet.layers[0]['ori-c0'].b myNet.layers[idxSiam]['ori-c0'] = Conv2DLayer( myNet.layers[idxSiam]['ori-input'], num_filters=10, filter_size=5, W=W_init, b=b_init, nonlinearity=None, flip_filters=False, name='ori-c0', ) # Activation 0 myNet.layers[idxSiam]['ori-c0a'] = NonlinearityLayer( myNet.layers[idxSiam]['ori-c0'], nonlinearity=relu, name='ori-c0a', ) # Pool 0 myNet.layers[idxSiam]['ori-c0p'] = MaxPool2DLayer( myNet.layers[idxSiam]['ori-c0a'], pool_size=2, name='ori-c0p', ) # -------------- # Conv 1 if idxSiam == 0: W_init = HeNormal(gain=INITIALIZATION_GAIN) # W_init = Constant(0.0) b_init = Constant(0.0) else: W_init = myNet.layers[0]['ori-c1'].W b_init = myNet.layers[0]['ori-c1'].b myNet.layers[idxSiam]['ori-c1'] = Conv2DLayer( myNet.layers[idxSiam]['ori-c0p'], num_filters=20, filter_size=5, W=W_init, b=b_init, nonlinearity=None, flip_filters=False, name='ori-c1', ) # Activation 1 myNet.layers[idxSiam]['ori-c1a'] = NonlinearityLayer( myNet.layers[idxSiam]['ori-c1'], nonlinearity=relu, name='ori-c1a', ) # Pool 1 myNet.layers[idxSiam]['ori-c1p'] = MaxPool2DLayer( myNet.layers[idxSiam]['ori-c1a'], pool_size=2, name='ori-c1p', ) # -------------- # Conv 2 if idxSiam == 0: W_init = HeNormal(gain=INITIALIZATION_GAIN) # W_init = Constant(0.0) b_init = Constant(0.0) else: W_init = myNet.layers[0]['ori-c2'].W b_init = myNet.layers[0]['ori-c2'].b myNet.layers[idxSiam]['ori-c2'] = Conv2DLayer( myNet.layers[idxSiam]['ori-c1p'], num_filters=50, filter_size=3, W=W_init, b=b_init, nonlinearity=None, flip_filters=False, name='ori-c2', ) # Activation 2 myNet.layers[idxSiam]['ori-c2a'] = NonlinearityLayer( myNet.layers[idxSiam]['ori-c2'], nonlinearity=relu, name='ori-c2a', ) # Pool 2 myNet.layers[idxSiam]['ori-c2p'] = MaxPool2DLayer( myNet.layers[idxSiam]['ori-c2a'], pool_size=2, name='ori-c2p', ) # ----------------------------------------------------------------------------- # Fully Connected Layers # -------------- # FC 3 nu = 100 ns = 4 nm = 4 if idxSiam == 0: W_init = HeNormal(gain=INITIALIZATION_GAIN) # W_init = Constant(0.0) b_init = Constant(0.0) else: W_init = myNet.layers[0]['ori-f3'].W b_init = myNet.layers[0]['ori-f3'].b myNet.layers[idxSiam]['ori-f3'] = DenseLayer( myNet.layers[idxSiam]['ori-c2a'], num_units=nu * ns * nm, W=W_init, b=b_init, nonlinearity=None, name='ori-f3', ) # Activation 3 myNet.layers[idxSiam]['ori-f3a'] = GHHFeaturePoolLayer( myNet.layers[idxSiam]['ori-f3'], num_in_sum=ns, num_in_max=nm, max_strength=myNet.config.max_strength, name='ori-f3a', ) # Dropout 3 myNet.layers[idxSiam]['ori-f3d'] = DropoutLayer( myNet.layers[idxSiam]['ori-f3a'], p=0.3, name='ori-f3d', ) # -------------- # FC 4 nu = 2 ns = 4 nm = 4 if idxSiam == 0: W_init = HeNormal(gain=INITIALIZATION_GAIN) # W_init = Constant(0.0) b_init = Constant(0.0) else: W_init = myNet.layers[0]['ori-f4'].W b_init = myNet.layers[0]['ori-f4'].b myNet.layers[idxSiam]['ori-f4'] = DenseLayer( myNet.layers[idxSiam]['ori-f3d'], num_units=nu * ns * nm, W=W_init, b=b_init, nonlinearity=None, name='ori-f4', ) # Activation 4 myNet.layers[idxSiam]['ori-f4a'] = GHHFeaturePoolLayer( myNet.layers[idxSiam]['ori-f4'], num_in_sum=ns, num_in_max=nm, max_strength=myNet.config.max_strength, name='ori-f4a', ) # ----------------------------------------------------------------------------- # Arctan2 Layer myNet.layers[idxSiam]['ori-output'] = ExpressionLayer( myNet.layers[idxSiam]['ori-f4a'], lambda x: CT.custom_arctan2(x[:, 0], x[:, 1]).flatten().dimshuffle( 0, 'x'), output_shape=(myNet.config.batch_size, 1), name='ori-output', )
process1 = subprocess.check_call(command1.split()) network={} X = T.tensor3(name='features',dtype='float32') print("Building network ...") network['input'] = InputLayer(shape=(None,382,40), input_var = X) batchs,_,_ = network['input'].input_var.shape #pre-activation network['reshape'] = ReshapeLayer(network['input'],(batchs,1,382,40)) network['conv1_1'] = batch_norm(ConvLayer(network['reshape'], 64,(3,3),pad=1,flip_filters=False, W=HeNormal('relu'))) network['conv1_2'] = batch_norm(ConvLayer(network['conv1_1'],64,(3,3),pad=1, flip_filters=False, W=HeNormal('relu'))) network['pool1'] = MaxPool2DLayer(network['conv1_2'],2) network['conv2_1'] = batch_norm(ConvLayer(network['pool1'], 128,(3,3),pad=1,flip_filters=False,W=HeNormal('relu'))) network['conv2_2'] = batch_norm(ConvLayer(network['conv2_1'],128,(3,3),pad=1,flip_filters=False,W=HeNormal('relu'))) network['pool2'] = MaxPool2DLayer(network['conv2_2'],2) network['conv3_1'] = batch_norm(ConvLayer(network['pool2'], 256,(3,3),pad=1,flip_filters=False, W=HeNormal('relu'))) network['conv3_2'] = batch_norm(ConvLayer(network['conv3_1'],256,(3,3),pad=1,flip_filters=False, W=HeNormal('relu'))) network['conv3_3'] = batch_norm(ConvLayer(network['conv3_2'],256,(3,3),pad=1,flip_filters=False, W=HeNormal('relu'))) network['pool3'] = MaxPool2DLayer(network['conv3_3'],2) network['conv4_1'] = batch_norm(ConvLayer(network['pool3'], 512,(3,3),pad=1,flip_filters=False, W=HeNormal('relu'))) network['conv4_2'] = batch_norm(ConvLayer(network['conv4_1'],512,(3,3),pad=1,flip_filters=False, W=HeNormal('relu'))) network['conv4_3'] = batch_norm(ConvLayer(network['conv4_2'],512,(3,3),pad=1,flip_filters=False, W=HeNormal('relu'))) network['pool4'] = MaxPool2DLayer(network['conv4_3'],2)
def __init__(self, size): net = {} net['input'] = InputLayer((None, 3, *size)) net['conv1_1'] = Conv2DLayer(net['input'], 64, 3, pad=1, flip_filters=False) net['conv1_2'] = Conv2DLayer(net['conv1_1'], 64, 3, pad=1, flip_filters=False) net['pool1'] = MaxPool2DLayer(net['conv1_2'], 2) net['conv2_1'] = Conv2DLayer(net['pool1'], 128, 3, pad=1, flip_filters=False) net['conv2_2'] = Conv2DLayer(net['conv2_1'], 128, 3, pad=1, flip_filters=False) net['pool2'] = MaxPool2DLayer(net['conv2_2'], 2) net['conv3_1'] = Conv2DLayer(net['pool2'], 256, 3, pad=1, flip_filters=False) net['conv3_2'] = Conv2DLayer(net['conv3_1'], 256, 3, pad=1, flip_filters=False) net['conv3_3'] = Conv2DLayer(net['conv3_2'], 256, 3, pad=1, flip_filters=False) net['conv3_4'] = Conv2DLayer(net['conv3_3'], 256, 3, pad=1, flip_filters=False) net['pool3'] = MaxPool2DLayer(net['conv3_4'], 2) net['conv4_1'] = Conv2DLayer(net['pool3'], 512, 3, pad=1, flip_filters=False) net['conv4_2'] = Conv2DLayer(net['conv4_1'], 512, 3, pad=1, flip_filters=False) net['conv4_3'] = Conv2DLayer(net['conv4_2'], 512, 3, pad=1, flip_filters=False) net['conv4_4'] = Conv2DLayer(net['conv4_3'], 512, 3, pad=1, flip_filters=False) net['pool4'] = MaxPool2DLayer(net['conv4_4'], 2) net['conv5_1'] = Conv2DLayer(net['pool4'], 512, 3, pad=1, flip_filters=False) net['conv5_2'] = Conv2DLayer(net['conv5_1'], 512, 3, pad=1, flip_filters=False) net['conv5_3'] = Conv2DLayer(net['conv5_2'], 512, 3, pad=1, flip_filters=False) net['conv5_4'] = Conv2DLayer(net['conv5_3'], 512, 3, pad=1, flip_filters=False) net['pool5'] = MaxPool2DLayer(net['conv5_4'], 2) self.net = net
def get_model(): # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.matrix('targets') # input layer with unspecified batch size layer_0 = InputLayer(shape=(None, 30, 64, 64), input_var=input_var) # Z-score? # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_1 = batch_norm( Conv2DLayer(layer_0, 64, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_2 = batch_norm( Conv2DLayer(layer_1, 64, (3, 3), pad='valid', nonlinearity=leaky_rectify)) layer_3 = MaxPool2DLayer(layer_2, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_4 = DropoutLayer(layer_3, p=0.25) # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_5 = batch_norm( Conv2DLayer(layer_4, 96, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_6 = batch_norm( Conv2DLayer(layer_5, 96, (3, 3), pad='valid', nonlinearity=leaky_rectify)) layer_7 = MaxPool2DLayer(layer_6, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_8 = DropoutLayer(layer_7, p=0.25) # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_9 = batch_norm( Conv2DLayer(layer_8, 128, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_10 = batch_norm( Conv2DLayer(layer_9, 128, (3, 3), pad='valid', nonlinearity=leaky_rectify)) layer_11 = MaxPool2DLayer(layer_10, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_12 = DropoutLayer(layer_11, p=0.25) # Last layers layer_13 = FlattenLayer(layer_12) layer_14 = DenseLayer(layer_13, 1024, nonlinearity=leaky_rectify) layer_15 = DropoutLayer(layer_14, p=0.5) layer_16 = DenseLayer(layer_15, 600, nonlinearity=softmax) # Loss prediction = get_output(layer_16) loss = squared_error(prediction, target_var) loss = loss.mean() + regularize_layer_params(layer_14, l2) #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum params = get_all_params(layer_16, trainable=True) updates = nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, disabling dropout layers. test_prediction = get_output(layer_16, deterministic=True) test_loss = squared_error(test_prediction, target_var) test_loss = test_loss.mean() # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], test_loss) # Compule a third function computing the prediction predict_fn = theano.function([input_var], test_prediction) return [layer_16, train_fn, val_fn, predict_fn]
def network(image, p): input_image = InputLayer(input_var = image, shape = (None, 128, 256, 3)) input_image = DimshuffleLayer(input_image, pattern = (0,3,1,2)) conv1 = batch_norm(Conv2DLayer(input_image, num_filters = 16, filter_size = (3,3), stride = (1,1), nonlinearity = rectify, pad = 'same')) conv1 = batch_norm(Conv2DLayer(conv1, num_filters = 16, filter_size = (3,3), stride = (1,1), nonlinearity = rectify, pad = 'same')) conv1 = DropoutLayer(conv1, p=p) conv1 = ConcatLayer([input_image, conv1], axis = 1) conv2 = batch_norm(Conv2DLayer(conv1, num_filters = 32, filter_size = (3,3), stride = (1,1), nonlinearity = rectify, pad = 'same')) conv2 = batch_norm(Conv2DLayer(conv2, num_filters = 32, filter_size = (3,3), stride = (1,1), nonlinearity = rectify, pad = 'same')) conv2 = DropoutLayer(conv2, p=p) conv2 = batch_norm(ConcatLayer([conv2, conv1], axis = 1)) atr1 = DilatedConv2DLayer(PadLayer(conv2, width = 1), num_filters = 16, filter_size = (3,3), dilation = (1,1), pad = 0, nonlinearity = rectify) atr2 = DilatedConv2DLayer(PadLayer(conv2, width = 2), num_filters = 16, filter_size = (3,3), dilation = (2,2), pad = 0, nonlinearity = rectify) atr4 = DilatedConv2DLayer(PadLayer(conv2, width = 4), num_filters = 16, filter_size = (3,3), dilation = (4,4), pad = 0, nonlinearity = rectify) atr8 = DilatedConv2DLayer(PadLayer(conv2, width = 8), num_filters = 16, filter_size = (3,3), dilation = (8,8), pad = 0, nonlinearity = rectify) sumblock = ConcatLayer([conv2,atr1,atr2,atr4,atr8], axis = 1) crp = MaxPool2DLayer(PadLayer(sumblock, width = 1), pool_size = (3,3), stride = (1,1), ignore_border = False) crp = batch_norm(Conv2DLayer(crp, num_filters = 115, filter_size = (3,3), stride = (1,1), nonlinearity = rectify, pad = 'same')) sumblock = ElemwiseSumLayer([sumblock, crp]) ground = batch_norm(Conv2DLayer(sumblock, num_filters = 1, filter_size = (3,3), stride = (1,1), nonlinearity = output_layer_nonlinearity, pad = 'same')) ground = ReshapeLayer(ground, shape = ([0],128,256)) return ground
def build_model(x=None, layer='fc8', shape=(None, 3, 227, 227), up_scale=4): net = {'data': InputLayer(shape=shape, input_var=x)} net['data_s'] = Upscale2DLayer(net['data'], up_scale) net['conv1'] = Conv2DLayer(net['data_s'], num_filters=96, filter_size=(11, 11), stride=4, nonlinearity=lasagne.nonlinearities.rectify) if layer is 'conv1': return net # pool1 net['pool1'] = MaxPool2DLayer(net['conv1'], pool_size=(3, 3), stride=2) # norm1 net['norm1'] = LocalResponseNormalization2DLayer(net['pool1'], n=5, alpha=0.0001 / 5.0, beta=0.75, k=1) # conv2 # before conv2 split the data net['conv2_data1'] = SliceLayer(net['norm1'], indices=slice(0, 48), axis=1) net['conv2_data2'] = SliceLayer(net['norm1'], indices=slice(48, 96), axis=1) # now do the convolutions net['conv2_part1'] = Conv2DLayer(net['conv2_data1'], num_filters=128, filter_size=(5, 5), pad=2) net['conv2_part2'] = Conv2DLayer(net['conv2_data2'], num_filters=128, filter_size=(5, 5), pad=2) # now combine net['conv2'] = concat((net['conv2_part1'], net['conv2_part2']), axis=1) if layer is 'conv2': return net # pool2 net['pool2'] = MaxPool2DLayer(net['conv2'], pool_size=(3, 3), stride=2) # norm2 net['norm2'] = LocalResponseNormalization2DLayer(net['pool2'], n=5, alpha=0.0001 / 5.0, beta=0.75, k=1) # conv3 # no group net['conv3'] = Conv2DLayer(net['norm2'], num_filters=384, filter_size=(3, 3), pad=1) if layer is 'conv3': return net # conv4 net['conv4_data1'] = SliceLayer(net['conv3'], indices=slice(0, 192), axis=1) net['conv4_data2'] = SliceLayer(net['conv3'], indices=slice(192, 384), axis=1) net['conv4_part1'] = Conv2DLayer(net['conv4_data1'], num_filters=192, filter_size=(3, 3), pad=1) net['conv4_part2'] = Conv2DLayer(net['conv4_data2'], num_filters=192, filter_size=(3, 3), pad=1) net['conv4'] = concat((net['conv4_part1'], net['conv4_part2']), axis=1) if layer is 'conv4': return net # conv5 # group 2 net['conv5_data1'] = SliceLayer(net['conv4'], indices=slice(0, 192), axis=1) net['conv5_data2'] = SliceLayer(net['conv4'], indices=slice(192, 384), axis=1) net['conv5_part1'] = Conv2DLayer(net['conv5_data1'], num_filters=128, filter_size=(3, 3), pad=1) net['conv5_part2'] = Conv2DLayer(net['conv5_data2'], num_filters=128, filter_size=(3, 3), pad=1) net['conv5'] = concat((net['conv5_part1'], net['conv5_part2']), axis=1) if layer is 'conv5': return net # pool 5 net['pool5'] = MaxPool2DLayer(net['conv5'], pool_size=(3, 3), stride=2) # fc6 net['fc6'] = DenseLayer(net['pool5'], num_units=4096, nonlinearity=lasagne.nonlinearities.rectify) if layer is 'fc6': return net # fc7 net['fc7'] = DenseLayer(net['fc6'], num_units=4096, nonlinearity=lasagne.nonlinearities.rectify) if layer is 'fc7': return net # fc8 net['fc8'] = DenseLayer(net['fc7'], num_units=1000, nonlinearity=lasagne.nonlinearities.softmax) if layer is 'fc8': # st() return net
def build_model(): net = {} net['data'] = InputLayer(shape=(None, 3, 227, 227)) # conv1 net['conv1'] = Conv2DLayer(net['data'], num_filters=96, filter_size=(11, 11), stride=4, nonlinearity=lasagne.nonlinearities.rectify) # pool1 net['pool1'] = MaxPool2DLayer(net['conv1'], pool_size=(3, 3), stride=2) # norm1 net['norm1'] = LocalResponseNormalization2DLayer(net['pool1'], n=5, alpha=0.0001 / 5.0, beta=0.75, k=1) # conv2 # The caffe reference model uses a parameter called group. # This parameter splits input to the convolutional layer. # The first half of the filters operate on the first half # of the input from the previous layer. Similarly, the # second half operate on the second half of the input. # # Lasagne does not have this group parameter, but we can # do it ourselves. # # see https://github.com/BVLC/caffe/issues/778 # also see https://code.google.com/p/cuda-convnet/wiki/LayerParams # before conv2 split the data net['conv2_data1'] = SliceLayer(net['norm1'], indices=slice(0, 48), axis=1) net['conv2_data2'] = SliceLayer(net['norm1'], indices=slice(48, 96), axis=1) # now do the convolutions net['conv2_part1'] = Conv2DLayer(net['conv2_data1'], num_filters=128, filter_size=(5, 5), pad=2) net['conv2_part2'] = Conv2DLayer(net['conv2_data2'], num_filters=128, filter_size=(5, 5), pad=2) # now combine net['conv2'] = concat((net['conv2_part1'], net['conv2_part2']), axis=1) # pool2 net['pool2'] = MaxPool2DLayer(net['conv2'], pool_size=(3, 3), stride=2) # norm2 net['norm2'] = LocalResponseNormalization2DLayer(net['pool2'], n=5, alpha=0.0001 / 5.0, beta=0.75, k=1) # conv3 # no group net['conv3'] = Conv2DLayer(net['norm2'], num_filters=384, filter_size=(3, 3), pad=1) # conv4 # group = 2 net['conv4_data1'] = SliceLayer(net['conv3'], indices=slice(0, 192), axis=1) net['conv4_data2'] = SliceLayer(net['conv3'], indices=slice(192, 384), axis=1) net['conv4_part1'] = Conv2DLayer(net['conv4_data1'], num_filters=192, filter_size=(3, 3), pad=1) net['conv4_part2'] = Conv2DLayer(net['conv4_data2'], num_filters=192, filter_size=(3, 3), pad=1) net['conv4'] = concat((net['conv4_part1'], net['conv4_part2']), axis=1) # conv5 # group 2 net['conv5_data1'] = SliceLayer(net['conv4'], indices=slice(0, 192), axis=1) net['conv5_data2'] = SliceLayer(net['conv4'], indices=slice(192, 384), axis=1) net['conv5_part1'] = Conv2DLayer(net['conv5_data1'], num_filters=128, filter_size=(3, 3), pad=1) net['conv5_part2'] = Conv2DLayer(net['conv5_data2'], num_filters=128, filter_size=(3, 3), pad=1) net['conv5'] = concat((net['conv5_part1'], net['conv5_part2']), axis=1) # pool 5 net['pool5'] = MaxPool2DLayer(net['conv5'], pool_size=(3, 3), stride=2) # fc6 net['fc6'] = DenseLayer(net['pool5'], num_units=4096, nonlinearity=lasagne.nonlinearities.rectify) # fc7 net['fc7'] = DenseLayer(net['fc6'], num_units=4096, nonlinearity=lasagne.nonlinearities.rectify) # fc8 net['fc8'] = DenseLayer(net['fc7'], num_units=1183, nonlinearity=lasagne.nonlinearities.softmax) params = np.load('alex_start_net.npy', encoding='latin1') lasagne.layers.set_all_param_values(net['fc8'], params) return net
def network_discriminator(self, input, network_weights=None): layers = [] if isinstance(input, lasagne.layers.Layer): layers.append(input) # First convolution layers.append( conv_layer(input, n_filters=self.n_filters, stride=1, name='discriminator/encoder/conv%d' % len(layers), network_weights=network_weights)) else: # Input layer layers.append( InputLayer(shape=(None, 3, self.input_size, self.input_size), input_var=input, name='discriminator/encoder/input')) # First convolution layers.append( conv_layer(layers[-1], n_filters=self.n_filters, stride=1, name='discriminator/encoder/conv%d' % len(layers), network_weights=network_weights)) # Convolutional blocks (encoder)self.n_filters*i_block n_blocks = int(np.log2( self.input_size / 8)) + 1 # end up with 8x8 output for i_block in range(1, n_blocks + 1): layers.append( conv_layer(layers[-1], n_filters=self.n_filters * i_block, stride=1, name='discriminator/encoder/conv%d' % len(layers), network_weights=network_weights)) layers.append( conv_layer(layers[-1], n_filters=self.n_filters * i_block, stride=1, name='discriminator/encoder/conv%d' % len(layers), network_weights=network_weights)) if i_block != n_blocks: # layers.append(conv_layer(layers[-1], n_filters=self.n_filters*(i_block+1), stride=2, name='discriminator/encoder/conv%d' % len(layers), network_weights=network_weights)) layers.append( MaxPool2DLayer(layers[-1], pool_size=2, stride=2, name='discriminator/encoder/pooling%d' % len(layers))) # else: # layers.append(conv_layer(layers[-1], n_filters=self.n_filters*(i_block), stride=1, name='discriminator/encoder/conv%d' % len(layers), network_weights=network_weights)) # Dense layers (linear outputs) layers.append( dense_layer(layers[-1], n_units=self.hidden_size, name='discriminator/encoder/dense%d' % len(layers), network_weights=network_weights)) # Dense layer up (from h to n*8*8) layers.append( dense_layer(layers[-1], n_units=(8 * 8 * self.n_filters), name='discriminator/decoder/dense%d' % len(layers), network_weights=network_weights)) layers.append( ReshapeLayer(layers[-1], (-1, self.n_filters, 8, 8), name='discriminator/decoder/reshape%d' % len(layers))) # Convolutional blocks (decoder) for i_block in range(1, n_blocks + 1): layers.append( conv_layer(layers[-1], n_filters=self.n_filters, stride=1, name='discriminator/decoder/conv%d' % len(layers), network_weights=network_weights)) layers.append( conv_layer(layers[-1], n_filters=self.n_filters, stride=1, name='discriminator/decoder/conv%d' % len(layers), network_weights=network_weights)) if i_block != n_blocks: layers.append( Upscale2DLayer(layers[-1], scale_factor=2, name='discriminator/decoder/upsample%d' % len(layers))) # Final layer (make sure input images are in the range [-1, 1] layers.append( conv_layer(layers[-1], n_filters=3, stride=1, name='discriminator/decoder/output', network_weights=network_weights, nonlinearity=sigmoid)) # Network in dictionary form network = {layer.name: layer for layer in layers} return network
def build_stereo_cnn(input_var=None): conv_num_filters1 = 16 conv_num_filters2 = 32 conv_num_filters3 = 64 conv_num_filters4 = 128 filter_size1 = 7 filter_size2 = 5 filter_size3 = 3 filter_size4 = 3 pool_size = 2 scale_factor = 2 pad_in = 'valid' pad_out = 'full' # Input layer, as usual: network = InputLayer(shape=(None,2, X_train.shape[2], X_train.shape[3]),input_var=input_var,name="input_layer") network = batch_norm(Conv2DLayer( network, num_filters=conv_num_filters1, filter_size=(filter_size1, filter_size1),pad=pad_in, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform(),name="conv1")) network = MaxPool2DLayer(network, pool_size=(pool_size, pool_size),name="pool1") network = batch_norm(Conv2DLayer( network, num_filters=conv_num_filters2, filter_size=(filter_size2, filter_size2),pad=pad_in, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform(),name="conv2")) network = MaxPool2DLayer(network, pool_size=(pool_size, pool_size),name="pool2") network = batch_norm(Conv2DLayer( network, num_filters=conv_num_filters3, filter_size=(filter_size3, filter_size3),pad=pad_in, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform(),name="conv3")) network = MaxPool2DLayer(network, pool_size=(pool_size, pool_size),name="pool3") network = batch_norm(Conv2DLayer( network, num_filters=conv_num_filters4, filter_size=(filter_size4, filter_size4),pad=pad_in, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform(),name="conv4")) network = batch_norm(Conv2DLayer( network, num_filters=32, filter_size=(filter_size4, filter_size4),pad=pad_out, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform(),name="deconv1")) network = Upscale2DLayer(network, scale_factor=(pool_size, pool_size),name="upscale1") network = batch_norm(Conv2DLayer( network, num_filters=16, filter_size=(filter_size3, filter_size3),pad=pad_out, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform(),name="deconv2")) network = Upscale2DLayer(network, scale_factor=(pool_size, pool_size),name="upscale2") network = batch_norm(Conv2DLayer( network, num_filters=8, filter_size=(filter_size2, filter_size2),pad=pad_out, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform(),name="deconv3")) network = Upscale2DLayer(network, scale_factor=(pool_size, pool_size),name="upscale3") network = batch_norm(Conv2DLayer( network, num_filters=1, filter_size=(filter_size1, filter_size1),pad=pad_out, nonlinearity=lasagne.nonlinearities.sigmoid, W=lasagne.init.GlorotUniform(),name="deconv4")) return network
def build(myNet, idxSiam, verbose=True): # Load model fn = '%s/%s' % (myNet.config.descriptor_export_folder, myNet.config.descriptor_model) model_dict = dt.loadh5(fn) # Load training mean/std # if we have the normalization setup if myNet.config.bNormalizeInput: kwang_mean = np.cast[floatX](myNet.config.mean_x) kwang_std = np.cast[floatX](myNet.config.std_x) # else, simply divide with 255 else: kwang_mean = np.cast[floatX](0.0) kwang_std = np.cast[floatX](255.0) if 'patch-mean' in model_dict.keys(): desc_mean_x = np.cast[floatX](model_dict['patch-mean'][0]) desc_std_x = np.cast[floatX](model_dict['patch-std'][0]) else: print('Warning: no mean/std in the model file') desc_mean_x = kwang_mean desc_std_x = kwang_std # Layer indices indices = model_dict['layers'] if verbose and idxSiam == 0: print('*** Loading descriptor "%s" ***' % fn) print('Number of elements: %d' % indices.size) # Add another layer that transforms the original input curr_name = 'desc-re-normalize' curr_input = myNet.config.descriptor_input myNet.layers[idxSiam][curr_name] = ExpressionLayer( myNet.layers[idxSiam][curr_input], lambda x: (x * kwang_std + kwang_mean - desc_mean_x) / desc_std_x, name=curr_name) curr_input = curr_name # Loop over layers for i in six.moves.xrange(indices.size): if indices[i] == 1: if verbose and idxSiam == 0: print('%d -> SpatialConvolution' % i) curr_name = 'desc-%d-conv' % (i + 1) # read actual value for siamese 0 w = model_dict['l-%d-weights' % (i + 1)].astype(floatX) b = model_dict['l-%d-bias' % (i + 1)].astype(floatX) num_filters, num_input_channels, filter_size, filter_size = w.shape # assert num_input_channels == myNet.config.nDescInputChannels # assert filter_size == myNet.config.nDescInputSize if verbose and idxSiam == 0: print(' Number of filters: %d' % num_filters) print(' Filter size: %d' % filter_size) # Manually create shared variables if idxSiam == 0: w = theano.shared(w, name=curr_name + '.W') b = theano.shared(b, name=curr_name + '.b') else: w = myNet.layers[0][curr_name].W b = myNet.layers[0][curr_name].b myNet.layers[idxSiam][curr_name] = Conv2DLayer( myNet.layers[idxSiam][curr_input], num_filters, filter_size, W=w, b=b, nonlinearity=None, # no activation flip_filters=False, name=curr_name) elif indices[i] == 2: if verbose and idxSiam == 0: print('%d -> Linear' % i) raise RuntimeError('Layer type %d TODO' % i) elif indices[i] == 3: if verbose and idxSiam == 0: print('%d -> SpatialMaxPooling' % i) curr_name = 'desc-%d-maxpool' % (i + 1) kw = model_dict['l-%d-kw' % (i + 1)].astype(np.int32)[0] kh = model_dict['l-%d-kh' % (i + 1)].astype(np.int32)[0] if verbose and idxSiam == 0: print(' Region size: %dx%d' % (kw, kh)) assert kw == kh kw = int(kw) myNet.layers[idxSiam][curr_name] = MaxPool2DLayer( myNet.layers[idxSiam][curr_input], pool_size=kw, stride=None, name=curr_name) elif indices[i] == 4: if verbose and idxSiam == 0: print('%d -> SpatialLPPooling' % i) curr_name = 'desc-%d-lppool' % (i + 1) kw = model_dict['l-%d-kw' % (i + 1)].astype(np.int32)[0] kh = model_dict['l-%d-kh' % (i + 1)].astype(np.int32)[0] if verbose and idxSiam == 0: print(' Region size: %dx%d' % (kw, kh)) assert kw == kh kw = int(kw) myNet.layers[idxSiam][curr_name] = LPPool2DLayer( myNet.layers[idxSiam][curr_input], pnorm=2, pool_size=kw, stride=None, name=curr_name) elif indices[i] == 5: if verbose and idxSiam == 0: print('%d -> Tanh' % i) curr_name = 'desc-%d-tanh' % (i + 1) myNet.layers[idxSiam][curr_name] = NonlinearityLayer( myNet.layers[idxSiam][curr_input], nonlinearity=nonlinearities.tanh, name=curr_name) elif indices[i] == 6: if verbose and idxSiam == 0: print('%d -> ReLU' % i) curr_name = 'desc-%d-relu' % (i + 1) myNet.layers[idxSiam][curr_name] = NonlinearityLayer( myNet.layers[idxSiam][curr_input], nonlinearity=nonlinearities.rectify, name=curr_name) elif indices[i] == 7: if verbose and idxSiam == 0: print('%d -> SpatialSubtractiveNormalization' % i) curr_name = 'desc-%d-subt-norm' % (i + 1) kernel = model_dict['l-%d-filter' % (i + 1)].astype(floatX) w_kernel, h_kernel = kernel.shape if verbose and idxSiam == 0: print(' Kernel size: %dx%d' % (w_kernel, h_kernel)) myNet.layers[idxSiam][curr_name] = SubtractiveNormalization2DLayer( myNet.layers[idxSiam][curr_input], kernel=kernel, name=curr_name) else: raise RuntimeError('Layer type %d: unknown' % i) # Input to the next layer curr_input = curr_name # Flatten output and rename myNet.layers[idxSiam]['desc-output'] = FlattenLayer( myNet.layers[idxSiam][curr_input], outdim=2)
def create_model(incoming, options): conv_num_filters1 = 100 conv_num_filters2 = 150 conv_num_filters3 = 200 filter_size1 = 5 filter_size2 = 5 filter_size3 = 3 pool_size = 2 encode_size = options['BOTTLENECK'] dense_mid_size = options['DENSE'] pad_in = 'valid' pad_out = 'full' scaled_tanh = create_scaled_tanh() conv2d1 = Conv2DLayer(incoming, num_filters=conv_num_filters1, filter_size=filter_size1, pad=pad_in, name='conv2d1', nonlinearity=scaled_tanh) maxpool2d2 = MaxPool2DLayer(conv2d1, pool_size=pool_size, name='maxpool2d2') conv2d3 = Conv2DLayer(maxpool2d2, num_filters=conv_num_filters2, filter_size=filter_size2, pad=pad_in, name='conv2d3', nonlinearity=scaled_tanh) maxpool2d4 = MaxPool2DLayer(conv2d3, pool_size=pool_size, name='maxpool2d4', pad=(1, 0)) conv2d5 = Conv2DLayer(maxpool2d4, num_filters=conv_num_filters3, filter_size=filter_size3, pad=pad_in, name='conv2d5', nonlinearity=scaled_tanh) reshape6 = ReshapeLayer(conv2d5, shape=([0], -1), name='reshape6') # 3000 reshape6_output = reshape6.output_shape[1] dense7 = DenseLayer(reshape6, num_units=dense_mid_size, name='dense7', nonlinearity=scaled_tanh) bottleneck = DenseLayer(dense7, num_units=encode_size, name='bottleneck', nonlinearity=linear) # print_network(bottleneck) dense8 = DenseLayer(bottleneck, num_units=dense_mid_size, W=bottleneck.W.T, name='dense8', nonlinearity=linear) dense9 = DenseLayer(dense8, num_units=reshape6_output, W=dense7.W.T, nonlinearity=scaled_tanh, name='dense9') reshape10 = ReshapeLayer(dense9, shape=([0], conv_num_filters3, 3, 5), name='reshape10') # 32 x 4 x 7 deconv2d11 = Deconv2DLayer(reshape10, conv2d5.input_shape[1], conv2d5.filter_size, stride=conv2d5.stride, W=conv2d5.W, flip_filters=not conv2d5.flip_filters, name='deconv2d11', nonlinearity=scaled_tanh) upscale2d12 = Upscale2DLayer(deconv2d11, scale_factor=pool_size, name='upscale2d12') deconv2d13 = Deconv2DLayer(upscale2d12, conv2d3.input_shape[1], conv2d3.filter_size, stride=conv2d3.stride, W=conv2d3.W, flip_filters=not conv2d3.flip_filters, name='deconv2d13', nonlinearity=scaled_tanh) upscale2d14 = Upscale2DLayer(deconv2d13, scale_factor=pool_size, name='upscale2d14') deconv2d15 = Deconv2DLayer(upscale2d14, conv2d1.input_shape[1], conv2d1.filter_size, stride=conv2d1.stride, crop=(1, 0), W=conv2d1.W, flip_filters=not conv2d1.flip_filters, name='deconv2d14', nonlinearity=scaled_tanh) reshape16 = ReshapeLayer(deconv2d15, ([0], -1), name='reshape16') return reshape16, bottleneck
def network(image): input_image = InputLayer(input_var=image, shape=(None, 1, 120, 160)) conv1 = Conv2DLayer(input_image, num_filters=32, filter_size=(5, 5), stride=(2, 2), nonlinearity=rectify, pad='same') pool1 = MaxPool2DLayer(conv1, pool_size=(3, 3), stride=(2, 2), pad=1) conv2 = batch_norm( Conv2DLayer(pool1, num_filters=32, filter_size=(3, 3), stride=(2, 2), nonlinearity=rectify, pad='same')) conv2 = batch_norm( Conv2DLayer(conv2, num_filters=32, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same')) downsample1 = Conv2DLayer(pool1, num_filters=32, filter_size=(1, 1), stride=(2, 2), nonlinearity=rectify, pad='same') input3 = ElemwiseSumLayer([downsample1, conv2]) conv3 = batch_norm( Conv2DLayer(input3, num_filters=64, filter_size=(3, 3), stride=(2, 2), nonlinearity=rectify, pad='same')) conv3 = batch_norm( Conv2DLayer(conv3, num_filters=64, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same')) downsample2 = Conv2DLayer(input3, num_filters=64, filter_size=(1, 1), stride=(2, 2), nonlinearity=rectify, pad='same') input4 = ElemwiseSumLayer([downsample2, conv3]) conv4 = batch_norm( Conv2DLayer(input4, num_filters=128, filter_size=(3, 3), stride=(2, 2), nonlinearity=rectify, pad='same')) conv4 = batch_norm( Conv2DLayer(conv4, num_filters=128, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same')) downsample3 = Conv2DLayer(input4, num_filters=128, filter_size=(1, 1), stride=(2, 2), nonlinearity=rectify, pad='same') input5 = ElemwiseSumLayer([downsample3, conv4]) flatten = DropoutLayer(FlattenLayer(input5), 0.5) prob_out = DenseLayer(flatten, num_units=1, nonlinearity=sigmoid) turn_angle = DenseLayer(flatten, num_units=1, nonlinearity=tanh) return prob_out, turn_angle