def _forward(self): net = {} net['input'] = layers.InputLayer(shape=(None, 1, 28, 28), input_var=self.X) net['conv1'] = layers.Conv2DLayer(net['input'], 32, (3, 3), W=init.Orthogonal(), pad=1) net['pool1'] = layers.MaxPool2DLayer(net['conv1'], (2, 2), stride=(2, 2)) net['conv2'] = layers.Conv2DLayer(net['pool1'], 64, (3, 3), W=init.Orthogonal(), pad=1) net['pool2'] = layers.MaxPool2DLayer(net['conv2'], (2, 2), stride=(2, 2)) net['conv3'] = layers.Conv2DLayer(net['pool2'], 128, (3, 3), W=init.Orthogonal(), pad=1) net['conv4'] = layers.Conv2DLayer(net['conv3'], 128, (3, 3), W=init.Orthogonal(), pad=1) net['pool3'] = layers.MaxPool2DLayer(net['conv4'], (2, 2), stride=(2, 2)) net['flatten'] = layers.FlattenLayer(net['pool3']) net['out'] = layers.DenseLayer(net['flatten'], 10, b=None, nonlinearity=nonlinearities.softmax) return net
def getNet6(): inputLayer = layers.InputLayer(shape=(None, 1, imageShape[0], imageShape[1])) #120x120 conv1Layer = layers.Conv2DLayer(inputLayer, num_filters=32, filter_size=(3,3), pad=(1,1), W=HeNormal('relu'), nonlinearity=rectify) #120x120 conv2Layer = layers.Conv2DLayer(conv1Layer, num_filters=32, filter_size=(3,3), pad=(1,1), W=HeNormal('relu'), nonlinearity=rectify) #120x120 pool1Layer = layers.MaxPool2DLayer(conv2Layer, pool_size=(2,2)) #60x60 conv3Layer = layers.Conv2DLayer(pool1Layer, num_filters=64, filter_size=(3,3), pad=(1,1), W=HeNormal('relu'), nonlinearity=rectify) #60x60 conv4Layer = layers.Conv2DLayer(conv3Layer, num_filters=64, filter_size=(3,3), pad=(1,1), W=HeNormal('relu'), nonlinearity=rectify) #60x60 conv5Layer = layers.Conv2DLayer(conv4Layer, num_filters=64, filter_size=(3,3), pad=(1,1), W=HeNormal('relu'), nonlinearity=rectify) #60x60 pool2Layer = layers.MaxPool2DLayer(conv5Layer, pool_size=(2,2)) #30x30 conv6Layer = layers.Conv2DLayer(pool2Layer, num_filters=128, filter_size=(3,3), pad=(1,1), W=HeNormal('relu'), nonlinearity=rectify) #30x30 conv7Layer = layers.Conv2DLayer(conv6Layer, num_filters=128, filter_size=(3,3), pad=(1,1), W=HeNormal('relu'), nonlinearity=rectify) #30x30 conv8Layer = layers.Conv2DLayer(conv7Layer, num_filters=128, filter_size=(3,3), pad=(1,1), W=HeNormal('relu'), nonlinearity=rectify) #30x30 pool3Layer = layers.MaxPool2DLayer(conv8Layer, pool_size=(2,2)) #15x15 conv9Layer = layers.Conv2DLayer(pool3Layer, num_filters=256, filter_size=(4,4), W=HeNormal('relu'), nonlinearity=rectify) #12x12 flattenLayer = layers.FlattenLayer(conv9Layer) hidden1Layer = layers.DenseLayer(flattenLayer, num_units=1024, W=HeNormal('relu'), nonlinearity=rectify) dropout1Layer = layers.DropoutLayer(hidden1Layer, p=0.5) hidden2Layer = layers.DenseLayer(dropout1Layer, num_units=512, W=HeNormal('relu'), nonlinearity=rectify) dropout2Layer = layers.DropoutLayer(hidden2Layer, p=0.5) hidden3Layer = layers.DenseLayer(dropout2Layer, num_units=256, W=HeNormal('relu'), nonlinearity=rectify) dropout3Layer = layers.DropoutLayer(hidden3Layer, p=0.5) hidden4Layer = layers.DenseLayer(dropout3Layer, num_units=128, W=HeNormal('relu'), nonlinearity=rectify) outputLayer = layers.DenseLayer(hidden4Layer, num_units=10, W=HeNormal('relu'), nonlinearity=softmax) return outputLayer
def encoder(z_dim=100, input_var=None, num_units=512, vae=True): encoder = [] lrelu = lasagne.nonlinearities.LeakyRectify(0.2) encoder.append(ll.InputLayer(shape=(None, 3, 80, 160), input_var=input_var)) encoder.append( ll.Conv2DLayer(encoder[-1], num_filters=num_units / 8, filter_size=(5, 5), stride=2, pad=2, nonlinearity=lrelu)) encoder.append( ll.batch_norm( ll.Conv2DLayer(encoder[-1], num_filters=num_units / 4, filter_size=(5, 5), stride=2, pad=2, nonlinearity=lrelu))) encoder.append( ll.batch_norm( ll.Conv2DLayer(encoder[-1], num_filters=num_units / 2, filter_size=(5, 5), stride=2, pad=2, nonlinearity=lrelu))) encoder.append( ll.batch_norm( ll.Conv2DLayer(encoder[-1], num_filters=num_units, filter_size=(5, 5), stride=2, pad=2, nonlinearity=lrelu))) encoder.append(ll.FlattenLayer(encoder[-1])) if vae: enc_mu = ll.DenseLayer(encoder[-1], num_units=z_dim, nonlinearity=None) enc_logsigma = ll.DenseLayer(encoder[-1], num_units=z_dim, nonlinearity=None) l_z = GaussianSampleLayer(enc_mu, enc_logsigma, name='Z layer') encoder += [enc_mu, enc_logsigma, l_z] for layer in encoder: print layer.output_shape print "" return encoder
def discriminator_3D(input_var=None, num_units=512, seq_length=4): discriminator = [] lrelu = lasagne.nonlinearities.LeakyRectify(0.2) discriminator.append( ll.InputLayer(shape=(None, seq_length, 3, 80, 160), input_var=input_var)) # lasagne documentations requires shape : # (batch_size, num_input_channels, input_depth, input_rows, input_columns) # so we need to change dimension ordering discriminator.append(ll.DimshuffleLayer(discriminator[-1], (0, 2, 1, 3, 4))) discriminator.append( ll.Conv3DLayer(discriminator[-1], num_filters=num_units / 8, filter_size=5, stride=2, pad=2, nonlinearity=lrelu)) discriminator.append( ll.batch_norm( ll.Conv3DLayer(discriminator[-1], num_filters=num_units / 4, filter_size=5, stride=2, pad=2, nonlinearity=lrelu))) discriminator.append( ll.batch_norm( ll.Conv3DLayer(discriminator[-1], num_filters=num_units / 2, filter_size=5, stride=2, pad=2, nonlinearity=lrelu))) discriminator.append( ll.batch_norm( ll.Conv3DLayer(discriminator[-1], num_filters=num_units, filter_size=5, stride=2, pad=2, nonlinearity=lrelu))) discriminator.append(ll.FlattenLayer(discriminator[-1])) discriminator.append( ll.DenseLayer(discriminator[-1], num_units=1, nonlinearity=None)) for layer in discriminator: print layer.output_shape print "" return discriminator
def build_auto_encoder_mnist_cnn(input_var=None): """ Generate an auto-encoder cnn using the Lasagne library """ # Build encoder part network = lyr.InputLayer(shape=(None, 1, 28, 28), input_var=input_var) network = lyr.Conv2DLayer(network, 64, (5, 5), W=lasagne.init.Normal()) network = lyr.MaxPool2DLayer(network, (2, 2)) network = lyr.Conv2DLayer(network, 128, (5, 5), W=lasagne.init.Normal()) network = lyr.MaxPool2DLayer(network, (2, 2)) network = lyr.FlattenLayer(network) network = lyr.DenseLayer(network, 2048, W=lasagne.init.Normal()) network = lyr.ReshapeLayer(network, (input_var.shape[0], 2048, 1, 1)) # Build decoder part network = lyr.TransposedConv2DLayer(network, 128, (5, 5), W=lasagne.init.Normal()) network = lyr.Upscale2DLayer(network, (2, 2)) network = lyr.TransposedConv2DLayer(network, 64, (4, 4), W=lasagne.init.Normal()) network = lyr.Upscale2DLayer(network, (2, 2)) network = lyr.TransposedConv2DLayer(network, 1, (3, 3), W=lasagne.init.Normal(), nonlinearity=None) return network
def get_dense_xy(layer, deterministic=True): x = L.get_output(L.FlattenLayer(layer.input_layer), deterministic=deterministic) # N, D w = layer.W # D, O y = T.dot(x, w) # (N,O) if layer.b is not None: y += T.shape_padaxis(layer.b, axis=0) return x, y
def init_discriminator(self, first_layer, input_var=None): """ Initialize the DCGAN discriminator network using lasagne Returns the network """ lrelu = nonlinearities.LeakyRectify(0.2) layers = [] l_in = lyr.InputLayer((None, 3, 64, 64), input_var) layers.append(l_in) l_1 = lyr.Conv2DLayer(incoming=l_in, num_filters=first_layer, filter_size=5, stride=2, pad=2, nonlinearity=lrelu) layers.append(l_1) l_2 = lyr.batch_norm( lyr.Conv2DLayer(incoming=l_1, num_filters=first_layer * 2, filter_size=5, stride=2, pad=2, nonlinearity=lrelu)) layers.append(l_2) l_3 = lyr.batch_norm( lyr.Conv2DLayer(incoming=l_2, num_filters=first_layer * 4, filter_size=5, stride=2, pad=2, nonlinearity=lrelu)) layers.append(l_3) l_4 = lyr.batch_norm( lyr.Conv2DLayer(incoming=l_3, num_filters=first_layer * 8, filter_size=5, stride=2, pad=2, nonlinearity=lrelu)) l_4 = lyr.FlattenLayer(l_4) layers.append(l_4) l_out = lyr.DenseLayer(incoming=l_4, num_units=1, nonlinearity=nonlinearities.sigmoid) layers.append(l_out) if self.verbose: for i, layer in enumerate(layers): print 'dicriminator layer %s output shape:' % i, layer.output_shape return l_out
def output_block(net, config, non_lin, verbose=True): """ """ # output setting out_acts = [] for out_act in config.hyper_parameters.out_act: exec('from lasagne.nonlinearities import {}'.format(out_act)) out_acts.append(eval(out_act)) n_outs = config.hyper_parameters.n_out # Global Average Pooling last_conv_block_name = next(reversed(net)) net['gap'] = L.GlobalPoolLayer(net[last_conv_block_name], name='gap') net['gap.bn'] = L.BatchNormLayer(net['gap'], name='gap.bn') n_features = net['gap.bn'].output_shape[-1] # feature Layer net['fc'] = L.dropout(L.batch_norm( L.DenseLayer(net['gap.bn'], num_units=n_features, nonlinearity=non_lin, name='fc')), name='fc.bn.do') # output (prediction) # check whether the model if for MTL or STL # target is passed as list, regardless whether # it's MTL or STL (configuration checker checks it) targets = config.target out_layer_names = [] for target, n_out, out_act in zip(targets, n_outs, out_acts): out_layer_names.append('out.{}'.format(target)) if target == 'self': net[out_layer_names[-1]], inputs = build_siamese(net['fc']) else: net[out_layer_names[-1]] = L.DenseLayer(net['fc'], num_units=n_out, nonlinearity=out_act, name=out_layer_names[-1]) inputs = [net['input'].input_var] # make a concatation layer just for save/load purpose net['IO'] = L.ConcatLayer([ L.FlattenLayer(net[target_layer_name]) if target == 'self' else net[target_layer_name] for target_layer_name in out_layer_names ], name='IO') if verbose: print(net['gap.bn'].output_shape) print(net['fc'].output_shape) for target in targets: print(net['out.{}'.format(target)].output_shape) return net, inputs
def discriminator(input_var=None, num_units=512): discriminator = [] lrelu = lasagne.nonlinearities.LeakyRectify(0.2) discriminator.append( ll.InputLayer(shape=(None, 3, 80, 160), input_var=input_var)) discriminator.append( ll.Conv2DLayer(discriminator[-1], num_filters=num_units / 8, filter_size=(5, 5), stride=2, pad=2, nonlinearity=lrelu)) discriminator.append( ll.batch_norm( ll.Conv2DLayer(discriminator[-1], num_filters=num_units / 4, filter_size=(5, 5), stride=2, pad=2, nonlinearity=lrelu))) discriminator.append( ll.batch_norm( ll.Conv2DLayer(discriminator[-1], num_filters=num_units / 2, filter_size=(5, 5), stride=2, pad=2, nonlinearity=lrelu))) discriminator.append( ll.batch_norm( ll.Conv2DLayer(discriminator[-1], num_filters=num_units, filter_size=(5, 5), stride=2, pad=2, nonlinearity=lrelu))) discriminator.append(ll.FlattenLayer(discriminator[-1])) discriminator.append( ll.DenseLayer(discriminator[-1], num_units=1, nonlinearity=None)) for layer in discriminator: print layer.output_shape print "" return discriminator
def _forward(self): net = {} net['input'] = layers.InputLayer(shape=(None, 1, 28, 28), input_var=self.X) net['conv'] = layers.Conv2DLayer(net['input'], 10, (5, 5), W=init.Orthogonal()) net['pool'] = layers.MaxPool2DLayer(net['conv'], (3, 3), stride=(1, 1), pad=(1, 1)) net['flatten'] = layers.FlattenLayer(net['pool']) net['out'] = layers.DenseLayer(net['flatten'], 10, b=None, nonlinearity=nonlinearities.softmax) return net
def __init__(self, input_layer, filter_sizes=((4,4), (4,4)), num_filters=(16,16), strides=((2,2), (2,2)), hidden_act=nonlinearities.rectify, ): out_layer = input_layer for i, (filter_size, num_filter, stride) in enumerate(zip(filter_sizes, num_filters, strides)): out_layer = L.Conv2DLayer(out_layer, num_filters=num_filter, filter_size=filter_size, stride=stride, pad='full', nonlinearity=hidden_act) out_layer = L.FlattenLayer(out_layer) self.out_layer = out_layer self.output = L.get_output(self.out_layer)
def critic(z_dim=100, input_var=None, num_units=64, batch_size=64): encoder = [] lrelu = lasagne.nonlinearities.LeakyRectify(0.2) encoder.append( ll.InputLayer(shape=(batch_size, 3, 64, 64), input_var=input_var)) # no bn encoder.append((ll.Conv2DLayer(encoder[-1], num_filters=num_units, filter_size=(5, 5), stride=2, pad=2, nonlinearity=lrelu))) # no bn encoder.append((ll.Conv2DLayer(encoder[-1], num_filters=num_units * 2, filter_size=(5, 5), stride=2, pad=2, nonlinearity=lrelu))) # no bn encoder.append((ll.Conv2DLayer(encoder[-1], num_filters=num_units * 4, filter_size=(5, 5), stride=2, pad=2, nonlinearity=lrelu))) # no bn encoder.append((ll.Conv2DLayer(encoder[-1], num_filters=num_units * 8, filter_size=(5, 5), stride=2, pad=2, nonlinearity=lrelu))) encoder.append(ll.FlattenLayer(encoder[-1])) encoder.append(ll.DenseLayer(encoder[-1], num_units=1, nonlinearity=None)) for layer in encoder: print layer.output_shape print "" return encoder
def cls_net(_incoming): _drop1 = L.DropoutLayer(_incoming, p=0.2, rescale=True) _conv1 = batch_norm( conv(_drop1, num_filters=64, filter_size=7, stride=3, pad=0, W=I.Normal(0.02), b=None, nonlinearity=NL.rectify)) _drop2 = L.DropoutLayer(_conv1, p=0.2, rescale=True) _conv2 = batch_norm( conv(_drop2, num_filters=128, filter_size=3, stride=1, pad=0, W=I.Normal(0.02), b=None, nonlinearity=NL.rectify)) _pool2 = L.MaxPool2DLayer(_conv2, pool_size=2) _fc1 = batch_norm( L.DenseLayer(L.FlattenLayer(_pool2, outdim=2), 256, W=I.Normal(0.02), b=None, nonlinearity=NL.rectify)) _fc2 = L.DenseLayer(_fc1, ny, W=I.Normal(0.02), b=None, nonlinearity=NL.sigmoid) _aux = [ tanh(_conv1), tanh(_conv2), tanh(L.DimshuffleLayer(_fc1, (0, 1, 'x', 'x'))), L.DimshuffleLayer(_fc2, (0, 1, 'x', 'x')) ] return _aux, _fc2
def _setup_model(self, num_features, num_rows): if self.fit_intercept: b = lasagne.init.Constant(0.) else: b = None X_sym = T.matrix() y_sym = T.ivector() bag_labels = T.ivector() input_layer = layers.InputLayer(shape=(num_rows, num_features), input_var=X_sym) if self.hidden_units <= 1: instance_log_odds = layers.DenseLayer(input_layer, num_units=1, W=lasagne.init.Constant(0.), b=b, nonlinearity=lasagne.nonlinearities.linear) else: instance_log_odds = layers.DenseLayer(input_layer, num_units=self.hidden_units, W=lasagne.init.GlorotUniform(1.0), b=b, nonlinearity=lasagne.nonlinearities.linear) instance_log_odds = layers.FeaturePoolLayer(instance_log_odds, pool_size=self.hidden_units, pool_function=T.max) instance_log_odds = layers.FlattenLayer(instance_log_odds, outdim=1) instance_log_odds_output = layers.get_output(instance_log_odds, X_sym) instance_probs_output = T.nnet.sigmoid(instance_log_odds_output) self.all_params = layers.get_all_params(instance_log_odds, trainable=True) bag_mapper = T.transpose(T.extra_ops.to_one_hot(bag_labels, T.max(bag_labels)+1)) # if previous layers were probabilities: # bag_probs = 1 - T.exp(T.dot(bag_mapper, T.log(1 - instance_probs_output))) # if previous layers were log odds: bag_probs = 1 - T.exp(T.dot(bag_mapper, -T.nnet.softplus(instance_log_odds_output))) if self.C is None: regularization = 0 else: # I scale the penalty by num_rows since the likelihood # term is the average over instances, instead of the sum # (like sklearn). This is to make the learning rate not # depend on the dataset (or minibatch) size, but it means # we have to know the minibatch size here in order for C # to be the same as for sklearn. # # Note: this applies the same regularization to all # "regularizable" parameters in the whole network # (everything but the bias terms). I need to think more # about whether this makes sense for the deep networks, # though it's probably a reasonable starting point. regularization = 1.0/self.C/num_rows * lasagne.regularization.regularize_network_params(instance_log_odds, self.penalty) # This chunk is a bit repetitive and could be simplified: bag_loss = T.mean(lasagne.objectives.binary_crossentropy(bag_probs, y_sym)) + regularization self.f_train_bag = theano.function([X_sym, y_sym, bag_labels], [bag_loss], updates=self.updater(bag_loss, self.all_params, learning_rate=self.learning_rate)) nobag_loss = T.mean(lasagne.objectives.binary_crossentropy(instance_probs_output, y_sym)) + regularization self.f_train_nobag = theano.function([X_sym, y_sym], [nobag_loss], updates=self.updater(nobag_loss, self.all_params, learning_rate=self.learning_rate)) self.f_bag_logprobs = theano.function([X_sym, bag_labels], T.log(bag_probs)) self.f_logprobs = theano.function([X_sym], T.log(instance_probs_output))
def mask_loss(loss, mask): return loss * lo(LL.FlattenLayer(mask, 1))
def __init__(self, n_inputs, n_outputs, n_components=1, n_filters=[], n_hiddens=[10, 10], n_rnn=None, impute_missing=True, seed=None, svi=True): """Initialize a mixture density network with custom layers Parameters ---------- n_inputs : int or tuple of ints or list of ints Dimensionality of input n_outputs : int Dimensionality of output n_components : int Number of components of the mixture density n_filters : list of ints Number of filters per convolutional layer n_hiddens : list of ints Number of hidden units per fully connected layer n_rnn : None or int Number of RNN units impute_missing : bool If set to True, learns replacement value for NaNs, otherwise those inputs are set to zero seed : int or None If provided, random number generator will be seeded svi : bool Whether to use SVI version or not """ self.impute_missing = impute_missing self.n_components = n_components self.n_filters = n_filters self.n_hiddens = n_hiddens self.n_outputs = n_outputs self.svi = svi self.iws = tt.vector('iws', dtype=dtype) if n_rnn is None: self.n_rnn = 0 else: self.n_rnn = n_rnn if self.n_rnn > 0 and len(self.n_filters) > 0: raise NotImplementedError self.seed = seed if seed is not None: self.rng = np.random.RandomState(seed=seed) else: self.rng = np.random.RandomState() lasagne.random.set_rng(self.rng) # cast n_inputs to tuple if type(n_inputs) is int: self.n_inputs = (n_inputs, ) elif type(n_inputs) is list: self.n_inputs = tuple(n_inputs) elif type(n_inputs) is tuple: self.n_inputs = n_inputs else: raise ValueError('n_inputs type not supported') # compose layers self.layer = collections.OrderedDict() # stats : input placeholder, (batch, *self.n_inputs) if len(self.n_inputs) + 1 == 2: self.stats = tt.matrix('stats', dtype=dtype) elif len(self.n_inputs) + 1 == 3: self.stats = tt.tensor3('stats', dtype=dtype) elif len(self.n_inputs) + 1 == 4: self.stats = tt.tensor4('stats', dtype=dtype) else: raise NotImplementedError # input layer self.layer['input'] = ll.InputLayer((None, *self.n_inputs), input_var=self.stats) # learn replacement values if self.impute_missing: self.layer['missing'] = dl.ImputeMissingLayer( last(self.layer), n_inputs=self.n_inputs) else: self.layer['missing'] = dl.ReplaceMissingLayer( last(self.layer), n_inputs=self.n_inputs) # recurrent neural net # expects shape (batch, sequence_length, num_inputs) if self.n_rnn > 0: if len(self.n_inputs) == 1: rs = (-1, *self.n_inputs, 1) self.layer['rnn_reshape'] = ll.ReshapeLayer( last(self.layer), rs) self.layer['rnn'] = ll.GRULayer(last(self.layer), n_rnn, only_return_final=True) # convolutional layers # expects shape (batch, num_input_channels, input_rows, input_columns) if len(self.n_filters) > 0: # reshape if len(self.n_inputs) == 1: raise NotImplementedError elif len(self.n_inputs) == 2: rs = (-1, 1, *self.n_inputs) else: rs = None if rs is not None: self.layer['conv_reshape'] = ll.ReshapeLayer( last(self.layer), rs) # add layers for l in range(len(n_filters)): self.layer['conv_' + str(l + 1)] = ll.Conv2DLayer( name='c' + str(l + 1), incoming=last(self.layer), num_filters=n_filters[l], filter_size=3, stride=(2, 2), pad=0, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lnl.rectify, flip_filters=True, convolution=tt.nnet.conv2d) # flatten self.layer['flatten'] = ll.FlattenLayer(incoming=last(self.layer), outdim=2) # hidden layers for l in range(len(n_hiddens)): self.layer['hidden_' + str(l + 1)] = dl.FullyConnectedLayer( last(self.layer), n_units=n_hiddens[l], svi=svi, name='h' + str(l + 1)) last_hidden = last(self.layer) # mixture layers self.layer['mixture_weights'] = dl.MixtureWeightsLayer( last_hidden, n_units=n_components, actfun=lnl.softmax, svi=svi, name='weights') self.layer['mixture_means'] = dl.MixtureMeansLayer( last_hidden, n_components=n_components, n_dim=n_outputs, svi=svi, name='means') self.layer['mixture_precisions'] = dl.MixturePrecisionsLayer( last_hidden, n_components=n_components, n_dim=n_outputs, svi=svi, name='precisions') last_mog = [ self.layer['mixture_weights'], self.layer['mixture_means'], self.layer['mixture_precisions'] ] # output placeholder self.params = tt.matrix('params', dtype=dtype) # (batch, self.n_outputs) # mixture parameters # a : weights, matrix with shape (batch, n_components) # ms : means, list of len n_components with (batch, n_dim, n_dim) # Us : precision factors, n_components list with (batch, n_dim, n_dim) # ldetUs : log determinants of precisions, n_comp list with (batch, ) self.a, self.ms, precision_out = ll.get_output(last_mog, deterministic=False) self.Us = precision_out['Us'] self.ldetUs = precision_out['ldetUs'] self.comps = { **{ 'a': self.a }, **{'m' + str(i): self.ms[i] for i in range(self.n_components)}, **{'U' + str(i): self.Us[i] for i in range(self.n_components)} } # log probability of y given the mixture distribution # lprobs_comps : log probs per component, list of len n_components with (batch, ) # probs : log probs of mixture, (batch, ) self.lprobs_comps = [ -0.5 * tt.sum(tt.sum( (self.params - m).dimshuffle([0, 'x', 1]) * U, axis=2)**2, axis=1) + ldetU for m, U, ldetU in zip(self.ms, self.Us, self.ldetUs) ] self.lprobs = (MyLogSumExp(tt.stack(self.lprobs_comps, axis=1) + tt.log(self.a), axis=1) \ - (0.5 * self.n_outputs * np.log(2 * np.pi))).squeeze() # the quantities from above again, but with deterministic=True # --- in the svi case, this will disable injection of randomness; # the mean of weights is used instead self.da, self.dms, dprecision_out = ll.get_output(last_mog, deterministic=True) self.dUs = dprecision_out['Us'] self.dldetUs = dprecision_out['ldetUs'] self.dcomps = { **{ 'a': self.da }, **{'m' + str(i): self.dms[i] for i in range(self.n_components)}, **{'U' + str(i): self.dUs[i] for i in range(self.n_components)} } self.dlprobs_comps = [ -0.5 * tt.sum(tt.sum( (self.params - m).dimshuffle([0, 'x', 1]) * U, axis=2)**2, axis=1) + ldetU for m, U, ldetU in zip(self.dms, self.dUs, self.dldetUs) ] self.dlprobs = (MyLogSumExp(tt.stack(self.dlprobs_comps, axis=1) + tt.log(self.da), axis=1) \ - (0.5 * self.n_outputs * np.log(2 * np.pi))).squeeze() # parameters of network self.aps = ll.get_all_params(last_mog) # all parameters self.mps = ll.get_all_params(last_mog, mp=True) # means self.sps = ll.get_all_params(last_mog, sp=True) # log stds # weight and bias parameter sets as seperate lists self.mps_wp = ll.get_all_params(last_mog, mp=True, wp=True) self.sps_wp = ll.get_all_params(last_mog, sp=True, wp=True) self.mps_bp = ll.get_all_params(last_mog, mp=True, bp=True) self.sps_bp = ll.get_all_params(last_mog, sp=True, bp=True) # theano functions self.compile_funs() self.iws = tt.vector('iws', dtype=dtype)
pad='same', W=Normal(0.05), nonlinearity=nn.lrelu, name='disxz-31'), name='disxz-32')) disxz_x_layers.append( ll.batch_norm(dnn.Conv2DDNNLayer(disxz_x_layers[-1], 512, (5, 5), stride=2, pad='same', W=Normal(0.05), nonlinearity=nn.lrelu, name='disxz-41'), name='disxz-42')) disxz_layers = [ ll.ConcatLayer([ll.FlattenLayer(disxz_x_layers[-1]), disxz_z_layers[-1]], name='disxz-5') ] disxz_layers.append( ll.DenseLayer(disxz_layers[-1], num_units=1024, W=Normal(0.05), nonlinearity=nn.lrelu, name='disxz-6')) disxz_layers.append( ll.DenseLayer(disxz_layers[-1], num_units=1, W=Normal(0.05), nonlinearity=ln.sigmoid, name='disxz-7')) '''
def build_rnn_net(input_var=None, input_width=None, input_dim=None, nin_units=80, h_num_units=[64, 64], h_grad_clip=1.0, output_width=1): """ A stacked bidirectional RNN network for regression, alternating with dense layers and merging of the two directions, followed by a feature mean pooling in the time direction, with a linear dim-reduction layer at the start add dropout for generalizations Args: input_var (theano 3-tensor): minibatch of input sequence vectors input_width (int): length of input sequences nin_units (list): number of NIN features h_num_units (int list): no. of units in hidden layer in each stack from bottom to top h_grad_clip (float): gradient clipping maximum value output_width (int): size of output layer (e.g. =1 for 1D regression) Returns: output layer (Lasagne layer object) """ # Non-linearity hyperparameter leaky_ratio = 0.3 nonlin = lasagne.nonlinearities.LeakyRectify(leakiness=leaky_ratio) # Input layer l_in = LL.InputLayer(shape=(None, input_width, input_dim), input_var=input_var) batchsize = l_in.input_var.shape[0] # NIN-layer #l_in_1 = LL.NINLayer(l_in, num_units=nin_units, #nonlinearity=lasagne.nonlinearities.linear) l_in_1 = l_in #l_in_d = LL.DropoutLayer(l_in, p = 0.8) Do not use drop out now, for the first rnn layer is 256 # currently, we do not drop features # RNN layers # dropout in the first two (total three) or three (total five) layers counter = -1 drop_ends = 2 for h in h_num_units: counter += 1 # Forward layers l_forward_0 = LL.RecurrentLayer( l_in_1, nonlinearity=nonlin, num_units=h, W_in_to_hid=lasagne.init.Normal(0.01, 0), #W_in_to_hid=lasagne.init.He(initializer, math.sqrt(2/(1+0.15**2))), W_hid_to_hid=lasagne.init.Orthogonal( math.sqrt(2 / (1 + leaky_ratio**2))), backwards=False, learn_init=True, grad_clipping=h_grad_clip, #gradient_steps = 20, unroll_scan=True, precompute_input=True) l_forward_0a = LL.ReshapeLayer(l_forward_0, (-1, h)) if (counter < drop_ends and counter % 2 != 0): l_forward_0a = LL.DropoutLayer(l_forward_0a, p=0.2) else: l_forward_0a = l_forward_0a l_forward_0b = LL.DenseLayer(l_forward_0a, num_units=h, nonlinearity=nonlin) l_forward_0c = LL.ReshapeLayer(l_forward_0b, (batchsize, input_width, h)) l_forward_out = l_forward_0c # Backward layers l_backward_0 = LL.RecurrentLayer( l_in_1, nonlinearity=nonlin, num_units=h, W_in_to_hid=lasagne.init.Normal(0.01, 0), #W_in_to_hid=lasagne.init.He(initializer, math.sqrt(2/(1+0.15**2))), W_hid_to_hid=lasagne.init.Orthogonal( math.sqrt(2 / (1 + leaky_ratio**2))), backwards=True, learn_init=True, grad_clipping=h_grad_clip, #gradient_steps = 20, unroll_scan=True, precompute_input=True) l_backward_0a = LL.ReshapeLayer(l_backward_0, (-1, h)) if (counter < drop_ends and counter % 2 == 0): l_backward_0a = LL.DropoutLayer(l_backward_0a, p=0.2) else: l_backward_0a = l_backward_0a l_backward_0b = LL.DenseLayer(l_backward_0a, num_units=h, nonlinearity=nonlin) l_backward_0c = LL.ReshapeLayer(l_backward_0b, (batchsize, input_width, h)) l_backward_out = l_backward_0c l_in_1 = LL.ElemwiseSumLayer([l_forward_out, l_backward_out]) # Output layers network_0a = LL.DenseLayer(l_in_1, num_units=1, num_leading_axes=2, nonlinearity=nonlin) output_net = LL.FlattenLayer(network_0a, outdim=2) return output_net
def _initialize_network(self, img_input_shape, misc_len, output_size, img_input, misc_input=None, **kwargs): input_layers = [] inputs = [img_input] # weights_init = lasagne.init.GlorotUniform("relu") weights_init = lasagne.init.HeNormal("relu") network = ls.InputLayer(shape=img_input_shape, input_var=img_input) input_layers.append(network) network = ls.Conv2DLayer(network, num_filters=32, filter_size=8, nonlinearity=rectify, W=weights_init, b=lasagne.init.Constant(0.1), stride=4) network = ls.Conv2DLayer(network, num_filters=64, filter_size=4, nonlinearity=rectify, W=weights_init, b=lasagne.init.Constant(0.1), stride=2) network = ls.Conv2DLayer(network, num_filters=64, filter_size=3, nonlinearity=rectify, W=weights_init, b=lasagne.init.Constant(0.1), stride=1) network = ls.FlattenLayer(network) if self.misc_state_included: layers_for_merge = [] health_inputs = 4 units_per_health_input = 100 for i in range(health_inputs): oh_input = lasagne.utils.one_hot(misc_input[:, i] - 1, units_per_health_input) health_input_layer = ls.InputLayer( shape=(None, units_per_health_input), input_var=oh_input) inputs.append(oh_input) input_layers.append(health_input_layer) layers_for_merge.append(health_input_layer) time_inputs = 4 # TODO set this somewhere else cause it depends on skiprate and timeout .... units_pertime_input = 525 for i in range(health_inputs, health_inputs + time_inputs): oh_input = lasagne.utils.one_hot(misc_input[:, i] - 1, units_pertime_input) time_input_layer = ls.InputLayer(shape=(None, units_pertime_input), input_var=oh_input) inputs.append(oh_input) input_layers.append(time_input_layer) layers_for_merge.append(time_input_layer) other_misc_input = misc_input[:, health_inputs + time_inputs:] other_misc_shape = (None, misc_len - health_inputs - time_inputs) other_misc_input_layer = ls.InputLayer(shape=other_misc_shape, input_var=other_misc_input) input_layers.append(other_misc_input_layer) layers_for_merge.append(other_misc_input_layer) inputs.append(other_misc_input) layers_for_merge.append(network) network = ls.ConcatLayer(layers_for_merge) network = ls.DenseLayer(network, 512, nonlinearity=rectify, W=weights_init, b=lasagne.init.Constant(0.1)) network = ls.DenseLayer(network, output_size, nonlinearity=None, b=lasagne.init.Constant(.1)) return network, input_layers, inputs
def _initialize_network(self, img_input_shape, misc_len, output_size, img_input, misc_input=None, **kwargs): input_layers = [] inputs = [img_input] # weights_init = lasagne.init.GlorotUniform("relu") weights_init = lasagne.init.HeNormal("relu") network = ls.InputLayer(shape=img_input_shape, input_var=img_input) input_layers.append(network) network = ls.Conv2DLayer(network, num_filters=32, filter_size=8, nonlinearity=rectify, W=weights_init, b=lasagne.init.Constant(0.1), stride=4) network = ls.Conv2DLayer(network, num_filters=64, filter_size=4, nonlinearity=rectify, W=weights_init, b=lasagne.init.Constant(0.1), stride=2) network = ls.Conv2DLayer(network, num_filters=64, filter_size=3, nonlinearity=rectify, W=weights_init, b=lasagne.init.Constant(0.1), stride=1) network = ls.FlattenLayer(network) if self.misc_state_included: health_inputs = 4 units_per_health_input = 100 layers_for_merge = [] for i in range(health_inputs): health_input_layer = ls.InputLayer( shape=(None, 1), input_var=misc_input[:, i:i + 1]) health_layer = ls.DenseLayer(health_input_layer, units_per_health_input, nonlinearity=rectify, W=weights_init, b=lasagne.init.Constant(0.1)) health_layer = ls.DenseLayer(health_layer, units_per_health_input, nonlinearity=rectify, W=weights_init, b=lasagne.init.Constant(0.1)) inputs.append(misc_input[:, i:i + 1]) input_layers.append(health_input_layer) layers_for_merge.append(health_layer) misc_input_layer = ls.InputLayer( shape=(None, misc_len - health_inputs), input_var=misc_input[:, health_inputs:]) input_layers.append(misc_input_layer) layers_for_merge.append(misc_input_layer) inputs.append(misc_input[:, health_inputs:]) layers_for_merge.append(network) network = ls.ConcatLayer(layers_for_merge) network = ls.DenseLayer(network, 512, nonlinearity=rectify, W=weights_init, b=lasagne.init.Constant(0.1)) network = ls.DenseLayer(network, output_size, nonlinearity=None, b=lasagne.init.Constant(.1)) return network, input_layers, inputs
def _initialize_network(self, img_input_shape, misc_len, output_size, img_input, misc_input=None, **kwargs): input_layers = [] inputs = [img_input] # weights_init = lasagne.init.GlorotUniform("relu") weights_init = lasagne.init.HeNormal("relu") network = ls.InputLayer(shape=img_input_shape, input_var=img_input) input_layers.append(network) network = ls.Conv2DLayer(network, num_filters=32, filter_size=8, nonlinearity=rectify, W=weights_init, b=lasagne.init.Constant(.1), stride=4) network = ls.Conv2DLayer(network, num_filters=64, filter_size=4, nonlinearity=rectify, W=weights_init, b=lasagne.init.Constant(.1), stride=2) network = ls.Conv2DLayer(network, num_filters=64, filter_size=3, nonlinearity=rectify, W=weights_init, b=lasagne.init.Constant(.1), stride=1) if self.misc_state_included: inputs.append(misc_input) network = ls.FlattenLayer(network) misc_input_layer = ls.InputLayer(shape=(None, misc_len), input_var=misc_input) input_layers.append(misc_input_layer) if "additional_misc_layer" in kwargs: misc_input_layer = ls.DenseLayer( misc_input_layer, int(kwargs["additional_misc_layer"]), nonlinearity=rectify, W=weights_init, b=lasagne.init.Constant(0.1)) network = ls.ConcatLayer([network, misc_input_layer]) # Duelling here advanteges_branch = ls.DenseLayer(network, 256, nonlinearity=rectify, W=weights_init, b=lasagne.init.Constant(.1)) advanteges_branch = ls.DenseLayer(advanteges_branch, output_size, nonlinearity=None, b=lasagne.init.Constant(.1)) state_value_branch = ls.DenseLayer(network, 256, nonlinearity=rectify, W=weights_init, b=lasagne.init.Constant(.1)) state_value_branch = ls.DenseLayer(state_value_branch, 1, nonlinearity=None, b=lasagne.init.Constant(.1)) network = DuellingMergeLayer([advanteges_branch, state_value_branch]) return network, input_layers, inputs
def makeDiscriminator(self, aNBatch, aX, aXShape, aY, aYSize): #(D1) yb = aY.dimshuffle(0, 1, 'x', 'x') #(D2) layer_X = ll.InputLayer(shape=aXShape, input_var=aX, name='lX') layer_Y = ll.InputLayer(shape=(aNBatch, aYSize), input_var=aY, name='lY') dis = self.conv_cond_concat(layer_X, yb, aYSize) #(D3), (D4) if self.IS_DIS_BIN: dis = binary_net_ex.Conv2DLayer( dis, num_filters=NUM_DIS_FILTERS, filter_size=(5, 5), stride=(2, 2), nonlinearity=ln.LeakyRectify(0.2), #TODO pad=2, binary=True, stochastic=IS_STOCHASTIC, H=H, W_LR_scale=W_LR_scale) else: dis = ll.Conv2DLayer(dis, num_filters=NUM_DIS_FILTERS, filter_size=(5, 5), stride=(2, 2), nonlinearity=ln.LeakyRectify(0.2), pad=2) print 'D4:', dis.output_shape # (128, 64, 14, 14) #(D5) dis = self.conv_cond_concat(dis, yb, aYSize) #(D6) if self.IS_DIS_BIN: dis = binary_net_ex.Conv2DLayer(dis, num_filters=NUM_DIS_FILTERS * 2, filter_size=(5, 5), stride=(2, 2), nonlinearity=None, pad=2, binary=True, stochastic=IS_STOCHASTIC, H=H, W_LR_scale=W_LR_scale) else: dis = ll.Conv2DLayer(dis, num_filters=NUM_DIS_FILTERS * 2, filter_size=(5, 5), stride=(2, 2), nonlinearity=None, pad=2) print 'D6:', dis.output_shape # (128, 128, 7, 7) dis = ll.BatchNormLayer(dis, epsilon=EPSILON, alpha=ALPHA) dis = ll.NonlinearityLayer(dis, nonlinearity=ln.LeakyRectify(0.2)) #TODO #(D7) dis = ll.FlattenLayer(dis, outdim=2) print 'D7:', dis.output_shape # (128, 6272) #(D8) dis = ll.ConcatLayer([dis, layer_Y], axis=1) #(D9) if self.IS_DIS_BIN: dis = binary_net_ex.DenseLayer( dis, num_units=NUM_DIS_FC_UNITS, binary=True, stochastic=IS_STOCHASTIC, H=H, W_LR_scale=W_LR_scale, b=None, #No Bias nonlinearity=None) else: dis = ll.DenseLayer(dis, num_units=NUM_DIS_FC_UNITS) dis = ll.BatchNormLayer(dis, epsilon=EPSILON, alpha=ALPHA) dis = ll.NonlinearityLayer(dis, nonlinearity=ln.LeakyRectify(0.2)) #TODO #(D10) dis = ll.ConcatLayer([dis, layer_Y], axis=1) #(D11) OUTPUT layer dis = ll.DenseLayer(dis, num_units=1, nonlinearity=ln.sigmoid) print 'D11:', dis.output_shape # (128, 1) self.dis = dis return dis, layer_X, layer_Y
def __init__(self, incomings, vocab_size, emb_size, A=lasagne.init.Normal(std=0.1), C=lasagne.init.Normal(std=0.1), AT=lasagne.init.Normal(std=0.1), CT=lasagne.init.Normal(std=0.1), nonlin=lasagne.nonlinearities.softmax, RN=0., **kwargs): super(MemoryLayer, self).__init__(incomings, **kwargs) self.vocab_size, self.emb_size = vocab_size, emb_size self.nonlin = nonlin self.RN = RN # self.A, self.C, self.AT, self.CT = A, C, AT, CT batch_size, c_count, c_length = self.input_shapes[0] _, q_count, _ = self.input_shapes[2] self.l_c_in = LL.InputLayer(shape=(batch_size, c_count, c_length)) self.l_c_in_pe = LL.InputLayer(shape=(batch_size, c_count, c_length, self.emb_size)) self.l_u_in = LL.InputLayer(shape=(batch_size, q_count, self.emb_size)) self.l_c_A_enc = EncodingFullLayer((self.l_c_in, self.l_c_in_pe), self.vocab_size, self.emb_size, A, AT) self.l_c_C_enc = EncodingFullLayer((self.l_c_in, self.l_c_in_pe), self.vocab_size, self.emb_size, C, CT) self.A, self.C = self.l_c_A_enc.W, self.l_c_C_enc.W self.AT, self.CT = self.l_c_A_enc.WT, self.l_c_C_enc.WT if len(incomings ) == 4: # if there is also the probabilities over sentences self.l_in_ac_prob = LL.InputLayer(shape=(batch_size, c_count, emb_size)) self.l_c_A_enc_ = LL.ElemwiseMergeLayer( (self.l_c_A_enc, self.l_in_ac_prob), merge_function=T.mul) self.l_c_C_enc_ = LL.ElemwiseMergeLayer( (self.l_c_C_enc, self.l_in_ac_prob), merge_function=T.mul) self.l_u_in_tr = LL.DimshuffleLayer(self.l_u_in, pattern=(0, 2, 1)) if len(incomings) == 4: self.l_p = BatchedDotLayer((self.l_c_A_enc_, self.l_u_in_tr)) else: self.l_p = BatchedDotLayer((self.l_c_A_enc, self.l_u_in_tr)) if self.l_p.output_shape[2] == 1: self.l_p = LL.FlattenLayer(self.l_p, outdim=2) # self.l_p = LL.DimshuffleLayer(self.l_p, (0, 1)) if self.nonlin == 'MaxOut': raise NotImplementedError self.l_p = LL.NonlinearityLayer(self.l_p, nonlinearity=nonlin) self.l_p = LL.DimshuffleLayer(self.l_p, (0, 1, 'x')) # self.l_p = LL.ReshapeLayer(self.l_p, self.l_p.output_shape + (1,)) self.l_p = LL.ExpressionLayer(self.l_p, lambda X: X.repeat(emb_size, 2), output_shape='auto') ## self.l_p = RepeatDimLayer(self.l_p, emb_size, axis=2) if len(incomings) == 4: self.l_pc = LL.ElemwiseMergeLayer((self.l_p, self.l_c_C_enc_), merge_function=T.mul) else: self.l_pc = LL.ElemwiseMergeLayer((self.l_p, self.l_c_C_enc), merge_function=T.mul) self.l_o = LL.ExpressionLayer(self.l_pc, lambda X: X.sum(1), output_shape='auto') # self.l_o = SumLayer(self.l_pc, axis=1) self.l_o = LL.DimshuffleLayer(self.l_o, pattern=(0, 'x', 1)) self.l_o_u = LL.ElemwiseMergeLayer((self.l_o, self.l_u_in), merge_function=T.add) params = LL.helper.get_all_params(self.l_o_u, trainable=True) values = LL.helper.get_all_param_values(self.l_o_u, trainable=True) for p, v in zip(params, values): self.add_param(p, v.shape, name=p.name)
def build_1Dregression_v1(input_var=None, input_width=None, nin_units=12, h_num_units=[64, 64], h_grad_clip=1.0, output_width=1): """ A stacked bidirectional RNN network for regression, alternating with dense layers and merging of the two directions, followed by a feature mean pooling in the time direction, with a linear dim-reduction layer at the start Args: input_var (theano 3-tensor): minibatch of input sequence vectors input_width (int): length of input sequences nin_units (list): number of NIN features h_num_units (int list): no. of units in hidden layer in each stack from bottom to top h_grad_clip (float): gradient clipping maximum value output_width (int): size of output layer (e.g. =1 for 1D regression) Returns: output layer (Lasagne layer object) """ # Non-linearity hyperparameter nonlin = lasagne.nonlinearities.LeakyRectify(leakiness=0.15) # Input layer l_in = LL.InputLayer(shape=(None, 22, input_width), input_var=input_var) batchsize = l_in.input_var.shape[0] # NIN-layer l_in = LL.NINLayer(l_in, num_units=nin_units, nonlinearity=lasagne.nonlinearities.linear) l_in_1 = LL.DimshuffleLayer(l_in, (0, 2, 1)) # RNN layers for h in h_num_units: # Forward layers l_forward_0 = LL.RecurrentLayer(l_in_1, nonlinearity=nonlin, num_units=h, backwards=False, learn_init=True, grad_clipping=h_grad_clip, unroll_scan=True, precompute_input=True) l_forward_0a = LL.ReshapeLayer(l_forward_0, (-1, h)) l_forward_0b = LL.DenseLayer(l_forward_0a, num_units=h, nonlinearity=nonlin) l_forward_0c = LL.ReshapeLayer(l_forward_0b, (batchsize, input_width, h)) # Backward layers l_backward_0 = LL.RecurrentLayer(l_in_1, nonlinearity=nonlin, num_units=h, backwards=True, learn_init=True, grad_clipping=h_grad_clip, unroll_scan=True, precompute_input=True) l_backward_0a = LL.ReshapeLayer(l_backward_0, (-1, h)) l_backward_0b = LL.DenseLayer(l_backward_0a, num_units=h, nonlinearity=nonlin) l_backward_0c = LL.ReshapeLayer(l_backward_0b, (batchsize, input_width, h)) l_in_1 = LL.ElemwiseSumLayer([l_forward_0c, l_backward_0c]) # Output layers network_0a = LL.ReshapeLayer(l_in_1, (-1, h_num_units[-1])) network_0b = LL.DenseLayer(network_0a, num_units=output_width, nonlinearity=nonlin) network_0c = LL.ReshapeLayer(network_0b, (batchsize, input_width, output_width)) output_net_1 = LL.FlattenLayer(network_0c, outdim=2) output_net_2 = LL.FeaturePoolLayer(output_net_1, pool_size=input_width, pool_function=T.mean) return output_net_2
def build(self): """""" # output setting out_acts = [] for out_act in self.config.hyper_parameters.out_act: # exec('from lasagne.nonlinearities import {}'.format(out_act)) out_acts.append(eval(out_act)) n_outs = self.config.hyper_parameters.n_out targets = self.config.target # apply size multiplier self.n_filters = map(lambda x: x * self.m, self.n_filters) # network dict & set input block self.net, self.variables['sigma'] = input_block(self.net, self.config, melspec=True) if self.branch_at == 'fc': # shares all layers except output layer for i in range(len(self.n_convs)): name = 'conv{:d}'.format(i + 1) self.net = conv_block(self.net, self.n_convs[i], self.n_filters[i], self.filter_sizes[i], self.strides[i], self.pool_sizes[i], self.non_lin, self.batch_norm, name, self.net.keys()[-1], self.verbose) # GAP self.net['gap'] = L.batch_norm( L.GlobalPoolLayer(self.net[next(reversed(self.net))], name='gap')) self.net['fc'] = L.dropout(L.batch_norm( L.DenseLayer(self.net['gap'], num_units=self.net['gap'].output_shape[-1], nonlinearity=self.non_lin, name='fc')), name='fc.bn.do') # Out block out_layer_names = [] for target, n_out, out_act in zip(targets, n_outs, out_acts): out_layer_names.append('{}.out'.format(target)) if target == 'self': self.net[out_layer_names[-1]], inputs = \ build_siamese(self.net['fc']) else: self.net[out_layer_names[-1]] = L.DenseLayer( self.net['fc'], num_units=n_out, nonlinearity=out_act, name=out_layer_names[-1]) inputs = [self.net['input'].input_var] self.variables['{}.inputs'.format(target)] = inputs else: # shares lower part of the network and branch out # shared conv blocks for i in range(self.branch_at - 1): name = 'conv{:d}'.format(i + 1) self.net = conv_block(self.net, self.n_convs[i], self.n_filters[i], self.filter_sizes[i], self.strides[i], self.pool_sizes[i], self.non_lin, self.batch_norm, name, self.net.keys()[-1], self.verbose) branch_point = self.net.keys()[-1] # branch out to each targets out_layer_names = [] for target, n_out, out_act in zip(targets, n_outs, out_acts): # first conv_block for each branch j = self.branch_at - 1 # branch_point_ix name = '{}.conv{:d}'.format(target, j + 1) self.net = conv_block(self.net, self.n_convs[j], self.n_filters[j], self.filter_sizes[j], self.strides[j], self.pool_sizes[j], self.non_lin, self.batch_norm, name, branch_point, self.verbose) for i in range(self.branch_at, len(self.n_convs)): name = '{}.conv{:d}'.format(target, i + 1) self.net = conv_block(self.net, self.n_convs[i], self.n_filters[i], self.filter_sizes[i], self.strides[i], self.pool_sizes[i], self.non_lin, self.batch_norm, name, self.net.keys()[-1], self.verbose) # GAP gap_name = '{}.gap'.format(target) self.net[gap_name] = L.batch_norm( L.GlobalPoolLayer(self.net[next(reversed(self.net))], name=gap_name)) # FC fc_name = '{}.fc'.format(target) self.net[fc_name] = L.dropout(L.batch_norm( L.DenseLayer(self.net[gap_name], num_units=self.net[gap_name].output_shape[-1], nonlinearity=self.non_lin, name=fc_name)), name=fc_name) # OUT out_layer_names.append('{}.out'.format(target)) if target == 'self': self.net[out_layer_names[-1]], inputs = \ build_siamese(self.net[fc_name]) else: self.net[out_layer_names[-1]] = L.DenseLayer( self.net[fc_name], num_units=n_out, nonlinearity=out_act, name=out_layer_names[-1]) inputs = [self.net['input'].input_var] self.variables['{}.inputs'.format(target)] = inputs # make a concatation layer just for save/load purpose self.net['IO'] = L.ConcatLayer([ L.FlattenLayer(self.net[target_layer_name]) if target == 'self' else self.net[target_layer_name] for target_layer_name in out_layer_names ], name='IO') if self.verbose: for target in targets: print(self.net['{}.out'.format(target)].output_shape) return self.net, self.variables
def flatten(_in, **kwargs): return L.FlattenLayer(_in, **kwargs)
def __init__(self, n_inputs=None, n_outputs=None, input_shape=None, n_bypass=0, density='mog', n_hiddens=(10, 10), impute_missing=True, seed=None, n_filters=(), filter_sizes=3, pool_sizes=2, n_rnn=0, **density_opts): """Initialize a mixture density network with custom layers Parameters ---------- n_inputs : int Total input dimensionality (data/summary stats) n_outputs : int Dimensionality of output (simulator parameters) input_shape : tuple Size to which data are reshaped before CNN or RNN n_bypass : int Number of elements at end of input which bypass CNN or RNN density : string Type of density condition on the network, can be 'mog' or 'maf' n_components : int Number of components of the mixture density n_filters : list of ints Number of filters per convolutional layer n_hiddens : list of ints Number of hidden units per fully connected layer n_rnn : None or int Number of RNN units impute_missing : bool If set to True, learns replacement value for NaNs, otherwise those inputs are set to zero seed : int or None If provided, random number generator will be seeded density_opts : dict Options for the density estimator """ if n_rnn > 0 and len(n_filters) > 0: raise NotImplementedError assert isint(n_inputs) and isint(n_outputs)\ and n_inputs > 0 and n_outputs > 0 self.density = density.lower() self.impute_missing = impute_missing self.n_hiddens = list(n_hiddens) self.n_outputs, self.n_inputs = n_outputs, n_inputs self.n_bypass = n_bypass self.n_rnn = n_rnn self.n_filters, self.filter_sizes, self.pool_sizes, n_cnn = \ list(n_filters), filter_sizes, pool_sizes, len(n_filters) if type(self.filter_sizes) is int: self.filter_sizes = [self.filter_sizes for _ in range(n_cnn)] else: assert len(self.filter_sizes) >= n_cnn if type(self.pool_sizes) is int: self.pool_sizes = [self.pool_sizes for _ in range(n_cnn)] else: assert len(self.pool_sizes) >= n_cnn self.iws = tt.vector('iws', dtype=dtype) self.seed = seed if seed is not None: self.rng = np.random.RandomState(seed=seed) else: self.rng = np.random.RandomState() lasagne.random.set_rng(self.rng) self.input_shape = (n_inputs,) if input_shape is None else input_shape assert np.prod(self.input_shape) + self.n_bypass == self.n_inputs assert 1 <= len(self.input_shape) <= 3 # params: output placeholder (batch, self.n_outputs) self.params = tensorN(2, name='params', dtype=dtype) # stats : input placeholder, (batch, self.n_inputs) self.stats = tensorN(2, name='stats', dtype=dtype) # compose layers self.layer = collections.OrderedDict() # input layer, None indicates batch size not fixed at compile time self.layer['input'] = ll.InputLayer( (None, self.n_inputs), input_var=self.stats) # learn replacement values if self.impute_missing: self.layer['missing'] = \ dl.ImputeMissingLayer(last(self.layer), n_inputs=(self.n_inputs,)) else: self.layer['missing'] = \ dl.ReplaceMissingLayer(last(self.layer), n_inputs=(self.n_inputs,)) if self.n_bypass > 0 and (self.n_rnn > 0 or n_cnn > 0): last_layer = last(self.layer) bypass_slice = slice(self.n_inputs - self.n_bypass, self.n_inputs) direct_slice = slice(0, self.n_inputs - self.n_bypass) self.layer['bypass'] = ll.SliceLayer(last_layer, bypass_slice) self.layer['direct'] = ll.SliceLayer(last_layer, direct_slice) # reshape inputs prior to RNN or CNN step if self.n_rnn > 0 or n_cnn > 0: if len(n_filters) > 0 and len(self.input_shape) == 2: # 1 channel rs = (-1, 1, *self.input_shape) else: if self.n_rnn > 0: assert len(self.input_shape) == 2 # time, dim else: assert len(self.input_shape) == 3 # channel, row, col rs = (-1, *self.input_shape) # last layer is 'missing' or 'direct' self.layer['reshape'] = ll.ReshapeLayer(last(self.layer), rs) # recurrent neural net, input: (batch, sequence_length, num_inputs) if self.n_rnn > 0: self.layer['rnn'] = ll.GRULayer(last(self.layer), n_rnn, only_return_final=True) # convolutional net, input: (batch, channels, rows, columns) if n_cnn > 0: for l in range(n_cnn): # add layers if self.pool_sizes[l] == 1: padding = (self.filter_sizes[l] - 1) // 2 else: padding = 0 self.layer['conv_' + str(l + 1)] = ll.Conv2DLayer( name='c' + str(l + 1), incoming=last(self.layer), num_filters=self.n_filters[l], filter_size=self.filter_sizes[l], stride=(1, 1), pad=padding, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lnl.rectify, flip_filters=True, convolution=tt.nnet.conv2d) if self.pool_sizes[l] > 1: self.layer['pool_' + str(l + 1)] = ll.MaxPool2DLayer( name='p' + str(l + 1), incoming=last(self.layer), pool_size=self.pool_sizes[l], stride=None, ignore_border=True) # flatten self.layer['flatten'] = ll.FlattenLayer( incoming=last(self.layer), outdim=2) # incorporate bypass inputs if self.n_bypass > 0 and (self.n_rnn > 0 or n_cnn > 0): self.layer['bypass_merge'] = lasagne.layers.ConcatLayer( [self.layer['bypass'], last(self.layer)], axis=1) if self.density == 'mog': self.init_mdn(**density_opts) elif self.density == 'maf': self.init_maf(**density_opts) else: raise NotImplementedError self.compile_funs() # theano functions
def cnn_autoencoder(input_var=None): """ Build the network using Lasagne library """ ################## # Network config # ################## input_channels = 3 weight_init = lasagne.init.Normal() # encoder conv1_nb_filt = 32 conv1_sz_filt = (9, 9) conv1_sz_padd = 2 # conv1 output size = (60, 60) pool1_sz = (2, 2) # pool1 output size = (30, 30) conv2_nb_filt = 64 conv2_sz_filt = (7, 7) conv2_sz_padd = 0 # conv2 output size = (24, 24) pool2_sz = (4, 4) # pool2 size = (6, 6) conv3_nb_filt = 128 conv3_sz_filt = (5, 5) conv3_sz_padd = 0 # conv3 output size = (2, 2) pool3_sz = (2, 2) # pool3 output size = (32, 1, 1) dens1_nb_unit = 256 # dense1 output (vector 256) dens2_nb_unit = 256 # dense2 output (vector 256) rshp_sz = 1 # reshape output (256, 1, 1) # decoder tconv1_nb_filt = 64 tconv1_sz_filt = (5, 5) tconv1_sz_strd = (1, 1) # conv1 output size = (5, 5) upsamp1_sz = (2, 2) # upsamp1 output size = (10, 10) tconv2_nb_filt = 32 tconv2_sz_filt = (4, 4) tconv2_sz_strd = (1, 1) # tconv2 output size = (13, 13) upsamp2_sz = (2, 2) # upsamp2 output size = (26, 26) tconv3_nb_filt = 32 tconv3_sz_filt = (5, 5) tconv3_sz_strd = (1, 1) # tconv3 output size = (30, 30) tconv4_nb_filt = 3 tconv4_sz_filt = (3, 3) tconv4_sz_strd = (1, 1) # tconv4 output size = (32, 32) # final output = (3 channels, 32 x 32) ##################### # Build the network # ##################### # Add input layer network = lyr.InputLayer(shape=(None, input_channels, 64, 64), input_var=input_var) # Add convolution layer network = lyr.Conv2DLayer(incoming=network, num_filters=conv1_nb_filt, filter_size=conv1_sz_filt, pad=conv1_sz_padd, W=weight_init) # Add pooling layer network = lyr.MaxPool2DLayer(incoming=network, pool_size=pool1_sz) # Add convolution layer network = lyr.Conv2DLayer(incoming=network, num_filters=conv2_nb_filt, filter_size=conv2_sz_filt, pad=conv2_sz_padd, W=weight_init) # Add pooling layer network = lyr.MaxPool2DLayer(incoming=network, pool_size=pool2_sz) # Add convolution layer network = lyr.Conv2DLayer(incoming=network, num_filters=conv3_nb_filt, filter_size=conv3_sz_filt, pad=conv3_sz_padd, W=weight_init) # Add pooling layer network = lyr.MaxPool2DLayer(incoming=network, pool_size=pool3_sz) network = lyr.FlattenLayer(network) # Add dense layer network = lyr.DenseLayer(network, dens1_nb_unit, W=weight_init) network = lyr.DenseLayer(network, dens2_nb_unit, W=weight_init) network = lyr.ReshapeLayer(network, (input_var.shape[0], dens2_nb_unit / (rshp_sz**2), rshp_sz, rshp_sz)) # Add transposed convolution layer network = lyr.TransposedConv2DLayer(incoming=network, num_filters=tconv1_nb_filt, filter_size=tconv1_sz_filt, stride=tconv1_sz_strd, W=weight_init) # Add upsampling layer network = lyr.Upscale2DLayer(incoming=network, scale_factor=upsamp1_sz) # Add transposed convolution layer network = lyr.TransposedConv2DLayer(incoming=network, num_filters=tconv2_nb_filt, filter_size=tconv2_sz_filt, stride=tconv2_sz_strd, W=weight_init) # Add upsampling layer network = lyr.Upscale2DLayer(incoming=network, scale_factor=upsamp2_sz) # Add transposed convolution layer network = lyr.TransposedConv2DLayer(incoming=network, num_filters=tconv3_nb_filt, filter_size=tconv3_sz_filt, stride=tconv3_sz_strd, W=weight_init) # Add transposed convolution layer network = lyr.TransposedConv2DLayer( incoming=network, num_filters=tconv4_nb_filt, filter_size=tconv4_sz_filt, stride=tconv4_sz_strd, W=weight_init, nonlinearity=lasagne.nonlinearities.sigmoid) return network
inf_layers.append(ll.batch_norm(dnn.Conv2DDNNLayer(inf_layers[-1], 128, (4,4), stride=2, pad=1, W=Normal(0.05), nonlinearity=nn.lrelu, name='inf-11'), name='inf-12')) inf_layers.append(ll.batch_norm(dnn.Conv2DDNNLayer(inf_layers[-1], 256, (4,4), stride=2, pad=1, W=Normal(0.05), nonlinearity=nn.lrelu, name='inf-21'), name='inf-22')) inf_layers.append(ll.batch_norm(dnn.Conv2DDNNLayer(inf_layers[-1], 512, (4,4), stride=2, pad=1, W=Normal(0.05), nonlinearity=nn.lrelu, name='inf-31'), name='inf-32')) inf_layers.append(ll.DenseLayer(inf_layers[-1], num_units=n_z, W=Normal(0.05), nonlinearity=None, name='inf-4')) # discriminator xz disxz_in_x = ll.InputLayer(shape=(None, in_channels) + dim_input) disxz_in_z = ll.InputLayer(shape=(None, n_z)) disxz_z_layers = [disxz_in_z] disxz_z_layers.append(ll.DenseLayer(disxz_z_layers[-1], num_units=512, W=Normal(0.05), nonlinearity=nn.lrelu, name='disxz-0')) disxz_z_layers.append(ll.DenseLayer(disxz_z_layers[-1], num_units=512, W=Normal(0.05), nonlinearity=nn.lrelu, name='disxz-1')) disxz_x_layers = [disxz_in_x] disxz_x_layers.append(dnn.Conv2DDNNLayer(disxz_x_layers[-1], 128, (5,5), stride=2, pad='same', W=Normal(0.05), nonlinearity=nn.lrelu, name='disxz-2')) disxz_x_layers.append(ll.batch_norm(dnn.Conv2DDNNLayer(disxz_x_layers[-1], 256, (5,5), stride=2, pad='same', W=Normal(0.05), nonlinearity=nn.lrelu, name='disxz-31'), name='disxz-32')) disxz_x_layers.append(ll.batch_norm(dnn.Conv2DDNNLayer(disxz_x_layers[-1], 512, (5,5), stride=2, pad='same', W=Normal(0.05), nonlinearity=nn.lrelu, name='disxz-41'), name='disxz-42')) disxz_layers = [ll.ConcatLayer([ll.FlattenLayer(disxz_x_layers[-1]), disxz_z_layers[-1]], name='disxz-5')] disxz_layers.append(ll.DenseLayer(disxz_layers[-1], num_units=1024, W=Normal(0.05), nonlinearity=nn.lrelu, name='disxz-6')) disxz_layers.append(ll.DenseLayer(disxz_layers[-1], num_units=1, W=Normal(0.05), nonlinearity=ln.sigmoid, name='disxz-7')) ''' objectives ''' # zca whitener = ZCA(x=x_unlabelled) sym_x_l_zca = whitener.apply(sym_x_l) sym_x_eval_zca = whitener.apply(sym_x_eval) sym_x_u_zca = whitener.apply(sym_x_u) sym_x_u_rep_zca = whitener.apply(sym_x_u_rep) sym_x_u_d_zca = whitener.apply(sym_x_u_d) # init
dropout = lambda p=0.1, rescale=True: lambda incoming: \ layers.DropoutLayer(incoming, p=p, rescale=rescale) if p is not None else incoming batch_norm = lambda axes='auto': lambda incoming: layers.BatchNormLayer( incoming, axes=axes) class Select(object): def __getitem__(self, item): return lambda incomings: incomings[item] select = Select() take = select nonlinearity = lambda f=None: lambda incoming: layers.NonlinearityLayer( incoming, (nonlinearities.LeakyRectify(0.05) if f is None else f)) elementwise = lambda f=T.add: lambda incomings: layers.ElemwiseMergeLayer( incomings, f) elementwise_sum = lambda: lambda incomings: layers.ElemwiseMergeLayer( incomings, T.add) elementwise_mean = lambda: lambda incomings: \ nonlinearity(f=lambda x: x / len(incomings))(layers.ElemwiseMergeLayer(incomings, T.add)) flatten = lambda outdim=2: lambda incoming: layers.FlattenLayer(incoming, outdim=outdim) feature_pool = lambda pool_size=4, axis=1, f=T.max: lambda incoming: \ layers.FeaturePoolLayer(incoming, pool_size=pool_size, axis=axis, pool_function=f)