def test_glorot_uniform_c01b_4d_only(): from lasagne.init import GlorotUniform with pytest.raises(RuntimeError): GlorotUniform(c01b=True).sample((100, )) with pytest.raises(RuntimeError): GlorotUniform(c01b=True).sample((100, 100)) with pytest.raises(RuntimeError): GlorotUniform(c01b=True).sample((100, 100, 100))
def create_network(available_actions_num): # Creates the input variables s1 = tensor.tensor4("States") a = tensor.vector("Actions", dtype="int32") q2 = tensor.vector("Next State best Q-Value") r = tensor.vector("Rewards") nonterminal = tensor.vector("Nonterminal", dtype="int8") # Creates the input layer of the network. dqn = InputLayer(shape=[None, 1, downsampled_y, downsampled_x], input_var=s1) # Adds 3 convolutional layers, each followed by a max pooling layer. dqn = Conv2DLayer(dqn, num_filters=32, filter_size=[8, 8], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = MaxPool2DLayer(dqn, pool_size=[2, 2]) dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[4, 4], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = MaxPool2DLayer(dqn, pool_size=[2, 2]) dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[3, 3], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = MaxPool2DLayer(dqn, pool_size=[2, 2]) # Adds a single fully connected layer. dqn = DenseLayer(dqn, num_units=512, nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) # Adds a single fully connected layer which is the output layer. # (no nonlinearity as it is for approximating an arbitrary real function) dqn = DenseLayer(dqn, num_units=available_actions_num, nonlinearity=None) # Theano stuff q = get_output(dqn) # Only q for the chosen actions is updated more or less according to following formula: # target Q(s,a,t) = r + gamma * max Q(s2,_,t+1) target_q = tensor.set_subtensor(q[tensor.arange(q.shape[0]), a], r + discount_factor * nonterminal * q2) loss = squared_error(q, target_q).mean() # Updates the parameters according to the computed gradient using rmsprop. params = get_all_params(dqn, trainable=True) updates = rmsprop(loss, params, learning_rate) # Compiles theano functions print "Compiling the network ..." function_learn = theano.function([s1, q2, a, r, nonterminal], loss, updates=updates, name="learn_fn") function_get_q_values = theano.function([s1], q, name="eval_fn") function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn") print "Network compiled." # Returns Theano objects for the net and functions. # We wouldn't need the net anymore but it is nice to save your model. return dqn, function_learn, function_get_q_values, function_get_best_action
def init_weights(self, shape, weightType = None, typeLayer = None, caffeLayerName = None): if(weightType == 'Xavier' and typeLayer == None): W=GlorotUniform() weights = W.sample(shape) if(self.mode == "Train"): if(self.caffeModelName != None and caffeLayerName != None): caffeWeights = self.loadTheseWeights(self.caffeModelName, caffeLayerName) print caffeWeights.shape print weights.shape print 'returning Xavier weights' return theano.shared(self.floatX(weights), borrow=True) return theano.shared(self.floatX(np.random.randn(*shape) * 0.01),borrow=True)
def build_bottleneck_layer(input_size, encode_size, sigma=0.3): W = theano.shared(GlorotUniform().sample(shape=(input_size, encode_size))) layers = [ (InputLayer, { 'shape': (None, input_size) }), (GaussianNoiseLayer, { 'name': 'corrupt', 'sigma': sigma }), (DenseLayer, { 'name': 'encoder', 'num_units': encode_size, 'nonlinearity': linear, 'W': W }), (DenseLayer, { 'name': 'decoder', 'num_units': input_size, 'nonlinearity': linear, 'W': W.T }), ] return W, layers
def build_encoder_layers(input_size, encode_size, sigma=0.5): """ builds an autoencoder with gaussian noise layer :param input_size: input size :param encode_size: encoded size :param sigma: gaussian noise standard deviation :return: Weights of encoder layer, denoising autoencoder layer """ W = theano.shared(GlorotUniform().sample(shape=(input_size, encode_size))) layers = [ (InputLayer, { 'shape': (None, input_size) }), (GaussianNoiseLayer, { 'name': 'corrupt', 'sigma': sigma }), (DenseLayer, { 'name': 'encoder', 'num_units': encode_size, 'nonlinearity': sigmoid, 'W': W }), (DenseLayer, { 'name': 'decoder', 'num_units': input_size, 'nonlinearity': linear, 'W': W.T }), ] return W, layers
def test_glorot_uniform_gain(): from lasagne.init import GlorotUniform sample = GlorotUniform(gain=10.0).sample((150, 450)) assert -1.0 <= sample.min() < -0.9 assert 0.9 < sample.max() <= 1.0 sample = GlorotUniform(gain='relu').sample((100, 100)) assert -0.01 < sample.mean() < 0.01 assert 0.132 < sample.std() < 0.152
def create_rnn(input_vars, num_inputs, hidden_layer_size, num_outputs): network = InputLayer((None, None, num_inputs), input_vars) batch_size_theano, seqlen, _ = network.input_var.shape network = GaussianNoiseLayer(network, sigma=0.05) for i in range(1): network = RecurrentLayer(network, hidden_layer_size, W_hid_to_hid=GlorotUniform(), W_in_to_hid=GlorotUniform(), b=Constant(1.0), nonlinearity=leaky_rectify, learn_init=True) network = ReshapeLayer(network, (-1, hidden_layer_size)) network = DenseLayer(network, num_outputs, nonlinearity=softmax) network = ReshapeLayer(network, (batch_size_theano, seqlen, num_outputs)) return network
def convert_initialization(component, nonlinearity="sigmoid"): # component = init_dic[component_key] assert(len(component) == 2) if component[0] == "uniform": return Uniform(component[1]) elif component[0] == "glorotnormal": if nonlinearity in ["linear", "sigmoid", "tanh"]: return GlorotNormal(1.) else: return GlorotNormal("relu") elif component[0] == "glorotuniform": if nonlinearity in ["linear", "sigmoid", "tanh"]: return GlorotUniform(1.) else: return GlorotUniform("relu") elif component[0] == "normal": return Normal(*component[1]) else: raise NotImplementedError()
def create_rnn(input_vars, num_inputs, depth, hidden_layer_size, num_outputs): # network = InputLayer((None, None, num_inputs), input_vars) network = lasagne.layers.InputLayer(shape=(None, 1, 1, num_inputs), input_var=input_vars) batch_size_theano, _, _, seqlen = network.input_var.shape network = GaussianNoiseLayer(network, sigma=0.05) for i in range(depth): network = RecurrentLayer(network, hidden_layer_size, W_hid_to_hid=GlorotUniform(), W_in_to_hid=GlorotUniform(), b=Constant(1.0), nonlinearity=lasagne.nonlinearities.tanh, learn_init=True) network = ReshapeLayer(network, (-1, hidden_layer_size)) network = DenseLayer(network, num_outputs, nonlinearity=softmax) return network
def getNet1(): inputLayer = layers.InputLayer(shape=(None, 1, imageShape[0], imageShape[1])) conv1Layer = layers.Conv2DLayer(inputLayer, num_filters=32, filter_size=(3, 3), W=GlorotNormal(0.8), nonlinearity=rectify) pool1Layer = layers.MaxPool2DLayer(conv1Layer, pool_size=(2, 2)) dropout1Layer = layers.DropoutLayer(pool1Layer, p=0.5) conv2Layer = layers.Conv2DLayer(dropout1Layer, num_filters=64, filter_size=(4, 3), W=GlorotUniform(1.0), nonlinearity=rectify) pool2Layer = layers.MaxPool2DLayer(conv2Layer, pool_size=(2, 2)) dropout2Layer = layers.DropoutLayer(pool2Layer, p=0.5) conv3Layer = layers.Conv2DLayer(dropout2Layer, num_filters=128, filter_size=(3, 3), W=GlorotUniform(1.0), nonlinearity=rectify) pool3Layer = layers.MaxPool2DLayer(conv3Layer, pool_size=(2, 2)) dropout3Layer = layers.DropoutLayer(pool3Layer, p=0.5) conv4Layer = layers.Conv2DLayer(dropout3Layer, num_filters=256, filter_size=(3, 2), W=GlorotNormal(0.8), nonlinearity=rectify) hidden1Layer = layers.DenseLayer(conv4Layer, num_units=1024, W=GlorotUniform(1.0), nonlinearity=rectify) hidden2Layer = layers.DenseLayer(hidden1Layer, num_units=512, W=GlorotUniform(1.0), nonlinearity=rectify) #hidden3Layer = layers.DenseLayer(hidden2Layer, num_units=256, nonlinearity=tanh) outputLayer = layers.DenseLayer(hidden2Layer, num_units=10, nonlinearity=softmax) return outputLayer
def _create_network(available_actions_num, input_shape, visual_input_var, n_variables, variables_input_var): dqn = InputLayer(shape=[None, input_shape.frames, input_shape.y, input_shape.x], input_var=visual_input_var) dqn = Conv2DLayer(dqn, num_filters=32, filter_size=[8, 8], stride=[4, 4], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[4, 4], stride=[2, 2], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[3, 3], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) if n_variables > 0: variables_layer = InputLayer(shape=[None, n_variables], input_var=variables_input_var) dqn = ConcatLayer((flatten(dqn), variables_layer)) dqn = DenseLayer(dqn, num_units=512, nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = DenseLayer(dqn, num_units=available_actions_num, nonlinearity=None) return dqn
def addConvModule(nnet, num_filters, filter_size, pad='valid', W_init=None, bias=True, use_maxpool=True, pool_size=(2, 2), use_batch_norm=False, dropout=False, p_dropout=0.5, upscale=False, stride=(1, 1)): """ add a convolutional module (convolutional layer + (leaky) ReLU + MaxPool) to the network """ if W_init is None: W = GlorotUniform( gain=(2 / (1 + 0.01**2) )**0.5) # gain adjusted for leaky ReLU with alpha=0.01 else: W = W_init if bias is True: b = Constant(0.) else: b = None # build module if dropout: nnet.addDropoutLayer(p=p_dropout) nnet.addConvLayer(use_batch_norm=use_batch_norm, num_filters=num_filters, filter_size=filter_size, pad=pad, W=W, b=b, stride=stride) if Cfg.leaky_relu: nnet.addLeakyReLU() else: nnet.addReLU() if upscale: nnet.addUpscale(scale_factor=pool_size) elif use_maxpool: nnet.addMaxPool(pool_size=pool_size)
def create_th(image_shape, output_dim, layers_conf): from lasagne.init import GlorotUniform, Constant from lasagne.layers import Conv2DLayer, InputLayer, DenseLayer, get_output, \ get_all_params, set_all_param_values from lasagne.nonlinearities import rectify from lasagne.objectives import squared_error from lasagne.updates import rmsprop x = th.tensor.tensor4("input") t = th.tensor.matrix("target") net = InputLayer(shape=[None, 1, image_shape[0], image_shape[1]], input_var=x) for num_filters, kernel_size, stride in layers_conf[:-1]: net = Conv2DLayer(net, num_filters=num_filters, filter_size=[kernel_size, kernel_size], nonlinearity=rectify, W=GlorotUniform(), b=Constant(.1), stride=stride) net = DenseLayer(net, num_units=layers_conf[-1], nonlinearity=rectify, W=GlorotUniform(), b=Constant(.1)) net = DenseLayer(net, num_units=output_dim, nonlinearity=None) q = get_output(net) loss = squared_error(q, t).mean() params = get_all_params(net, trainable=True) updates = rmsprop(loss, params, learning_rate) backprop = th.function([x, t], loss, updates=updates, name="bprop") fwd_pass = th.function([x], q, name="fwd") return fwd_pass, backprop
def build(bs, num_out): conv = { 'filter_size': (5, 5), 'stride': (1, 1), 'pad': 2, 'num_filters': 192, 'W': GlorotUniform(gain='relu'), 'nonlinearity': identity, # for LeNet } pool = {'pool_size': (2, 2), 'stride': (2, 2)} drop = {'p': 0.5} l_in = InputLayer((None, 3, 32, 32), name='in') l_conv1 = Conv2DLayer(l_in, name='conv1', **conv) l_drop1 = DropoutLayer(l_conv1, name='drop1', **drop) l_pool1 = Pool2DLayer(l_drop1, name='pool1', **pool) l_conv2 = Conv2DLayer(l_pool1, name='conv2', **conv) l_drop2 = DropoutLayer(l_conv2, name='drop2', **drop) l_pool2 = Pool2DLayer(l_drop2, name='pool2', **pool) l_dense3 = DenseLayer(l_pool2, name='dense3', num_units=1000, W=GlorotUniform(gain='relu'), nonlinearity=rectify) l_drop3 = DropoutLayer(l_dense3, name='drop3', **drop) l_dense4 = DenseLayer(l_drop3, name='out', num_units=num_out, W=GlorotUniform(), nonlinearity=softmax) return l_dense4
def __init__(self, incoming, num_capsule, dim_vector, num_routing=3, W=GlorotUniform(), b=Constant(0), **kwargs): super(CapsLayer, self).__init__(incoming, **kwargs) self.num_capsule = num_capsule self.dim_vector = dim_vector self.num_routing = num_routing self.input_num_caps = self.input_shape[1] self.input_dim_vector = self.input_shape[2] self.W = self.add_param(W, (self.input_num_caps, self.num_capsule, self.input_dim_vector, self.dim_vector), name="W") self.b = self.add_param( b, (1, self.input_num_caps, self.num_capsule, 1, 1), name="b", trainable=False)
def init_net(self, feature_count, class_count=NCLASSES, verbosity=VERBOSITY >= 2): """ Initialize the network (needs to be done when data is available in order to set dimensions). """ if VERBOSITY >= 1: print 'initializing network {0:s} {1:d}x{2:d}x{3:d}'.format( self.name, self.dense1_size or 0, self.dense2_size or 0, self.dense3_size or 0) if VERBOSITY >= 2: print 'parameters: ' + ', '.join( '{0:s} = {1:}'.format(k, v) for k, v in self.get_params(deep=False).items()) self.feature_count = feature_count self.class_count = class_count """ Create the layers and their settings. """ self.layers = [ ('input', InputLayer), ] self.params = { 'dense1_num_units': self.dense1_size, 'dense1_nonlinearity': nonlinearities[self.dense1_nonlinearity], 'dense1_W': initializers[self.dense1_init], 'dense1_b': Constant(0.), } if self.dropout0_rate: self.layers += [('dropout0', DropoutLayer)] self.params['dropout0_p'] = self.dropout0_rate self.layers += [ ('dense1', DenseLayer), ] if self.dropout1_rate: self.layers += [('dropout1', DropoutLayer)] self.params['dropout1_p'] = self.dropout1_rate if self.dense2_size: self.layers += [('dense2', DenseLayer)] self.params.update({ 'dense2_num_units': self.dense2_size, 'dense2_nonlinearity': nonlinearities[self.dense2_nonlinearity], 'dense2_W': initializers[self.dense2_init], 'dense2_b': Constant(0.), }) else: assert not self.dense3_size, 'There cannot be a third dense layer without a second one' if self.dropout2_rate: assert self.dense2_size is not None, 'There cannot be a second dropout layer without a second dense layer.' self.layers += [('dropout2', DropoutLayer)] self.params['dropout2_p'] = self.dropout2_rate if self.dense3_size: self.layers += [('dense3', DenseLayer)] self.params.update({ 'dense3_num_units': self.dense3_size, 'dense3_nonlinearity': nonlinearities[self.dense3_nonlinearity], 'dense3_W': initializers[self.dense3_init], 'dense3_b': Constant(0.), }) if self.dropout3_rate: assert self.dense2_size is not None, 'There cannot be a third dropout layer without a third dense layer.' self.layers += [('dropout3', DropoutLayer)] self.params['dropout3_p'] = self.dropout2_rate self.layers += [('output', DenseLayer)] self.params.update({ 'output_nonlinearity': nonlinearities[self.output_nonlinearity], 'output_W': GlorotUniform(), 'output_b': Constant(0.), }) """ Create meta parameters and special handlers. """ if VERBOSITY >= 3: print 'learning rate: {0:.6f} -> {1:.6f}'.format( abs(self.learning_rate), abs(self.learning_rate) / float(self.learning_rate_scaling)) print 'momentum: {0:.6f} -> {1:.6f}'.format( abs(self.momentum), 1 - ((1 - abs(self.momentum)) / float(self.momentum_scaling))) self.step_handlers = [ LinearVariable('update_learning_rate', start=abs(self.learning_rate), stop=abs(self.learning_rate) / float(self.learning_rate_scaling)), LinearVariable( 'update_momentum', start=abs(self.momentum), stop=1 - ((1 - abs(self.momentum)) / float(self.momentum_scaling))), StopNaN(), ] self.end_handlers = [ SnapshotEndSaver(base_name=self.name), TrainProgressPlotter(base_name=self.name), ] snapshot_name = 'nn_' + params_name(self.params, prefix=self.name)[0] if self.save_snapshots_stepsize: self.step_handlers += [ SnapshotStepSaver(every=self.save_snapshots_stepsize, base_name=snapshot_name), ] if self.auto_stopping: self.step_handlers += [ StopWhenOverfitting(loss_fraction=0.9, base_name=snapshot_name), StopAfterMinimum(patience=40, base_name=self.name), ] weight_decay = shared(float32(abs(self.weight_decay)), 'weight_decay') if self.adaptive_weight_decay: self.step_handlers += [ AdaptiveWeightDecay(weight_decay), ] if self.epoch_steps: self.step_handlers += [ BreakEveryN(self.epoch_steps), ] """ Create the actual nolearn network with information from __init__. """ self.net = NeuralNet( layers=self.layers, objective=partial(WeightDecayObjective, weight_decay=weight_decay), input_shape=(None, feature_count), output_num_units=class_count, update=nesterov_momentum, # todo: make parameter update_learning_rate=shared(float32(self.learning_rate)), update_momentum=shared(float(self.weight_decay)), on_epoch_finished=self.step_handlers, on_training_finished=self.end_handlers, regression=False, max_epochs=self.max_epochs, verbose=verbosity, batch_iterator_train=BatchIterator(batch_size=self.batch_size), batch_iterator_test=BatchIterator(batch_size=self.batch_size), eval_size=0.1, #custom_score = ('custom_loss', categorical_crossentropy), **self.params) self.net.parent = self self.net.initialize() return self.net
print 'set random seed to {0} while loading NNet'.format(SEED) nonlinearities = { 'tanh': tanh, 'sigmoid': sigmoid, 'rectify': rectify, 'leaky2': LeakyRectify(leakiness=0.02), 'leaky20': LeakyRectify(leakiness=0.2), 'softmax': softmax, } initializers = { 'orthogonal': Orthogonal(), 'sparse': Sparse(), 'glorot_normal': GlorotNormal(), 'glorot_uniform': GlorotUniform(), 'he_normal': HeNormal(), 'he_uniform': HeUniform(), } class NNet(BaseEstimator, ClassifierMixin): def __init__( self, name='nameless_net', # used for saving, so maybe make it unique dense1_size=60, dense1_nonlinearity='tanh', dense1_init='orthogonal', dense2_size=None, dense2_nonlinearity=None, # inherits dense1 dense2_init=None, # inherits dense1
def test_glorot_uniform_receptive_field(): from lasagne.init import GlorotUniform sample = GlorotUniform().sample((150, 150, 2)) assert -0.10 <= sample.min() < -0.09 assert 0.09 < sample.max() <= 0.10
def build_critic(input_var=None, cond_var=None, n_conds=0, arch=0, with_BatchNorm=True, loss_type='wgan'): from lasagne.layers import ( InputLayer, Conv2DLayer, DenseLayer, MaxPool2DLayer, concat, dropout, flatten) from lasagne.nonlinearities import rectify, LeakyRectify from lasagne.init import GlorotUniform # Normal lrelu = LeakyRectify(0.2) layer = InputLayer( shape=(None, 1, 128, 128), input_var=input_var, name='d_in_data') # init = Normal(0.02, 0.0) init = GlorotUniform() if cond_var: # class: from data or from generator input layer_cond = InputLayer( shape=(None, n_conds), input_var=cond_var, name='d_in_condition') layer_cond = BatchNorm(DenseLayer( layer_cond, 1024, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) if arch == 'dcgan': # DCGAN inspired layer = BatchNorm(Conv2DLayer( layer, 32, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 64, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 128, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 256, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 512, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) elif arch == 'cont-enc': # convolution layers layer = BatchNorm(Conv2DLayer( layer, 64, 4, stride=2, pad=1, W=init, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 64, 4, stride=2, pad=1, W=init, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 128, 4, stride=2, pad=1, W=init, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 256, 4, stride=2, pad=1, W=init, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 512, 4, stride=2, pad=1, W=init, nonlinearity=lrelu), with_BatchNorm) elif arch == 'mnist': # Jan Schluechter's MNIST discriminator # convolution layers layer = BatchNorm(Conv2DLayer( layer, 128, 5, stride=2, pad='same', W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 128, 5, stride=2, pad='same', W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 128, 5, stride=2, pad='same', W=init, b=None, nonlinearity=lrelu), with_BatchNorm) # layer = BatchNorm(Conv2DLayer( # layer, 128, 5, stride=2, pad='same', W=init, b=None, # nonlinearity=lrelu), with_BatchNorm) # fully-connected layer # layer = BatchNorm(DenseLayer( # layer, 1024, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) elif arch == 'lsgan': layer = batch_norm(Conv2DLayer( layer, 256, 5, stride=2, pad='same', nonlinearity=lrelu)) layer = batch_norm(Conv2DLayer( layer, 256, 5, stride=2, pad='same', nonlinearity=lrelu)) layer = batch_norm(Conv2DLayer( layer, 256, 5, stride=2, pad='same', nonlinearity=lrelu)) elif arch == 'crepe': # CREPE # form words from sequence of characters layer = BatchNorm(Conv2DLayer( layer, 1024, (128, 7), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = MaxPool2DLayer(layer, (1, 3)) # temporal convolution, 7-gram layer = BatchNorm(Conv2DLayer( layer, 512, (1, 7), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = MaxPool2DLayer(layer, (1, 3)) # temporal convolution, 3-gram layer = BatchNorm(Conv2DLayer( layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = flatten(layer) # fully-connected layers layer = dropout(DenseLayer( layer, 1024, W=init, b=None, nonlinearity=rectify)) layer = dropout(DenseLayer( layer, 1024, W=init, b=None, nonlinearity=rectify)) else: raise Exception("Model architecture {} is not supported".format(arch)) # output layer (linear and without bias) if cond_var is not None: layer = DenseLayer(layer, 1024, nonlinearity=lrelu, b=None) layer = concat([layer, layer_cond]) layer = DenseLayer(layer, 1, b=None, nonlinearity=None) print("Critic output:", layer.output_shape) return layer
def test_glorot_uniform(): from lasagne.init import GlorotUniform sample = GlorotUniform().sample((150, 450)) assert -0.1 <= sample.min() < -0.09 assert 0.09 < sample.max() <= 0.1
def getNet2(): inputLayer = layers.InputLayer(shape=(None, 1, imageShape[0], imageShape[1])) loc1Layer = layers.Conv2DLayer(inputLayer, num_filters=32, filter_size=(3, 3), W=GlorotUniform('relu'), nonlinearity=rectify) loc2Layer = layers.MaxPool2DLayer(loc1Layer, pool_size=(2, 2)) loc3Layer = layers.Conv2DLayer(loc2Layer, num_filters=64, filter_size=(4, 3), W=GlorotUniform('relu'), nonlinearity=rectify) loc4Layer = layers.MaxPool2DLayer(loc3Layer, pool_size=(2, 2)) loc5Layer = layers.Conv2DLayer(loc4Layer, num_filters=128, filter_size=(3, 3), W=GlorotUniform('relu'), nonlinearity=rectify) loc6Layer = layers.MaxPool2DLayer(loc5Layer, pool_size=(2, 2)) loc7Layer = layers.Conv2DLayer(loc6Layer, num_filters=256, filter_size=(3, 2), W=GlorotUniform('relu'), nonlinearity=rectify) #loc7Layer = layers.DenseLayer(loc5Layer, num_units=1024, nonlinearity=rectify) loc8Layer = layers.DenseLayer(loc7Layer, num_units=256, W=GlorotUniform('relu'), nonlinearity=rectify) loc9Layer = layers.DenseLayer(loc8Layer, num_units=128, W=GlorotUniform('relu'), nonlinearity=tanh) loc10Layer = layers.DenseLayer(loc9Layer, num_units=64, W=GlorotUniform('relu'), nonlinearity=tanh) #loc11Layer = layers.DenseLayer(loc10Layer, num_units=32, nonlinearity=tanh) #loc12Layer = layers.DenseLayer(loc11Layer, num_units=16, nonlinearity=tanh) locOutLayer = layers.DenseLayer(loc10Layer, num_units=6, W=GlorotUniform(1.0), nonlinearity=identity) transformLayer = layers.TransformerLayer(inputLayer, locOutLayer, downsample_factor=1.0) conv1Layer = layers.Conv2DLayer(inputLayer, num_filters=32, filter_size=(3, 3), W=GlorotNormal('relu'), nonlinearity=rectify) pool1Layer = layers.MaxPool2DLayer(conv1Layer, pool_size=(2, 2)) dropout1Layer = layers.DropoutLayer(pool1Layer, p=0.5) conv2Layer = layers.Conv2DLayer(dropout1Layer, num_filters=64, filter_size=(4, 3), W=GlorotUniform('relu'), nonlinearity=rectify) pool2Layer = layers.MaxPool2DLayer(conv2Layer, pool_size=(2, 2)) dropout2Layer = layers.DropoutLayer(pool2Layer, p=0.5) conv3Layer = layers.Conv2DLayer(dropout2Layer, num_filters=128, filter_size=(3, 3), W=GlorotUniform('relu'), nonlinearity=rectify) pool3Layer = layers.MaxPool2DLayer(conv3Layer, pool_size=(2, 2)) dropout3Layer = layers.DropoutLayer(pool3Layer, p=0.5) conv4Layer = layers.Conv2DLayer(dropout3Layer, num_filters=256, filter_size=(3, 2), W=GlorotNormal('relu'), nonlinearity=rectify) hidden1Layer = layers.DenseLayer(conv4Layer, num_units=1024, W=GlorotUniform('relu'), nonlinearity=rectify) hidden2Layer = layers.DenseLayer(hidden1Layer, num_units=512, W=GlorotUniform('relu'), nonlinearity=rectify) #hidden3Layer = layers.DenseLayer(hidden2Layer, num_units=256, nonlinearity=tanh) outputLayer = layers.DenseLayer(hidden2Layer, num_units=10, W=GlorotUniform('relu'), nonlinearity=softmax) return outputLayer
def test_glorot_uniform_gain(): from lasagne.init import GlorotUniform sample = GlorotUniform(gain=10.0).sample((150, 450)) assert -1.0 <= sample.min() < -0.9 assert 0.9 < sample.max() <= 1.0
def get_W(network, layer_name): if (network is not None) and (layer_name in network): W = network[layer_name].W else: W = GlorotUniform() # default value in Lasagne return W
def createCNN(self): net = {} net['input'] = lasagne.layers.InputLayer(shape=(None, self.nChannels, self.imageHeight, self.imageWidth), input_var=self.data) print("Input shape: {0}".format(net['input'].output_shape)) #STAGE 1 net['s1_conv1_1'] = batch_norm( Conv2DLayer(net['input'], 64, 3, pad='same', W=GlorotUniform('relu'))) net['s1_conv1_2'] = batch_norm( Conv2DLayer(net['s1_conv1_1'], 64, 3, pad='same', W=GlorotUniform('relu'))) net['s1_pool1'] = lasagne.layers.Pool2DLayer(net['s1_conv1_2'], 2) net['s1_conv2_1'] = batch_norm( Conv2DLayer(net['s1_pool1'], 128, 3, pad=1, W=GlorotUniform('relu'))) net['s1_conv2_2'] = batch_norm( Conv2DLayer(net['s1_conv2_1'], 128, 3, pad=1, W=GlorotUniform('relu'))) net['s1_pool2'] = lasagne.layers.Pool2DLayer(net['s1_conv2_2'], 2) net['s1_conv3_1'] = batch_norm( Conv2DLayer(net['s1_pool2'], 256, 3, pad=1, W=GlorotUniform('relu'))) net['s1_conv3_2'] = batch_norm( Conv2DLayer(net['s1_conv3_1'], 256, 3, pad=1, W=GlorotUniform('relu'))) net['s1_pool3'] = lasagne.layers.Pool2DLayer(net['s1_conv3_2'], 2) net['s1_conv4_1'] = batch_norm( Conv2DLayer(net['s1_pool3'], 512, 3, pad=1, W=GlorotUniform('relu'))) net['s1_conv4_2'] = batch_norm( Conv2DLayer(net['s1_conv4_1'], 512, 3, pad=1, W=GlorotUniform('relu'))) net['s1_pool4'] = lasagne.layers.Pool2DLayer(net['s1_conv4_2'], 2) net['s1_fc1_dropout'] = lasagne.layers.DropoutLayer(net['s1_pool4'], p=0.5) net['s1_fc1'] = batch_norm( lasagne.layers.DenseLayer(net['s1_fc1_dropout'], num_units=256, W=GlorotUniform('relu'))) net['s1_output'] = lasagne.layers.DenseLayer(net['s1_fc1'], num_units=136, nonlinearity=None) net['s1_landmarks_Org1'] = LandmarkDeformLayer1_Org1( net['s1_output'], self.initLandmarks, self.n_T) net['s1_landmarks_Org2'] = LandmarkDeformLayer1_Org2( net['s1_output'], self.initLandmarks, self.n_T) net['s1_landmarks_Org3'] = LandmarkDeformLayer1_Org3( net['s1_output'], self.initLandmarks, self.n_T) net['s1_landmarks_Org4'] = LandmarkDeformLayer1_Org4( net['s1_output'], self.initLandmarks, self.n_T) net['s1_landmarks_Org5'] = LandmarkDeformLayer1_Org5( net['s1_output'], self.initLandmarks, self.n_T) net['s1_landmarks_Org6'] = LandmarkDeformLayer1_Org6( net['s1_output'], self.initLandmarks, self.n_T) net['s1_landmarks_Org7'] = LandmarkDeformLayer1_Org7( net['s1_output'], self.initLandmarks, self.n_T) net['s1_landmarks_Org8'] = LandmarkDeformLayer1_Org8( net['s1_output'], self.initLandmarks, self.n_T) net['s1_landmarks_Org9'] = LandmarkDeformLayer1_Org9( net['s1_output'], self.initLandmarks, self.n_T) net['s1_landmarks_Org10'] = LandmarkDeformLayer1_Org10( net['s1_output'], self.initLandmarks, self.n_T) net['s1_landmarks_Org11'] = LandmarkDeformLayer1_Org11( net['s1_output'], self.initLandmarks, self.n_T) net['s1_landmarks_Org12'] = LandmarkDeformLayer1_Org12( net['s1_output'], self.initLandmarks, self.n_T) net['s1_landmarks'] = LandmarkConvergeLayer( net['s1_landmarks_Org1'], net['s1_landmarks_Org2'], net['s1_landmarks_Org3'], net['s1_landmarks_Org4'], net['s1_landmarks_Org5'], net['s1_landmarks_Org6'], net['s1_landmarks_Org7'], net['s1_landmarks_Org8'], net['s1_landmarks_Org9'], net['s1_landmarks_Org10'], net['s1_landmarks_Org11'], net['s1_landmarks_Org12']) for i in range(1, self.nStages): self.addDANStage(i + 1, net) net['output'] = net['s' + str(self.nStages) + '_landmarks'] return net
random_state = np.random.RandomState(1999) # Add batchsize, channel dim X_train = face(gray=True)[None, None].astype('float32') X_train = X_train / 255. y_train = 2 * X_train chan = X_train.shape[1] width = X_train.shape[2] height = X_train.shape[3] input_var = tensor.tensor4('X') target_var = tensor.tensor4('y') l_input = InputLayer((None, chan, width, height), input_var=input_var) l_conv1 = Conv2DLayer(l_input, num_filters=32, filter_size=(3, 3), nonlinearity=rectify, W=GlorotUniform()) l_pool1 = MaxPool2DLayer(l_conv1, pool_size=(2, 2)) l_conv2 = Conv2DLayer(l_pool1, num_filters=32, filter_size=(1, 1), nonlinearity=rectify, W=GlorotUniform()) l_depool1 = Unpool2DLayer(l_pool1, (2, 2)) l_deconv1 = TransposeConv2DLayer(l_depool1, num_filters=chan, filter_size=(3, 3), W=GlorotUniform(), nonlinearity=linear) l_out = l_deconv1 prediction = get_output(l_out) train_loss = squared_error(prediction, target_var) train_loss = train_loss.mean()
def test_glorot_uniform_c01b(): from lasagne.init import GlorotUniform sample = GlorotUniform(c01b=True).sample((75, 2, 2, 75)) assert -0.1 <= sample.min() < -0.09 assert 0.09 < sample.max() <= 0.1
def create_architecture(self, input_shape, dense_dim=1024, dout=10, dropout=0.5, input_var_=None, output_var_=None, enc_weights=None): print('[ConvNet: create_architecture] dense_dim:', dense_dim) if input_var_ is not None: self.X_ = input_var_ if output_var_ is not None: self.Y_ = output_var_ self.dropout = dropout (c, d1, d2) = input_shape self.lin = InputLayer((None, c, d1, d2), self.X_) self.lconv1 = Conv2DLayerFast(self.lin, 100, (5, 5), pad=(2, 2), W=GlorotUniform(), nonlinearity=rectify) self.lpool1 = MaxPool2DLayerFast(self.lconv1, (2, 2)) self.lconv2 = Conv2DLayerFast(self.lpool1, 150, (5, 5), pad=(2, 2), W=GlorotUniform(), nonlinearity=rectify) self.lpool2 = MaxPool2DLayerFast(self.lconv2, (2, 2)) self.lconv3 = Conv2DLayerFast(self.lpool2, 200, (3, 3), W=GlorotUniform(), nonlinearity=rectify) self.lconv3_flat = FlattenLayer(self.lconv3) self.ldense1 = DenseLayer(self.lconv3_flat, dense_dim, W=GlorotUniform(), nonlinearity=rectify) self.ldense1_drop = self.ldense1 if dropout > 0: self.ldense1_drop = DropoutLayer(self.ldense1, p=dropout) self.ldense2 = DenseLayer(self.ldense1_drop, dense_dim, W=GlorotUniform(), nonlinearity=rectify) self.ldense2_drop = self.ldense2 if dropout > 0: self.ldense2_drop = DropoutLayer(self.ldense2_drop, p=dropout) self.model_ = DenseLayer(self.ldense2_drop, dout, W=GlorotUniform(), nonlinearity=softmax) self.enc_weights = enc_weights if enc_weights is not None: lasagne.layers.set_all_param_values(self.model_, enc_weights)
def build(input_height, input_width, concat_var): """ Build the discriminator, all weights initialized from scratch :param input_width: :param input_height: :param concat_var: Theano symbolic tensor variable :return: Dictionary that contains the discriminator """ net = { 'input': InputLayer((None, 4, input_height, input_width), input_var=concat_var) } print "Input: {}".format(net['input'].output_shape[1:]) net['merge'] = batch_norm( ConvLayer(net['input'], 3, 1, pad=0, W=GlorotUniform(gain="relu"), flip_filters=False)) print "merge: {}".format(net['merge'].output_shape[1:]) net['conv1'] = batch_norm( ConvLayer(net['merge'], 32, 3, pad=1, W=GlorotUniform(gain="relu"))) print "conv1: {}".format(net['conv1'].output_shape[1:]) net['pool1'] = PoolLayer(net['conv1'], 4) print "pool1: {}".format(net['pool1'].output_shape[1:]) net['conv2_1'] = batch_norm( ConvLayer(net['pool1'], 64, 3, pad=1, W=GlorotUniform(gain="relu"))) print "conv2_1: {}".format(net['conv2_1'].output_shape[1:]) net['conv2_2'] = batch_norm( ConvLayer(net['conv2_1'], 64, 3, pad=1, W=GlorotUniform(gain="relu"))) print "conv2_2: {}".format(net['conv2_2'].output_shape[1:]) net['pool2'] = PoolLayer(net['conv2_2'], 2) print "pool2: {}".format(net['pool2'].output_shape[1:]) net['conv3_1'] = batch_norm( ConvLayer(net['pool2'], 64, 3, pad=1, W=GlorotUniform(gain="relu"))) print "conv3_1: {}".format(net['conv3_1'].output_shape[1:]) net['conv3_2'] = batch_norm( ConvLayer(net['conv3_1'], 64, 3, pad=1, W=GlorotUniform(gain="relu"))) print "conv3_2: {}".format(net['conv3_2'].output_shape[1:]) net['pool3'] = PoolLayer(net['conv3_2'], 2) print "pool3: {}".format(net['pool3'].output_shape[1:]) net['fc4'] = batch_norm( DenseLayer(net['pool3'], num_units=100, W=GlorotUniform(gain="relu"))) print "fc4: {}".format(net['fc4'].output_shape[1:]) net['fc5'] = batch_norm( DenseLayer(net['fc4'], num_units=2, W=GlorotUniform(gain="relu"))) print "fc5: {}".format(net['fc5'].output_shape[1:]) net['prob'] = batch_norm( DenseLayer(net['fc5'], num_units=1, W=GlorotUniform(gain=1.0), nonlinearity=sigmoid)) print "prob: {}".format(net['prob'].output_shape[1:]) return net
X_train = min_max(X_train) X_test = min_max(X_test) X_tgt_test = min_max(X_tgt_test) [n, c, d1, d2] = X_train.shape ###### CONVNET ###### print('Create ConvNet....') Xnet_ = T.ftensor4('x') Ynet_ = T.ivector('y') lnet_in = InputLayer((None, c, d1, d2), Xnet_) lnet_conv1 = Conv2DLayerFast(lnet_in, 100, (5, 5), pad=(2, 2), W=GlorotUniform(), nonlinearity=rectify) lnet_pool1 = MaxPool2DLayerFast(lnet_conv1, (2, 2)) lnet_conv2 = Conv2DLayerFast(lnet_pool1, 150, (5, 5), pad=(2, 2), W=GlorotUniform(), nonlinearity=rectify) lnet_pool2 = MaxPool2DLayerFast(lnet_conv2, (2, 2)) lnet_conv3 = Conv2DLayerFast(lnet_pool2, 200, (3, 3), W=GlorotUniform(), nonlinearity=rectify) lnet_conv3_flat = FlattenLayer(lnet_conv3)
def multi_task_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats, lambda_val=0.5 * 1e-4): print("Building multi task model with 1D Convolution") vocab_size = wordEmbeddings.shape[1] wordDim = wordEmbeddings.shape[0] kw = 2 num_filters = seqlen - kw + 1 stride = 1 filter_size = wordDim pool_size = num_filters input = InputLayer((None, seqlen, num_feats), input_var=input_var) batchsize, _, _ = input.input_var.shape #span emb1 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape1 = ReshapeLayer(emb1, (batchsize, seqlen, num_feats * wordDim)) conv1d_1 = DimshuffleLayer( Conv1DLayer(reshape1, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh, W=GlorotUniform()), (0, 2, 1)) maxpool_1 = MaxPool1DLayer(conv1d_1, pool_size=pool_size) hid_1 = DenseLayer(maxpool_1, num_units=args.hiddenDim, nonlinearity=sigmoid) network_1 = DenseLayer(hid_1, num_units=2, nonlinearity=softmax) """ #DocTimeRel emb2 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape2 = ReshapeLayer(emb2, (batchsize, seqlen, num_feats*wordDim)) conv1d_2 = DimshuffleLayer(Conv1DLayer(reshape2, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_2 = MaxPool1DLayer(conv1d_2, pool_size=pool_size) hid_2 = DenseLayer(maxpool_2, num_units=args.hiddenDim, nonlinearity=sigmoid) network_2 = DenseLayer(hid_2, num_units=5, nonlinearity=softmax) """ #Type emb3 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape3 = ReshapeLayer(emb3, (batchsize, seqlen, num_feats * wordDim)) conv1d_3 = DimshuffleLayer( Conv1DLayer(reshape3, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh, W=GlorotUniform()), (0, 2, 1)) maxpool_3 = MaxPool1DLayer(conv1d_3, pool_size=pool_size) hid_3 = DenseLayer(maxpool_3, num_units=args.hiddenDim, nonlinearity=sigmoid) network_3 = DenseLayer(hid_3, num_units=4, nonlinearity=softmax) #Degree emb4 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape4 = ReshapeLayer(emb4, (batchsize, seqlen, num_feats * wordDim)) conv1d_4 = DimshuffleLayer( Conv1DLayer(reshape4, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh, W=GlorotUniform()), (0, 2, 1)) maxpool_4 = MaxPool1DLayer(conv1d_4, pool_size=pool_size) hid_4 = DenseLayer(maxpool_4, num_units=args.hiddenDim, nonlinearity=sigmoid) network_4 = DenseLayer(hid_4, num_units=4, nonlinearity=softmax) #Polarity emb5 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape5 = ReshapeLayer(emb5, (batchsize, seqlen, num_feats * wordDim)) conv1d_5 = DimshuffleLayer( Conv1DLayer(reshape5, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh, W=GlorotUniform()), (0, 2, 1)) maxpool_5 = MaxPool1DLayer(conv1d_5, pool_size=pool_size) hid_5 = DenseLayer(maxpool_5, num_units=args.hiddenDim, nonlinearity=sigmoid) network_5 = DenseLayer(hid_5, num_units=3, nonlinearity=softmax) #ContextualModality emb6 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape6 = ReshapeLayer(emb6, (batchsize, seqlen, num_feats * wordDim)) conv1d_6 = DimshuffleLayer( Conv1DLayer(reshape6, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh, W=GlorotUniform()), (0, 2, 1)) maxpool_6 = MaxPool1DLayer(conv1d_6, pool_size=pool_size) hid_6 = DenseLayer(maxpool_6, num_units=args.hiddenDim, nonlinearity=sigmoid) network_6 = DenseLayer(hid_6, num_units=5, nonlinearity=softmax) """ #ContextualAspect emb7 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape7 = ReshapeLayer(emb7, (batchsize, seqlen, num_feats*wordDim)) conv1d_7 = DimshuffleLayer(Conv1DLayer(reshape7, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_7 = MaxPool1DLayer(conv1d_7, pool_size=pool_size) hid_7 = DenseLayer(maxpool_7, num_units=args.hiddenDim, nonlinearity=sigmoid) network_7 = DenseLayer(hid_7, num_units=4, nonlinearity=softmax) """ """ #Permanence emb8 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape8 = ReshapeLayer(emb8, (batchsize, seqlen, num_feats*wordDim)) conv1d_8 = DimshuffleLayer(Conv1DLayer(reshape8, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_8 = MaxPool1DLayer(conv1d_8, pool_size=pool_size) hid_8 = DenseLayer(maxpool_8, num_units=args.hiddenDim, nonlinearity=sigmoid) network_8 = DenseLayer(hid_8, num_units=4, nonlinearity=softmax) """ # Is this important? """ network_1_out, network_2_out, network_3_out, network_4_out, \ network_5_out, network_6_out, network_7_out, network_8_out = \ get_output([network_1, network_2, network_3, network_4, network_5, network_6, network_7, network_8]) """ network_1_out = get_output(network_1) network_3_out = get_output(network_3) network_4_out = get_output(network_4) network_5_out = get_output(network_5) network_6_out = get_output(network_6) loss_1 = T.mean(binary_crossentropy( network_1_out, target_var)) + regularize_layer_params_weighted( { emb1: lambda_val, conv1d_1: lambda_val, hid_1: lambda_val, network_1: lambda_val }, l2) updates_1 = adagrad(loss_1, get_all_params(network_1, trainable=True), learning_rate=args.step) train_fn_1 = theano.function([input_var, target_var], loss_1, updates=updates_1, allow_input_downcast=True) val_acc_1 = T.mean( binary_accuracy(get_output(network_1, deterministic=True), target_var)) val_fn_1 = theano.function([input_var, target_var], val_acc_1, allow_input_downcast=True) """ loss_2 = T.mean(categorical_crossentropy(network_2_out,target_var)) + regularize_layer_params_weighted({emb2:lambda_val, conv1d_2:lambda_val, hid_2:lambda_val, network_2:lambda_val} , l2) updates_2 = adagrad(loss_2, get_all_params(network_2, trainable=True), learning_rate=args.step) train_fn_2 = theano.function([input_var, target_var], loss_2, updates=updates_2, allow_input_downcast=True) val_acc_2 = T.mean(categorical_accuracy(get_output(network_2, deterministic=True), target_var)) val_fn_2 = theano.function([input_var, target_var], val_acc_2, allow_input_downcast=True) """ loss_3 = T.mean(categorical_crossentropy( network_3_out, target_var)) + regularize_layer_params_weighted( { emb3: lambda_val, conv1d_3: lambda_val, hid_3: lambda_val, network_3: lambda_val }, l2) updates_3 = adagrad(loss_3, get_all_params(network_3, trainable=True), learning_rate=args.step) train_fn_3 = theano.function([input_var, target_var], loss_3, updates=updates_3, allow_input_downcast=True) val_acc_3 = T.mean( categorical_accuracy(get_output(network_3, deterministic=True), target_var)) val_fn_3 = theano.function([input_var, target_var], val_acc_3, allow_input_downcast=True) loss_4 = T.mean(categorical_crossentropy( network_4_out, target_var)) + regularize_layer_params_weighted( { emb4: lambda_val, conv1d_4: lambda_val, hid_4: lambda_val, network_4: lambda_val }, l2) updates_4 = adagrad(loss_4, get_all_params(network_4, trainable=True), learning_rate=args.step) train_fn_4 = theano.function([input_var, target_var], loss_4, updates=updates_4, allow_input_downcast=True) val_acc_4 = T.mean( categorical_accuracy(get_output(network_4, deterministic=True), target_var)) val_fn_4 = theano.function([input_var, target_var], val_acc_4, allow_input_downcast=True) loss_5 = T.mean(categorical_crossentropy( network_5_out, target_var)) + regularize_layer_params_weighted( { emb5: lambda_val, conv1d_5: lambda_val, hid_5: lambda_val, network_5: lambda_val }, l2) updates_5 = adagrad(loss_5, get_all_params(network_5, trainable=True), learning_rate=args.step) train_fn_5 = theano.function([input_var, target_var], loss_5, updates=updates_5, allow_input_downcast=True) val_acc_5 = T.mean( categorical_accuracy(get_output(network_5, deterministic=True), target_var)) val_fn_5 = theano.function([input_var, target_var], val_acc_5, allow_input_downcast=True) loss_6 = T.mean(categorical_crossentropy( network_6_out, target_var)) + regularize_layer_params_weighted( { emb6: lambda_val, conv1d_6: lambda_val, hid_6: lambda_val, network_6: lambda_val }, l2) updates_6 = adagrad(loss_6, get_all_params(network_6, trainable=True), learning_rate=args.step) train_fn_6 = theano.function([input_var, target_var], loss_6, updates=updates_6, allow_input_downcast=True) val_acc_6 = T.mean( categorical_accuracy(get_output(network_6, deterministic=True), target_var)) val_fn_6 = theano.function([input_var, target_var], val_acc_6, allow_input_downcast=True) """ loss_7 = T.mean(categorical_crossentropy(network_7_out,target_var)) + regularize_layer_params_weighted({emb7:lambda_val, conv1d_7:lambda_val, hid_7:lambda_val, network_7:lambda_val} , l2) updates_7 = adagrad(loss_7, get_all_params(network_7, trainable=True), learning_rate=args.step) train_fn_7 = theano.function([input_var, target_var], loss_7, updates=updates_7, allow_input_downcast=True) val_acc_7 = T.mean(categorical_accuracy(get_output(network_7, deterministic=True), target_var)) val_fn_7 = theano.function([input_var, target_var], val_acc_7, allow_input_downcast=True) loss_8 = T.mean(categorical_crossentropy(network_8_out,target_var)) + regularize_layer_params_weighted({emb8:lambda_val, conv1d_8:lambda_val, hid_8:lambda_val, network_8:lambda_val} , l2) updates_8 = adagrad(loss_8, get_all_params(network_8, trainable=True), learning_rate=args.step) train_fn_8 = theano.function([input_var, target_var], loss_8, updates=updates_8, allow_input_downcast=True) val_acc_8 = T.mean(categorical_accuracy(get_output(network_8, deterministic=True), target_var)) val_fn_8 = theano.function([input_var, target_var], val_acc_8, allow_input_downcast=True) """ """ return train_fn_1, val_fn_1, network_1, train_fn_2, val_fn_2, network_2, train_fn_3, val_fn_3, \ network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \ train_fn_6, val_fn_6, network_6, train_fn_7, val_fn_7, network_7, train_fn_8, val_fn_8, network_8 """ return train_fn_1, val_fn_1, network_1, train_fn_3, val_fn_3, \ network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \ train_fn_6, val_fn_6, network_6
def createCNN(self): net = {} net['input'] = lasagne.layers.InputLayer(shape=(None, self.nChannels, self.imageHeight, self.imageWidth), input_var=self.data) print("Input shape: {0}".format(net['input'].output_shape)) #STAGE 1 net['s1_conv1_1'] = batch_norm( Conv2DLayer(net['input'], 64, 3, pad='same', W=GlorotUniform('relu'))) net['s1_conv1_2'] = batch_norm( Conv2DLayer(net['s1_conv1_1'], 64, 3, pad='same', W=GlorotUniform('relu'))) net['s1_pool1'] = lasagne.layers.Pool2DLayer(net['s1_conv1_2'], 2) net['s1_conv2_1'] = batch_norm( Conv2DLayer(net['s1_pool1'], 128, 3, pad=1, W=GlorotUniform('relu'))) net['s1_conv2_2'] = batch_norm( Conv2DLayer(net['s1_conv2_1'], 128, 3, pad=1, W=GlorotUniform('relu'))) net['s1_pool2'] = lasagne.layers.Pool2DLayer(net['s1_conv2_2'], 2) net['s1_conv3_1'] = batch_norm( Conv2DLayer(net['s1_pool2'], 256, 3, pad=1, W=GlorotUniform('relu'))) net['s1_conv3_2'] = batch_norm( Conv2DLayer(net['s1_conv3_1'], 256, 3, pad=1, W=GlorotUniform('relu'))) net['s1_pool3'] = lasagne.layers.Pool2DLayer(net['s1_conv3_2'], 2) net['s1_conv4_1'] = batch_norm( Conv2DLayer(net['s1_pool3'], 512, 3, pad=1, W=GlorotUniform('relu'))) net['s1_conv4_2'] = batch_norm( Conv2DLayer(net['s1_conv4_1'], 512, 3, pad=1, W=GlorotUniform('relu'))) net['s1_pool4'] = lasagne.layers.Pool2DLayer(net['s1_conv4_2'], 2) net['s1_fc1_dropout'] = lasagne.layers.DropoutLayer(net['s1_pool4'], p=0.5) net['s1_fc1'] = batch_norm( lasagne.layers.DenseLayer(net['s1_fc1_dropout'], num_units=256, W=GlorotUniform('relu'))) net['s1_output'] = lasagne.layers.DenseLayer(net['s1_fc1'], num_units=136, nonlinearity=None) net['s1_landmarks'] = LandmarkInitLayer(net['s1_output'], self.initLandmarks) if self.confidenceLayer: net['s1_confidence'] = lasagne.layers.DenseLayer( net['s1_fc1'], num_units=2, W=GlorotUniform('relu'), nonlinearity=lasagne.nonlinearities.softmax) for i in range(1, self.nStages): self.addDANStage(i + 1, net) net['output'] = net['s' + str(self.nStages) + '_landmarks'] if self.confidenceLayer: net['output'] = lasagne.layers.ConcatLayer( [net['output'], net['s1_confidence']]) return net
# ('dense2', DenseLayer), # ('dropout2', DropoutLayer), ('output', DenseLayer) ] #0.686160 # inDrop=0.2, den0=1000, den0drop=.6, den1=1000, den1drop=0.6 from theano import tensor as T np.random.seed(5) net0 = NeuralNet( layers=layers0, input_shape=(None, num_features), inputDropout0_p=0.5, dense0_num_units=80, dense0_W=GlorotUniform(), dense0_b=Constant(1.0), dense0_nonlinearity=rectify, dropout0_p=0.2, # noise0_sigma=2, dense1_num_units=80, dense1_W=GlorotUniform(), dense1_b=Constant(1.0), dense1_nonlinearity=rectify, dropout1_p=0.2, # dense2_num_units=50, # dense2_W=GlorotUniform(), # dense2_nonlinearity=rectify, # dense2_b = Constant(1.0), # dropout2_p=0.2, output_num_units=1,
def addDANStage(self, stageIdx, net): prevStage = 's' + str(stageIdx - 1) curStage = 's' + str(stageIdx) #CONNNECTION LAYERS OF PREVIOUS STAGE net[prevStage + '_transform_params'] = TransformParamsLayer( net[prevStage + '_landmarks'], self.initLandmarks) net[prevStage + '_img_output'] = AffineTransformLayer( net['input'], net[prevStage + '_transform_params']) net[prevStage + '_landmarks_affine'] = LandmarkTransformLayer( net[prevStage + '_landmarks'], net[prevStage + '_transform_params']) net[prevStage + '_img_landmarks'] = LandmarkImageLayer( net[prevStage + '_landmarks_affine'], (self.imageHeight, self.imageWidth), self.landmarkPatchSize) net[prevStage + '_img_feature'] = lasagne.layers.DenseLayer( net[prevStage + '_fc1'], num_units=56 * 56, W=GlorotUniform('relu')) net[prevStage + '_img_feature'] = lasagne.layers.ReshapeLayer( net[prevStage + '_img_feature'], (-1, 1, 56, 56)) net[prevStage + '_img_feature'] = lasagne.layers.Upscale2DLayer( net[prevStage + '_img_feature'], 2) #CURRENT STAGE net[curStage + '_input'] = batch_norm( lasagne.layers.ConcatLayer([ net[prevStage + '_img_output'], net[prevStage + '_img_landmarks'], net[prevStage + '_img_feature'] ], 1)) net[curStage + '_conv1_1'] = batch_norm( Conv2DLayer(net[curStage + '_input'], 64, 3, pad='same', W=GlorotUniform('relu'))) net[curStage + '_conv1_2'] = batch_norm( Conv2DLayer(net[curStage + '_conv1_1'], 64, 3, pad='same', W=GlorotUniform('relu'))) net[curStage + '_pool1'] = lasagne.layers.Pool2DLayer( net[curStage + '_conv1_2'], 2) net[curStage + '_conv2_1'] = batch_norm( Conv2DLayer(net[curStage + '_pool1'], 128, 3, pad=1, W=GlorotUniform('relu'))) net[curStage + '_conv2_2'] = batch_norm( Conv2DLayer(net[curStage + '_conv2_1'], 128, 3, pad=1, W=GlorotUniform('relu'))) net[curStage + '_pool2'] = lasagne.layers.Pool2DLayer( net[curStage + '_conv2_2'], 2) net[curStage + '_conv3_1'] = batch_norm( Conv2DLayer(net[curStage + '_pool2'], 256, 3, pad=1, W=GlorotUniform('relu'))) net[curStage + '_conv3_2'] = batch_norm( Conv2DLayer(net[curStage + '_conv3_1'], 256, 3, pad=1, W=GlorotUniform('relu'))) net[curStage + '_pool3'] = lasagne.layers.Pool2DLayer( net[curStage + '_conv3_2'], 2) net[curStage + '_conv4_1'] = batch_norm( Conv2DLayer(net[curStage + '_pool3'], 512, 3, pad=1, W=GlorotUniform('relu'))) net[curStage + '_conv4_2'] = batch_norm( Conv2DLayer(net[curStage + '_conv4_1'], 512, 3, pad=1, W=GlorotUniform('relu'))) net[curStage + '_pool4'] = lasagne.layers.Pool2DLayer( net[curStage + '_conv4_2'], 2) net[curStage + '_pool4'] = lasagne.layers.FlattenLayer(net[curStage + '_pool4']) net[curStage + '_fc1_dropout'] = lasagne.layers.DropoutLayer( net[curStage + '_pool4'], p=0.5) net[curStage + '_fc1'] = batch_norm( lasagne.layers.DenseLayer(net[curStage + '_fc1_dropout'], num_units=256, W=GlorotUniform('relu'))) net[curStage + '_output'] = lasagne.layers.DenseLayer( net[curStage + '_fc1'], num_units=136, nonlinearity=None) net[curStage + '_landmarks'] = lasagne.layers.ElemwiseSumLayer( [net[prevStage + '_landmarks_affine'], net[curStage + '_output']]) net[curStage + '_landmarks'] = LandmarkTransformLayer( net[curStage + '_landmarks'], net[prevStage + '_transform_params'], True)
def create_architecture(self, input_shape, dense_dim=1024, input_var_=None, output_var_=None, convnet_=None, is_enc_fixed=False): print('[ConvAE: create_architecture]') if input_var_ is not None: self.X_ = input_var_ if output_var_ is not None: self.Y_ = output_var_ (c, d1, d2) = input_shape self.lin = InputLayer((None, c, d1, d2), self.X_) if convnet_ is not None: self.lconv1 = Conv2DLayerFast(self.lin, 100, (5, 5), pad=(2, 2), W=convnet_.lconv1.W, nonlinearity=rectify) else: self.lconv1 = Conv2DLayerFast(self.lin, 100, (5, 5), pad=(2, 2), W=GlorotUniform(), nonlinearity=rectify) self.lpool1 = MaxPool2DLayerFast(self.lconv1, (2, 2)) if convnet_ is not None: self.lconv2 = Conv2DLayerFast(self.lpool1, 150, (5, 5), pad=(2, 2), W=convnet_.lconv2.W, nonlinearity=rectify) else: self.lconv2 = Conv2DLayerFast(self.lpool1, 150, (5, 5), pad=(2, 2), W=GlorotUniform(), nonlinearity=rectify) self.lpool2 = MaxPool2DLayerFast(self.lconv2, (2, 2)) if convnet_ is not None: self.lconv3 = Conv2DLayerFast(self.lpool2, 200, (3, 3), W=convnet_.lconv3.W, nonlinearity=rectify) else: self.lconv3 = Conv2DLayerFast(self.lpool2, 200, (3, 3), W=GlorotUniform(), nonlinearity=rectify) [nd, nf, dc1, dc2] = get_output_shape(self.lconv3) self.lconv3_flat = FlattenLayer(self.lconv3) [_, dflat] = get_output_shape(self.lconv3_flat) if convnet_ is not None: self.ldense1 = DenseLayer(self.lconv3_flat, dense_dim, W=convnet_.ldense1.W, nonlinearity=rectify) else: self.ldense1 = DenseLayer(self.lconv3_flat, dense_dim, W=GlorotUniform(), nonlinearity=rectify) if convnet_ is not None: self.ldense2 = DenseLayer(self.ldense1, dense_dim, W=convnet_.ldense2.W, nonlinearity=rectify) else: self.ldense2 = DenseLayer(self.ldense1, dense_dim, W=GlorotUniform(), nonlinearity=rectify) self.ldense3 = DenseLayer(self.ldense2, dflat, W=GlorotUniform(), nonlinearity=rectify) self.ldense3_reshape = ReshapeLayer(self.ldense3, ([0], nf, dc1, -1)) # lae_conv3 self.ldeconv1 = Conv2DLayerFast(self.ldense3_reshape, 150, (3, 3), pad=(2, 2), W=GlorotUniform(), nonlinearity=rectify) self.lunpool1 = Upscale2DLayer(self.ldeconv1, (2, 2)) self.ldeconv2 = Conv2DLayerFast(self.lunpool1, 100, (5, 5), pad=(2, 2), W=GlorotUniform(), nonlinearity=rectify) self.lunpool2 = Upscale2DLayer(self.ldeconv2, (2, 2)) self.model_ = Conv2DLayerFast(self.lunpool2, 1, (5, 5), pad=(2, 2), W=GlorotUniform(), nonlinearity=linear) self.is_enc_fixed = is_enc_fixed
def __init__(self, incomings, hid_state_size, max_sentence, Wb=GlorotUniform(), W1=GlorotUniform(), W2=GlorotUniform(), b1=Constant(0.), b2=Constant(0, ), resetgate=GRU_Gate(), updategate=GRU_Gate(), hid_update=GRU_Gate(nonlinearity=nonlin.tanh), n_pass=2, time_embedding=False, T_=Normal(), **kwargs): super(EpMemModule, self).__init__(incomings, **kwargs) # Create parameters for computing gate self.Wb = self.add_param(Wb, (1, hid_state_size), name="Wb") self.W1 = self.add_param(W2, (1, 9), name="W1") self.W2 = self.add_param(W1, (hid_state_size, 1), name="W2") self.b1 = self.add_param(b2, (hid_state_size, ), name="b1", regularizable=False) self.b2 = self.add_param(b1, (1, ), name="b2", regularizable=False) self.max_sentence = max_sentence # sentence masking # sentence_mask_mat[i] = [1111 ... (i times) ... 11110000 ... (n-i times) ... 000] smat = np.zeros((max_sentence, max_sentence), dtype=theano.config.floatX) for i in xrange(smat.shape[0]): for j in xrange(smat.shape[1]): smat[i, j] = (0 if j - i > 0 else 1) self.sentence_mask_mat = theano.shared(smat, name="sentence_mask_mat", borrow=True) self.hid_state_size = hid_state_size # The lines below is modified from lasagne's GRU input_shape = self.input_shapes[0] num_inputs = np.prod(input_shape[2:]) self.resetgate = resetgate self.updategate = updategate self.hid_update = hid_update def add_gate(gate, gate_name): return (self.add_param(gate.W_in, (num_inputs, hid_state_size), name="W_in_to_{}".format(gate_name)), self.add_param(gate.W_hid, (hid_state_size, hid_state_size), name="W_hid_to_{}".format(gate_name)), self.add_param(gate.b, (hid_state_size, ), name="b_{}".format(gate_name), regularizable=False), gate.nonlinearity) # Add in all parameters from gates (self.W_in_to_updategate, self.W_hid_to_updategate, self.b_updategate, self.nonlinearity_updategate) = add_gate(updategate, 'updategate') (self.W_in_to_resetgate, self.W_hid_to_resetgate, self.b_resetgate, self.nonlinearity_resetgate) = add_gate(resetgate, 'resetgate') (self.W_in_to_hid_update, self.W_hid_to_hid_update, self.b_hid_update, self.nonlinearity_hid) = add_gate(hid_update, 'hid_update') self.n_pass = n_pass # We use time embedding proposed in End-to-end MemNN(Facebook) self.time_embedding = time_embedding if time_embedding: self.T_ = self.add_param(T_, (int(max_sentence * 1.2), hid_state_size), name='Time_Embedding', regularizable=False)