l0s = cc_layers.ShuffleBC01ToC01BLayer(l0r) l1a = cc_layers.CudaConvnetConv2DLayer(l0s, n_filters=32, filter_size=6, weights_std=0.01, init_bias_value=0.1, dropout=0.0, partial_sum=1, untie_biases=True) l1 = cc_layers.CudaConvnetPooling2DLayer(l1a, pool_size=2) l2a = cc_layers.CudaConvnetConv2DLayer(l1, n_filters=64, filter_size=5, weights_std=0.01, init_bias_value=0.1, dropout=0.0, partial_sum=1, untie_biases=True) l2 = cc_layers.CudaConvnetPooling2DLayer(l2a, pool_size=2) l3a = cc_layers.CudaConvnetConv2DLayer(l2, n_filters=128, filter_size=3, weights_std=0.01, init_bias_value=0.1, dropout=0.0, partial_sum=1, untie_biases=True) l3b = cc_layers.CudaConvnetConv2DLayer(l3a, n_filters=128, filter_size=3, pad=0, weights_std=0.1, init_bias_value=0.1, dropout=0.0, partial_sum=1, untie_biases=True) l3 = cc_layers.CudaConvnetPooling2DLayer(l3b, pool_size=2) l3s = cc_layers.ShuffleC01BToBC01Layer(l3) l3f = layers.FlattenLayer(l3s) l4a = layers.DenseLayer(l3f, n_outputs=512, weights_std=0.01, init_bias_value=0.1, dropout=0.5, nonlinearity=layers.identity) l4 = layers.FeatureMaxPoolingLayer(l4a, pool_size=2, feature_dim=1, implementation='reshape') j4 = layers.MultiRotMergeLayer(l4, num_views=4) # 2) # merge convolutional parts l5a = layers.DenseLayer(j4, n_outputs=4096, weights_std=0.001, init_bias_value=0.01, dropout=0.5, nonlinearity=layers.identity) l5 = layers.FeatureMaxPoolingLayer(l5a, pool_size=2, feature_dim=1, implementation='reshape') l6a = layers.DenseLayer(l5, n_outputs=37, weights_std=0.01, init_bias_value=0.1, dropout=0.5, nonlinearity=layers.identity) # l6 = layers.OutputLayer(l5, error_measure='mse') l6 = custom.OptimisedDivGalaxyOutputLayer(l6a) # this incorporates the constraints on the output (probabilities sum to one, weighting, etc.) xs_shared = [theano.shared(np.zeros((1,1,1,1), dtype=theano.config.floatX)) for _ in xrange(num_input_representations)]
l1a = cc_layers.CudaConvnetConv2DLayer(l0s, n_filters=32, filter_size=6, weights_std=0.01, init_bias_value=0.1, dropout=0.0, partial_sum=1, untie_biases=True) l1 = cc_layers.CudaConvnetPooling2DLayer(l1a, pool_size=2) l2a = cc_layers.CudaConvnetConv2DLayer(l1, n_filters=64, filter_size=5, weights_std=0.01, init_bias_value=0.1, dropout=0.0, partial_sum=1, untie_biases=True) l2 = cc_layers.CudaConvnetPooling2DLayer(l2a, pool_size=2) l3a = cc_layers.CudaConvnetConv2DLayer(l2, n_filters=128, filter_size=3, weights_std=0.01, init_bias_value=0.1, dropout=0.0, partial_sum=1, untie_biases=True) l3b = cc_layers.CudaConvnetConv2DLayer(l3a, n_filters=192, filter_size=3, pad=0, weights_std=0.1, init_bias_value=0.1, dropout=0.0, partial_sum=1, untie_biases=True) l3 = cc_layers.CudaConvnetPooling2DLayer(l3b, pool_size=2) l3s = cc_layers.ShuffleC01BToBC01Layer(l3) j3 = layers.MultiRotMergeLayer(l3s, num_views=4) # 2) # merge convolutional parts l4 = layers.DenseLayer(j3, n_outputs=4096, weights_std=0.001, init_bias_value=0.01, dropout=0.5) # l4a = layers.DenseLayer(j3, n_outputs=4096, weights_std=0.001, init_bias_value=0.01, dropout=0.5, nonlinearity=layers.identity) # l4 = layers.FeatureMaxPoolingLayer(l4a, pool_size=2, feature_dim=1, implementation='reshape') # l5 = layers.DenseLayer(l4, n_outputs=37, weights_std=0.01, init_bias_value=0.0, dropout=0.5, nonlinearity=custom.clip_01) # nonlinearity=layers.identity) l5 = layers.DenseLayer(l4, n_outputs=37, weights_std=0.01, init_bias_value=0.1, dropout=0.5, nonlinearity=layers.identity) # l6 = layers.OutputLayer(l5, error_measure='mse') l6 = custom.OptimisedDivGalaxyOutputLayer(l5) # this incorporates the constraints on the output (probabilities sum to one, weighting, etc.) xs_shared = [theano.shared(np.zeros((1,1,1,1), dtype=theano.config.floatX)) for _ in range(num_input_representations)] idx = T.lscalar('idx')
def __init__(self, RecognitionParams, Input, rng, n_samples=1): ''' h = Q_phi(z|x), where phi are parameters, z is our latent class, and x are data ''' super().__init__(Input, rng, n_samples) self.n_units = RecognitionParams['rnn_units'] self.n_convfeatures = RecognitionParams['n_features'] self.conv_back = RecognitionParams['network'] conv_cell = RecognitionParams['network'] conv_cell = ll.DimshuffleLayer(conv_cell, (1, 0, 2)) self.conv_cell = ll.get_output(conv_cell, inputs=self.Input) inp_cell = RecognitionParams['input'] inp_cell = ll.DimshuffleLayer(inp_cell, (1, 0, 'x')) self.inp_cell = ll.get_output(inp_cell, inputs=self.Input) inp_back = RecognitionParams['input'] inp_back = ll.DimshuffleLayer(inp_back, (0, 1, 'x')) inp_back = ll.ConcatLayer([self.conv_back, inp_back], axis=2) cell_inp = ll.InputLayer( (None, self.n_convfeatures + self.n_units + 1 + 1 + 1)) self.cell = rec.GRUCell(cell_inp, self.n_units, grad_clipping=100.) self.p_out = ll.DenseLayer((None, self.n_units + self.n_convfeatures), 1, nonlinearity=lasagne.nonlinearities.sigmoid, b=lasagne.init.Constant(-3.)) hid_0 = T.zeros([self.Input.shape[0], self.n_units]) samp_0 = T.zeros([self.Input.shape[0], 1]) self.back_nn = rec.GRULayer(inp_back, self.n_units, backwards=True) self.back_nn = ll.DimshuffleLayer(self.back_nn, (1, 0, 2)) self.backward = ll.get_output(self.back_nn, inputs=self.Input) def sampleStep(conv_cell, inp_cell, back, hid_tm1, samp_tm1, prob_tm1): cell_in = T.concatenate( [conv_cell, inp_cell, back, samp_tm1, prob_tm1], axis=1) rnn_t = self.cell.get_output_for({ 'input': cell_in, 'output': hid_tm1 }) prob_in = T.concatenate([conv_cell, rnn_t['output']], axis=1) prob_t = self.p_out.get_output_for(prob_in) samp_t = srng.binomial(prob_t.shape, n=1, p=prob_t, dtype=theano.config.floatX) return rnn_t['output'], samp_t, prob_t ((rnn_temp,s_t, p_t), updates) =\ theano.scan(fn=sampleStep, sequences=[self.conv_cell,self.inp_cell, self.backward], # outputs_info=[T.unbroadcast(hid_0,1), T.unbroadcast(samp_0,1), T.unbroadcast(samp_0,1)]) outputs_info=[hid_0, samp_0, samp_0]) for k, v in updates.items(): k.default_update = v self.recfunc = theano.function([self.Input], outputs=p_t[:, :, 0].T, updates=updates) self.samplefunc = theano.function([self.Input], outputs=s_t[:, :, 0].T, updates=updates) self.dualfunc = theano.function( [self.Input], outputs=[p_t[:, :, 0].T, s_t[:, :, 0].T], updates=updates) self.detfunc = self.recfunc
def test_DenseLayer_forward(self): input_ = np.random.random((5, 4)) layer = layers.DenseLayer(3, activation_func=af.Sigmoid()) rv = layer.forward(input_) assert rv.shape == (5, 3)
def __init__(self, num_actions, phi_length, width, height, discount=.9, learning_rate=.01, batch_size=32, approximator='none'): self._batch_size = batch_size self._num_input_features = phi_length self._phi_length = phi_length self._img_width = width self._img_height = height self._discount = discount self.num_actions = num_actions self.learning_rate = learning_rate self.scale_input_by = 255.0 # CONSTRUCT THE LAYERS self.q_layers = [] self.q_layers.append( layers.Input2DLayer(self._batch_size, self._num_input_features, self._img_height, self._img_width, self.scale_input_by)) if approximator == 'cuda_conv': self.q_layers.append( cc_layers.ShuffleBC01ToC01BLayer(self.q_layers[-1])) self.q_layers.append( cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1], n_filters=16, filter_size=8, stride=4, weights_std=.01, init_bias_value=0.1)) self.q_layers.append( cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1], n_filters=32, filter_size=4, stride=2, weights_std=.01, init_bias_value=0.1)) self.q_layers.append( cc_layers.ShuffleC01BToBC01Layer(self.q_layers[-1])) elif approximator == 'conv': self.q_layers.append( layers.StridedConv2DLayer(self.q_layers[-1], n_filters=16, filter_width=8, filter_height=8, stride_x=4, stride_y=4, weights_std=.01, init_bias_value=0.01)) self.q_layers.append( layers.StridedConv2DLayer(self.q_layers[-1], n_filters=32, filter_width=4, filter_height=4, stride_x=2, stride_y=2, weights_std=.01, init_bias_value=0.01)) if approximator == 'cuda_conv' or approximator == 'conv': self.q_layers.append( layers.DenseLayer(self.q_layers[-1], n_outputs=256, weights_std=0.01, init_bias_value=0.1, dropout=0, nonlinearity=layers.rectify)) self.q_layers.append( layers.DenseLayer(self.q_layers[-1], n_outputs=num_actions, weights_std=0.01, init_bias_value=0.1, dropout=0, nonlinearity=layers.identity)) if approximator == 'none': self.q_layers.append(\ layers.DenseLayerNoBias(self.q_layers[-1], n_outputs=num_actions, weights_std=0.00, dropout=0, nonlinearity=layers.identity)) self.q_layers.append(layers.OutputLayer(self.q_layers[-1])) for i in range(len(self.q_layers) - 1): print self.q_layers[i].get_output_shape() # Now create a network (using the same weights) # for next state q values self.next_layers = copy_layers(self.q_layers) self.next_layers[0] = layers.Input2DLayer(self._batch_size, self._num_input_features, self._img_width, self._img_height, self.scale_input_by) self.next_layers[1].input_layer = self.next_layers[0] self.rewards = T.col() self.actions = T.icol() # Build the loss function ... q_vals = self.q_layers[-1].predictions() next_q_vals = self.next_layers[-1].predictions() next_maxes = T.max(next_q_vals, axis=1, keepdims=True) target = self.rewards + discount * next_maxes target = theano.gradient.consider_constant(target) diff = target - q_vals # Zero out all entries for actions that were not chosen... mask = build_mask(T.zeros_like(diff), self.actions, 1.0) diff_masked = diff * mask error = T.mean(diff_masked**2) self._loss = error * diff_masked.shape[1] # self._parameters = layers.all_parameters(self.q_layers[-1]) self._idx = T.lscalar('idx') # CREATE VARIABLES FOR INPUT AND OUTPUT self.states_shared = theano.shared( np.zeros((1, 1, 1, 1), dtype=theano.config.floatX)) self.states_shared_next = theano.shared( np.zeros((1, 1, 1, 1), dtype=theano.config.floatX)) self.rewards_shared = theano.shared(np.zeros( (1, 1), dtype=theano.config.floatX), broadcastable=(False, True)) self.actions_shared = theano.shared(np.zeros((1, 1), dtype='int32'), broadcastable=(False, True)) self._givens = \ {self.q_layers[0].input_var: self.states_shared[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :, :, :], self.next_layers[0].input_var: self.states_shared_next[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :, :, :], self.rewards: self.rewards_shared[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :], self.actions: self.actions_shared[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :] } self._updates = layers.gen_updates_rmsprop_and_nesterov_momentum(\ self._loss, self._parameters, learning_rate=self.learning_rate, rho=0.9, momentum=0.9, epsilon=1e-6) self._train = theano.function([self._idx], self._loss, givens=self._givens, updates=self._updates) self._compute_loss = theano.function([self._idx], self._loss, givens=self._givens) self._compute_q_vals = \ theano.function([self.q_layers[0].input_var], self.q_layers[-1].predictions(), on_unused_input='ignore')
def build(self): """build the model. This method should be called after self.add_data. """ x_sym = sparse.csr_matrix('x', dtype='float32') self.x_sym = x_sym y_sym = T.imatrix('y') gx_sym = sparse.csr_matrix('gx', dtype='float32') gy_sym = T.ivector('gy') gz_sym = T.vector('gz') l_x_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]), input_var=x_sym) l_gx_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]), input_var=gx_sym) l_gy_in = lasagne.layers.InputLayer(shape=(None, ), input_var=gy_sym) l_x_1 = layers.SparseLayer(l_x_in, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax) l_x_2 = layers.SparseLayer(l_x_in, self.embedding_size) W = l_x_2.W l_x_2 = layers.DenseLayer(l_x_2, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax) if self.use_feature: l_x = lasagne.layers.ConcatLayer([l_x_1, l_x_2], axis=1) l_x = layers.DenseLayer( l_x, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax) else: l_x = l_x_2 l_gx = layers.SparseLayer(l_gx_in, self.embedding_size, W=W) if self.neg_samp > 0: l_gy = lasagne.layers.EmbeddingLayer( l_gy_in, input_size=self.num_ver, output_size=self.embedding_size) l_gx = lasagne.layers.ElemwiseMergeLayer([l_gx, l_gy], T.mul) pgy_sym = lasagne.layers.get_output(l_gx) g_loss = -T.log(T.nnet.sigmoid( T.sum(pgy_sym, axis=1) * gz_sym)).sum() else: l_gx = lasagne.layers.DenseLayer( l_gx, self.num_ver, nonlinearity=lasagne.nonlinearities.softmax) pgy_sym = lasagne.layers.get_output(l_gx) g_loss = lasagne.objectives.categorical_crossentropy( pgy_sym, gy_sym).sum() self.l = [l_x, l_gx] py_sym = lasagne.layers.get_output(l_x) loss = lasagne.objectives.categorical_crossentropy(py_sym, y_sym).mean() if self.layer_loss and self.use_feature: hid_sym = lasagne.layers.get_output(l_x_1) loss += lasagne.objectives.categorical_crossentropy( hid_sym, y_sym).mean() emd_sym = lasagne.layers.get_output(l_x_2) loss += lasagne.objectives.categorical_crossentropy( emd_sym, y_sym).mean() params = [l_x_1.W, l_x_1.b, l_x_2.W, l_x_2.b, l_x.W, l_x.b ] if self.use_feature else [l_x.W, l_x.b] if self.update_emb: params = lasagne.layers.get_all_params(l_x) updates = lasagne.updates.sgd(loss, params, learning_rate=self.learning_rate) self.train_fn = theano.function([x_sym, y_sym], loss, updates=updates) g_params = lasagne.layers.get_all_params(l_gx) g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate=self.g_learning_rate) self.g_fn = theano.function([gx_sym, gy_sym, gz_sym], g_loss, updates=g_updates, on_unused_input='ignore') self.test_fn = theano.function([x_sym], py_sym)
def build(self): """build the model. This method should be called after self.add_data. """ x_sym = sparse.csr_matrix('x', dtype='float32') # imatrix: matrix of int32 type y_sym = T.imatrix('y') g_sym = T.imatrix('g') gy_sym = T.vector('gy') ind_sym = T.ivector('ind') l_x_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]), input_var=x_sym) l_g_in = lasagne.layers.InputLayer(shape=(None, 2), input_var=g_sym) l_ind_in = lasagne.layers.InputLayer(shape=(None, ), input_var=ind_sym) l_gy_in = lasagne.layers.InputLayer(shape=(None, ), input_var=gy_sym) num_ver = max(self.graph.keys()) + 1 l_emb_in = lasagne.layers.SliceLayer(l_g_in, indices=0, axis=1) l_emb_in = lasagne.layers.EmbeddingLayer( l_emb_in, input_size=num_ver, output_size=self.embedding_size) l_emb_out = lasagne.layers.SliceLayer(l_g_in, indices=1, axis=1) if self.neg_samp > 0: l_emb_out = lasagne.layers.EmbeddingLayer( l_emb_out, input_size=num_ver, output_size=self.embedding_size) l_emd_f = lasagne.layers.EmbeddingLayer( l_ind_in, input_size=num_ver, output_size=self.embedding_size, W=l_emb_in.W) l_x_hid = layers.SparseLayer( l_x_in, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax) if self.use_feature: l_emd_f = layers.DenseLayer( l_emd_f, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax) l_y = lasagne.layers.ConcatLayer([l_x_hid, l_emd_f], axis=1) l_y = layers.DenseLayer( l_y, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax) else: l_y = layers.DenseLayer( l_emd_f, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax) py_sym = lasagne.layers.get_output(l_y) loss = lasagne.objectives.categorical_crossentropy(py_sym, y_sym).mean() if self.layer_loss and self.use_feature: hid_sym = lasagne.layers.get_output(l_x_hid) loss += lasagne.objectives.categorical_crossentropy( hid_sym, y_sym).mean() emd_sym = lasagne.layers.get_output(l_emd_f) loss += lasagne.objectives.categorical_crossentropy( emd_sym, y_sym).mean() if self.neg_samp == 0: l_gy = layers.DenseLayer( l_emb_in, num_ver, nonlinearity=lasagne.nonlinearities.softmax) pgy_sym = lasagne.layers.get_output(l_gy) g_loss = lasagne.objectives.categorical_crossentropy( pgy_sym, lasagne.layers.get_output(l_emb_out)).sum() else: l_gy = lasagne.layers.ElemwiseMergeLayer([l_emb_in, l_emb_out], T.mul) pgy_sym = lasagne.layers.get_output(l_gy) g_loss = -T.log(T.nnet.sigmoid( T.sum(pgy_sym, axis=1) * gy_sym)).sum() params = [l_emd_f.W, l_emd_f.b, l_x_hid.W, l_x_hid.b, l_y.W, l_y.b ] if self.use_feature else [l_y.W, l_y.b] if self.update_emb: params = lasagne.layers.get_all_params(l_y) updates = lasagne.updates.sgd(loss, params, learning_rate=self.learning_rate) self.train_fn = theano.function([x_sym, y_sym, ind_sym], loss, updates=updates, on_unused_input='ignore') self.test_fn = theano.function([x_sym, ind_sym], py_sym, on_unused_input='ignore') self.l = [l_gy, l_y] g_params = lasagne.layers.get_all_params(l_gy, trainable=True) g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate=self.g_learning_rate) # iteration to update parameters of graph embedding branch self.g_fn = theano.function([g_sym, gy_sym], g_loss, updates=g_updates, on_unused_input='ignore')
from mnist_data import y_test_reformatted as y_test # =================================================================================== nn = NN.NN(loss_func=SoftMaxCrossEntropyLoss()) convLayer = layers.ConvLayer( n_channels=9, kernel_size=9, weight_init='glorot', activation_func=Tanh(), flatten=True, dropout=0.8, ) # pool = layers.PoolingLayer(pool_size=7, flatten=True) dense = layers.DenseLayer(10, Linear(), weight_initialisation='glorot') nn.add_layer(convLayer) # nn.add_layer(pool) nn.add_layer(dense) optimizer = optimizers.MomentumSGD(learning_rate=0.01, momentum=0.90) trainer = NN.Trainer(nn, optimizer) # statistic batch_size = 50 bar = utils.ProgressBar(len(X), batch_size) trainer.add_batch_callback(bar) # loss_history = utils.LossHistory(nn, avg_over=50)
def build(self): """build the model. This method should be called after self.add_data. """ # x_sym = sparse.csr_matrix('x', dtype = 'float32') x_sym = T.matrix('x', dtype='float32') self.x_sym = x_sym y_sym = T.imatrix('y') # gx_sym = sparse.csr_matrix('gx', dtype = 'float32') gx_sym = T.matrix('gx', dtype='float32') gy_sym = T.ivector('gy') gz_sym = T.vector('gz') l_x_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]), input_var=x_sym) l_gx_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]), input_var=gx_sym) l_gy_in = lasagne.layers.InputLayer(shape=(None, ), input_var=gy_sym) l_x_in1 = lasagne.layers.dropout(l_x_in, p=0.01) l_x_1 = layers.DenseLayer(l_x_in1, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax) l_x_2 = layers.DenseLayer(l_x_in, self.embedding_size) W = l_x_2.W if self.use_feature: l_x = lasagne.layers.ConcatLayer([l_x_1, l_x_2], axis=1) l_x = layers.DenseLayer( l_x, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax) else: l_x = l_x_2 l_x_2 = layers.DenseLayer(l_x_2, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax) l_gx = layers.DenseLayer(l_gx_in, self.embedding_size, W=W) HYPOTHETICALLY = {l_gx: (200, self.embedding_size)} print("Layer Shape") LIN = get_output_shape(l_gx, HYPOTHETICALLY) print(HYPOTHETICALLY) print(LIN) print("graph...") if self.neg_samp > 0: l_gy = lasagne.layers.EmbeddingLayer( l_gy_in, input_size=self.num_ver, output_size=self.embedding_size) l_gx = lasagne.layers.ElemwiseMergeLayer([l_gx, l_gy], T.mul) pgy_sym = lasagne.layers.get_output(l_gx) g_loss = -T.log(T.nnet.sigmoid( T.sum(pgy_sym, axis=1) * gz_sym)).sum() else: l_gx = lasagne.layers.DenseLayer( l_gx, self.num_ver, nonlinearity=lasagne.nonlinearities.softmax) pgy_sym = lasagne.layers.get_output(l_gx) g_loss = lasagne.objectives.categorical_crossentropy( pgy_sym, gy_sym).sum() self.l = [l_x, l_gx] py_sym = lasagne.layers.get_output(l_x) loss = lasagne.objectives.categorical_crossentropy(py_sym, y_sym).mean() if self.layer_loss and self.use_feature: hid_sym = lasagne.layers.get_output(l_x_1) loss += lasagne.objectives.categorical_crossentropy( hid_sym, y_sym).mean() emd_sym = lasagne.layers.get_output(l_x_2) loss += lasagne.objectives.categorical_crossentropy( emd_sym, y_sym).mean() params = [l_x_1.W, l_x_1.b, l_x_2.W, l_x_2.b, l_x.W, l_x.b ] if self.use_feature else [l_x.W, l_x.b] if self.update_emb: params = lasagne.layers.get_all_params(l_x) # updates = lasagne.updates.adadelta(loss, params, learning_rate = self.learning_rate) # self.train_fn = theano.function([x_sym, y_sym], loss, updates = updates) g_params = lasagne.layers.get_all_params(l_gx) g_updates = lasagne.updates.adadelta( g_loss, g_params, learning_rate=self.g_learning_rate) self.g_fn = theano.function([gx_sym, gy_sym, gz_sym], g_loss, updates=g_updates, on_unused_input='ignore') self.test_fn = theano.function([x_sym], py_sym) #source_network = lasagne.layers.DenseLayer(l_x_in,num_units=nb_classes,nonlinearity=lasagne.nonlinearities.softmax) domain_network = lasagne.layers.DenseLayer( l_x_in, num_units=self.nb_classes, nonlinearity=lasagne.nonlinearities.softmax) #source_prediction = lasagne.layers.get_output(source_network) domain_prediction = lasagne.layers.get_output(domain_network) #source_loss = T.mean(lasagne.objectives.categorical_crossentropy(source_prediction, train_y_var)) #source_params = lasagne.layers.get_all_params(source_network, trainable=True) #domain_prediction = lasagne.layers.get_output(domain_network) domain_y_var = T.imatrix('domain_label') domain_loss = T.mean( lasagne.objectives.categorical_crossentropy( domain_prediction, domain_y_var)) domain_params = lasagne.layers.get_all_params(domain_network, trainable=True) common = set(params) & set(domain_params) #lambda_val = 1-1e-8 val = 1e-8 self.lambda_val = theano.shared(lasagne.utils.floatX(val)) updates = lasagne.updates.adagrad(loss - (val * domain_loss), params, learning_rate=0.1) #updates1 = lasagne.updates.adadelta(source_loss - (lambda_val * domain_loss), source_params, learning_rate=1.0) # update blue and green part updates2 = lasagne.updates.adagrad(domain_loss, list(set(domain_params) - common), learning_rate=1.0) updates3 = lasagne.updates.adagrad(-(val * domain_loss), list(common), learning_rate=1.0) updates.update(updates2) updates2.update(updates3) #domain_y_var = T.imatrix('domain_label') self.train_fn = theano.function([x_sym, y_sym, domain_y_var], loss, updates=updates) #train1 = theano.function([l_x_in.input_var, y_sym, domain_y_var], loss, updates=updates1) self.train2 = theano.function([l_x_in.input_var, domain_y_var], domain_loss, updates=updates2)