l0s = cc_layers.ShuffleBC01ToC01BLayer(l0r) 

l1a = cc_layers.CudaConvnetConv2DLayer(l0s, n_filters=32, filter_size=6, weights_std=0.01, init_bias_value=0.1, dropout=0.0, partial_sum=1, untie_biases=True)
l1 = cc_layers.CudaConvnetPooling2DLayer(l1a, pool_size=2)

l2a = cc_layers.CudaConvnetConv2DLayer(l1, n_filters=64, filter_size=5, weights_std=0.01, init_bias_value=0.1, dropout=0.0, partial_sum=1, untie_biases=True)
l2 = cc_layers.CudaConvnetPooling2DLayer(l2a, pool_size=2)

l3a = cc_layers.CudaConvnetConv2DLayer(l2, n_filters=128, filter_size=3, weights_std=0.01, init_bias_value=0.1, dropout=0.0, partial_sum=1, untie_biases=True)
l3b = cc_layers.CudaConvnetConv2DLayer(l3a, n_filters=128, filter_size=3, pad=0, weights_std=0.1, init_bias_value=0.1, dropout=0.0, partial_sum=1, untie_biases=True)
l3 = cc_layers.CudaConvnetPooling2DLayer(l3b, pool_size=2)

l3s = cc_layers.ShuffleC01BToBC01Layer(l3)
l3f = layers.FlattenLayer(l3s)

l4a = layers.DenseLayer(l3f, n_outputs=512, weights_std=0.01, init_bias_value=0.1, dropout=0.5, nonlinearity=layers.identity)
l4 = layers.FeatureMaxPoolingLayer(l4a, pool_size=2, feature_dim=1, implementation='reshape')

j4 = layers.MultiRotMergeLayer(l4, num_views=4) # 2) # merge convolutional parts

l5a = layers.DenseLayer(j4, n_outputs=4096, weights_std=0.001, init_bias_value=0.01, dropout=0.5, nonlinearity=layers.identity)
l5 = layers.FeatureMaxPoolingLayer(l5a, pool_size=2, feature_dim=1, implementation='reshape')

l6a = layers.DenseLayer(l5, n_outputs=37, weights_std=0.01, init_bias_value=0.1, dropout=0.5, nonlinearity=layers.identity)

# l6 = layers.OutputLayer(l5, error_measure='mse')
l6 = custom.OptimisedDivGalaxyOutputLayer(l6a) # this incorporates the constraints on the output (probabilities sum to one, weighting, etc.)



xs_shared = [theano.shared(np.zeros((1,1,1,1), dtype=theano.config.floatX)) for _ in xrange(num_input_representations)]
Ejemplo n.º 2
0
l1a = cc_layers.CudaConvnetConv2DLayer(l0s, n_filters=32, filter_size=6, weights_std=0.01, init_bias_value=0.1, dropout=0.0, partial_sum=1, untie_biases=True)
l1 = cc_layers.CudaConvnetPooling2DLayer(l1a, pool_size=2)

l2a = cc_layers.CudaConvnetConv2DLayer(l1, n_filters=64, filter_size=5, weights_std=0.01, init_bias_value=0.1, dropout=0.0, partial_sum=1, untie_biases=True)
l2 = cc_layers.CudaConvnetPooling2DLayer(l2a, pool_size=2)

l3a = cc_layers.CudaConvnetConv2DLayer(l2, n_filters=128, filter_size=3, weights_std=0.01, init_bias_value=0.1, dropout=0.0, partial_sum=1, untie_biases=True)
l3b = cc_layers.CudaConvnetConv2DLayer(l3a, n_filters=192, filter_size=3, pad=0, weights_std=0.1, init_bias_value=0.1, dropout=0.0, partial_sum=1, untie_biases=True)
l3 = cc_layers.CudaConvnetPooling2DLayer(l3b, pool_size=2)

l3s = cc_layers.ShuffleC01BToBC01Layer(l3)

j3 = layers.MultiRotMergeLayer(l3s, num_views=4) # 2) # merge convolutional parts

l4 = layers.DenseLayer(j3, n_outputs=4096, weights_std=0.001, init_bias_value=0.01, dropout=0.5)

# l4a = layers.DenseLayer(j3, n_outputs=4096, weights_std=0.001, init_bias_value=0.01, dropout=0.5, nonlinearity=layers.identity)
# l4 = layers.FeatureMaxPoolingLayer(l4a, pool_size=2, feature_dim=1, implementation='reshape')

# l5 = layers.DenseLayer(l4, n_outputs=37, weights_std=0.01, init_bias_value=0.0, dropout=0.5, nonlinearity=custom.clip_01) #  nonlinearity=layers.identity)
l5 = layers.DenseLayer(l4, n_outputs=37, weights_std=0.01, init_bias_value=0.1, dropout=0.5, nonlinearity=layers.identity)

# l6 = layers.OutputLayer(l5, error_measure='mse')
l6 = custom.OptimisedDivGalaxyOutputLayer(l5) # this incorporates the constraints on the output (probabilities sum to one, weighting, etc.)



xs_shared = [theano.shared(np.zeros((1,1,1,1), dtype=theano.config.floatX)) for _ in range(num_input_representations)]

idx = T.lscalar('idx')
Ejemplo n.º 3
0
    def __init__(self, RecognitionParams, Input, rng, n_samples=1):
        '''
        h = Q_phi(z|x), where phi are parameters, z is our latent class, and x are data
        '''
        super().__init__(Input, rng, n_samples)
        self.n_units = RecognitionParams['rnn_units']
        self.n_convfeatures = RecognitionParams['n_features']

        self.conv_back = RecognitionParams['network']

        conv_cell = RecognitionParams['network']
        conv_cell = ll.DimshuffleLayer(conv_cell, (1, 0, 2))
        self.conv_cell = ll.get_output(conv_cell, inputs=self.Input)

        inp_cell = RecognitionParams['input']
        inp_cell = ll.DimshuffleLayer(inp_cell, (1, 0, 'x'))
        self.inp_cell = ll.get_output(inp_cell, inputs=self.Input)

        inp_back = RecognitionParams['input']
        inp_back = ll.DimshuffleLayer(inp_back, (0, 1, 'x'))
        inp_back = ll.ConcatLayer([self.conv_back, inp_back], axis=2)

        cell_inp = ll.InputLayer(
            (None, self.n_convfeatures + self.n_units + 1 + 1 + 1))
        self.cell = rec.GRUCell(cell_inp, self.n_units, grad_clipping=100.)
        self.p_out = ll.DenseLayer((None, self.n_units + self.n_convfeatures),
                                   1,
                                   nonlinearity=lasagne.nonlinearities.sigmoid,
                                   b=lasagne.init.Constant(-3.))

        hid_0 = T.zeros([self.Input.shape[0], self.n_units])
        samp_0 = T.zeros([self.Input.shape[0], 1])

        self.back_nn = rec.GRULayer(inp_back, self.n_units, backwards=True)
        self.back_nn = ll.DimshuffleLayer(self.back_nn, (1, 0, 2))
        self.backward = ll.get_output(self.back_nn, inputs=self.Input)

        def sampleStep(conv_cell, inp_cell, back, hid_tm1, samp_tm1, prob_tm1):

            cell_in = T.concatenate(
                [conv_cell, inp_cell, back, samp_tm1, prob_tm1], axis=1)
            rnn_t = self.cell.get_output_for({
                'input': cell_in,
                'output': hid_tm1
            })
            prob_in = T.concatenate([conv_cell, rnn_t['output']], axis=1)
            prob_t = self.p_out.get_output_for(prob_in)
            samp_t = srng.binomial(prob_t.shape,
                                   n=1,
                                   p=prob_t,
                                   dtype=theano.config.floatX)

            return rnn_t['output'], samp_t, prob_t

        ((rnn_temp,s_t, p_t), updates) =\
            theano.scan(fn=sampleStep,
                        sequences=[self.conv_cell,self.inp_cell, self.backward],
                         #                         outputs_info=[T.unbroadcast(hid_0,1), T.unbroadcast(samp_0,1), T.unbroadcast(samp_0,1)])
                        outputs_info=[hid_0, samp_0, samp_0])

        for k, v in updates.items():
            k.default_update = v

        self.recfunc = theano.function([self.Input],
                                       outputs=p_t[:, :, 0].T,
                                       updates=updates)
        self.samplefunc = theano.function([self.Input],
                                          outputs=s_t[:, :, 0].T,
                                          updates=updates)
        self.dualfunc = theano.function(
            [self.Input],
            outputs=[p_t[:, :, 0].T, s_t[:, :, 0].T],
            updates=updates)
        self.detfunc = self.recfunc
Ejemplo n.º 4
0
 def test_DenseLayer_forward(self):
     input_ = np.random.random((5, 4))
     layer = layers.DenseLayer(3, activation_func=af.Sigmoid())
     rv = layer.forward(input_)
     assert rv.shape == (5, 3)
Ejemplo n.º 5
0
    def __init__(self,
                 num_actions,
                 phi_length,
                 width,
                 height,
                 discount=.9,
                 learning_rate=.01,
                 batch_size=32,
                 approximator='none'):
        self._batch_size = batch_size
        self._num_input_features = phi_length
        self._phi_length = phi_length
        self._img_width = width
        self._img_height = height
        self._discount = discount
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.scale_input_by = 255.0

        # CONSTRUCT THE LAYERS
        self.q_layers = []
        self.q_layers.append(
            layers.Input2DLayer(self._batch_size, self._num_input_features,
                                self._img_height, self._img_width,
                                self.scale_input_by))

        if approximator == 'cuda_conv':
            self.q_layers.append(
                cc_layers.ShuffleBC01ToC01BLayer(self.q_layers[-1]))
            self.q_layers.append(
                cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1],
                                                 n_filters=16,
                                                 filter_size=8,
                                                 stride=4,
                                                 weights_std=.01,
                                                 init_bias_value=0.1))
            self.q_layers.append(
                cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1],
                                                 n_filters=32,
                                                 filter_size=4,
                                                 stride=2,
                                                 weights_std=.01,
                                                 init_bias_value=0.1))
            self.q_layers.append(
                cc_layers.ShuffleC01BToBC01Layer(self.q_layers[-1]))

        elif approximator == 'conv':
            self.q_layers.append(
                layers.StridedConv2DLayer(self.q_layers[-1],
                                          n_filters=16,
                                          filter_width=8,
                                          filter_height=8,
                                          stride_x=4,
                                          stride_y=4,
                                          weights_std=.01,
                                          init_bias_value=0.01))

            self.q_layers.append(
                layers.StridedConv2DLayer(self.q_layers[-1],
                                          n_filters=32,
                                          filter_width=4,
                                          filter_height=4,
                                          stride_x=2,
                                          stride_y=2,
                                          weights_std=.01,
                                          init_bias_value=0.01))
        if approximator == 'cuda_conv' or approximator == 'conv':

            self.q_layers.append(
                layers.DenseLayer(self.q_layers[-1],
                                  n_outputs=256,
                                  weights_std=0.01,
                                  init_bias_value=0.1,
                                  dropout=0,
                                  nonlinearity=layers.rectify))

            self.q_layers.append(
                layers.DenseLayer(self.q_layers[-1],
                                  n_outputs=num_actions,
                                  weights_std=0.01,
                                  init_bias_value=0.1,
                                  dropout=0,
                                  nonlinearity=layers.identity))

        if approximator == 'none':
            self.q_layers.append(\
                layers.DenseLayerNoBias(self.q_layers[-1],
                                        n_outputs=num_actions,
                                        weights_std=0.00,
                                        dropout=0,
                                        nonlinearity=layers.identity))

        self.q_layers.append(layers.OutputLayer(self.q_layers[-1]))

        for i in range(len(self.q_layers) - 1):
            print self.q_layers[i].get_output_shape()

        # Now create a network (using the same weights)
        # for next state q values
        self.next_layers = copy_layers(self.q_layers)
        self.next_layers[0] = layers.Input2DLayer(self._batch_size,
                                                  self._num_input_features,
                                                  self._img_width,
                                                  self._img_height,
                                                  self.scale_input_by)
        self.next_layers[1].input_layer = self.next_layers[0]

        self.rewards = T.col()
        self.actions = T.icol()

        # Build the loss function ...
        q_vals = self.q_layers[-1].predictions()
        next_q_vals = self.next_layers[-1].predictions()
        next_maxes = T.max(next_q_vals, axis=1, keepdims=True)
        target = self.rewards + discount * next_maxes
        target = theano.gradient.consider_constant(target)
        diff = target - q_vals
        # Zero out all entries for actions that were not chosen...
        mask = build_mask(T.zeros_like(diff), self.actions, 1.0)
        diff_masked = diff * mask
        error = T.mean(diff_masked**2)
        self._loss = error * diff_masked.shape[1]  #

        self._parameters = layers.all_parameters(self.q_layers[-1])

        self._idx = T.lscalar('idx')

        # CREATE VARIABLES FOR INPUT AND OUTPUT
        self.states_shared = theano.shared(
            np.zeros((1, 1, 1, 1), dtype=theano.config.floatX))
        self.states_shared_next = theano.shared(
            np.zeros((1, 1, 1, 1), dtype=theano.config.floatX))
        self.rewards_shared = theano.shared(np.zeros(
            (1, 1), dtype=theano.config.floatX),
                                            broadcastable=(False, True))
        self.actions_shared = theano.shared(np.zeros((1, 1), dtype='int32'),
                                            broadcastable=(False, True))

        self._givens = \
            {self.q_layers[0].input_var:
             self.states_shared[self._idx*self._batch_size:
                                (self._idx+1)*self._batch_size, :, :, :],
             self.next_layers[0].input_var:
             self.states_shared_next[self._idx*self._batch_size:
                                     (self._idx+1)*self._batch_size, :, :, :],

             self.rewards:
             self.rewards_shared[self._idx*self._batch_size:
                                 (self._idx+1)*self._batch_size, :],
             self.actions:
             self.actions_shared[self._idx*self._batch_size:
                                 (self._idx+1)*self._batch_size, :]
             }

        self._updates = layers.gen_updates_rmsprop_and_nesterov_momentum(\
            self._loss, self._parameters, learning_rate=self.learning_rate,
            rho=0.9, momentum=0.9, epsilon=1e-6)

        self._train = theano.function([self._idx],
                                      self._loss,
                                      givens=self._givens,
                                      updates=self._updates)
        self._compute_loss = theano.function([self._idx],
                                             self._loss,
                                             givens=self._givens)
        self._compute_q_vals = \
            theano.function([self.q_layers[0].input_var],
                            self.q_layers[-1].predictions(),
                            on_unused_input='ignore')
Ejemplo n.º 6
0
    def build(self):
        """build the model. This method should be called after self.add_data.
        """
        x_sym = sparse.csr_matrix('x', dtype='float32')
        self.x_sym = x_sym
        y_sym = T.imatrix('y')
        gx_sym = sparse.csr_matrix('gx', dtype='float32')
        gy_sym = T.ivector('gy')
        gz_sym = T.vector('gz')

        l_x_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]),
                                           input_var=x_sym)
        l_gx_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]),
                                            input_var=gx_sym)
        l_gy_in = lasagne.layers.InputLayer(shape=(None, ), input_var=gy_sym)

        l_x_1 = layers.SparseLayer(l_x_in,
                                   self.y.shape[1],
                                   nonlinearity=lasagne.nonlinearities.softmax)
        l_x_2 = layers.SparseLayer(l_x_in, self.embedding_size)
        W = l_x_2.W
        l_x_2 = layers.DenseLayer(l_x_2,
                                  self.y.shape[1],
                                  nonlinearity=lasagne.nonlinearities.softmax)
        if self.use_feature:
            l_x = lasagne.layers.ConcatLayer([l_x_1, l_x_2], axis=1)
            l_x = layers.DenseLayer(
                l_x,
                self.y.shape[1],
                nonlinearity=lasagne.nonlinearities.softmax)
        else:
            l_x = l_x_2

        l_gx = layers.SparseLayer(l_gx_in, self.embedding_size, W=W)
        if self.neg_samp > 0:
            l_gy = lasagne.layers.EmbeddingLayer(
                l_gy_in,
                input_size=self.num_ver,
                output_size=self.embedding_size)
            l_gx = lasagne.layers.ElemwiseMergeLayer([l_gx, l_gy], T.mul)
            pgy_sym = lasagne.layers.get_output(l_gx)
            g_loss = -T.log(T.nnet.sigmoid(
                T.sum(pgy_sym, axis=1) * gz_sym)).sum()
        else:
            l_gx = lasagne.layers.DenseLayer(
                l_gx,
                self.num_ver,
                nonlinearity=lasagne.nonlinearities.softmax)
            pgy_sym = lasagne.layers.get_output(l_gx)
            g_loss = lasagne.objectives.categorical_crossentropy(
                pgy_sym, gy_sym).sum()

        self.l = [l_x, l_gx]

        py_sym = lasagne.layers.get_output(l_x)
        loss = lasagne.objectives.categorical_crossentropy(py_sym,
                                                           y_sym).mean()
        if self.layer_loss and self.use_feature:
            hid_sym = lasagne.layers.get_output(l_x_1)
            loss += lasagne.objectives.categorical_crossentropy(
                hid_sym, y_sym).mean()
            emd_sym = lasagne.layers.get_output(l_x_2)
            loss += lasagne.objectives.categorical_crossentropy(
                emd_sym, y_sym).mean()

        params = [l_x_1.W, l_x_1.b, l_x_2.W, l_x_2.b, l_x.W, l_x.b
                  ] if self.use_feature else [l_x.W, l_x.b]
        if self.update_emb:
            params = lasagne.layers.get_all_params(l_x)
        updates = lasagne.updates.sgd(loss,
                                      params,
                                      learning_rate=self.learning_rate)
        self.train_fn = theano.function([x_sym, y_sym], loss, updates=updates)

        g_params = lasagne.layers.get_all_params(l_gx)
        g_updates = lasagne.updates.sgd(g_loss,
                                        g_params,
                                        learning_rate=self.g_learning_rate)
        self.g_fn = theano.function([gx_sym, gy_sym, gz_sym],
                                    g_loss,
                                    updates=g_updates,
                                    on_unused_input='ignore')

        self.test_fn = theano.function([x_sym], py_sym)
Ejemplo n.º 7
0
    def build(self):
        """build the model. This method should be called after self.add_data.
        """
        x_sym = sparse.csr_matrix('x', dtype='float32')
        # imatrix: matrix of int32 type
        y_sym = T.imatrix('y')
        g_sym = T.imatrix('g')
        gy_sym = T.vector('gy')
        ind_sym = T.ivector('ind')

        l_x_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]),
                                           input_var=x_sym)
        l_g_in = lasagne.layers.InputLayer(shape=(None, 2), input_var=g_sym)
        l_ind_in = lasagne.layers.InputLayer(shape=(None, ), input_var=ind_sym)
        l_gy_in = lasagne.layers.InputLayer(shape=(None, ), input_var=gy_sym)

        num_ver = max(self.graph.keys()) + 1
        l_emb_in = lasagne.layers.SliceLayer(l_g_in, indices=0, axis=1)
        l_emb_in = lasagne.layers.EmbeddingLayer(
            l_emb_in, input_size=num_ver, output_size=self.embedding_size)
        l_emb_out = lasagne.layers.SliceLayer(l_g_in, indices=1, axis=1)
        if self.neg_samp > 0:
            l_emb_out = lasagne.layers.EmbeddingLayer(
                l_emb_out, input_size=num_ver, output_size=self.embedding_size)

        l_emd_f = lasagne.layers.EmbeddingLayer(
            l_ind_in,
            input_size=num_ver,
            output_size=self.embedding_size,
            W=l_emb_in.W)
        l_x_hid = layers.SparseLayer(
            l_x_in,
            self.y.shape[1],
            nonlinearity=lasagne.nonlinearities.softmax)

        if self.use_feature:
            l_emd_f = layers.DenseLayer(
                l_emd_f,
                self.y.shape[1],
                nonlinearity=lasagne.nonlinearities.softmax)
            l_y = lasagne.layers.ConcatLayer([l_x_hid, l_emd_f], axis=1)
            l_y = layers.DenseLayer(
                l_y,
                self.y.shape[1],
                nonlinearity=lasagne.nonlinearities.softmax)
        else:
            l_y = layers.DenseLayer(
                l_emd_f,
                self.y.shape[1],
                nonlinearity=lasagne.nonlinearities.softmax)

        py_sym = lasagne.layers.get_output(l_y)
        loss = lasagne.objectives.categorical_crossentropy(py_sym,
                                                           y_sym).mean()
        if self.layer_loss and self.use_feature:
            hid_sym = lasagne.layers.get_output(l_x_hid)
            loss += lasagne.objectives.categorical_crossentropy(
                hid_sym, y_sym).mean()
            emd_sym = lasagne.layers.get_output(l_emd_f)
            loss += lasagne.objectives.categorical_crossentropy(
                emd_sym, y_sym).mean()

        if self.neg_samp == 0:
            l_gy = layers.DenseLayer(
                l_emb_in, num_ver, nonlinearity=lasagne.nonlinearities.softmax)
            pgy_sym = lasagne.layers.get_output(l_gy)
            g_loss = lasagne.objectives.categorical_crossentropy(
                pgy_sym, lasagne.layers.get_output(l_emb_out)).sum()
        else:
            l_gy = lasagne.layers.ElemwiseMergeLayer([l_emb_in, l_emb_out],
                                                     T.mul)
            pgy_sym = lasagne.layers.get_output(l_gy)
            g_loss = -T.log(T.nnet.sigmoid(
                T.sum(pgy_sym, axis=1) * gy_sym)).sum()

        params = [l_emd_f.W, l_emd_f.b, l_x_hid.W, l_x_hid.b, l_y.W, l_y.b
                  ] if self.use_feature else [l_y.W, l_y.b]
        if self.update_emb:
            params = lasagne.layers.get_all_params(l_y)
        updates = lasagne.updates.sgd(loss,
                                      params,
                                      learning_rate=self.learning_rate)

        self.train_fn = theano.function([x_sym, y_sym, ind_sym],
                                        loss,
                                        updates=updates,
                                        on_unused_input='ignore')
        self.test_fn = theano.function([x_sym, ind_sym],
                                       py_sym,
                                       on_unused_input='ignore')
        self.l = [l_gy, l_y]

        g_params = lasagne.layers.get_all_params(l_gy, trainable=True)
        g_updates = lasagne.updates.sgd(g_loss,
                                        g_params,
                                        learning_rate=self.g_learning_rate)

        # iteration to update parameters of graph embedding branch
        self.g_fn = theano.function([g_sym, gy_sym],
                                    g_loss,
                                    updates=g_updates,
                                    on_unused_input='ignore')
from mnist_data import y_test_reformatted as y_test

# ===================================================================================

nn = NN.NN(loss_func=SoftMaxCrossEntropyLoss())

convLayer = layers.ConvLayer(
    n_channels=9,
    kernel_size=9,
    weight_init='glorot',
    activation_func=Tanh(),
    flatten=True,
    dropout=0.8,
)
# pool = layers.PoolingLayer(pool_size=7, flatten=True)
dense = layers.DenseLayer(10, Linear(), weight_initialisation='glorot')

nn.add_layer(convLayer)
# nn.add_layer(pool)
nn.add_layer(dense)

optimizer = optimizers.MomentumSGD(learning_rate=0.01, momentum=0.90)
trainer = NN.Trainer(nn, optimizer)

# statistic
batch_size = 50

bar = utils.ProgressBar(len(X), batch_size)
trainer.add_batch_callback(bar)

# loss_history = utils.LossHistory(nn, avg_over=50)
    def build(self):
        """build the model. This method should be called after self.add_data.
        """
        #        x_sym = sparse.csr_matrix('x', dtype = 'float32')
        x_sym = T.matrix('x', dtype='float32')
        self.x_sym = x_sym
        y_sym = T.imatrix('y')
        #        gx_sym = sparse.csr_matrix('gx', dtype = 'float32')
        gx_sym = T.matrix('gx', dtype='float32')
        gy_sym = T.ivector('gy')
        gz_sym = T.vector('gz')

        l_x_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]),
                                           input_var=x_sym)
        l_gx_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]),
                                            input_var=gx_sym)
        l_gy_in = lasagne.layers.InputLayer(shape=(None, ), input_var=gy_sym)

        l_x_in1 = lasagne.layers.dropout(l_x_in, p=0.01)
        l_x_1 = layers.DenseLayer(l_x_in1,
                                  self.y.shape[1],
                                  nonlinearity=lasagne.nonlinearities.softmax)
        l_x_2 = layers.DenseLayer(l_x_in, self.embedding_size)

        W = l_x_2.W

        if self.use_feature:
            l_x = lasagne.layers.ConcatLayer([l_x_1, l_x_2], axis=1)
            l_x = layers.DenseLayer(
                l_x,
                self.y.shape[1],
                nonlinearity=lasagne.nonlinearities.softmax)
        else:
            l_x = l_x_2

        l_x_2 = layers.DenseLayer(l_x_2,
                                  self.y.shape[1],
                                  nonlinearity=lasagne.nonlinearities.softmax)
        l_gx = layers.DenseLayer(l_gx_in, self.embedding_size, W=W)
        HYPOTHETICALLY = {l_gx: (200, self.embedding_size)}
        print("Layer Shape")
        LIN = get_output_shape(l_gx, HYPOTHETICALLY)
        print(HYPOTHETICALLY)
        print(LIN)
        print("graph...")

        if self.neg_samp > 0:
            l_gy = lasagne.layers.EmbeddingLayer(
                l_gy_in,
                input_size=self.num_ver,
                output_size=self.embedding_size)
            l_gx = lasagne.layers.ElemwiseMergeLayer([l_gx, l_gy], T.mul)
            pgy_sym = lasagne.layers.get_output(l_gx)
            g_loss = -T.log(T.nnet.sigmoid(
                T.sum(pgy_sym, axis=1) * gz_sym)).sum()
        else:
            l_gx = lasagne.layers.DenseLayer(
                l_gx,
                self.num_ver,
                nonlinearity=lasagne.nonlinearities.softmax)
            pgy_sym = lasagne.layers.get_output(l_gx)
            g_loss = lasagne.objectives.categorical_crossentropy(
                pgy_sym, gy_sym).sum()

        self.l = [l_x, l_gx]

        py_sym = lasagne.layers.get_output(l_x)
        loss = lasagne.objectives.categorical_crossentropy(py_sym,
                                                           y_sym).mean()
        if self.layer_loss and self.use_feature:
            hid_sym = lasagne.layers.get_output(l_x_1)
            loss += lasagne.objectives.categorical_crossentropy(
                hid_sym, y_sym).mean()
            emd_sym = lasagne.layers.get_output(l_x_2)
            loss += lasagne.objectives.categorical_crossentropy(
                emd_sym, y_sym).mean()

        params = [l_x_1.W, l_x_1.b, l_x_2.W, l_x_2.b, l_x.W, l_x.b
                  ] if self.use_feature else [l_x.W, l_x.b]
        if self.update_emb:
            params = lasagne.layers.get_all_params(l_x)
#        updates = lasagne.updates.adadelta(loss, params, learning_rate = self.learning_rate)
#        self.train_fn = theano.function([x_sym, y_sym], loss, updates = updates)

        g_params = lasagne.layers.get_all_params(l_gx)
        g_updates = lasagne.updates.adadelta(
            g_loss, g_params, learning_rate=self.g_learning_rate)
        self.g_fn = theano.function([gx_sym, gy_sym, gz_sym],
                                    g_loss,
                                    updates=g_updates,
                                    on_unused_input='ignore')

        self.test_fn = theano.function([x_sym], py_sym)

        #source_network = lasagne.layers.DenseLayer(l_x_in,num_units=nb_classes,nonlinearity=lasagne.nonlinearities.softmax)
        domain_network = lasagne.layers.DenseLayer(
            l_x_in,
            num_units=self.nb_classes,
            nonlinearity=lasagne.nonlinearities.softmax)

        #source_prediction = lasagne.layers.get_output(source_network)
        domain_prediction = lasagne.layers.get_output(domain_network)

        #source_loss = T.mean(lasagne.objectives.categorical_crossentropy(source_prediction, train_y_var))
        #source_params = lasagne.layers.get_all_params(source_network, trainable=True)
        #domain_prediction = lasagne.layers.get_output(domain_network)
        domain_y_var = T.imatrix('domain_label')
        domain_loss = T.mean(
            lasagne.objectives.categorical_crossentropy(
                domain_prediction, domain_y_var))
        domain_params = lasagne.layers.get_all_params(domain_network,
                                                      trainable=True)

        common = set(params) & set(domain_params)

        #lambda_val =  1-1e-8
        val = 1e-8
        self.lambda_val = theano.shared(lasagne.utils.floatX(val))
        updates = lasagne.updates.adagrad(loss - (val * domain_loss),
                                          params,
                                          learning_rate=0.1)

        #updates1 = lasagne.updates.adadelta(source_loss - (lambda_val * domain_loss), source_params, learning_rate=1.0)  # update blue and green part
        updates2 = lasagne.updates.adagrad(domain_loss,
                                           list(set(domain_params) - common),
                                           learning_rate=1.0)
        updates3 = lasagne.updates.adagrad(-(val * domain_loss),
                                           list(common),
                                           learning_rate=1.0)
        updates.update(updates2)
        updates2.update(updates3)

        #domain_y_var = T.imatrix('domain_label')
        self.train_fn = theano.function([x_sym, y_sym, domain_y_var],
                                        loss,
                                        updates=updates)
        #train1 = theano.function([l_x_in.input_var, y_sym, domain_y_var], loss, updates=updates1)
        self.train2 = theano.function([l_x_in.input_var, domain_y_var],
                                      domain_loss,
                                      updates=updates2)