Ejemplo n.º 1
0
    def __init__(self,
                 inp_dim,
                 hid_dim=50,
                 initialization='glorot_normal',
                 optimization='adadelta'):

        self.dim = hid_dim
        self.inp_dim = inp_dim
        initializer = nn_utils.get_initialization_function(initialization)
        optimizer = nn_utils.get_optimization_function(optimization)

        # Forming the input layer of the answer module
        q_sent_hid = T.vector("Question root node hidden State")
        ans_sent_hid = T.vector("Answer root node hidden state")
        ans_node_hid = T.vector("Answer word node hidden state")
        ans_parent_hid = T.vector("Answer word's parent hidden state")
        answer = T.scalar("Answer Probability")

        # Forming the processing layer
        self.W_q = initializer(shape=(self.inp_dim, self.dim))
        self.W_ans_sent = initializer(shape=(self.inp_dim, self.dim))
        self.W_ans_node = initializer(shape=(self.inp_dim, self.dim))
        self.W_ans_parent = initializer(shape=(self.inp_dim, self.dim))
        self.b_inp = nn_utils.constant_param(value=0.0, shape=(self.dim))

        self.W_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, ))
        self.b_hid = nn_utils.constant_param(value=0.0, shape=())

        self.params = [
            self.W_q, self.W_ans_sent, self.W_ans_node, self.W_ans_parent,
            self.b_inp, self.W_hid, self.b_hid
        ]

        # Forming the output layer
        prediction = self.compute(q_sent_hid, ans_sent_hid, ans_node_hid,
                                  ans_parent_hid)

        # Forming the updates and loss layer
        loss = T.nnet.binary_crossentropy(prediction, answer)
        self.updates = optimizer(loss, self.params)

        self.train = theano.function(
            [q_sent_hid, ans_sent_hid, ans_node_hid, ans_parent_hid, answer],
            [],
            updates=self.updates)

        self.predict = theano.function(
            [q_sent_hid, ans_sent_hid, ans_node_hid, ans_parent_hid],
            prediction)

        self.get_loss = theano.function(
            [q_sent_hid, ans_sent_hid, ans_node_hid, ans_parent_hid, answer],
            loss)
Ejemplo n.º 2
0
    def __init__(self, **kwargs):

        self.dim = kwargs['dim']
        self.word_vector_size = kwargs['word_vector_size']

        self.input_var = T.matrix('input_var')
        self.q_var = T.matrix('question_var')
        self.input_mask_var = T.ivector('input_mask_var')

        self.W_inp_res_in = nn_utils.normal_param(
            std=0.1, shape=(self.dim, self.word_vector_size))
        self.W_inp_res_hid = nn_utils.normal_param(std=0.1,
                                                   shape=(self.dim, self.dim))
        self.b_inp_res = nn_utils.constant_param(value=0.0, shape=(self.dim, ))

        self.W_inp_upd_in = nn_utils.normal_param(
            std=0.1, shape=(self.dim, self.word_vector_size))
        self.W_inp_upd_hid = nn_utils.normal_param(std=0.1,
                                                   shape=(self.dim, self.dim))
        self.b_inp_upd = nn_utils.constant_param(value=0.0, shape=(self.dim, ))

        self.W_inp_hid_in = nn_utils.normal_param(
            std=0.1, shape=(self.dim, self.word_vector_size))
        self.W_inp_hid_hid = nn_utils.normal_param(std=0.1,
                                                   shape=(self.dim, self.dim))
        self.b_inp_hid = nn_utils.constant_param(value=0.0, shape=(self.dim, ))

        inp_c_history, _ = theano.scan(fn=self.input_gru_step,
                                       sequences=self.input_var,
                                       outputs_info=T.zeros_like(
                                           self.b_inp_hid))

        self.inp_c = inp_c_history.take(self.input_mask_var, axis=0)

        self.q_q, _ = theano.scan(fn=self.input_gru_step,
                                  sequences=self.q_var,
                                  outputs_info=T.zeros_like(self.b_inp_hid))

        self.q_q = self.q_q[-1]

        self.memory = [self.q_q.copy()]

        super().__init__(**kwargs)

        self.params += [
            self.W_inp_res_in, self.W_inp_res_hid, self.b_inp_res,
            self.W_inp_upd_in, self.W_inp_upd_hid, self.b_inp_upd,
            self.W_inp_hid_in, self.W_inp_hid_hid, self.b_inp_hid
        ]

        updates = lasagne.updates.adadelta(self.loss, self.params)

        input_list = [
            self.input_var, self.q_var, self.answer_var, self.input_mask_var
        ]
        output_list = [self.inp_c, self.q_q]

        self.generate_functions(input_list, output_list)
Ejemplo n.º 3
0
 def __init__(self,word_vector_size,dim,dep_tags_size,visualise=False):
     super().__init__(word_vector_size,dim,visualise)
     self.W_dep=nn_utils.normal_param(std=0.1, shape=(self.dim, dep_tags_size))
     depTags1 = T.lvector('dep_tags1')
     depTags2=T.lvector('dep_tags2')
     self.hid_state1,_=theano.scan(fn=self.computation_syntactic,sequences=[self.sent1,depTags1],outputs_info=[T.zeros_like(self.b_inp_hid)])
     self.hid1=self.hid_state1[-1]
     self.hid_state2,_=theano.scan(fn=self.computation_syntactic,sequences=[self.sent2,depTags2],outputs_info=[T.zeros_like(self.b_inp_hid)])
     self.hid2=self.hid_state2[-1]
     self.params.append(self.W_dep)
     self.predict=theano.function([self.sent1,depTags1],self.hid_state1)
     self.generate_function()
     self.get_similarity = theano.function([self.sent1,self.sent2,depTags1,depTags2],[self.score])
     self.train = theano.function([self.sent1,self.sent2,self.similarity_score,depTags1,depTags2],[],updates=self.updates)
     self.dep_tags=utils.load_dep_tags()
Ejemplo n.º 4
0
    def __init__(self,word_vector_size,dim,visualise=False):

        self.visualise = visualise

        self.dim=dim #Dimmensions of Hidden State of the GRU
        self.word_vector_size=word_vector_size
        self.W_inp_res_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.word_vector_size))
        self.U_inp_res_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        self.b_inp_res = nn_utils.constant_param(value=0.0, shape=(self.dim,))

        self.W_inp_upd_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.word_vector_size))
        self.U_inp_upd_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        self.b_inp_upd = nn_utils.constant_param(value=0.0, shape=(self.dim,))

        self.W_inp_hid_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.word_vector_size))
        self.U_inp_hid_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self. dim))
        self.b_inp_hid = nn_utils.constant_param(value=0.0, shape=(self.dim,))


        self.similarity_score = T.dscalar('score')
        self.sent1=T.dmatrix('sent1')
        self.sent2=T.dmatrix('sent2')
        self.params = [
        self.W_inp_res_in,
        self.U_inp_res_hid ,
        self.b_inp_res ,

        self.W_inp_upd_in,
        self.U_inp_upd_hid,
        self.b_inp_upd,

        self.W_inp_hid_in,
        self.U_inp_hid_hid,
        self.b_inp_hid
]

        self.hid1=None
        self.hid2=None
        self.hid_state1=None
        self.hid_state2=None
        self.train=None
        self.get_similarity=None
        self.updates=None
        self.score=None
        self.predict=None
Ejemplo n.º 5
0
    def __init__(self, babi_train_raw, babi_test_raw, word2vec,
                 word_vector_size, dim, mode, answer_module, input_mask_mode,
                 memory_hops, l2, normalize_attention, answer_vec, debug,
                 **kwargs):

        self.vocab = {}
        self.ivocab = {}

        self.debug = debug

        self.word2vec = word2vec
        self.word_vector_size = word_vector_size
        self.dim = dim
        self.mode = mode
        self.answer_module = answer_module
        self.input_mask_mode = input_mask_mode
        self.memory_hops = memory_hops
        self.l2 = l2
        self.normalize_attention = normalize_attention
        self.answer_vec = answer_vec

        if self.mode != 'deploy':
            print("==> not used params in DMN class:", kwargs.keys())

        self.train_input, self.train_q, self.train_answer, self.train_input_mask = self._process_input(
            babi_train_raw)
        self.test_input, self.test_q, self.test_answer, self.test_input_mask = self._process_input(
            babi_test_raw)
        self.vocab_size = len(self.vocab)

        if self.debug:
            print('Input:', np.array(self.train_input).shape)
            print('Quest:', np.array(self.train_q).shape)
            print('Answer:', np.array(self.train_answer).shape)
            print('Mask:', np.array(self.train_input_mask))
            sys.exit(0)

        # if self.mode == 'deploy':
        #     self.input_var = T.tensor3('input_var')
        #     self.q_var = T.tensor3('question_var')
        #     self.input_mask_var = T.ivector('input_mask_var')

        # else:
        if self.answer_vec == 'word2vec':
            self.answer_var = T.vector('answer_var')
        else:
            self.answer_var = T.iscalar('answer_var')

        if self.answer_vec == 'one_hot' or self.answer_vec == 'index':
            self.answer_size = self.vocab_size
        elif self.answer_vec == 'word2vec':
            self.answer_size = self.word_vector_size
        else:
            raise Exception("Invalid answer_vec type")

        if self.mode != 'deploy': print("==> building input module")

        if self.mode != 'deploy':
            print("==> creating parameters for memory module")
        self.W_mem_res_in = nn_utils.normal_param(std=0.1,
                                                  shape=(self.dim, self.dim))
        self.W_mem_res_hid = nn_utils.normal_param(std=0.1,
                                                   shape=(self.dim, self.dim))
        self.b_mem_res = nn_utils.constant_param(value=0.0, shape=(self.dim, ))

        self.W_mem_upd_in = nn_utils.normal_param(std=0.1,
                                                  shape=(self.dim, self.dim))
        self.W_mem_upd_hid = nn_utils.normal_param(std=0.1,
                                                   shape=(self.dim, self.dim))
        self.b_mem_upd = nn_utils.constant_param(value=0.0, shape=(self.dim, ))

        self.W_mem_hid_in = nn_utils.normal_param(std=0.1,
                                                  shape=(self.dim, self.dim))
        self.W_mem_hid_hid = nn_utils.normal_param(std=0.1,
                                                   shape=(self.dim, self.dim))
        self.b_mem_hid = nn_utils.constant_param(value=0.0, shape=(self.dim, ))

        self.W_b = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        self.W_1 = nn_utils.normal_param(std=0.1,
                                         shape=(self.dim, 7 * self.dim + 2))
        self.W_2 = nn_utils.normal_param(std=0.1, shape=(1, self.dim))
        self.b_1 = nn_utils.constant_param(value=0.0, shape=(self.dim, ))
        self.b_2 = nn_utils.constant_param(value=0.0, shape=(1, ))

        if self.mode != 'deploy':
            print(
                "==> building episodic memory module (fixed number of steps: %d)"
                % self.memory_hops)
        for iter in range(1, self.memory_hops + 1):
            current_episode = self.new_episode(self.memory[iter - 1])
            self.memory.append(
                self.GRU_update(self.memory[iter - 1], current_episode,
                                self.W_mem_res_in, self.W_mem_res_hid,
                                self.b_mem_res, self.W_mem_upd_in,
                                self.W_mem_upd_hid, self.b_mem_upd,
                                self.W_mem_hid_in, self.W_mem_hid_hid,
                                self.b_mem_hid))

        self.last_mem = self.memory[-1]

        if self.mode != 'deploy': print("==> building answer module")

        self.W_a = nn_utils.normal_param(std=0.1,
                                         shape=(self.answer_size, self.dim))

        if self.answer_module == 'feedforward':
            self.prediction = nn_utils.softmax(T.dot(self.W_a, self.last_mem))

        elif self.answer_module == 'recurrent':
            self.W_ans_res_in = nn_utils.normal_param(
                std=0.1, shape=(self.dim, self.dim + self.answer_size))
            self.W_ans_res_hid = nn_utils.normal_param(std=0.1,
                                                       shape=(self.dim,
                                                              self.dim))
            self.b_ans_res = nn_utils.constant_param(value=0.0,
                                                     shape=(self.dim, ))

            self.W_ans_upd_in = nn_utils.normal_param(
                std=0.1, shape=(self.dim, self.dim + self.answer_size))
            self.W_ans_upd_hid = nn_utils.normal_param(std=0.1,
                                                       shape=(self.dim,
                                                              self.dim))
            self.b_ans_upd = nn_utils.constant_param(value=0.0,
                                                     shape=(self.dim, ))

            self.W_ans_hid_in = nn_utils.normal_param(
                std=0.1, shape=(self.dim, self.dim + self.answer_size))
            self.W_ans_hid_hid = nn_utils.normal_param(std=0.1,
                                                       shape=(self.dim,
                                                              self.dim))
            self.b_ans_hid = nn_utils.constant_param(value=0.0,
                                                     shape=(self.dim, ))

            def answer_step(prev_a, prev_y):
                a = self.GRU_update(prev_a, T.concatenate([prev_y, self.q_q]),
                                    self.W_ans_res_in, self.W_ans_res_hid,
                                    self.b_ans_res, self.W_ans_upd_in,
                                    self.W_ans_upd_hid, self.b_ans_upd,
                                    self.W_ans_hid_in, self.W_ans_hid_hid,
                                    self.b_ans_hid)
                y = T.dot(self.W_a, a)
                if self.answer_vec == 'one_hot' or self.answer_vec == 'index':
                    y = nn_utils.softmax(y)
                return [a, y]

            # TODO: add conditional ending
            dummy = theano.shared(np.zeros((self.answer_size, ), dtype=floatX))
            results, updates = theano.scan(
                fn=answer_step,
                outputs_info=[self.last_mem,
                              T.zeros_like(dummy)],
                n_steps=1)
            self.prediction = results[1][-1]

        else:
            raise Exception("invalid answer_module")

        if self.mode != 'deploy': print("==> collecting all parameters")
        self.params = [
            self.W_mem_res_in, self.W_mem_res_hid, self.b_mem_res,
            self.W_mem_upd_in, self.W_mem_upd_hid, self.b_mem_upd,
            self.W_mem_hid_in, self.W_mem_hid_hid, self.b_mem_hid, self.W_b,
            self.W_1, self.W_2, self.b_1, self.b_2, self.W_a
        ]

        if self.answer_module == 'recurrent':
            self.params = self.params + [
                self.W_ans_res_in, self.W_ans_res_hid, self.b_ans_res,
                self.W_ans_upd_in, self.W_ans_upd_hid, self.b_ans_upd,
                self.W_ans_hid_in, self.W_ans_hid_hid, self.b_ans_hid
            ]

        if self.mode != 'deploy':
            print("==> building loss layer and computing updates")
        if debug:
            print('Prediction dim:', self.prediction.dimshuffle('x', 0).ndim)
            print('Answer dim:', self.answer_var.ndim)
        if self.answer_vec == 'word2vec':
            self.loss_ce = nn_utils.cosine_proximity_loss(
                self.prediction.dimshuffle('x', 0),
                T.stack([self.answer_var]))[0][0]
        else:
            self.loss_ce = T.nnet.categorical_crossentropy(
                self.prediction.dimshuffle('x', 0),
                T.stack([self.answer_var]))[0]
        if self.l2 > 0:
            self.loss_l2 = self.l2 * nn_utils.l2_reg(self.params)
        else:
            self.loss_l2 = 0

        self.loss = self.loss_ce + self.loss_l2

        if debug: print(self.loss.ndim)
Ejemplo n.º 6
0
    def __init__(self,
                 babi_train_raw,
                 babi_test_raw,
                 word2vec,
                 word_vector_size,
                 dim,
                 mode,
                 answer_module,
                 input_mask_mode,
                 memory_hops,
                 l2,
                 normalize_attention,
                 answer_vec,
                 debug,
                 sentEmbdLoadState,
                 sentEmbdType="basic",
                 **kwargs):
        self.vocab = {}
        self.ivocab = {}
        self.debug = debug

        self.word2vec = word2vec
        self.word_vector_size = word_vector_size
        self.dim = dim
        self.mode = mode
        self.answer_module = answer_module
        self.input_mask_mode = input_mask_mode
        self.memory_hops = memory_hops
        self.l2 = l2
        self.normalize_attention = normalize_attention
        self.answer_vec = answer_vec
        self.sentEmbdType = sentEmbdType
        if (self.mode != 'deploy'):
            self.train_input, self.train_q, self.train_answer, self.train_input_mask = self._process_input(
                babi_train_raw)
            self.test_input, self.test_q, self.test_answer, self.test_input_mask = self._process_input(
                babi_test_raw)
            self.vocab_size = len(self.vocab)
            print(self.vocab_size)
        elif self.mode == 'deploy':
            self.train_input, self.train_q, self.train_answer, self.train_input_mask = self._process_input(
                babi_train_raw)
            self.vocab_size = len(self.vocab)
            print(self.vocab_size)
            # print(self.train_input.shape)
            # print(self.train_q.shape)
            # print(self.train_input_mask.shape)

        #Setting up pre-trained Sentence Embedder for question and input module:
        if self.mode != 'deploy':
            print("==> Setting up pre-trained Sentence Embedder")
        if self.sentEmbdType == "basic":
            self.sent_embd = SentEmbd.SentEmbd_basic(self.word_vector_size,
                                                     self.dim)
        else:
            dep_tags = utils.load_dep_tags
            self.sent_embd = SentEmbd.SentEmbd_syntactic(
                50, hid_dim, len(dep_tags))  #TODO: Dependency Tags
        self.sent_embd.load_params(sentEmbdLoadState)

        self.input_var = T.matrix('input_var')
        self.q_var = T.vector('question_var')
        if self.answer_vec == 'word2vec':
            self.answer_var = T.vector('answer_var')
        else:
            self.answer_var = T.iscalar('answer_var')
        self.input_mask_var = T.ivector('input_mask_var')

        if self.answer_vec == 'one_hot' or self.answer_vec == 'index':
            self.answer_size = self.vocab_size
        elif self.answer_vec == 'word2vec':
            self.answer_size = self.word_vector_size
        else:
            raise Exception("Invalid answer_vec type")

        #Setting up Untrained Memory module
        if self.mode != 'deploy':
            print("==> Creating parameters for memory module")
        self.W_mem_res_in = nn_utils.normal_param(std=0.1,
                                                  shape=(self.dim, self.dim))
        self.W_mem_res_hid = nn_utils.normal_param(std=0.1,
                                                   shape=(self.dim, self.dim))
        self.b_mem_res = nn_utils.constant_param(value=0.0, shape=(self.dim, ))

        self.W_mem_upd_in = nn_utils.normal_param(std=0.1,
                                                  shape=(self.dim, self.dim))
        self.W_mem_upd_hid = nn_utils.normal_param(std=0.1,
                                                   shape=(self.dim, self.dim))
        self.b_mem_upd = nn_utils.constant_param(value=0.0, shape=(self.dim, ))

        self.W_mem_hid_in = nn_utils.normal_param(std=0.1,
                                                  shape=(self.dim, self.dim))
        self.W_mem_hid_hid = nn_utils.normal_param(std=0.1,
                                                   shape=(self.dim, self.dim))
        self.b_mem_hid = nn_utils.constant_param(value=0.0, shape=(self.dim, ))

        self.W_b = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        self.W_1 = nn_utils.normal_param(std=0.1,
                                         shape=(self.dim, 7 * self.dim + 2))
        self.W_2 = nn_utils.normal_param(std=0.1, shape=(1, self.dim))
        self.b_1 = nn_utils.constant_param(value=0.0, shape=(self.dim, ))
        self.b_2 = nn_utils.constant_param(value=0.0, shape=(1, ))

        if self.mode != 'deploy':
            print(
                "==> Building episodic memory module (fixed number of steps: %d)"
                % self.memory_hops)
        memory = [self.q_var.copy()]
        for iter in range(1, self.memory_hops + 1):
            current_episode = self.new_episode(memory[iter - 1])
            memory.append(
                self.GRU_update(memory[iter - 1], current_episode,
                                self.W_mem_res_in, self.W_mem_res_hid,
                                self.b_mem_res, self.W_mem_upd_in,
                                self.W_mem_upd_hid, self.b_mem_upd,
                                self.W_mem_hid_in, self.W_mem_hid_hid,
                                self.b_mem_hid))

        last_mem = memory[-1]

        if self.mode != 'deploy': print("==> Building answer module")

        self.W_a = nn_utils.normal_param(std=0.1,
                                         shape=(self.answer_size, self.dim))

        if self.answer_module == 'feedforward':
            self.prediction = nn_utils.softmax(T.dot(self.W_a, last_mem))
        # elif self.answer_module == 'recurrent':
        #     self.W_ans_res_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim + self.answer_size))
        #     self.W_ans_res_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        #     self.b_ans_res = nn_utils.constant_param(value=0.0, shape=(self.dim,))

        #     self.W_ans_upd_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim + self.answer_size))
        #     self.W_ans_upd_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        #     self.b_ans_upd = nn_utils.constant_param(value=0.0, shape=(self.dim,))

        #     self.W_ans_hid_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim + self.answer_size))
        #     self.W_ans_hid_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        #     self.b_ans_hid = nn_utils.constant_param(value=0.0, shape=(self.dim,))

        #     def answer_step(prev_a, prev_y):
        #         a = self.GRU_update(prev_a, T.concatenate([prev_y, self.q_q]),
        #                           self.W_ans_res_in, self.W_ans_res_hid, self.b_ans_res,
        #                           self.W_ans_upd_in, self.W_ans_upd_hid, self.b_ans_upd,
        #                           self.W_ans_hid_in, self.W_ans_hid_hid, self.b_ans_hid)
        #         y = T.dot(self.W_a, a)
        #         if self.answer_vec == 'one_hot' or self.answer_vec == 'index':
        #             y = nn_utils.softmax(y)
        #         return [a, y]

        #     # TODO: add conditional ending
        #     dummy = theano.shared(np.zeros((self.answer_size, ), dtype=floatX))
        #     results, updates = theano.scan(fn=answer_step,
        #         outputs_info=[last_mem, T.zeros_like(dummy)],
        #         n_steps=1)
        #     self.prediction = results[1][-1]

        else:
            raise Exception("invalid answer_module")

        if self.mode != 'deploy':
            print("==> Collecting all parameters to be trained")
        self.params = [
            self.W_mem_res_in, self.W_mem_res_hid, self.b_mem_res,
            self.W_mem_upd_in, self.W_mem_upd_hid, self.b_mem_upd,
            self.W_mem_hid_in, self.W_mem_hid_hid, self.b_mem_hid, self.W_b,
            self.W_1, self.W_2, self.b_1, self.b_2, self.W_a
        ]

        # if self.answer_module == 'recurrent':
        #     self.params = self.params + [self.W_ans_res_in, self.W_ans_res_hid, self.b_ans_res,
        #                       self.W_ans_upd_in, self.W_ans_upd_hid, self.b_ans_upd,
        #                       self.W_ans_hid_in, self.W_ans_hid_hid, self.b_ans_hid]

        if self.mode != 'deploy':
            print("==> Building loss layer and computing updates")
        if debug:
            print('Prediction dim:', self.prediction.dimshuffle('x', 0).ndim)
            print('Answer dim:', self.answer_var.ndim)
        if self.answer_vec == 'word2vec':
            self.loss_ce = nn_utils.cosine_proximity_loss(
                self.prediction.dimshuffle('x', 0),
                T.stack([self.answer_var]))[0][0]
        else:
            self.loss_ce = T.nnet.categorical_crossentropy(
                self.prediction.dimshuffle('x', 0),
                T.stack([self.answer_var]))[0]
        if self.l2 > 0:
            self.loss_l2 = self.l2 * nn_utils.l2_reg(self.params)
        else:
            self.loss_l2 = 0

        self.loss = self.loss_ce + self.loss_l2

        if debug: print(self.loss.ndim)
        # if self.debug: print(self.loss.eval({self.input_var:self.train_input,self.q_var:self.train_q,self.answer_var:self.train_answer,self.input_mask_var:self.train_input_mask}))
        updates = lasagne.updates.adadelta(self.loss, self.params)

        if self.mode == 'deploy':
            self.deploy_fn = theano.function(
                inputs=[self.input_var, self.q_var], outputs=[self.prediction])

        else:
            if self.mode == 'train':
                print("==> compiling train_fn")
                self.train_fn = theano.function(
                    inputs=[self.input_var, self.q_var, self.answer_var],
                    outputs=[self.prediction, self.loss],
                    updates=updates)

            print("==> compiling test_fn")
            self.test_fn = theano.function(
                inputs=[self.input_var, self.q_var, self.answer_var],
                outputs=[
                    self.prediction, self.loss, self.input_var, self.q_var,
                    last_mem
                ])

            if self.mode == 'train':
                print("==> computing gradients (for debugging)")
                gradient = T.grad(self.loss, self.params)
                self.get_gradient_fn = theano.function(
                    inputs=[self.input_var, self.q_var, self.answer_var],
                    outputs=gradient)