Esempio n. 1
0
 def answer_step(prev_a, prev_y):
     a = self.GRU_update(prev_a, T.concatenate([prev_y, self.q_q]),
                       self.W_ans_res_in, self.W_ans_res_hid, self.b_ans_res,
                       self.W_ans_upd_in, self.W_ans_upd_hid, self.b_ans_upd,
                       self.W_ans_hid_in, self.W_ans_hid_hid, self.b_ans_hid)
     y = T.dot(self.W_a, a)
     if self.answer_vec == 'one_hot' or self.answer_vec == 'index':
         y = nn_utils.softmax(y)
     return [a, y]
Esempio n. 2
0
    def new_episode(self, mem):
        g, g_updates = theano.scan(fn=self.new_attention_step,
                                   sequences=self.inp_c,
                                   non_sequences=[mem, self.q_q],
                                   outputs_info=T.zeros_like(self.inp_c[0][0]))

        if (self.normalize_attention):
            g = nn_utils.softmax(g)

        e, e_updates = theano.scan(fn=self.new_episode_step,
                                   sequences=[self.inp_c, g],
                                   outputs_info=T.zeros_like(self.inp_c[0]))

        return e[-1]
Esempio n. 3
0
    def __init__(self, babi_train_raw, babi_test_raw, word2vec,
                 word_vector_size, dim, mode, answer_module, input_mask_mode,
                 memory_hops, l2, normalize_attention, answer_vec, debug,
                 **kwargs):

        self.vocab = {}
        self.ivocab = {}

        self.debug = debug

        self.word2vec = word2vec
        self.word_vector_size = word_vector_size
        self.dim = dim
        self.mode = mode
        self.answer_module = answer_module
        self.input_mask_mode = input_mask_mode
        self.memory_hops = memory_hops
        self.l2 = l2
        self.normalize_attention = normalize_attention
        self.answer_vec = answer_vec

        if self.mode != 'deploy':
            print("==> not used params in DMN class:", kwargs.keys())

        self.train_input, self.train_q, self.train_answer, self.train_input_mask = self._process_input(
            babi_train_raw)
        self.test_input, self.test_q, self.test_answer, self.test_input_mask = self._process_input(
            babi_test_raw)
        self.vocab_size = len(self.vocab)

        if self.debug:
            print('Input:', np.array(self.train_input).shape)
            print('Quest:', np.array(self.train_q).shape)
            print('Answer:', np.array(self.train_answer).shape)
            print('Mask:', np.array(self.train_input_mask))
            sys.exit(0)

        # if self.mode == 'deploy':
        #     self.input_var = T.tensor3('input_var')
        #     self.q_var = T.tensor3('question_var')
        #     self.input_mask_var = T.ivector('input_mask_var')

        # else:
        if self.answer_vec == 'word2vec':
            self.answer_var = T.vector('answer_var')
        else:
            self.answer_var = T.iscalar('answer_var')

        if self.answer_vec == 'one_hot' or self.answer_vec == 'index':
            self.answer_size = self.vocab_size
        elif self.answer_vec == 'word2vec':
            self.answer_size = self.word_vector_size
        else:
            raise Exception("Invalid answer_vec type")

        if self.mode != 'deploy': print("==> building input module")

        if self.mode != 'deploy':
            print("==> creating parameters for memory module")
        self.W_mem_res_in = nn_utils.normal_param(std=0.1,
                                                  shape=(self.dim, self.dim))
        self.W_mem_res_hid = nn_utils.normal_param(std=0.1,
                                                   shape=(self.dim, self.dim))
        self.b_mem_res = nn_utils.constant_param(value=0.0, shape=(self.dim, ))

        self.W_mem_upd_in = nn_utils.normal_param(std=0.1,
                                                  shape=(self.dim, self.dim))
        self.W_mem_upd_hid = nn_utils.normal_param(std=0.1,
                                                   shape=(self.dim, self.dim))
        self.b_mem_upd = nn_utils.constant_param(value=0.0, shape=(self.dim, ))

        self.W_mem_hid_in = nn_utils.normal_param(std=0.1,
                                                  shape=(self.dim, self.dim))
        self.W_mem_hid_hid = nn_utils.normal_param(std=0.1,
                                                   shape=(self.dim, self.dim))
        self.b_mem_hid = nn_utils.constant_param(value=0.0, shape=(self.dim, ))

        self.W_b = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        self.W_1 = nn_utils.normal_param(std=0.1,
                                         shape=(self.dim, 7 * self.dim + 2))
        self.W_2 = nn_utils.normal_param(std=0.1, shape=(1, self.dim))
        self.b_1 = nn_utils.constant_param(value=0.0, shape=(self.dim, ))
        self.b_2 = nn_utils.constant_param(value=0.0, shape=(1, ))

        if self.mode != 'deploy':
            print(
                "==> building episodic memory module (fixed number of steps: %d)"
                % self.memory_hops)
        for iter in range(1, self.memory_hops + 1):
            current_episode = self.new_episode(self.memory[iter - 1])
            self.memory.append(
                self.GRU_update(self.memory[iter - 1], current_episode,
                                self.W_mem_res_in, self.W_mem_res_hid,
                                self.b_mem_res, self.W_mem_upd_in,
                                self.W_mem_upd_hid, self.b_mem_upd,
                                self.W_mem_hid_in, self.W_mem_hid_hid,
                                self.b_mem_hid))

        self.last_mem = self.memory[-1]

        if self.mode != 'deploy': print("==> building answer module")

        self.W_a = nn_utils.normal_param(std=0.1,
                                         shape=(self.answer_size, self.dim))

        if self.answer_module == 'feedforward':
            self.prediction = nn_utils.softmax(T.dot(self.W_a, self.last_mem))

        elif self.answer_module == 'recurrent':
            self.W_ans_res_in = nn_utils.normal_param(
                std=0.1, shape=(self.dim, self.dim + self.answer_size))
            self.W_ans_res_hid = nn_utils.normal_param(std=0.1,
                                                       shape=(self.dim,
                                                              self.dim))
            self.b_ans_res = nn_utils.constant_param(value=0.0,
                                                     shape=(self.dim, ))

            self.W_ans_upd_in = nn_utils.normal_param(
                std=0.1, shape=(self.dim, self.dim + self.answer_size))
            self.W_ans_upd_hid = nn_utils.normal_param(std=0.1,
                                                       shape=(self.dim,
                                                              self.dim))
            self.b_ans_upd = nn_utils.constant_param(value=0.0,
                                                     shape=(self.dim, ))

            self.W_ans_hid_in = nn_utils.normal_param(
                std=0.1, shape=(self.dim, self.dim + self.answer_size))
            self.W_ans_hid_hid = nn_utils.normal_param(std=0.1,
                                                       shape=(self.dim,
                                                              self.dim))
            self.b_ans_hid = nn_utils.constant_param(value=0.0,
                                                     shape=(self.dim, ))

            def answer_step(prev_a, prev_y):
                a = self.GRU_update(prev_a, T.concatenate([prev_y, self.q_q]),
                                    self.W_ans_res_in, self.W_ans_res_hid,
                                    self.b_ans_res, self.W_ans_upd_in,
                                    self.W_ans_upd_hid, self.b_ans_upd,
                                    self.W_ans_hid_in, self.W_ans_hid_hid,
                                    self.b_ans_hid)
                y = T.dot(self.W_a, a)
                if self.answer_vec == 'one_hot' or self.answer_vec == 'index':
                    y = nn_utils.softmax(y)
                return [a, y]

            # TODO: add conditional ending
            dummy = theano.shared(np.zeros((self.answer_size, ), dtype=floatX))
            results, updates = theano.scan(
                fn=answer_step,
                outputs_info=[self.last_mem,
                              T.zeros_like(dummy)],
                n_steps=1)
            self.prediction = results[1][-1]

        else:
            raise Exception("invalid answer_module")

        if self.mode != 'deploy': print("==> collecting all parameters")
        self.params = [
            self.W_mem_res_in, self.W_mem_res_hid, self.b_mem_res,
            self.W_mem_upd_in, self.W_mem_upd_hid, self.b_mem_upd,
            self.W_mem_hid_in, self.W_mem_hid_hid, self.b_mem_hid, self.W_b,
            self.W_1, self.W_2, self.b_1, self.b_2, self.W_a
        ]

        if self.answer_module == 'recurrent':
            self.params = self.params + [
                self.W_ans_res_in, self.W_ans_res_hid, self.b_ans_res,
                self.W_ans_upd_in, self.W_ans_upd_hid, self.b_ans_upd,
                self.W_ans_hid_in, self.W_ans_hid_hid, self.b_ans_hid
            ]

        if self.mode != 'deploy':
            print("==> building loss layer and computing updates")
        if debug:
            print('Prediction dim:', self.prediction.dimshuffle('x', 0).ndim)
            print('Answer dim:', self.answer_var.ndim)
        if self.answer_vec == 'word2vec':
            self.loss_ce = nn_utils.cosine_proximity_loss(
                self.prediction.dimshuffle('x', 0),
                T.stack([self.answer_var]))[0][0]
        else:
            self.loss_ce = T.nnet.categorical_crossentropy(
                self.prediction.dimshuffle('x', 0),
                T.stack([self.answer_var]))[0]
        if self.l2 > 0:
            self.loss_l2 = self.l2 * nn_utils.l2_reg(self.params)
        else:
            self.loss_l2 = 0

        self.loss = self.loss_ce + self.loss_l2

        if debug: print(self.loss.ndim)
Esempio n. 4
0
    def __init__(self,
                 babi_train_raw,
                 babi_test_raw,
                 word2vec,
                 word_vector_size,
                 dim,
                 mode,
                 answer_module,
                 input_mask_mode,
                 memory_hops,
                 l2,
                 normalize_attention,
                 answer_vec,
                 debug,
                 sentEmbdLoadState,
                 sentEmbdType="basic",
                 **kwargs):
        self.vocab = {}
        self.ivocab = {}
        self.debug = debug

        self.word2vec = word2vec
        self.word_vector_size = word_vector_size
        self.dim = dim
        self.mode = mode
        self.answer_module = answer_module
        self.input_mask_mode = input_mask_mode
        self.memory_hops = memory_hops
        self.l2 = l2
        self.normalize_attention = normalize_attention
        self.answer_vec = answer_vec
        self.sentEmbdType = sentEmbdType
        if (self.mode != 'deploy'):
            self.train_input, self.train_q, self.train_answer, self.train_input_mask = self._process_input(
                babi_train_raw)
            self.test_input, self.test_q, self.test_answer, self.test_input_mask = self._process_input(
                babi_test_raw)
            self.vocab_size = len(self.vocab)
            print(self.vocab_size)
        elif self.mode == 'deploy':
            self.train_input, self.train_q, self.train_answer, self.train_input_mask = self._process_input(
                babi_train_raw)
            self.vocab_size = len(self.vocab)
            print(self.vocab_size)
            # print(self.train_input.shape)
            # print(self.train_q.shape)
            # print(self.train_input_mask.shape)

        #Setting up pre-trained Sentence Embedder for question and input module:
        if self.mode != 'deploy':
            print("==> Setting up pre-trained Sentence Embedder")
        if self.sentEmbdType == "basic":
            self.sent_embd = SentEmbd.SentEmbd_basic(self.word_vector_size,
                                                     self.dim)
        else:
            dep_tags = utils.load_dep_tags
            self.sent_embd = SentEmbd.SentEmbd_syntactic(
                50, hid_dim, len(dep_tags))  #TODO: Dependency Tags
        self.sent_embd.load_params(sentEmbdLoadState)

        self.input_var = T.matrix('input_var')
        self.q_var = T.vector('question_var')
        if self.answer_vec == 'word2vec':
            self.answer_var = T.vector('answer_var')
        else:
            self.answer_var = T.iscalar('answer_var')
        self.input_mask_var = T.ivector('input_mask_var')

        if self.answer_vec == 'one_hot' or self.answer_vec == 'index':
            self.answer_size = self.vocab_size
        elif self.answer_vec == 'word2vec':
            self.answer_size = self.word_vector_size
        else:
            raise Exception("Invalid answer_vec type")

        #Setting up Untrained Memory module
        if self.mode != 'deploy':
            print("==> Creating parameters for memory module")
        self.W_mem_res_in = nn_utils.normal_param(std=0.1,
                                                  shape=(self.dim, self.dim))
        self.W_mem_res_hid = nn_utils.normal_param(std=0.1,
                                                   shape=(self.dim, self.dim))
        self.b_mem_res = nn_utils.constant_param(value=0.0, shape=(self.dim, ))

        self.W_mem_upd_in = nn_utils.normal_param(std=0.1,
                                                  shape=(self.dim, self.dim))
        self.W_mem_upd_hid = nn_utils.normal_param(std=0.1,
                                                   shape=(self.dim, self.dim))
        self.b_mem_upd = nn_utils.constant_param(value=0.0, shape=(self.dim, ))

        self.W_mem_hid_in = nn_utils.normal_param(std=0.1,
                                                  shape=(self.dim, self.dim))
        self.W_mem_hid_hid = nn_utils.normal_param(std=0.1,
                                                   shape=(self.dim, self.dim))
        self.b_mem_hid = nn_utils.constant_param(value=0.0, shape=(self.dim, ))

        self.W_b = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        self.W_1 = nn_utils.normal_param(std=0.1,
                                         shape=(self.dim, 7 * self.dim + 2))
        self.W_2 = nn_utils.normal_param(std=0.1, shape=(1, self.dim))
        self.b_1 = nn_utils.constant_param(value=0.0, shape=(self.dim, ))
        self.b_2 = nn_utils.constant_param(value=0.0, shape=(1, ))

        if self.mode != 'deploy':
            print(
                "==> Building episodic memory module (fixed number of steps: %d)"
                % self.memory_hops)
        memory = [self.q_var.copy()]
        for iter in range(1, self.memory_hops + 1):
            current_episode = self.new_episode(memory[iter - 1])
            memory.append(
                self.GRU_update(memory[iter - 1], current_episode,
                                self.W_mem_res_in, self.W_mem_res_hid,
                                self.b_mem_res, self.W_mem_upd_in,
                                self.W_mem_upd_hid, self.b_mem_upd,
                                self.W_mem_hid_in, self.W_mem_hid_hid,
                                self.b_mem_hid))

        last_mem = memory[-1]

        if self.mode != 'deploy': print("==> Building answer module")

        self.W_a = nn_utils.normal_param(std=0.1,
                                         shape=(self.answer_size, self.dim))

        if self.answer_module == 'feedforward':
            self.prediction = nn_utils.softmax(T.dot(self.W_a, last_mem))
        # elif self.answer_module == 'recurrent':
        #     self.W_ans_res_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim + self.answer_size))
        #     self.W_ans_res_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        #     self.b_ans_res = nn_utils.constant_param(value=0.0, shape=(self.dim,))

        #     self.W_ans_upd_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim + self.answer_size))
        #     self.W_ans_upd_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        #     self.b_ans_upd = nn_utils.constant_param(value=0.0, shape=(self.dim,))

        #     self.W_ans_hid_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim + self.answer_size))
        #     self.W_ans_hid_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        #     self.b_ans_hid = nn_utils.constant_param(value=0.0, shape=(self.dim,))

        #     def answer_step(prev_a, prev_y):
        #         a = self.GRU_update(prev_a, T.concatenate([prev_y, self.q_q]),
        #                           self.W_ans_res_in, self.W_ans_res_hid, self.b_ans_res,
        #                           self.W_ans_upd_in, self.W_ans_upd_hid, self.b_ans_upd,
        #                           self.W_ans_hid_in, self.W_ans_hid_hid, self.b_ans_hid)
        #         y = T.dot(self.W_a, a)
        #         if self.answer_vec == 'one_hot' or self.answer_vec == 'index':
        #             y = nn_utils.softmax(y)
        #         return [a, y]

        #     # TODO: add conditional ending
        #     dummy = theano.shared(np.zeros((self.answer_size, ), dtype=floatX))
        #     results, updates = theano.scan(fn=answer_step,
        #         outputs_info=[last_mem, T.zeros_like(dummy)],
        #         n_steps=1)
        #     self.prediction = results[1][-1]

        else:
            raise Exception("invalid answer_module")

        if self.mode != 'deploy':
            print("==> Collecting all parameters to be trained")
        self.params = [
            self.W_mem_res_in, self.W_mem_res_hid, self.b_mem_res,
            self.W_mem_upd_in, self.W_mem_upd_hid, self.b_mem_upd,
            self.W_mem_hid_in, self.W_mem_hid_hid, self.b_mem_hid, self.W_b,
            self.W_1, self.W_2, self.b_1, self.b_2, self.W_a
        ]

        # if self.answer_module == 'recurrent':
        #     self.params = self.params + [self.W_ans_res_in, self.W_ans_res_hid, self.b_ans_res,
        #                       self.W_ans_upd_in, self.W_ans_upd_hid, self.b_ans_upd,
        #                       self.W_ans_hid_in, self.W_ans_hid_hid, self.b_ans_hid]

        if self.mode != 'deploy':
            print("==> Building loss layer and computing updates")
        if debug:
            print('Prediction dim:', self.prediction.dimshuffle('x', 0).ndim)
            print('Answer dim:', self.answer_var.ndim)
        if self.answer_vec == 'word2vec':
            self.loss_ce = nn_utils.cosine_proximity_loss(
                self.prediction.dimshuffle('x', 0),
                T.stack([self.answer_var]))[0][0]
        else:
            self.loss_ce = T.nnet.categorical_crossentropy(
                self.prediction.dimshuffle('x', 0),
                T.stack([self.answer_var]))[0]
        if self.l2 > 0:
            self.loss_l2 = self.l2 * nn_utils.l2_reg(self.params)
        else:
            self.loss_l2 = 0

        self.loss = self.loss_ce + self.loss_l2

        if debug: print(self.loss.ndim)
        # if self.debug: print(self.loss.eval({self.input_var:self.train_input,self.q_var:self.train_q,self.answer_var:self.train_answer,self.input_mask_var:self.train_input_mask}))
        updates = lasagne.updates.adadelta(self.loss, self.params)

        if self.mode == 'deploy':
            self.deploy_fn = theano.function(
                inputs=[self.input_var, self.q_var], outputs=[self.prediction])

        else:
            if self.mode == 'train':
                print("==> compiling train_fn")
                self.train_fn = theano.function(
                    inputs=[self.input_var, self.q_var, self.answer_var],
                    outputs=[self.prediction, self.loss],
                    updates=updates)

            print("==> compiling test_fn")
            self.test_fn = theano.function(
                inputs=[self.input_var, self.q_var, self.answer_var],
                outputs=[
                    self.prediction, self.loss, self.input_var, self.q_var,
                    last_mem
                ])

            if self.mode == 'train':
                print("==> computing gradients (for debugging)")
                gradient = T.grad(self.loss, self.params)
                self.get_gradient_fn = theano.function(
                    inputs=[self.input_var, self.q_var, self.answer_var],
                    outputs=gradient)