def answer_step(prev_a, prev_y): a = self.GRU_update(prev_a, T.concatenate([prev_y, self.q_q]), self.W_ans_res_in, self.W_ans_res_hid, self.b_ans_res, self.W_ans_upd_in, self.W_ans_upd_hid, self.b_ans_upd, self.W_ans_hid_in, self.W_ans_hid_hid, self.b_ans_hid) y = T.dot(self.W_a, a) if self.answer_vec == 'one_hot' or self.answer_vec == 'index': y = nn_utils.softmax(y) return [a, y]
def new_episode(self, mem): g, g_updates = theano.scan(fn=self.new_attention_step, sequences=self.inp_c, non_sequences=[mem, self.q_q], outputs_info=T.zeros_like(self.inp_c[0][0])) if (self.normalize_attention): g = nn_utils.softmax(g) e, e_updates = theano.scan(fn=self.new_episode_step, sequences=[self.inp_c, g], outputs_info=T.zeros_like(self.inp_c[0])) return e[-1]
def __init__(self, babi_train_raw, babi_test_raw, word2vec, word_vector_size, dim, mode, answer_module, input_mask_mode, memory_hops, l2, normalize_attention, answer_vec, debug, **kwargs): self.vocab = {} self.ivocab = {} self.debug = debug self.word2vec = word2vec self.word_vector_size = word_vector_size self.dim = dim self.mode = mode self.answer_module = answer_module self.input_mask_mode = input_mask_mode self.memory_hops = memory_hops self.l2 = l2 self.normalize_attention = normalize_attention self.answer_vec = answer_vec if self.mode != 'deploy': print("==> not used params in DMN class:", kwargs.keys()) self.train_input, self.train_q, self.train_answer, self.train_input_mask = self._process_input( babi_train_raw) self.test_input, self.test_q, self.test_answer, self.test_input_mask = self._process_input( babi_test_raw) self.vocab_size = len(self.vocab) if self.debug: print('Input:', np.array(self.train_input).shape) print('Quest:', np.array(self.train_q).shape) print('Answer:', np.array(self.train_answer).shape) print('Mask:', np.array(self.train_input_mask)) sys.exit(0) # if self.mode == 'deploy': # self.input_var = T.tensor3('input_var') # self.q_var = T.tensor3('question_var') # self.input_mask_var = T.ivector('input_mask_var') # else: if self.answer_vec == 'word2vec': self.answer_var = T.vector('answer_var') else: self.answer_var = T.iscalar('answer_var') if self.answer_vec == 'one_hot' or self.answer_vec == 'index': self.answer_size = self.vocab_size elif self.answer_vec == 'word2vec': self.answer_size = self.word_vector_size else: raise Exception("Invalid answer_vec type") if self.mode != 'deploy': print("==> building input module") if self.mode != 'deploy': print("==> creating parameters for memory module") self.W_mem_res_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.W_mem_res_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_mem_res = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_mem_upd_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.W_mem_upd_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_mem_upd = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_mem_hid_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.W_mem_hid_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_mem_hid = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_b = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.W_1 = nn_utils.normal_param(std=0.1, shape=(self.dim, 7 * self.dim + 2)) self.W_2 = nn_utils.normal_param(std=0.1, shape=(1, self.dim)) self.b_1 = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.b_2 = nn_utils.constant_param(value=0.0, shape=(1, )) if self.mode != 'deploy': print( "==> building episodic memory module (fixed number of steps: %d)" % self.memory_hops) for iter in range(1, self.memory_hops + 1): current_episode = self.new_episode(self.memory[iter - 1]) self.memory.append( self.GRU_update(self.memory[iter - 1], current_episode, self.W_mem_res_in, self.W_mem_res_hid, self.b_mem_res, self.W_mem_upd_in, self.W_mem_upd_hid, self.b_mem_upd, self.W_mem_hid_in, self.W_mem_hid_hid, self.b_mem_hid)) self.last_mem = self.memory[-1] if self.mode != 'deploy': print("==> building answer module") self.W_a = nn_utils.normal_param(std=0.1, shape=(self.answer_size, self.dim)) if self.answer_module == 'feedforward': self.prediction = nn_utils.softmax(T.dot(self.W_a, self.last_mem)) elif self.answer_module == 'recurrent': self.W_ans_res_in = nn_utils.normal_param( std=0.1, shape=(self.dim, self.dim + self.answer_size)) self.W_ans_res_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_ans_res = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_ans_upd_in = nn_utils.normal_param( std=0.1, shape=(self.dim, self.dim + self.answer_size)) self.W_ans_upd_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_ans_upd = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_ans_hid_in = nn_utils.normal_param( std=0.1, shape=(self.dim, self.dim + self.answer_size)) self.W_ans_hid_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_ans_hid = nn_utils.constant_param(value=0.0, shape=(self.dim, )) def answer_step(prev_a, prev_y): a = self.GRU_update(prev_a, T.concatenate([prev_y, self.q_q]), self.W_ans_res_in, self.W_ans_res_hid, self.b_ans_res, self.W_ans_upd_in, self.W_ans_upd_hid, self.b_ans_upd, self.W_ans_hid_in, self.W_ans_hid_hid, self.b_ans_hid) y = T.dot(self.W_a, a) if self.answer_vec == 'one_hot' or self.answer_vec == 'index': y = nn_utils.softmax(y) return [a, y] # TODO: add conditional ending dummy = theano.shared(np.zeros((self.answer_size, ), dtype=floatX)) results, updates = theano.scan( fn=answer_step, outputs_info=[self.last_mem, T.zeros_like(dummy)], n_steps=1) self.prediction = results[1][-1] else: raise Exception("invalid answer_module") if self.mode != 'deploy': print("==> collecting all parameters") self.params = [ self.W_mem_res_in, self.W_mem_res_hid, self.b_mem_res, self.W_mem_upd_in, self.W_mem_upd_hid, self.b_mem_upd, self.W_mem_hid_in, self.W_mem_hid_hid, self.b_mem_hid, self.W_b, self.W_1, self.W_2, self.b_1, self.b_2, self.W_a ] if self.answer_module == 'recurrent': self.params = self.params + [ self.W_ans_res_in, self.W_ans_res_hid, self.b_ans_res, self.W_ans_upd_in, self.W_ans_upd_hid, self.b_ans_upd, self.W_ans_hid_in, self.W_ans_hid_hid, self.b_ans_hid ] if self.mode != 'deploy': print("==> building loss layer and computing updates") if debug: print('Prediction dim:', self.prediction.dimshuffle('x', 0).ndim) print('Answer dim:', self.answer_var.ndim) if self.answer_vec == 'word2vec': self.loss_ce = nn_utils.cosine_proximity_loss( self.prediction.dimshuffle('x', 0), T.stack([self.answer_var]))[0][0] else: self.loss_ce = T.nnet.categorical_crossentropy( self.prediction.dimshuffle('x', 0), T.stack([self.answer_var]))[0] if self.l2 > 0: self.loss_l2 = self.l2 * nn_utils.l2_reg(self.params) else: self.loss_l2 = 0 self.loss = self.loss_ce + self.loss_l2 if debug: print(self.loss.ndim)
def __init__(self, babi_train_raw, babi_test_raw, word2vec, word_vector_size, dim, mode, answer_module, input_mask_mode, memory_hops, l2, normalize_attention, answer_vec, debug, sentEmbdLoadState, sentEmbdType="basic", **kwargs): self.vocab = {} self.ivocab = {} self.debug = debug self.word2vec = word2vec self.word_vector_size = word_vector_size self.dim = dim self.mode = mode self.answer_module = answer_module self.input_mask_mode = input_mask_mode self.memory_hops = memory_hops self.l2 = l2 self.normalize_attention = normalize_attention self.answer_vec = answer_vec self.sentEmbdType = sentEmbdType if (self.mode != 'deploy'): self.train_input, self.train_q, self.train_answer, self.train_input_mask = self._process_input( babi_train_raw) self.test_input, self.test_q, self.test_answer, self.test_input_mask = self._process_input( babi_test_raw) self.vocab_size = len(self.vocab) print(self.vocab_size) elif self.mode == 'deploy': self.train_input, self.train_q, self.train_answer, self.train_input_mask = self._process_input( babi_train_raw) self.vocab_size = len(self.vocab) print(self.vocab_size) # print(self.train_input.shape) # print(self.train_q.shape) # print(self.train_input_mask.shape) #Setting up pre-trained Sentence Embedder for question and input module: if self.mode != 'deploy': print("==> Setting up pre-trained Sentence Embedder") if self.sentEmbdType == "basic": self.sent_embd = SentEmbd.SentEmbd_basic(self.word_vector_size, self.dim) else: dep_tags = utils.load_dep_tags self.sent_embd = SentEmbd.SentEmbd_syntactic( 50, hid_dim, len(dep_tags)) #TODO: Dependency Tags self.sent_embd.load_params(sentEmbdLoadState) self.input_var = T.matrix('input_var') self.q_var = T.vector('question_var') if self.answer_vec == 'word2vec': self.answer_var = T.vector('answer_var') else: self.answer_var = T.iscalar('answer_var') self.input_mask_var = T.ivector('input_mask_var') if self.answer_vec == 'one_hot' or self.answer_vec == 'index': self.answer_size = self.vocab_size elif self.answer_vec == 'word2vec': self.answer_size = self.word_vector_size else: raise Exception("Invalid answer_vec type") #Setting up Untrained Memory module if self.mode != 'deploy': print("==> Creating parameters for memory module") self.W_mem_res_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.W_mem_res_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_mem_res = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_mem_upd_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.W_mem_upd_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_mem_upd = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_mem_hid_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.W_mem_hid_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.b_mem_hid = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.W_b = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) self.W_1 = nn_utils.normal_param(std=0.1, shape=(self.dim, 7 * self.dim + 2)) self.W_2 = nn_utils.normal_param(std=0.1, shape=(1, self.dim)) self.b_1 = nn_utils.constant_param(value=0.0, shape=(self.dim, )) self.b_2 = nn_utils.constant_param(value=0.0, shape=(1, )) if self.mode != 'deploy': print( "==> Building episodic memory module (fixed number of steps: %d)" % self.memory_hops) memory = [self.q_var.copy()] for iter in range(1, self.memory_hops + 1): current_episode = self.new_episode(memory[iter - 1]) memory.append( self.GRU_update(memory[iter - 1], current_episode, self.W_mem_res_in, self.W_mem_res_hid, self.b_mem_res, self.W_mem_upd_in, self.W_mem_upd_hid, self.b_mem_upd, self.W_mem_hid_in, self.W_mem_hid_hid, self.b_mem_hid)) last_mem = memory[-1] if self.mode != 'deploy': print("==> Building answer module") self.W_a = nn_utils.normal_param(std=0.1, shape=(self.answer_size, self.dim)) if self.answer_module == 'feedforward': self.prediction = nn_utils.softmax(T.dot(self.W_a, last_mem)) # elif self.answer_module == 'recurrent': # self.W_ans_res_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim + self.answer_size)) # self.W_ans_res_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) # self.b_ans_res = nn_utils.constant_param(value=0.0, shape=(self.dim,)) # self.W_ans_upd_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim + self.answer_size)) # self.W_ans_upd_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) # self.b_ans_upd = nn_utils.constant_param(value=0.0, shape=(self.dim,)) # self.W_ans_hid_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim + self.answer_size)) # self.W_ans_hid_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim)) # self.b_ans_hid = nn_utils.constant_param(value=0.0, shape=(self.dim,)) # def answer_step(prev_a, prev_y): # a = self.GRU_update(prev_a, T.concatenate([prev_y, self.q_q]), # self.W_ans_res_in, self.W_ans_res_hid, self.b_ans_res, # self.W_ans_upd_in, self.W_ans_upd_hid, self.b_ans_upd, # self.W_ans_hid_in, self.W_ans_hid_hid, self.b_ans_hid) # y = T.dot(self.W_a, a) # if self.answer_vec == 'one_hot' or self.answer_vec == 'index': # y = nn_utils.softmax(y) # return [a, y] # # TODO: add conditional ending # dummy = theano.shared(np.zeros((self.answer_size, ), dtype=floatX)) # results, updates = theano.scan(fn=answer_step, # outputs_info=[last_mem, T.zeros_like(dummy)], # n_steps=1) # self.prediction = results[1][-1] else: raise Exception("invalid answer_module") if self.mode != 'deploy': print("==> Collecting all parameters to be trained") self.params = [ self.W_mem_res_in, self.W_mem_res_hid, self.b_mem_res, self.W_mem_upd_in, self.W_mem_upd_hid, self.b_mem_upd, self.W_mem_hid_in, self.W_mem_hid_hid, self.b_mem_hid, self.W_b, self.W_1, self.W_2, self.b_1, self.b_2, self.W_a ] # if self.answer_module == 'recurrent': # self.params = self.params + [self.W_ans_res_in, self.W_ans_res_hid, self.b_ans_res, # self.W_ans_upd_in, self.W_ans_upd_hid, self.b_ans_upd, # self.W_ans_hid_in, self.W_ans_hid_hid, self.b_ans_hid] if self.mode != 'deploy': print("==> Building loss layer and computing updates") if debug: print('Prediction dim:', self.prediction.dimshuffle('x', 0).ndim) print('Answer dim:', self.answer_var.ndim) if self.answer_vec == 'word2vec': self.loss_ce = nn_utils.cosine_proximity_loss( self.prediction.dimshuffle('x', 0), T.stack([self.answer_var]))[0][0] else: self.loss_ce = T.nnet.categorical_crossentropy( self.prediction.dimshuffle('x', 0), T.stack([self.answer_var]))[0] if self.l2 > 0: self.loss_l2 = self.l2 * nn_utils.l2_reg(self.params) else: self.loss_l2 = 0 self.loss = self.loss_ce + self.loss_l2 if debug: print(self.loss.ndim) # if self.debug: print(self.loss.eval({self.input_var:self.train_input,self.q_var:self.train_q,self.answer_var:self.train_answer,self.input_mask_var:self.train_input_mask})) updates = lasagne.updates.adadelta(self.loss, self.params) if self.mode == 'deploy': self.deploy_fn = theano.function( inputs=[self.input_var, self.q_var], outputs=[self.prediction]) else: if self.mode == 'train': print("==> compiling train_fn") self.train_fn = theano.function( inputs=[self.input_var, self.q_var, self.answer_var], outputs=[self.prediction, self.loss], updates=updates) print("==> compiling test_fn") self.test_fn = theano.function( inputs=[self.input_var, self.q_var, self.answer_var], outputs=[ self.prediction, self.loss, self.input_var, self.q_var, last_mem ]) if self.mode == 'train': print("==> computing gradients (for debugging)") gradient = T.grad(self.loss, self.params) self.get_gradient_fn = theano.function( inputs=[self.input_var, self.q_var, self.answer_var], outputs=gradient)