def __init__(self, n_words, word_embedding_dim=128, hidden_status_dim=128, input_params=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self.rmsprop, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). } # global paramters. params = self.init_global_params(options) # Theano paramters, self.tparams = self.init_tparams(params) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape( [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']]) # 1. encoder layer self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.encoder_hidden_status = self.encoder_layer.get_output(inputs=(question_embedding, self.question_mask)) # 2. decoder layer self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape( [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']]) self.decoder_layer = DecoderLayer_Seq2Seq(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.decoder_hidden_status = self.decoder_layer.get_output(inputs=(answer_embedding, self.answer_mask, self.encoder_hidden_status)) # 3. softmax layer self.softmax_layer = SoftmaxLayer(n_in=options["hidden_status_dim"] , n_out=options["n_words"] , tparams=self.tparams) self.softmax_input = self.decoder_hidden_status[:-1, :, :].reshape( [(self.answer.shape[0] - 1) * self.answer.shape[1], options['hidden_status_dim']]) likihood_vector = \ self.softmax_layer.likelihood(input_data=self.softmax_input, y=self.answer[1:, :].flatten()) # get evaluation and cost likihood_vector = - tensor.log(likihood_vector) self.zae, self.sae, self.bae, self.likihood_vector = \ self.get_evaluation(likihood_vector, self.answer_mask[1:, :], self.answer.shape[0], self.answer.shape[1]) self.cost = self.zae self.set_parameters(input_params) # params from list to TensorVirable
def update_function(index): (answer_up, answer_up_mask), (question, question_mask), (answer_down, answer_down_mask), _, _ = \ cr.get_train_set([index * batch_size, (index + 1) * batch_size]) # alignment if answer_up.shape[0] > answer_down.shape[0] : answer_down = numpy.concatenate([answer_down, numpy.zeros((answer_up.shape[0] - answer_down.shape[0], answer_down.shape[1]), dtype='int64')], axis=0) answer_down_mask = numpy.concatenate([answer_down_mask, numpy.zeros((answer_up_mask.shape[0] - answer_down_mask.shape[0], answer_down_mask.shape[1]), dtype=config.globalFloatType())], axis=0) elif answer_up.shape[0] < answer_down.shape[0] : answer_up = numpy.concatenate([answer_up, numpy.zeros((answer_down.shape[0] - answer_up.shape[0], answer_up.shape[1]), dtype='int64')], axis=0) answer_up_mask = numpy.concatenate([answer_up_mask, numpy.zeros((answer_down_mask.shape[0] - answer_up_mask.shape[0], answer_up_mask.shape[1]), dtype=config.globalFloatType())], axis=0) for _ in xrange(batch_repeat): cost = f_grad_shared(numpy.concatenate([question, question], axis=1), numpy.concatenate([question_mask, question_mask], axis=1), numpy.concatenate([answer_up, answer_down], axis=1), numpy.concatenate([answer_up_mask, answer_down_mask], axis=1)) f_update(self.options["learning_rate"]) return cost
def get_validing_function(self, cr): (answer_up, answer_up_mask), (question, question_mask), (answer_down, answer_down_mask), _, _ = \ cr.get_valid_set() # alignment if answer_up.shape[0] > answer_down.shape[0] : answer_down = numpy.concatenate([answer_down, numpy.zeros((answer_up.shape[0] - answer_down.shape[0], answer_down.shape[1]), dtype='int64')], axis=0) answer_down_mask = numpy.concatenate([answer_down_mask, numpy.zeros((answer_up_mask.shape[0] - answer_down_mask.shape[0], answer_down_mask.shape[1]), dtype=config.globalFloatType())], axis=0) elif answer_up.shape[0] < answer_down.shape[0] : answer_up = numpy.concatenate([answer_up, numpy.zeros((answer_down.shape[0] - answer_up.shape[0], answer_up.shape[1]), dtype='int64')], axis=0) answer_up_mask = numpy.concatenate([answer_up_mask, numpy.zeros((answer_down_mask.shape[0] - answer_up_mask.shape[0], answer_up_mask.shape[1]), dtype=config.globalFloatType())], axis=0) valid_function = \ theano.function(inputs=[], outputs=[self.cost], givens={self.question: numpy.concatenate([question, question], axis=1), self.question_mask: numpy.concatenate([question_mask, question_mask], axis=1), self.answer: numpy.concatenate([answer_up, answer_down], axis=1), self.answer_mask: numpy.concatenate([answer_up_mask, answer_down_mask], axis=1)}, name='valid_function') return valid_function
def init_global_params(self, options, word_embedings): """ Global (not LSTM) parameter. For the embeding and the classifier. """ params = OrderedDict() randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_e'] = (0.1 * randn).astype(config.globalFloatType()) #params['Wemb_e'] =word_embedings #randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) #params['Wemb_e'] = (0.1 * randn).astype(config.globalFloatType()) randn = numpy.random.rand(options['hidden_status_dim'], options['hidden_status_dim'], options['hidden_status_dim']) params['P_M'] = (0.1 * randn).astype(config.globalFloatType()) randn = numpy.random.rand(2 * options['hidden_status_dim'], options['hidden_status_dim']) params['P_N'] = (0.1 * randn).astype(config.globalFloatType()) ''' randn = numpy.random.rand(1) params['P_alpha']= (1 * randn).astype(config.globalFloatType()) randn = numpy.random.rand(1) params['P_beta']= (1 * randn).astype(config.globalFloatType()) ''' #randn = numpy.random.rand(options['topic_embedding_dim'], options['topic_embedding_dim'])/options['topic_embedding_dim']*2 #params['QTA']=(1.0 * randn).astype(config.globalFloatType()) #randn = numpy.random.rand(options['n_topics'], options['topic_embedding_dim']) #params['Temb'] = (0.1 * randn).astype(config.globalFloatType()) #params['Temb'] = numpy.dot(params['Qemb'],params['QTA']) return params
def __init__(self, n_words, word_embedding_dim=128, hidden_status_dim=128, input_params=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self.rmsprop, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). } # global paramters. params = self.init_global_params(options) # Theano paramters, self.tparams = self.init_tparams(params) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape( [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']]) # 1. encoder layer self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.encoder_hidden_status = self.encoder_layer.getOutput(inputs=(question_embedding, self.question_mask)) # 2. decoder layer self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape( [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']]) self.decoder_layer = DecoderLayer_Seq2Seq(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.decoder_hidden_status = self.decoder_layer.getOutput(inputs=(answer_embedding, self.answer_mask, self.encoder_hidden_status)) # 3. softmax layer self.softmax_layer = SoftmaxLayer(n_in=options["hidden_status_dim"] , n_out=options["n_words"] , tparams=self.tparams) self.softmax_input = self.decoder_hidden_status[:-1, :, :].reshape( [(self.answer.shape[0] - 1) * self.answer.shape[1], options['hidden_status_dim']]) output_error_vector = self.softmax_layer.negative_log_likelihood(self.softmax_input, y=self.answer[1:, :].flatten()) m = self.answer_mask[1:, :] self.cost = -1.0 * tensor.dot(output_error_vector, m.flatten()) / m.sum() self.output_error_vector = output_error_vector.reshape([self.answer.shape[0] - 1 , self.answer.shape[1]]) self.output_error_vector = self.output_error_vector * m self.output_error_vector = -self.output_error_vector.sum(axis=0) / m.sum(axis=0) self._set_parameters(input_params) # params from list to TensorVirable
def get_mask_data_topic(batch): n_samples = len(batch) lengths = [len(s) for s in batch] maxlen = numpy.max(lengths) data = numpy.zeros((maxlen, n_samples)).astype(config.globalFloatType()) mask = numpy.zeros((maxlen, n_samples)).astype(config.globalFloatType()) for idx, s in enumerate(batch): data[:lengths[idx], idx] = s mask[:lengths[idx], idx] = 1. return data, mask
def init_global_params(self, options): """ Global (not LSTM) parameter. For the embeding and the classifier. """ params = OrderedDict() randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_e'] = (0.01 * randn).astype(config.globalFloatType()) randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_d'] = (0.01 * randn).astype(config.globalFloatType()) return params
def init_global_params(self, options): """ Global (not LSTM) parameter. For the embeding and the classifier. """ params = OrderedDict() randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_e'] = (0.1 * randn).astype(config.globalFloatType()) randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_d'] = (0.1 * randn).astype(config.globalFloatType()) return params
def __init__(self, word_embedding_dim, hidden_status_dim, encoder_hidden_dim, tparams=None, prefix='Attention'): """ Init the GRU parameter: init_params. Updation in GRU : step1. r(t) = f(W_r dot x(t) + U_r dot h(t-1) + C_r dot h_last). step2. z(t) = f(W_z dot x(t) + U_z dot h(t-1) + C_z dot h_last). step3. h_wave(t) = f(W dot x(t) + U dot (r(t) * h(t-1)) + C dot h_last). step4. h(t) = (1-z(t)) * h(t-1) + z(t) * h_wave(t). We can combine W and C into one tensor W """ self.hidden_status_dim = hidden_status_dim self.params = tparams self.prefix = prefix W_bound = 0.01 # combine step1~3 W dot t, so W's dimension is (word_embedding_dim, hidden_status_dim, 3) W = uniform_random_weight(size=(hidden_status_dim, hidden_status_dim), bound=W_bound) # combine step1~2 U dot h, so U's dimension is (hidden_status_dim, 2) # connot combine step1~3, so split U_rh U = numpy.concatenate([ortho_weight(hidden_status_dim)]*int(encoder_hidden_dim/hidden_status_dim), axis=0) # U = uniform_random_weight(height=2*hidden_status_dim, width=hidden_status_dim, bound=W_bound) va = numpy.zeros((hidden_status_dim,), dtype=globalFloatType()) if tparams is not None: tparams[self._p(prefix, 'W')] = theano.shared(W, name=self._p(prefix, 'W')) tparams[self._p(prefix, 'U')] = theano.shared(U, name=self._p(prefix, 'U')) tparams[self._p(prefix, 'va')] = theano.shared(va, name=self._p(prefix, 'va')) else: print ' tparams is None'
def __init__(self, base_dim, refer_dim, tparams, prefix="maxout"): self.W_t = theano.shared( value=util.uniform_random_weight((refer_dim, 2 * refer_dim), 0.1), name=self._p(prefix, 'W_t'), borrow=True ) self.W_o = theano.shared( value=util.uniform_random_weight((base_dim, refer_dim), 0.1), name=self._p(prefix, 'W_o'), borrow=True ) # initialize the biases b as a vector of n_out 0s self.b = theano.shared( value=numpy.zeros( (2 * refer_dim,), dtype=config.globalFloatType() ), name=self._p(prefix, 'b'), borrow=True ) # parameters of the model self.params = [self.W_t, self.W_o, self.b] if not tparams is None: tparams[self._p(prefix, 'W_t')] = self.W_t tparams[self._p(prefix, 'W_o')] = self.W_o tparams[self._p(prefix, 'b')] = self.b else: print " tparams is None"
def __init__(self, word_embedding_dim, hidden_status_dim, encoder_hidden_dim, tparams=None, prefix='Attention'): """ Init the GRU parameter: init_params. Updation in GRU : step1. r(t) = f(W_r dot x(t) + U_r dot h(t-1) + C_r dot h_last). step2. z(t) = f(W_z dot x(t) + U_z dot h(t-1) + C_z dot h_last). step3. h_wave(t) = f(W dot x(t) + U dot (r(t) * h(t-1)) + C dot h_last). step4. h(t) = (1-z(t)) * h(t-1) + z(t) * h_wave(t). We can combine W and C into one tensor W """ self.hidden_status_dim = hidden_status_dim self.params = tparams self.prefix = prefix W_bound = numpy.sqrt(6. / (hidden_status_dim)) # combine step1~3 W dot t, so W's dimension is (word_embedding_dim, hidden_status_dim, 3) W = uniform_random_weight(size=(hidden_status_dim, hidden_status_dim), bound=W_bound) # combine step1~2 U dot h, so U's dimension is (hidden_status_dim, 2) # connot combine step1~3, so split U_rh U = numpy.concatenate([ortho_weight(hidden_status_dim)]*int(encoder_hidden_dim/hidden_status_dim), axis=0) # U = uniform_random_weight(height=2*hidden_status_dim, width=hidden_status_dim, bound=W_bound) va = numpy.zeros((hidden_status_dim,), dtype=globalFloatType()) if tparams is not None: tparams[self._p(prefix, 'W')] = theano.shared(W, name=self._p(prefix, 'W')) tparams[self._p(prefix, 'U')] = theano.shared(U, name=self._p(prefix, 'U')) tparams[self._p(prefix, 'va')] = theano.shared(va, name=self._p(prefix, 'va')) else: print ' tparams is None'
def uniform_random_weight(size, bound, dtype=config.globalFloatType()): if bound < 0: bound = -bound rng = numpy.random.RandomState(123) return numpy.asarray(\ rng.uniform(low=-bound, high=bound, \ size=size ), dtype=dtype )
def uniform_random_weight(size, bound, dtype=config.globalFloatType()): if bound < 0: bound = -bound rng = numpy.random.RandomState(123) return numpy.asarray(\ rng.uniform(low=-bound, high=bound, \ size=size ), dtype=dtype )
def init_global_params(self, options): """ Global (not LSTM) parameter. For the embeding and the classifier. """ params = OrderedDict() randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_e'] = (0.1 * randn).astype(config.globalFloatType()) randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_d'] = (0.1 * randn).astype(config.globalFloatType()) #randn = numpy.random.rand(options['topic_embedding_dim'], options['topic_embedding_dim'])/options['topic_embedding_dim']*2 #params['QTA']=(1.0 * randn).astype(config.globalFloatType()) randn = numpy.random.rand(options['n_topics'], options['topic_embedding_dim']) params['Temb'] = (0.1 * randn).astype(config.globalFloatType()) #params['Temb'] = numpy.dot(params['Qemb'],params['QTA']) return params
def __init__(self, n_in, n_out, tparams, prefix="softmax"): """ Initialize the parameters of the logistic regression :type input_data: theano.tensor.TensorType :param input_data: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # start-snippet-1 # initialize with 0 the weights W as a matrix of shape (n_in, n_out) self.W = theano.shared( value=numpy.zeros( (n_in, n_out), dtype=config.globalFloatType() ), name=self._p(prefix, 'W'), borrow=True ) # initialize the biases b as a vector of n_out 0s self.b = theano.shared( value=numpy.zeros( (n_out,), dtype=config.globalFloatType() ), name=self._p(prefix, 'b'), borrow=True ) # parameters of the model self.params = [self.W, self.b] if not tparams is None: tparams[self._p(prefix, 'W')] = self.W tparams[self._p(prefix, 'b')] = self.b else: print " tparams is None"
def get_mask_data(batch): n_samples = len(batch) lengths = [len(s) for s in batch] maxlen = numpy.max(lengths) data = numpy.zeros((maxlen, n_samples)).astype('int64') mask = numpy.zeros((maxlen, n_samples)).astype(config.globalFloatType()) for idx, s in enumerate(batch): data[:lengths[idx], idx] = s mask[:lengths[idx], idx] = 1. return data, mask
def get_testing_function(self, cr): (answer_up, answer_up_mask), (question, question_mask), (answer_down, answer_down_mask), _, _ = \ cr.get_test_set() # alignment if answer_up.shape[0] > answer_down.shape[0] : answer_down = numpy.concatenate([answer_down, numpy.zeros((answer_up.shape[0] - answer_down.shape[0], answer_down.shape[1]), dtype='int64')], axis=0) answer_down_mask = numpy.concatenate([answer_down_mask, numpy.zeros((answer_up_mask.shape[0] - answer_down_mask.shape[0], answer_down_mask.shape[1]), dtype=config.globalFloatType())], axis=0) elif answer_up.shape[0] < answer_down.shape[0] : answer_up = numpy.concatenate([answer_up, numpy.zeros((answer_down.shape[0] - answer_up.shape[0], answer_up.shape[1]), dtype='int64')], axis=0) answer_up_mask = numpy.concatenate([answer_up_mask, numpy.zeros((answer_down_mask.shape[0] - answer_up_mask.shape[0], answer_up_mask.shape[1]), dtype=config.globalFloatType())], axis=0) test_function = \ theano.function(inputs=[], outputs=[self.zae, self.sae, self.bae], givens={self.question: numpy.concatenate([question, question], axis=1), self.question_mask: numpy.concatenate([question_mask, question_mask], axis=1), self.answer: numpy.concatenate([answer_up, answer_down], axis=1), self.answer_mask: numpy.concatenate([answer_up_mask, answer_down_mask], axis=1)}, name='test_function') ''' (question, question_mask), (answer, answer_mask), _, _ = cr.get_pr_set() pr_function = theano.function(inputs=[], outputs=[self.likihood_vector], givens={self.question: question, self.question_mask: question_mask, self.answer: answer, self.answer_mask: answer_mask}, name='pr_function') ''' return test_function, None
def get_mask(self, question, answer) : q, a = question, answer question_dims, answer_dims = [0,0], [0,0] question_dims[0] = len(question) answer_dims[0] = len(answer) question_dims[1] = max_length = 30 answer_dims[1] = max([len(s) for s in answer]) question = numpy.zeros((question_dims[0], question_dims[1]), dtype='int64') question_mask = numpy.zeros((question_dims[0], question_dims[1]), \ dtype=config.globalFloatType()) answer = numpy.zeros((answer_dims[0], answer_dims[1]), dtype='int64') answer_mask = numpy.zeros((answer_dims[0], answer_dims[1]), \ dtype=config.globalFloatType()) for x, stc in enumerate(q) : for y, s in enumerate(stc[0:max_length]) : question[x,y] = s question_mask[x,y] = 1.0 for x, stc in enumerate(a) : for y, s in enumerate(stc) : answer[x,y] = s answer_mask[x,y] = 1.0 return question, question_mask, answer, answer_mask
def __init__(self, n_in, n_out, tparams, prefix="softmax"): """ Initialize the parameters of the logistic regression :type input_data: theano.tensor.TensorType :param input_data: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # start-snippet-1 # initialize with 0 the weights W as a matrix of shape (n_in, n_out) self.W = theano.shared(value=numpy.zeros( (n_in, n_out), dtype=config.globalFloatType()), name=self._p(prefix, 'W'), borrow=True) # initialize the biases b as a vector of n_out 0s self.b = theano.shared(value=numpy.zeros( (n_out, ), dtype=config.globalFloatType()), name=self._p(prefix, 'b'), borrow=True) # parameters of the model self.params = tparams self.prefix = prefix if not tparams is None: tparams[self._p(prefix, 'W')] = self.W tparams[self._p(prefix, 'b')] = self.b else: print " tparams is None"
def __init__(self, base_dim, refer_dim, tparams, prefix="maxout"): self.W_t = theano.shared(value=util.uniform_random_weight( (refer_dim, 2 * refer_dim), 0.01), name=self._p(prefix, 'W_t'), borrow=True) self.W_o = theano.shared(value=util.uniform_random_weight( (base_dim, refer_dim), 0.01), name=self._p(prefix, 'W_o'), borrow=True) # initialize the biases b as a vector of n_out 0s self.b = theano.shared(value=numpy.zeros( (2 * refer_dim, ), dtype=config.globalFloatType()), name=self._p(prefix, 'b'), borrow=True) # parameters of the model self.params = [self.W_t, self.W_o, self.b] if not tparams is None: tparams[self._p(prefix, 'W_t')] = self.W_t tparams[self._p(prefix, 'W_o')] = self.W_o tparams[self._p(prefix, 'b')] = self.b else: print " tparams is None"
def __init__(self, n_words, word_embedding_dim=128, hidden_status_dim=128, n_topics=2, topic_embedding_dim=5, input_params=None, word_embedings=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'n_topics': n_topics, 'topic_embedding_dim': topic_embedding_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self. adadelta, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). } # global paramters. params = self.init_global_params(options, word_embedings) # Theano paramters, self.tparams = self.init_tparams(params) #print self.tparams['Temb'] #self.answer_emb=T.dot(self.tparams['Qemb'],self.tparams['QTA']) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.reference = tensor.matrix('reference', dtype='int64') self.reference_mask = tensor.matrix('reference_mask', dtype=config.globalFloatType()) #self.reference_mask = tensor.matrix('reference_mask', dtype='int64') self.topic = tensor.matrix('topic', dtype=config.globalFloatType()) self.context = tensor.tensor3('context', dtype='int64') self.context_mask = tensor.tensor3('context_mask', dtype=config.globalFloatType()) self.context_mask2 = tensor.matrix('context_mask2', dtype=config.globalFloatType()) # self.reference.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug # self.reference_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.reference_embedding = self.tparams['Wemb_e'][ self.reference.flatten()].reshape([ self.reference.shape[0], self.reference.shape[1], options['word_embedding_dim'] ]) # 1. encoder layer self.encoder_layer_reference = EncoderLayer( word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='Encoder') self.encoder_hidden_status_reference = self.encoder_layer_reference.getOutput( inputs=(self.reference_embedding, self.reference_mask)) #self.topic_states = self.tparams['Temb'][self.topic.flatten()].reshape([1,self.reference.shape[1], options['topic_embedding_dim']]) #self.topic_change=T.alloc(self.topic_states,self.reference.shape[0], self.reference.shape[1], options['topic_embedding_dim']) #self.encoder_hidden_status = T.concatenate([self.encoder_hidden_status,self.topic_change], axis=2) # 2. decoder layer self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug self.answer_embedding = self.tparams['Wemb_e'][ self.answer.flatten()].reshape([ self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim'] ]) self.encoder_hidden_status_answer = self.encoder_layer_reference.getOutput( inputs=(self.answer_embedding, self.answer_mask)) self.context_emdedding = self.tparams['Wemb_e'][ self.context.flatten()].reshape([ self.context.shape[0], self.context.shape[1] * self.context.shape[2], options['word_embedding_dim'] ]) self.encoder_hidden_status_context1 = self.encoder_layer_reference.getOutput( inputs=(self.context_emdedding, self.context_mask.flatten().reshape([ self.context.shape[0], self.context.shape[1] * self.context.shape[2] ]))) self.encoder_layer_context2 = EncoderLayer( word_embedding_dim=options['hidden_status_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='Encoder2') self.encoder_hidden_status_context2 = self.encoder_layer_context2.getOutput( inputs=(self.encoder_hidden_status_context1[-1, :, :].reshape([ self.context.shape[1], self.context.shape[2], options['hidden_status_dim'] ]), self.context_mask2)) #self.context_processed=tensor.transpose(tensor.transpose(self.encoder_hidden_status_context2[-1])*self.topic.flatten()) self.context_processed = self.encoder_hidden_status_context2[-1] #self.rcm=tensor.dot(tensor.concatenate([self.encoder_hidden_status_reference[-1],self.context_processed],axis=1),self.tparams['P_M']) #self.acm=tensor.dot(tensor.concatenate([self.encoder_hidden_status_answer[-1],self.context_processed],axis=1),self.tparams['P_M']) #self.softmax_input=tensor.dot(tensor.concatenate([self.acm,self.rcm],axis=1),self.tparams['P_N']) self.rmc = tensor.batched_dot( tensor.dot(self.encoder_hidden_status_reference[-1], self.tparams['P_M']), self.context_processed) self.amc = tensor.batched_dot( tensor.dot(self.encoder_hidden_status_answer[-1], self.tparams['P_M']), self.context_processed) self.softmax_input = tensor.dot( tensor.concatenate([self.rmc, self.amc], axis=1), self.tparams['P_N']) #self.softmax_input=self.encoder_hidden_status_reference[-1]-self.encoder_hidden_status_reference[-1]+self.encoder_hidden_status_context2[-1]-self.encoder_hidden_status_answer[-1]+self.encoder_hidden_status_answer[-1] #self.softmax_input=self.rcm-self.acm #self.softmax_layer=SoftmaxLayer(n_in=options['hidden_status_dim'],n_out=3,tparams=self.tparams) self.softmax_layer = SoftmaxLayer(n_in=options['hidden_status_dim'], n_out=3, tparams=self.tparams) self.output_vector = self.softmax_layer.negative_log_likelihood( self.softmax_input, tensor.cast( self.topic.flatten() + tensor.ones_like(self.topic.flatten()), 'int64')) self.cost = -1.0 * self.output_vector.sum() / self.context.shape[2] #self.cost=((tensor.dot(mutti_m_am,(score-topic.flatten()))**2).sum()+0.01*self.l2)/(self.context.shape[2]/2) #self.cost=((tensor.max(tensor.dot(mutti_m_am,(topic.flatten()-score))*tensor.sgn(tensor.dot(mutti_m_am,(topic.flatten()))-tensor.ones(self.context.shape[2]/2)/2),tensor.zeros(self.context.shape[2]/2))**2).sum()+0.01*self.l2)/(self.context.shape[2]/2) ''' self.ground_truth=tensor.dot(mutti_m_am,topic.flatten()) self.score_diff=tensor.dot(mutti_m_am,score) self.ground_minus_score=(self.ground_truth-self.score_diff) #self.cost_max=(tensor.max(tensor.zeros_like(self.ground_truth),self.ground_truth*self.ground_minus_score))**2 self.cost_max=(tensor.max(tensor.concatenate(([tensor.zeros_like(self.ground_truth)],[self.ground_truth*self.ground_minus_score]),axis=0),axis=0))**2+(tensor.ones_like(self.ground_truth)-tensor.abs_(self.ground_truth))*(self.ground_minus_score)**2 self.cost=(self.cost_max.sum()+0.01*self.l2)/(self.context.shape[2]/2) ''' #self.cost=((tensor.dot(mutti_m_am,(score-topic.flatten()))**2).sum()+((score-topic.flatten())**2).sum()+0.01*self.l2)/(self.context.shape[2]/2) ''' self.decoder_layer = DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + options['hidden_status_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.decoder_hidden_status = self.decoder_layer.getOutput(inputs=(self.answer_embedding, self.answer_mask, self.encoder_hidden_status)) # 3. maxout layer self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'], refer_dim=2 * options["hidden_status_dim"] + options['word_embedding_dim'], tparams=self.tparams, prefix="maxout") self.maxout_input = tensor.concatenate([self.decoder_hidden_status[:-1, :, :]. reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['hidden_status_dim']]), tensor.alloc(self.encoder_hidden_status[-1, :, :], self.answer.shape[0] - 1, self.answer.shape[1], options['hidden_status_dim']). reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['hidden_status_dim']]), self.answer_embedding[:-1, :, :]. reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['word_embedding_dim']])], axis=1) output_error_vector = self.maxout_layer.negative_log_likelihood(self.tparams['Wemb_d'], self.maxout_input, y=self.answer[1:, :].flatten()) self.topic_matrix=tensor.alloc(self.topic.flatten(),self.answer.shape[0] - 1,self.answer.shape[1]).flatten() #self.topic_matrix_change=2*(self.topic_matrix-0.5) self.topic_matrix_change=self.topic_matrix m = self.answer_mask[1:, :] self.cost = -1.0 * tensor.dot(output_error_vector, m.flatten()*self.topic_matrix_change) / m.sum() self.output_error_vector = output_error_vector.reshape([self.answer.shape[0] - 1 , self.answer.shape[1]]) self.output_error_vector = self.output_error_vector * m self.output_error_vector = -output_error_vector.sum(axis=0) / m.sum(axis=0) ''' self.output_error_vector = self.cost self._set_parameters(input_params) # params from list to TensorVirable
def __init__(self, n_words, word_embedding_dim=128, hidden_status_dim=128, input_params=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self.rmsprop, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). } # global paramters. params = self.init_global_params(options) # Theano paramters, self.tparams = self.init_tparams(params) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape( [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']]) # 1. forward encoder layer self.forward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='forward_Encoder') self.forward_encoder_hidden_status = \ self.forward_encoder_layer.getOutput(inputs=(self.question_embedding, self.question_mask)) # 2. backward encoder layer self.backward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='backward_Encoder') self.backward_encoder_hidden_status = \ self.backward_encoder_layer.getOutput(inputs=(self.question_embedding[::-1, :, :], self.question_mask[::-1, :])) self.encoder_hidden_status = tensor.concatenate([self.forward_encoder_hidden_status, self.backward_encoder_hidden_status[::-1, :, :]], axis=2) # 3. decoder layer self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape( [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']]) self.decoder_layer = \ AttentionDecoderLayer(word_embedding_dim=options['word_embedding_dim'] + 2 * options['hidden_status_dim'], hidden_status_dim=options['hidden_status_dim'], encoder_hidden_dim=2 * options['hidden_status_dim'], tparams=self.tparams, prefix='Decoder') self.decoder_hidden_status, self.context = \ self.decoder_layer.getOutput(inputs=(self.answer_embedding, self.answer_mask, self.encoder_hidden_status, self.question_mask)) # 4. maxout layer self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'], refer_dim=3 * options["hidden_status_dim"] + options['word_embedding_dim'], tparams=self.tparams, prefix="maxout") self.maxout_input = \ tensor.concatenate(\ [self.decoder_hidden_status[:-1, :, :]. reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['hidden_status_dim']]), self.context[:-1, :, :]. reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], 2 * options['hidden_status_dim']]), self.answer_embedding[:-1, :, :]. reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['word_embedding_dim']])], axis=1) likihood_vector = \ self.maxout_layer.likelihood(base_data=self.tparams['Wemb_d'], refer_data=self.maxout_input, y=self.answer[1:, :].flatten()) likihood_vector = - tensor.log(likihood_vector) m = self.answer_mask[1:, :] # cost self.cost = tensor.dot(likihood_vector, m.flatten()) / self.question.shape[1] # pr self.likihood_vector = likihood_vector.reshape([self.answer.shape[0] - 1 , self.answer.shape[1]]) self.likihood_vector = tensor.sum(self.likihood_vector * m, axis=0) self._set_parameters(input_params) # params from list to TensorVirable
def numpy_floatX(data): return numpy.asarray(data, dtype=config.globalFloatType())
def __init__(self, n_words, word_embedding_dim, hidden_status_dim, style_number, style_dim, input_params=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'style_number': style_number, 'style_dim': style_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self.rmsprop, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). } # global paramters. params = self.init_global_params(options) # Theano paramters, self.tparams = self.init_tparams(params) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape( [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']]) # 1. forward encoder layer self.forward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='forward_Encoder') self.forward_encoder_hidden_status = \ self.forward_encoder_layer.getOutput(inputs=(self.question_embedding, self.question_mask)) # 2. backward encoder layer self.backward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='backward_Encoder') self.backward_encoder_hidden_status = \ self.backward_encoder_layer.getOutput(inputs=(self.question_embedding[::-1, :, :], self.question_mask[::-1, :])) self.encoder_hidden_status = tensor.concatenate([self.forward_encoder_hidden_status, self.backward_encoder_hidden_status[::-1, :, :]], axis=2) # 3. decoder layer self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape( [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']]) self.decoder_layer = \ AttentionDecoderLayer(word_embedding_dim=options['word_embedding_dim'] + 2 * options['hidden_status_dim'], hidden_status_dim=options['hidden_status_dim'], encoder_hidden_dim=2 * options['hidden_status_dim'], tparams=self.tparams, prefix='Decoder') self.decoder_hidden_status, self.context = \ self.decoder_layer.getOutput(inputs=(self.answer_embedding, self.answer_mask, self.encoder_hidden_status, self.question_mask)) # 4. style layer self.style_layer = StyleLayer(style_number, style_dim, 3 * hidden_status_dim, self.tparams, 'style') question_style_input_embedding1 = self.forward_encoder_hidden_status[-1, :, :].reshape([self.answer.shape[1], options['hidden_status_dim']]) question_style_input_embedding2 = self.backward_encoder_hidden_status[-1, :, :].reshape([self.answer.shape[1], options['hidden_status_dim']]) answer_style_input_embedding = self.backward_encoder_hidden_status[-1, :, :].reshape([self.answer.shape[1], options['hidden_status_dim']]) pair_embedding = tensor.concatenate([question_style_input_embedding1, question_style_input_embedding2, answer_style_input_embedding], axis=1) self.style = self.style_layer.getOutput(pair_embedding) # 5. maxout layer self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'], refer_dim=3 * options["hidden_status_dim"] + options['word_embedding_dim'] + options['style_dim'], tparams=self.tparams, prefix="maxout") maxout_decoder_hidden_status_input = self.decoder_hidden_status[:-1, :, :].\ reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['hidden_status_dim']]) maxout_context_hidden_status_input = self.context[:-1, :, :].\ reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], 2 * options['hidden_status_dim']]) maxout_answer_wordEmbedding_input = self.answer_embedding[:-1, :, :].\ reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['word_embedding_dim']]) maxout_answer_style_input = tensor.alloc(self.style.dimshuffle('x', 0, 1), self.answer.shape[0] - 1, self.answer.shape[1], options['style_dim']) maxout_answer_style_input = maxout_answer_style_input.\ reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['style_dim']]) self.maxout_input = \ tensor.concatenate(\ [maxout_decoder_hidden_status_input, maxout_context_hidden_status_input, maxout_answer_wordEmbedding_input, maxout_answer_style_input], axis=1) output_error_vector = self.maxout_layer.negative_log_likelihood( self.tparams['Wemb_d'], self.maxout_input, y=self.answer[1:, :].flatten()) m = self.answer_mask[1:, :] self.cost = -1.0 * tensor.dot(output_error_vector, m.flatten()) / m.sum() self.output_error_vector = output_error_vector.reshape([self.answer.shape[0] - 1 , self.answer.shape[1]]) self.output_error_vector = self.output_error_vector * m self.output_error_vector = -self.output_error_vector.sum(axis=0) / m.sum(axis=0) self._set_parameters(input_params) # params from list to TensorVirable
def ortho_weight(ndim): W = numpy.random.randn(ndim, ndim) u, s, v = numpy.linalg.svd(W) return u.astype(config.globalFloatType())
def __init__(self, n_words, word_embedding_dim, n_style, style_embedding_dim, hidden_status_dim, input_params=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'n_style': n_style, 'style_embedding_dim': style_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self.rmsprop, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). 'tiny_float': 1e-20 } # global paramters. params = self.init_global_params(options) # Theano paramters, self.tparams = self.init_tparams(params) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape( [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']]) # 1. encoder layer self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.encoder_hidden_status = self.encoder_layer.get_output(inputs=(self.question_embedding, self.question_mask)) # 2. style layer self.style_layer = MaxoutBiasedLayer(base_dim=options['style_embedding_dim'], refer_dim=options["hidden_status_dim"], tparams=self.tparams, prefix="style") self.style_prob = self.style_layer.probability(base_data=self.tparams['Semb'], refer_data=self.encoder_hidden_status[-1, :, :]) # (samples,n_style) self.answer = tensor.matrix('answer', dtype='int64') self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape( [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']]) self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) # extend the data n_question_time_stpes = self.question.shape[0] n_answer_time_stpes = self.answer.shape[0] n_samples = self.question.shape[1] self.cost = 0 self.zi_error = 0 self.st_error = 0 self.predict_probability_list = [] log_likelihood_vector_list = [] for style_index in range(self.options['n_style']): decoder_layer = DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + \ options['hidden_status_dim'] + options['style_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='decoder' + str(style_index)) style_embedding = tensor.alloc(self.tparams['Semb'][style_index], n_question_time_stpes, n_samples, self.options['style_embedding_dim']) encoder_hidden_status = tensor.concatenate([self.encoder_hidden_status, style_embedding], axis=2) # 3. decoder layer decoder_hidden_status = decoder_layer.get_output(inputs=[self.answer_embedding, self.answer_mask, encoder_hidden_status]) # 4. maxout layer maxout_input = tensor.concatenate([decoder_hidden_status[:-1, :, :]. reshape([(n_answer_time_stpes - 1) * n_samples, options['hidden_status_dim']]), tensor.alloc(encoder_hidden_status[-1, :, :], n_answer_time_stpes - 1, n_samples, options['style_embedding_dim'] + \ options['hidden_status_dim']). reshape([(n_answer_time_stpes - 1) * n_samples, options['hidden_status_dim'] + \ options['style_embedding_dim']]), self.answer_embedding[:-1, :, :]. reshape([(n_answer_time_stpes - 1) * n_samples, options['word_embedding_dim']])], axis=1) a_index = tensor.cast(self.answer_mask, 'int64').sum(axis=0) - 1 predict_maxout_input = tensor.concatenate([decoder_hidden_status[-1, :, :]. reshape([n_samples, options['hidden_status_dim']]), encoder_hidden_status[-1, :, :]. reshape([n_samples, options['hidden_status_dim'] + \ options['style_embedding_dim']]), self.answer_embedding[a_index, tensor.arange(a_index.shape[0]), :]. reshape([n_samples, options['word_embedding_dim']])], axis=1) maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'], refer_dim=2 * options["hidden_status_dim"] + \ options['word_embedding_dim'] + options['style_embedding_dim'], tparams=self.tparams, prefix='decoder_maxout' + str(style_index)) likelihood_vector = \ maxout_layer.likelihood(base_data=self.tparams['Wemb_d'], refer_data=maxout_input, y=self.answer[1:, :].flatten()) pred_word, predict_probability0 = \ maxout_layer.get_output(base_data=self.tparams['Wemb_d'], refer_data=predict_maxout_input) # carefully check likelihood_vector = likelihood_vector.reshape( [n_answer_time_stpes - 1, n_samples]) log_likelihood_vector0 = tensor.log(likelihood_vector + self.options['tiny_float']) * self.answer_mask[1:, :] log_likelihood_vector0 = log_likelihood_vector0.sum(axis=0) log_likelihood_vector_list.append(log_likelihood_vector0.dimshuffle(0, 'x')) predict_probability0 = predict_probability0.reshape( [n_samples, options['n_words']]) self.predict_probability_list.append(predict_probability0) # options['n_style'], n_answer_time_stpes - 1, n_samples # Transform the multiplication into add. log_likelihood_vector = tensor.concatenate(log_likelihood_vector_list, axis=1) sentence_probability = log_likelihood_vector.exp() * self.style_prob sentence_probability = sentence_probability + self.options['tiny_float'] sentence_probability = tensor.sum(sentence_probability, axis=1) negative_log_sentence_probability = -tensor.log(sentence_probability) self.likelihood_vector = negative_log_sentence_probability self.cost = negative_log_sentence_probability.sum() / self.answer_mask[1:, :].sum() self.zi_error = negative_log_sentence_probability / tensor.sum(self.answer_mask[1:, :], 0) self.zi_error = self.zi_error.mean() self.st_error = negative_log_sentence_probability.mean() self.pred_word = pred_word self.set_parameters(input_params) # params from list to TensorVirable
def __init__(self, n_words, word_embedding_dim, hidden_status_dim, input_params): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'learning_rate': 0.0001 } # global paramters params = self.init_global_params(options) # Theano paramters self.tparams = self.init_tparams(params) # construct network self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape( [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']]) self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape( [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']]) ''' theano.config.compute_test_value = 'off' self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug ''' # 1. encoder layer self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, node_type=GRUNode) # 2. decoder layer self.decoder_layer = \ DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + \ options['hidden_status_dim'], \ hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, node_type=GRUNode) # 3. maxout layer self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'], refer_dim=2 * options['hidden_status_dim'] + \ options['word_embedding_dim'], \ tparams=self.tparams, prefix='maxout') # 1. encoder layer self.encoder_hidden_status = \ self.encoder_layer.get_output(inputs=(self.question_embedding, self.question_mask)) # 2. decoder layer self.decoder_hidden_status = \ self.decoder_layer.get_output(inputs=(self.answer_embedding, self.answer_mask, \ self.encoder_hidden_status)) # 3. maxout layer self.maxout_input = \ tensor.concatenate([self.decoder_hidden_status[:-1, :, :]\ .reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], \ options['hidden_status_dim']]), \ tensor.alloc(self.encoder_hidden_status[-1, :, :], \ self.answer.shape[0] - 1, \ self.answer.shape[1], \ options['hidden_status_dim'])\ .reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], \ options['hidden_status_dim']]), \ self.answer_embedding[:-1, :, :]\ .reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], \ options['word_embedding_dim']])], \ axis=1) likihood_vector = \ self.maxout_layer.likelihood(base_data=self.tparams['Wemb_d'], \ refer_data=self.maxout_input, \ y=self.answer[1:, :].flatten()) # get evaluation and cost likihood_vector = -tensor.log(likihood_vector) self.cost = tensor.dot(likihood_vector.flatten(), self.answer_mask[1:, :].flatten()) \ / self.answer_mask[1:, :].sum() prob_matrix = likihood_vector.reshape([self.answer_mask[1:,:].shape[0], \ self.answer_mask[1:,:].shape[1]]) self.likihood_vector = tensor.sum(prob_matrix * self.answer_mask[1:, :], axis=0) \ / tensor.sum(self.answer_mask[1:,:], axis=0) self.set_parameters(input_params) # params from list to TensorVirable
def ortho_weight(ndim): W = numpy.random.randn(ndim, ndim) u, s, v = numpy.linalg.svd(W) return u.astype(config.globalFloatType())
def __init__(self, n_words, word_embedding_dim, n_style, style_embedding_dim, hidden_status_dim, input_params=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'n_style': n_style, 'style_embedding_dim': style_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self.rmsprop, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). } # global paramters. params = self.init_global_params(options) # Theano paramters, self.tparams = self.init_tparams(params) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape( [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']]) # 1. encoder layer self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.encoder_hidden_status = self.encoder_layer.get_output(inputs=(self.question_embedding, self.question_mask)) # 2. style layer self.style_layer = MaxoutLayer(base_dim=options['style_embedding_dim'], refer_dim=options["hidden_status_dim"], tparams=self.tparams, prefix="style") self.style_prob = self.style_layer.probability(base_data=self.tparams['Semb'], refer_data=self.encoder_hidden_status[-1, :, :]) # (samples,n_style) self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) self.decoder_layer = DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + \ options['hidden_status_dim'] + options['style_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'], refer_dim=2 * options["hidden_status_dim"] + \ options['word_embedding_dim'] + options['style_embedding_dim'], tparams=self.tparams, prefix="maxout") self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape( [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']]) # extend the data n_question_time_stpes = self.question.shape[0] n_answer_time_stpes = self.answer.shape[0] n_samples = self.question.shape[1] extent_data_size = n_samples * self.options['n_style'] # hidden status with style answer_mask = tensor.alloc(self.answer_mask, \ self.options['n_style'], \ n_answer_time_stpes, \ n_samples)\ .dimshuffle([1, 0, 2])\ .reshape([n_answer_time_stpes, extent_data_size]) answer = tensor.alloc(self.answer, \ self.options['n_style'], \ n_answer_time_stpes, \ n_samples)\ .dimshuffle([1, 0, 2])\ .reshape([n_answer_time_stpes, extent_data_size]) answer_embedding = tensor.alloc(self.answer_embedding, \ self.options['n_style'], \ n_answer_time_stpes, \ n_samples, \ options['word_embedding_dim'])\ .dimshuffle(1, 0, 2, 3)\ .reshape([n_answer_time_stpes, extent_data_size, options['word_embedding_dim']]) encoder_hidden_status = tensor.alloc(self.encoder_hidden_status[-1], \ self.options['n_style'], \ n_samples, \ options['hidden_status_dim'])\ .reshape([1, extent_data_size, options['hidden_status_dim']]) style_embeddings = tensor.alloc(self.tparams['Semb'], \ n_samples, \ self.options['n_style'], \ self.options['style_embedding_dim'])\ .dimshuffle(1, 0, 2)\ .reshape([1, extent_data_size, self.options['style_embedding_dim']]) encoder_hidden_status = tensor.concatenate([encoder_hidden_status, style_embeddings], axis=2) # 3. decoder layer decoder_hidden_status = self.decoder_layer.get_output(inputs=[answer_embedding, answer_mask, encoder_hidden_status]) # 4. maxout layer self.maxout_input = tensor.concatenate([decoder_hidden_status[:-1, :, :]. reshape([(n_answer_time_stpes - 1) * extent_data_size, options['hidden_status_dim']]), tensor.alloc(encoder_hidden_status[-1, :, :], n_answer_time_stpes - 1, extent_data_size, options['style_embedding_dim'] + \ options['hidden_status_dim']). reshape([(n_answer_time_stpes - 1) * extent_data_size, options['hidden_status_dim'] + \ options['style_embedding_dim']]), answer_embedding[:-1, :, :]. reshape([(n_answer_time_stpes - 1) * extent_data_size, options['word_embedding_dim']])], axis=1) predict_maxout_input = tensor.concatenate([decoder_hidden_status[-1, :, :]. reshape([extent_data_size, options['hidden_status_dim']]), tensor.alloc(encoder_hidden_status[-1, :, :], 1, extent_data_size, options['style_embedding_dim'] + \ options['hidden_status_dim']). reshape([extent_data_size, options['hidden_status_dim'] + \ options['style_embedding_dim']]), answer_embedding[-1, :, :]. reshape([extent_data_size, options['word_embedding_dim']])], axis=1) likelihood_vector = \ self.maxout_layer.likelihood(base_data=self.tparams['Wemb_d'], refer_data=self.maxout_input, y=answer[1:, :].flatten()) pred_word, predict_probability = \ self.maxout_layer.get_output(base_data=self.tparams['Wemb_d'], refer_data=predict_maxout_input) # carefully check likelihood_vector = likelihood_vector.reshape( [n_answer_time_stpes - 1, options['n_style'], n_samples]) predict_probability = predict_probability.reshape( [options['n_style'], n_samples, options['n_words']]) self.predict_probability = predict_probability pred_word = pred_word.reshape( [options['n_style'], n_samples]) self.pred_word = pred_word # options['n_style'], n_answer_time_stpes - 1, n_samples # Transform the multiplication into add. log_likelihood_vector = tensor.log(likelihood_vector) * self.answer_mask[1:, :].dimshuffle(0, 'x', 1) log_likelihood_vector = log_likelihood_vector.sum(axis=0) self.log_likelihood_vector_on_diff_styles = log_likelihood_vector sentence_probability = log_likelihood_vector + tensor.transpose(self.style_prob.log()) sentence_probability = self.remove_min_neg_log_prob(sentence_probability) sentence_probability = self.total_probability_precise(sentence_probability, 0, options['n_style']) negative_log_sentence_probability = -sentence_probability self.likelihood_vector = negative_log_sentence_probability self.cost = negative_log_sentence_probability.sum() / self.answer_mask[1:, :].sum() self.zi_error = negative_log_sentence_probability / tensor.sum(self.answer_mask[1:, :], 0) self.zi_error = self.zi_error.mean() self.st_error = negative_log_sentence_probability.mean() self.set_parameters(input_params) # params from list to TensorVirable
def __init__(self, n_words, word_embedding_dim=128, hidden_status_dim=128, n_topics=2, topic_embedding_dim=5, input_params=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'n_topics': n_topics, 'topic_embedding_dim': topic_embedding_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self. adadelta, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). } # global paramters. params = self.init_global_params(options) # Theano paramters, self.tparams = self.init_tparams(params) #print self.tparams['Temb'] #self.answer_emb=T.dot(self.tparams['Qemb'],self.tparams['QTA']) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) self.style = tensor.matrix('style', dtype='int64') self.style_mask = tensor.matrix('style_mask', dtype=config.globalFloatType()) self.style_embedding = self.tparams['Wemb_e'][ self.style.flatten()].reshape([ self.style.shape[0], self.style.shape[1], options['word_embedding_dim'] ]) #self.question_mask = tensor.matrix('question_mask', dtype='int64') self.topic = tensor.matrix('topic', dtype=config.globalFloatType()) # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.question_embedding = self.tparams['Wemb_e'][ self.question.flatten()].reshape([ self.question.shape[0], self.question.shape[1], options['word_embedding_dim'] ]) # 1. encoder layer self.encoder_layer = EncoderLayer( word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.encoder_hidden_status1 = self.encoder_layer.getOutput( inputs=(self.question_embedding, self.question_mask)) self.encoder_layer_style = EncoderLayer( word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='encoder_layer_style') self.encoder_hidden_status_style = self.encoder_layer_style.getOutput( inputs=(self.style_embedding, self.style_mask)) self.encoder_hidden_status = tensor.concatenate( [ self.encoder_hidden_status1[-1], self.encoder_hidden_status_style[-1] ], axis=1).reshape([ 1, self.encoder_hidden_status_style.shape[1], 2 * self.encoder_hidden_status_style.shape[2] ]) #self.topic_states = self.tparams['Temb'][self.topic.flatten()].reshape([1,self.question.shape[1], options['topic_embedding_dim']]) #self.topic_change=T.alloc(self.topic_states,self.question.shape[0], self.question.shape[1], options['topic_embedding_dim']) #self.encoder_hidden_status = T.concatenate([self.encoder_hidden_status,self.topic_change], axis=2) # 2. decoder layer self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug self.answer_embedding = self.tparams['Wemb_e'][ self.answer.flatten()].reshape([ self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim'] ]) self.decoder_layer = DecoderLayer_Cho( word_embedding_dim=options['word_embedding_dim'] + 2 * options['hidden_status_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.decoder_hidden_status = self.decoder_layer.getOutput( inputs=(self.answer_embedding, self.answer_mask, self.encoder_hidden_status)) # 2.5 softmax layer self.softmax_layer = SoftmaxLayer(n_in=options["hidden_status_dim"], n_out=2, tparams=self.tparams) self.softmax_input = self.encoder_hidden_status1[-1] self.output_error_vector = self.softmax_layer.negative_log_likelihood( self.softmax_input, tensor.cast(self.topic.flatten(), 'int64')) self.class_cost = self.output_error_vector.sum( ) / self.question.shape[1] # 3. maxout layer self.maxout_layer = MaxoutLayer( base_dim=options['word_embedding_dim'], refer_dim=3 * options["hidden_status_dim"] + options['word_embedding_dim'], tparams=self.tparams, prefix="maxout") self.maxout_input = tensor.concatenate([ self.decoder_hidden_status[:-1, :, :].reshape( [(self.answer.shape[0] - 1) * self.answer.shape[1], options['hidden_status_dim']]), tensor.alloc(self.encoder_hidden_status[-1, :, :], self.answer.shape[0] - 1, self.answer.shape[1], 2 * options['hidden_status_dim']).reshape([ (self.answer.shape[0] - 1) * self.answer.shape[1], 2 * options['hidden_status_dim'] ]), self.answer_embedding[:-1, :, :].reshape([ (self.answer.shape[0] - 1) * self.answer.shape[1], options['word_embedding_dim'] ]) ], axis=1) output_error_vector = self.maxout_layer.negative_log_likelihood( self.tparams['Wemb_e'], self.maxout_input, y=self.answer[1:, :].flatten()) self.topic_matrix = tensor.alloc(self.topic.flatten(), self.answer.shape[0] - 1, self.answer.shape[1]).flatten() #self.topic_matrix_change=2*(self.topic_matrix-0.5) self.topic_matrix_change = self.topic_matrix m = self.answer_mask[1:, :] self.cost = -1.0 * tensor.dot( output_error_vector, m.flatten() * self.topic_matrix_change) / m.sum() self.cost = self.cost - self.class_cost self.output_error_vector = output_error_vector.reshape( [self.answer.shape[0] - 1, self.answer.shape[1]]) self.output_error_vector = self.output_error_vector * m self.output_error_vector = -output_error_vector.sum(axis=0) / m.sum( axis=0) self._set_parameters(input_params) # params from list to TensorVirable
def numpy_floatX(data): return numpy.asarray(data, dtype=config.globalFloatType())