def __init__(self, n_words=1000, n_embedding=100, lr=0.01, margin=0.1, momentum=0.9, word_to_id=None): self.n_embedding = n_embedding self.n_lstm_embed = n_embedding self.word_embed = n_embedding self.lr = lr self.momentum = momentum self.margin = margin self.n_words = n_words self.n_D = 3 * self.n_words + 3 self.word_to_id = word_to_id self.id_to_word = dict((v, k) for k, v in word_to_id.iteritems()) # Question x = T.vector('x') phi_x = T.vector('phi_x') # True statements phi_f1_1 = T.vector('phi_f1_1') phi_f2_1 = T.vector('phi_f2_1') # False statements phi_f1_2 = T.vector('phi_f1_2') phi_f2_2 = T.vector('phi_f2_2') # Supporting memories m0 = T.vector('m0') m1 = T.vector('m1') phi_m0 = T.vector('phi_m0') phi_m1 = T.vector('phi_m1') # True word r = T.vector('r') # Word sequence words = T.ivector('words') # Scoring function self.U_O = init_shared_normal(n_embedding, self.n_D, 0.01) # Word embedding self.L = glorot_uniform((self.n_words, self.word_embed)) self.Lprime = glorot_uniform((self.n_words, self.n_lstm_embed)) # LSTM self.W_i = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_i = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_i = shared_zeros((self.n_lstm_embed)) self.W_f = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_f = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_f = shared_zeros((self.n_lstm_embed)) self.W_c = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_c = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_c = shared_zeros((self.n_lstm_embed)) self.W_o = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_o = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_o = shared_zeros((self.n_lstm_embed)) mem_cost = self.calc_cost(phi_x, phi_f1_1, phi_f1_2, phi_f2_1, phi_f2_2, phi_m0) lstm_output = self.lstm_cost(words) self.predict_function_r = theano.function(inputs=[words], outputs=lstm_output, allow_input_downcast=True) lstm_cost = -T.sum(T.mul(r, T.log(lstm_output))) cost = mem_cost + lstm_cost params = [ self.U_O, self.W_i, self.U_i, self.b_i, self.W_f, self.U_f, self.b_f, self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, self.L, self.Lprime ] grads = T.grad(cost, params) # Parameter updates updates = self.get_updates(params, grads, method='adagrad') l_rate = T.scalar('l_rate') # Theano functions self.train_function = theano.function( inputs=[ phi_x, phi_f1_1, phi_f1_2, phi_f2_1, phi_f2_2, phi_m0, r, words, theano.Param(l_rate, default=self.lr) ], outputs=cost, updates=updates, on_unused_input='warn', allow_input_downcast=True, ) #mode='FAST_COMPILE') #mode='DebugMode') #mode=theano.compile.MonitorMode(pre_func=inspect_inputs,post_func=inspect_outputs)) # Candidate statement for prediction phi_f = T.vector('phi_f') score_o = self.calc_score_o(phi_x, phi_f) self.predict_function_o = theano.function(inputs=[phi_x, phi_f], outputs=score_o)
def orthogonal_init(shape, dim_ordering='tf', name=None): return orthogonal(shape, name=name, dim_ordering=dim_ordering)
def __init__(self, n_words=1000, n_embedding=100, lr=0.01, margin=0.1, momentum=0.9, word_to_id=None): self.n_embedding = n_embedding self.n_lstm_embed = n_embedding self.word_embed = n_embedding self.lr = lr self.momentum = momentum self.margin = margin self.n_words = n_words self.n_D = 3 * self.n_words + 3 self.word_to_id = word_to_id self.id_to_word = dict((v, k) for k, v in word_to_id.iteritems()) # Question x = T.vector('x') phi_x = T.vector('phi_x') # True statements phi_f1_1 = T.vector('phi_f1_1') phi_f2_1 = T.vector('phi_f2_1') # False statements phi_f1_2 = T.vector('phi_f1_2') phi_f2_2 = T.vector('phi_f2_2') # Supporting memories m0 = T.vector('m0') m1 = T.vector('m1') phi_m0 = T.vector('phi_m0') phi_m1 = T.vector('phi_m1') # True word r = T.vector('r') # Word sequence words = T.ivector('words') # Scoring function self.U_O = init_shared_normal(n_embedding, self.n_D, 0.01) # Word embedding self.L = glorot_uniform((self.n_words, self.word_embed)) self.Lprime = glorot_uniform((self.n_words, self.n_lstm_embed)) # LSTM self.W_i = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_i = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_i = shared_zeros((self.n_lstm_embed)) self.W_f = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_f = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_f = shared_zeros((self.n_lstm_embed)) self.W_c = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_c = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_c = shared_zeros((self.n_lstm_embed)) self.W_o = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_o = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_o = shared_zeros((self.n_lstm_embed)) mem_cost = self.calc_cost(phi_x, phi_f1_1, phi_f1_2, phi_f2_1, phi_f2_2, phi_m0) lstm_output = self.lstm_cost(words) self.predict_function_r = theano.function(inputs = [words], outputs = lstm_output, allow_input_downcast=True) lstm_cost = -T.sum(T.mul(r, T.log(lstm_output))) cost = mem_cost + lstm_cost params = [ self.U_O, self.W_i, self.U_i, self.b_i, self.W_f, self.U_f, self.b_f, self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, self.L, self.Lprime ] grads = T.grad(cost, params) # Parameter updates updates = self.get_updates(params, grads, method='adagrad') l_rate = T.scalar('l_rate') # Theano functions self.train_function = theano.function( inputs = [phi_x, phi_f1_1, phi_f1_2, phi_f2_1, phi_f2_2, phi_m0, r, words, theano.Param(l_rate, default=self.lr)], outputs = cost, updates = updates, on_unused_input='warn', allow_input_downcast=True, ) #mode='FAST_COMPILE') #mode='DebugMode') #mode=theano.compile.MonitorMode(pre_func=inspect_inputs,post_func=inspect_outputs)) # Candidate statement for prediction phi_f = T.vector('phi_f') score_o = self.calc_score_o(phi_x, phi_f) self.predict_function_o = theano.function(inputs = [phi_x, phi_f], outputs = score_o)