def build_model(self): # Representation Generator self.inputs = tf.placeholder(tf.int32, [self.batch_size, self.seq_length]) embed = tf.get_variable("embed", [self.vocab_size, self.embed_dim]) word_embeds = tf.nn.embedding_lookup(embed, self.inputs) self.cell = rnn_cell.BasicLSTMCell(self.rnn_size) self.stacked_cell = rnn_cell.MultiRNNCell([self.cell] * self.layer_depth) outputs, _ = rnn.rnn(self.cell, [tf.squeeze(embed_t) for embed_t in tf.split(1, self.seq_length, word_embeds)], dtype=tf.float32) output_embed = tf.pack(outputs) mean_pool = tf.nn.relu(tf.reduce_mean(output_embed, 1)) self.num_action = 4 self.object_size = 4 # Action scorer. no bias in paper self.pred_action = rnn_cell.linear(mean_pool, self.num_action, 0, "action") self.object_ = rnn_cell.linear(mean_pool, self.object_size, 0, "object") self.true_action = tf.placeholder(tf.int32, [self.batch_size, self.num_action])
def attention(input_t,output_t_minus_1,time): with tf.variable_scope('attention'): VxS = tf.reshape(rnn_cell.linear(output_t_minus_1,self.attention_judge_size,True),[-1,1,1,self.attention_judge_size]) #batch_size x 1 x 1 x attention _exp = tf.exp(tf.reduce_sum( attention_V * tf.tanh(WxH+VxS), [3]))#batch_size x source_len x 1 _exp = _exp*tf.expand_dims(self.mask,-1) attention_weight = _exp/tf.reduce_sum(_exp,[1], keep_dims=True) attention_t = tf.reduce_sum(encoder_outputs*attention_weight,[1]) feed_in_t = tf.tanh(rnn_cell.linear([attention_t,input_t],self.embedding_size,True)) return feed_in_t
def __call__(self, inputs, state, scope=None): """Gated recurrent unit (GRU) with nunits cells.""" with vs.variable_scope(scope or type(self).__name__): # "GRUCell" with vs.variable_scope("Gates"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. r, u = array_ops.split(1, 2, rnn_cell.linear([inputs, state], 2 * self._num_units, True, 1.0)) r, u = tf.sigmoid(r), tf.sigmoid(u) with vs.variable_scope("Candidate"): c = self._activation(rnn_cell.linear([inputs, r * state], self._num_units, True)) new_h = u * state + (1 - u) * c return new_h, new_h
def __call__(self, inputs, state, con): with tf.variable_scope(type(self).__name__): # batch_size x 3(cluster_size) concat = rnn_cell.linear(inputs, 3 * self._cluster_size, True) a, b, k = tf.split(1, 3, concat) ao = tf.exp(a) bo = tf.exp(b) ko = state + tf.exp(k) # batch_size x _cluster_size phi = [] for i in range(self._con_size): # each phi is [batch_size x 1] phi.append( tf.reduce_sum(ao * tf.exp(-bo * tf.square(ko - i)), 1, keep_dims=True)) # tf.concat(1, phi) -> [batch_size x seq_length] # tf.expan_dims(%, 1) -> [batch_size x 1 x seq_length] # tf.batch_matmul(%, con) -> [batch_size x 1 x vocab_size] # tf.squeeze(%) -> [batch_size x vocab_size] wt = tf.squeeze( tf.batch_matmul(tf.expand_dims(tf.concat(1, phi), 1), con), [1]) return wt, ko
def highway(input_, size, layer_size=1, bias=-2, f=tf.nn.relu): """Highway Network (cf. http://arxiv.org/abs/1505.00387). t = sigmoid(Wy + b) z = t * g(Wy + b) + (1 - t) * y where g is nonlinearity, t is transform gate, and (1 - t) is carry gate. """ output = input_ for idx in xrange(layer_size): output = f(rnn_cell.linear(output, size, 0, scope='output_lin_%d' % idx)) transform_gate = tf.sigmoid( rnn_cell.linear(input_, size, 0, scope='transform_lin_%d' % idx) + bias) carry_gate = 1. - transform_gate output = transform_gate * output + carry_gate * input_ return output
def __call__(self, inputs, scope=None): """ :param inputs: list of 2D Tensors with shape [batch_size x self.from_size] :return: list of 2D Tensors with shape [batch_size x self.to_size] """ with vs.variable_scope(scope or "Projector"): projected = linear(inputs, self.to_size, self.bias) if self.non_linearity is not None: projected = self.non_linearity(projected) return projected
def highway(input_, size, layer_size=1, bias=-2, f=tf.nn.relu): """Highway Network (cf. http://arxiv.org/abs/1505.00387). t = sigmoid(Wy + b) z = t * g(Wy + b) + (1 - t) * y where g is nonlinearity, t is transform gate, and (1 - t) is carry gate. """ output = input_ for idx in xrange(layer_size): output = f( rnn_cell.linear(output, size, 0, scope='output_lin_%d' % idx)) transform_gate = tf.sigmoid( rnn_cell.linear(input_, size, 0, scope='transform_lin_%d' % idx) + bias) carry_gate = 1. - transform_gate output = transform_gate * output + carry_gate * input_ return output
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with vs.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. c, h = array_ops.split(1, 2, state) concat = rnn_cell.linear([inputs, h], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(1, 4, concat) new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * self._activation(j) new_h = self._activation(new_c) * tf.sigmoid(o) return new_h, array_ops.concat(1, [new_c, new_h])
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with tf.variable_scope("BasicLSTMCell"): h = state if self.c == None: self.c = tf.reshape(tf.zeros_like(h), [-1, self._num_units]) concat = linear([inputs, h, self.c], 4 * self._num_units, True) i, j, f, o = tf.split(1, 4, concat) self.c = self.c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * tf.tanh(j) new_h = tf.tanh(self.c) * tf.sigmoid(o) softmax_w = tf.get_variable("softmax_w", [self._num_units, self._num_units]) softmax_b = tf.get_variable("softmax_b", [self._num_units]) new_y = tf.nn.xw_plus_b(new_h, softmax_w, softmax_b) return new_y, new_y
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with tf.variable_scope("BasicLSTMCell"): h = state if self.c == None: self.c = tf.reshape(tf.zeros_like(h), [-1, self._num_units]) concat = linear([inputs, h, self.c], 4 * self._num_units, True) i, j, f, o = tf.split(1, 4, concat) self.c = self.c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid( i) * tf.tanh(j) new_h = tf.tanh(self.c) * tf.sigmoid(o) softmax_w = tf.get_variable("softmax_w", [self._num_units, self._num_units]) softmax_b = tf.get_variable("softmax_b", [self._num_units]) new_y = tf.nn.xw_plus_b(new_h, softmax_w, softmax_b) return new_y, new_y
def __call__(self, inputs, state, con): with tf.variable_scope(type(self).__name__): # batch_size x 3(cluster_size) concat = rnn_cell.linear(inputs, 3 * self._cluster_size, True) a, b, k = tf.split(1, 3, concat) ao = tf.exp(a) bo = tf.exp(b) ko = state + tf.exp(k) # batch_size x _cluster_size phi = [] for i in range(self._con_size): # each phi is [batch_size x 1] phi.append(tf.reduce_sum(ao * tf.exp(- bo * tf.square(ko - i)), 1, keep_dims=True)) # tf.concat(1, phi) -> [batch_size x seq_length] # tf.expan_dims(%, 1) -> [batch_size x 1 x seq_length] # tf.batch_matmul(%, con) -> [batch_size x 1 x vocab_size] # tf.squeeze(%) -> [batch_size x vocab_size] wt = tf.squeeze(tf.batch_matmul(tf.expand_dims(tf.concat(1, phi), 1), con), [1]) return wt, ko
def prepare_model(self): with tf.variable_scope("LSTMTDNN"): self.char_inputs = [] self.word_inputs = [] self.cnn_outputs = [] if self.use_char: char_W = tf.get_variable( "char_embed", [self.char_vocab_size, self.char_embed_dim]) else: word_W = tf.get_variable( "word_embed", [self.word_vocab_size, self.word_embed_dim]) with tf.variable_scope("CNN") as scope: self.char_inputs = tf.placeholder( tf.int32, [self.batch_size, self.seq_length, self.max_word_length]) self.word_inputs = tf.placeholder( tf.int32, [self.batch_size, self.seq_length]) char_indices = tf.split(1, self.seq_length, self.char_inputs) word_indices = tf.split(1, self.seq_length, tf.expand_dims(self.word_inputs, -1)) for idx in xrange(self.seq_length): char_index = tf.reshape(char_indices[idx], [-1, self.max_word_length]) word_index = tf.reshape(word_indices[idx], [-1, 1]) if idx != 0: scope.reuse_variables() if self.use_char: # [batch_size x word_max_length, char_embed] char_embed = tf.nn.embedding_lookup(char_W, char_index) char_cnn = TDNN(char_embed, self.char_embed_dim, self.feature_maps, self.kernels) if self.use_word: word_embed = tf.nn.embedding_lookup( word_W, word_index) cnn_output = tf.concat(1, char_cnn.output, word_embed) else: cnn_output = char_cnn.output else: cnn_output = tf.squeeze( tf.nn.embedding_lookup(word_W, word_index)) if self.use_batch_norm: bn = batch_norm() norm_output = bn( tf.expand_dims(tf.expand_dims(cnn_output, 1), 1)) cnn_output = tf.squeeze(norm_output) if highway: #cnn_output = highway(input_, input_dim_length, self.highway_layers, 0) cnn_output = highway(cnn_output, cnn_output.get_shape()[1], self.highway_layers, 0) self.cnn_outputs.append(cnn_output) with tf.variable_scope("LSTM") as scope: self.cell = rnn_cell.BasicLSTMCell(self.rnn_size) self.stacked_cell = rnn_cell.MultiRNNCell([self.cell] * self.layer_depth) outputs, _ = rnn.rnn(self.stacked_cell, self.cnn_outputs, dtype=tf.float32) self.lstm_outputs = [] self.true_outputs = tf.placeholder( tf.float32, [self.batch_size, self.seq_length, self.word_vocab_size]) loss = 0 true_outputs = tf.split(1, self.seq_length, self.true_outputs) for idx, (top_h, true_output) in enumerate(zip(outputs, true_outputs)): if self.dropout_prob > 0: top_h = tf.nn.dropout(top_h, self.dropout_prob) if self.hsm > 0: self.lstm_outputs.append(top_h) else: if idx != 0: scope.reuse_variables() proj = rnn_cell.linear(top_h, self.word_vocab_size, 0) log_softmax = tf.log(tf.nn.softmax(proj)) self.lstm_outputs.append(log_softmax) loss += tf.nn.softmax_cross_entropy_with_logits( self.lstm_outputs[idx], tf.squeeze(true_output)) self.loss = tf.reduce_mean(loss) / self.seq_length tf.scalar_summary("loss", self.loss) tf.scalar_summary("perplexity", tf.exp(self.loss))
def __call__(self, inputs, state, scope=None): """Most basic RNN: output = new_state = tanh(W * input + U * state + B).""" with vs.variable_scope(scope or type(self).__name__): # "BasicRNNCell" output = self._activation(rnn_cell.linear([inputs, state], self._num_units, True)) return output, output
def build_model(self): with tf.variable_scope('RNNTEST'): self.sense = tf.placeholder(tf.int32, [None]) self.arg1 = tf.placeholder(tf.int32, [None, None, 4]) self.arg2 = tf.placeholder(tf.int32, [None, None, 4]) self.arg1_len = tf.placeholder(tf.int32, [None]) self.arg2_len = tf.placeholder(tf.int32, [None]) self.keep_prob = tf.placeholder(tf.float32) arg1_list = tf.split(2, 4, self.arg1) arg2_list = tf.split(2, 4, self.arg2) with tf.device('/cpu:0'): NER_W = tf.get_variable('NER_embed', [ self.data_loader.NER_vocab_size, self.NER_embed_size ]) if self.NER_embed_size > 0 else None lemma_W = tf.get_variable('lemma_embed', [ self.data_loader.lemma_vocab_size, self.lemma_embed_size ]) if self.lemma_embed_size > 0 else None if self.use_pre_trained_embedding: word_W = tf.get_variable( 'word_embed', initializer=tf.convert_to_tensor( self.data_loader.pre_trained_word_embeddings, dtype=tf.float32) ) if self.word_embed_size > 0 else None else: word_W = tf.get_variable( 'word_embed', shape=[ self.data_loader.word_vocab_size, self.word_embed_size ]) if self.word_embed_size > 0 else None POS_W = tf.get_variable('POS_embed', [ self.data_loader.POS_vocab_size, self.POS_embed_size ]) if self.POS_embed_size > 0 else None arg1_embed_list = [] arg2_embed_list = [] for idx, W in enumerate([NER_W, lemma_W, word_W, POS_W]): if W is not None: arg1_embed_list.append( tf.nn.embedding_lookup(W, tf.squeeze(arg1_list[idx], [2]))) arg2_embed_list.append( tf.nn.embedding_lookup(W, tf.squeeze(arg2_list[idx], [2]))) arg1 = tf.nn.dropout(tf.concat(2, arg1_embed_list), self.keep_prob) arg2 = tf.nn.dropout(tf.concat(2, arg2_embed_list), self.keep_prob) encoder_lstm_unit = rnn_cell.BasicLSTMCell(self.encoder_size) decoder_lstm_unit = rnn_cell.BasicLSTMCell(self.decoder_size) with tf.variable_scope('forward_encoder'): forward_encoder_outputs, forward_encoder_state = rnn.dynamic_rnn( encoder_lstm_unit, arg1, self.arg1_len, dtype=tf.float32) with tf.variable_scope('backward_encoder'): backward_encoder_outputs, backward_encoder_state = rnn.dynamic_rnn( encoder_lstm_unit, tf.reverse_sequence(arg1, tf.cast(self.arg1_len, tf.int64), 1), dtype=tf.float32) encoder_outputs = tf.concat(2, [ forward_encoder_outputs, tf.reverse_sequence(backward_encoder_outputs, tf.cast(self.arg1_len, tf.int64), 1) ]) encoder_state = tf.concat( 1, [forward_encoder_state, backward_encoder_state]) source = tf.expand_dims( encoder_outputs, 2) #batch_size x source_len x 1 x source_depth(2*encoder_size) attention_W = tf.get_variable( 'attention_W', [1, 1, 2 * self.encoder_size, self.attention_judge_size]) attention_V = tf.get_variable('attention_V', [self.attention_judge_size]) WxH = tf.nn.conv2d( source, attention_W, [1, 1, 1, 1], 'SAME') #batch_size x source_len x 1 x attention self.mask = tf.placeholder(tf.float32, [None, None]) def attention(input_t, output_t_minus_1, time): with tf.variable_scope('attention'): VxS = tf.reshape( rnn_cell.linear(output_t_minus_1, self.attention_judge_size, True), [-1, 1, 1, self.attention_judge_size ]) #batch_size x 1 x 1 x attention _exp = tf.exp( tf.reduce_sum(attention_V * tf.tanh(WxH + VxS), [3])) #batch_size x source_len x 1 _exp = _exp * tf.expand_dims(self.mask, -1) attention_weight = _exp / tf.reduce_sum(_exp, [1], keep_dims=True) attention_t = tf.reduce_sum(encoder_outputs * attention_weight, [1]) feed_in_t = tf.tanh( rnn_cell.linear([attention_t, input_t], self.embedding_size, True)) return feed_in_t with tf.variable_scope('decoder'): decoder_outputs, decoder_state = dynamic_rnn_decoder( arg2, decoder_lstm_unit, initial_state=encoder_state, sequence_length=self.arg2_len, loop_function=attention) judge = tf.concat(1, [ tf.reduce_sum(decoder_outputs, [1]) / tf.expand_dims(tf.cast(self.arg2_len, tf.float32), -1), tf.reduce_sum(encoder_outputs, [1]) / tf.expand_dims(tf.cast(self.arg1_len, tf.float32), -1) ]) unscaled_log_distribution = rnn_cell.linear( judge, self.data_loader.sense_vocab_size, True) self.output = tf.cast(tf.argmax(unscaled_log_distribution, 1), tf.int32) self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.output, self.sense), tf.float32)) #max-margin method #self._MM = tf.placeholder(tf.int32,[None]) #margin = tf.sub(tf.reduce_max(unscaled_log_distribution,[1]),tf.gather(tf.reshape(unscaled_log_distribution,[-1]),self._MM)) #self.loss = tf.reduce_mean(margin) #maximum likelihood method self.loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( unscaled_log_distribution, self.sense)) self.optimizer = tf.train.AdagradOptimizer(self.lr) self.train_op = self.optimizer.minimize(self.loss)
def prepare_model(self): with tf.variable_scope("LSTMTDNN"): self.char_inputs = [] self.word_inputs = [] self.cnn_outputs = [] if self.use_char: char_W = tf.get_variable("char_embed", [self.char_vocab_size, self.char_embed_dim]) else: word_W = tf.get_variable("word_embed", [self.word_vocab_size, self.word_embed_dim]) with tf.variable_scope("CNN") as scope: self.char_inputs = tf.placeholder(tf.int32, [self.batch_size, self.seq_length, self.max_word_length]) self.word_inputs = tf.placeholder(tf.int32, [self.batch_size, self.seq_length]) char_indices = tf.split(1, self.seq_length, self.char_inputs) word_indices = tf.split(1, self.seq_length, tf.expand_dims(self.word_inputs, -1)) for idx in xrange(self.seq_length): char_index = tf.reshape(char_indices[idx], [-1, self.max_word_length]) word_index = tf.reshape(word_indices[idx], [-1, 1]) if idx != 0: scope.reuse_variables() if self.use_char: # [batch_size x word_max_length, char_embed] char_embed = tf.nn.embedding_lookup(char_W, char_index) char_cnn = TDNN(char_embed, self.char_embed_dim, self.feature_maps, self.kernels) if self.use_word: word_embed = tf.nn.embedding_lookup(word_W, word_index) cnn_output = tf.concat(1, char_cnn.output, word_embed) else: cnn_output = char_cnn.output else: cnn_output = tf.squeeze(tf.nn.embedding_lookup(word_W, word_index)) if self.use_batch_norm: bn = batch_norm() norm_output = bn(tf.expand_dims(tf.expand_dims(cnn_output, 1), 1)) cnn_output = tf.squeeze(norm_output) if highway: #cnn_output = highway(input_, input_dim_length, self.highway_layers, 0) cnn_output = highway(cnn_output, cnn_output.get_shape()[1], self.highway_layers, 0) self.cnn_outputs.append(cnn_output) with tf.variable_scope("LSTM") as scope: self.cell = rnn_cell.BasicLSTMCell(self.rnn_size) self.stacked_cell = rnn_cell.MultiRNNCell([self.cell] * self.layer_depth) outputs, _ = rnn.rnn(self.stacked_cell, self.cnn_outputs, dtype=tf.float32) self.lstm_outputs = [] self.true_outputs = tf.placeholder(tf.float32, [self.batch_size, self.seq_length, self.word_vocab_size]) loss = 0 true_outputs = tf.split(1, self.seq_length, self.true_outputs) for idx, (top_h, true_output) in enumerate(zip(outputs, true_outputs)): if self.dropout_prob > 0: top_h = tf.nn.dropout(top_h, self.dropout_prob) if self.hsm > 0: self.lstm_outputs.append(top_h) else: if idx != 0: scope.reuse_variables() proj = rnn_cell.linear(top_h, self.word_vocab_size, 0) log_softmax = tf.log(tf.nn.softmax(proj)) self.lstm_outputs.append(log_softmax) loss += tf.nn.softmax_cross_entropy_with_logits(self.lstm_outputs[idx], tf.squeeze(true_output)) self.loss = tf.reduce_mean(loss) / self.seq_length tf.scalar_summary("loss", self.loss) tf.scalar_summary("perplexity", tf.exp(self.loss))
def __call__(self, inputs, state, scope=None): """Most basic RNN: output = new_state = tanh(W * input + U * state + B).""" with vs.variable_scope(scope or type(self).__name__): # "BasicRNNCell" output = tf.nn.relu(rnn_cell.linear([inputs, state], self._num_units, True)) return output, output
def build_model(self): with tf.variable_scope('RNNTEST'): self.sense = tf.placeholder(tf.int32,[None]) self.arg1 = tf.placeholder(tf.int32,[None,None,4]) self.arg2 = tf.placeholder(tf.int32,[None,None,4]) self.arg1_len = tf.placeholder(tf.int32,[None]) self.arg2_len = tf.placeholder(tf.int32,[None]) self.keep_prob = tf.placeholder(tf.float32) arg1_list = tf.split(2,4,self.arg1) arg2_list = tf.split(2,4,self.arg2) with tf.device('/cpu:0'): NER_W = tf.get_variable('NER_embed',[self.data_loader.NER_vocab_size,self.NER_embed_size]) if self.NER_embed_size>0 else None lemma_W = tf.get_variable('lemma_embed',[self.data_loader.lemma_vocab_size,self.lemma_embed_size]) if self.lemma_embed_size>0 else None if self.use_pre_trained_embedding: word_W = tf.get_variable('word_embed',initializer = tf.convert_to_tensor(self.data_loader.pre_trained_word_embeddings,dtype=tf.float32)) if self.word_embed_size>0 else None else: word_W = tf.get_variable('word_embed',shape = [self.data_loader.word_vocab_size,self.word_embed_size]) if self.word_embed_size>0 else None POS_W = tf.get_variable('POS_embed',[self.data_loader.POS_vocab_size,self.POS_embed_size]) if self.POS_embed_size>0 else None arg1_embed_list = [] arg2_embed_list = [] for idx,W in enumerate([NER_W,lemma_W,word_W,POS_W]): if W is not None: arg1_embed_list.append(tf.nn.embedding_lookup(W,tf.squeeze(arg1_list[idx],[2]))) arg2_embed_list.append(tf.nn.embedding_lookup(W,tf.squeeze(arg2_list[idx],[2]))) arg1 = tf.nn.dropout(tf.concat(2,arg1_embed_list),self.keep_prob) arg2 = tf.nn.dropout(tf.concat(2,arg2_embed_list),self.keep_prob) encoder_lstm_unit = rnn_cell.BasicLSTMCell(self.encoder_size) decoder_lstm_unit = rnn_cell.BasicLSTMCell(self.decoder_size) with tf.variable_scope('forward_encoder'): forward_encoder_outputs,forward_encoder_state = rnn.dynamic_rnn(encoder_lstm_unit,arg1,self.arg1_len,dtype=tf.float32) with tf.variable_scope('backward_encoder'): backward_encoder_outputs,backward_encoder_state= rnn.dynamic_rnn(encoder_lstm_unit,tf.reverse_sequence(arg1,tf.cast(self.arg1_len,tf.int64),1),dtype=tf.float32) encoder_outputs = tf.concat(2,[forward_encoder_outputs,tf.reverse_sequence(backward_encoder_outputs,tf.cast(self.arg1_len,tf.int64),1)]) encoder_state = tf.concat(1,[forward_encoder_state,backward_encoder_state]) source = tf.expand_dims(encoder_outputs,2) #batch_size x source_len x 1 x source_depth(2*encoder_size) attention_W = tf.get_variable('attention_W',[1,1,2*self.encoder_size,self.attention_judge_size]) attention_V = tf.get_variable('attention_V',[self.attention_judge_size]) WxH = tf.nn.conv2d(source, attention_W,[1,1,1,1],'SAME') #batch_size x source_len x 1 x attention self.mask = tf.placeholder(tf.float32,[None,None]) def attention(input_t,output_t_minus_1,time): with tf.variable_scope('attention'): VxS = tf.reshape(rnn_cell.linear(output_t_minus_1,self.attention_judge_size,True),[-1,1,1,self.attention_judge_size]) #batch_size x 1 x 1 x attention _exp = tf.exp(tf.reduce_sum( attention_V * tf.tanh(WxH+VxS), [3]))#batch_size x source_len x 1 _exp = _exp*tf.expand_dims(self.mask,-1) attention_weight = _exp/tf.reduce_sum(_exp,[1], keep_dims=True) attention_t = tf.reduce_sum(encoder_outputs*attention_weight,[1]) feed_in_t = tf.tanh(rnn_cell.linear([attention_t,input_t],self.embedding_size,True)) return feed_in_t with tf.variable_scope('decoder'): decoder_outputs,decoder_state = dynamic_rnn_decoder(arg2,decoder_lstm_unit,initial_state=encoder_state,sequence_length=self.arg2_len,loop_function=attention) judge = tf.concat(1,[tf.reduce_sum(decoder_outputs,[1])/tf.expand_dims(tf.cast(self.arg2_len,tf.float32),-1),tf.reduce_sum(encoder_outputs,[1])/tf.expand_dims(tf.cast(self.arg1_len,tf.float32),-1)]) unscaled_log_distribution = rnn_cell.linear(judge,self.data_loader.sense_vocab_size,True) self.output = tf.cast(tf.argmax(unscaled_log_distribution,1),tf.int32) self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.output,self.sense), tf.float32)) #max-margin method #self._MM = tf.placeholder(tf.int32,[None]) #margin = tf.sub(tf.reduce_max(unscaled_log_distribution,[1]),tf.gather(tf.reshape(unscaled_log_distribution,[-1]),self._MM)) #self.loss = tf.reduce_mean(margin) #maximum likelihood method self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(unscaled_log_distribution, self.sense)) self.optimizer = tf.train.AdagradOptimizer(self.lr) self.train_op = self.optimizer.minimize(self.loss)