def __init__(self, hidden_size=10, vocab_size=42791, embedding_size=50, embedding_matrix=None): #tf.set_random_seed(1234) # Placeholders # ================================================== # (batch_size * max_sentence_count x max_sentence_length) self.sentences = tf.placeholder(tf.int32, [None, None], name="sentences") self.questions = tf.placeholder(tf.int32, [None, None], name="questions") self.labels = tf.placeholder(tf.int32, [ None, ], name="labels") batch_size = tf.shape(self.questions)[0] #self.batch_size = tf.placeholder(tf.int32, name="batch_size") sentences = self.sentences questions = self.questions labels = self.labels # (batch_size * mask_sentence_count x max_sentence_length) sentence_mask = tf.cast(sentences >= 0, tf.int32) self.sentence_mask = sentence_mask #sentence_mask = tf.sequence_mask(doc_lengths, dtype=tf.int32) masked_sentences = tf.mul(sentences, sentence_mask) max_sent_per_doc = tf.cast( tf.shape(sentence_mask)[0] / batch_size, tf.int32) self.max_sent_per_doc = max_sent_per_doc # Input Preparation # ================================================== with tf.variable_scope("embeddings"): if embedding_matrix == None: self.W_embeddings = tf.get_variable(shape=[vocab_size, embedding_size], \ initializer=tf.random_uniform_initializer(-0.01, 0.01),\ name="W_embeddings", dtype=tf.float32) else: ################## option to use pre-trained embeddings ################## self.W_embeddings = tf.Variable(embedding_matrix, \ name="W_embeddings", dtype=tf.float32) # SENTENCES MASKED # (batch_size x max_sent_per_doc) batch_mask = tf.reshape(tf.reduce_max(sentence_mask, 1), [batch_size, -1]) # (batch_size * max_sent_per_doc x 1 x 1) sentence_batch_mask = tf.cast(tf.reshape(batch_mask, [-1, 1, 1]), tf.float32) # batch_size * max_sent_per_doc x max_sentence_length x embedding_size sentence_embeddings = tf.gather(self.W_embeddings, masked_sentences) masked_sentence_embeddings = tf.mul( sentence_embeddings, tf.cast(tf.expand_dims(sentence_mask, -1), tf.float32)) # QUERY MASKED # create mask (batch_size x max_question_length) question_mask = tf.cast(questions > 0, tf.int32) self.question_mask = question_mask masked_question = tf.mul(question_mask, questions) self.masked_question = masked_question # (batch_size x max_question_length x embedding_size) question_embeddings = tf.gather(self.W_embeddings, masked_question) self.question_embeddings = question_embeddings question_mask_float = tf.expand_dims( tf.cast(question_mask, tf.float32), -1) masked_question_embeddings = tf.mul(question_embeddings, question_mask_float) self.masked_question_embeddings = masked_question_embeddings # CBOW Sentence Representation # ================================================== with tf.variable_scope("sentence-representation"): self.forward_cell_d = GRUCell(state_size=hidden_size, input_size=embedding_size, scope="GRU-Forward-D") self.backward_cell_d = GRUCell(state_size=hidden_size, input_size=embedding_size, scope="GRU-Backward-D") self.hidden_states_d, last_state_d = bidirectional_rnn(self.forward_cell_d, self.backward_cell_d, \ sentence_embeddings, tf.cast(sentence_mask, tf.float32), concatenate=True) doc_sentences = tf.reshape(last_state_d, [batch_size, -1, hidden_size * 2]) self.cbow_sentences = doc_sentences # # (batch_size * max_sentence_count x embedding_size) # cbow_sentences = tf.reduce_mean(masked_sentence_embeddings, 1) # self.cbow_sentences = cbow_sentences # # reshape batch to (batch_size x max_doc_length x embedding_size) # doc_sentences = tf.reshape(cbow_sentences, [batch_size, -1, embedding_size]) # Query Representation # ================================================== with tf.variable_scope("query-representation"): # easy baseline: cbow # (batch_size x embedding_size) #question_cbow = tf.reduce_mean(masked_question_embeddings, 1) #self.question_cbow = question_cbow self.forward_cell_q = GRUCell(state_size=hidden_size, input_size=embedding_size, scope="GRU-Forward-Q") self.backward_cell_q = GRUCell(state_size=hidden_size, input_size=embedding_size, scope="GRU-Backward-Q") self.hidden_states_q, last_state_q = bidirectional_rnn(self.forward_cell_q, self.backward_cell_q, \ question_embeddings, tf.cast(question_mask, tf.float32), concatenate=True) self.question_cbow = last_state_q # can use RNN representation as well************************************* # Similarity Scoring # ================================================== # Using simple dot product/cosine similiarity as of now (https://arxiv.org/pdf/1605.07427v1.pdf) with tf.variable_scope("similarity-scoring"): # (batch_size x max_sent_per_doc) #""" # dot_prod = tf.squeeze(tf.batch_matmul(doc_sentences, tf.expand_dims(question_cbow, -1)), [-1]) # self.dot_prod = dot_prod # # # softmax # numerator = tf.exp(tf.sub(dot_prod, tf.expand_dims(tf.reduce_max(dot_prod, 1), -1))) * tf.cast(batch_mask, tf.float32) # denom = tf.reduce_sum(numerator, 1) # # # Dimensions (batch x time) # probabilities = tf.div(numerator, tf.expand_dims(denom, 1)) #""" # #(batch_size x max_sent_per_doc) # sentence_norm = tf.sqrt(tf.reduce_sum(tf.mul(doc_sentences, doc_sentences), -1)) # self.sentence_norm = sentence_norm # # (batch_size) # question_norm = tf.sqrt(tf.reduce_sum(tf.mul(question_cbow, question_cbow), 1)) # self.question_norm = question_norm # # denom = tf.mul(sentence_norm, tf.expand_dims(question_norm, -1))+1e-30 # self.denom = denom # # (batch_size x max_sent_per_doc) - scalars between -1 and +1 # cosine_similarity = tf.div(dot_prod, denom) # self.cosine_similarity = cosine_similarity # # masked_pos_cos_sim = tf.sub(tf.add(cosine_similarity, 1), tf.cast(batch_mask < 1, tf.float32)) # self.masked_pos_cos_sim = masked_pos_cos_sim # normalized_cos_sim = tf.div(masked_pos_cos_sim, tf.expand_dims(tf.reduce_sum(masked_pos_cos_sim, 1), -1)) #""" attention = BilinearFunction(attending_size=hidden_size * 2, attended_size=hidden_size * 2) alpha_weights, attend_result = attention(self.question_cbow, attended=doc_sentences, \ time_mask=tf.cast(batch_mask, tf.float32)) probabilities = alpha_weights #""" #probabilities = tf.abs(dot_prod) #normalized_cos_sim self.probabilities = probabilities with tf.variable_scope("prediction"): one_hot_labels = tf.one_hot(labels, max_sent_per_doc, dtype=tf.float32) likelihoods = tf.reduce_sum(tf.mul(probabilities, one_hot_labels), 1) log_likelihoods = tf.log(likelihoods + 0.00000000000000000001) self.loss = tf.mul(tf.reduce_sum(log_likelihoods), -1) correct_vector = tf.cast(tf.equal(labels, tf.cast(tf.argmax(probabilities, 1), tf.int32)), \ tf.float32, name="correct_vector") self.accuracy = tf.reduce_mean(correct_vector)
masked_question = tf.mul(question_mask, questions) # easy baseline: cbow # (batch_size x max_question_length x embedding_size) question_embeddings = tf.gather(W_embeddings, masked_question) question_mask_float = tf.expand_dims(tf.cast(question_mask, tf.float32), -1) masked_question_embeddings = tf.mul(question_embeddings, question_mask_float) # (batch_size x embedding_size) question_cbow = tf.reduce_mean(masked_question_embeddings, 1) # can use RNN representation as well************************************* # ATTENTION/SIMILARITY SCORING -------------------------------------------------- # Using simple dot product/cosine similiarity as of now (https://arxiv.org/pdf/1605.07427v1.pdf) attention = BilinearFunction(attending_size=embedding_size, attended_size=embedding_size) alpha_weights, attend_result = attention(attending=question_cbow, attended=doc_sentences, \ time_mask=tf.cast(batch_mask, tf.float32)) # (batch_size x max_sent_per_doc) dot_prod = tf.squeeze( tf.batch_matmul(doc_sentences, tf.expand_dims(question_cbow, -1)), [-1]) # (batch_size x max_sent_per_doc) sentence_norm = tf.sqrt(tf.reduce_sum(tf.mul(doc_sentences, doc_sentences), -1)) # (batch_size) question_norm = tf.sqrt(tf.reduce_sum(tf.mul(question_cbow, question_cbow), 1)) denom = tf.mul(sentence_norm, tf.expand_dims(question_norm, -1)) + 1e-30 # (batch_size x max_sent_per_doc) - scalars between -1 and +1
def __init__(self, hidden_size=128, vocab_size=42791, embedding_size=50, embedding_matrix=None, \ embedding_trainable=False, sentence_rep_str="cbow", question_rep_str="cbow"): tf.set_random_seed(1234) # Placeholders # ================================================== # (batch_size * max_sentence_count x max_sentence_length) self.sentences = tf.placeholder(tf.int32, [None, None], name="sentences") self.questions = tf.placeholder(tf.int32, [None, None], name="questions") self.labels = tf.placeholder(tf.int32, [None, ], name="labels") attending_size = hidden_size # Input Preparation (Mask Creation) # ================================================== with tf.variable_scope("masks"): # MASK SENTENCES # (batch_size * mask_sentence_count x max_sentence_length) sentence_mask = tf.cast(self.sentences >= 0, tf.int32) #sentence_mask = tf.sequence_mask(doc_lengths, dtype=tf.int32) masked_sentences = tf.mul(self.sentences, sentence_mask) batch_size = tf.shape(self.questions)[0] # (batch_size x max_sent_per_doc) batch_mask = tf.reshape(tf.reduce_max(sentence_mask, 1), [batch_size, -1]) answer_counts = tf.cast(tf.reduce_sum(batch_mask, 1), tf.float32) # MASK QUESTIONS # create mask (batch_size x max_question_length) question_mask = tf.cast(self.questions >= 0, tf.int32) masked_question = tf.mul(question_mask, self.questions) question_mask_float = tf.expand_dims(tf.cast(question_mask, tf.float32), -1) max_sent_per_doc = tf.cast(tf.shape(sentence_mask)[0]/batch_size, tf.int32) max_sent_len = tf.shape(self.sentences)[1] max_ques_len = tf.shape(self.questions)[1] document_mask = tf.cast(tf.reshape(sentence_mask, [batch_size, max_sent_per_doc * max_sent_len]), tf.float32) # Embeddings # ================================================== with tf.variable_scope("embeddings"): if embedding_matrix is None: self.W_embeddings = tf.get_variable(shape=[vocab_size, embedding_size], \ initializer=tf.random_uniform_initializer(-0.01, 0.01),\ name="W_embeddings", dtype=tf.float32) else: ################## option to use pre-trained embeddings ################## self.W_embeddings = tf.Variable(embedding_matrix, \ name="W_embeddings", dtype=tf.float32, trainable=embedding_trainable) masked_documents = tf.reshape(masked_sentences, [batch_size, max_sent_per_doc * max_sent_len]) document_embeddings = tf.gather(self.W_embeddings, masked_documents) masked_document_embeddings = tf.mul(document_embeddings, document_mask) # (batch_size x max_question_length x embedding_size) question_embeddings = tf.gather(self.W_embeddings, masked_question) masked_question_embeddings = tf.mul(question_embeddings, question_mask_float) # Query Representation (CBOW or RNN) # ================================================== with tf.variable_scope("query-representation"): # RNN Attention on sentence embeddings ----------------------------------------- doc_sentences = masked_document_embeddings question_cbow = tf.reduce_mean(masked_question_embeddings, 1) self.decoder_cell = GRUCell(state_size=hidden_size, input_size=embedding_size*2, scope="GRU_decoder") self.bilinearf = BilinearFunction(attending_size=hidden_size, attended_size=embedding_size) hidden_states_decoder, last_state_decoder, alpha_weights_time = \ rnn_decoder_attention(cell=self.decoder_cell, start_state=question_cbow, # ZEROS inputs=question_embeddings, inputs_mask=tf.cast(question_mask, tf.float32), attentionf=self.bilinearf, attended=document_embeddings, #doc_sentences, attended_mask=tf.cast(document_mask, tf.float32) ) # alpha_weights originally (batch x context_time x max_ques_len) alpha_weights_time_prime = tf.reshape(alpha_weights_time, [batch_size, max_sent_len, max_sent_per_doc, max_ques_len]) # # (batch_size, max_sent_per_doc, max_ques_len) alpha_weights_per_qword = tf.reduce_sum(alpha_weights_time_prime, 1) # (batch_size, max_sent_per_doc) sent_score = tf.reduce_sum(alpha_weights_per_qword, 2) # normalize self.probabilities = tf.div(sent_score, tf.reduce_sum(sent_score)) # Loss # ================================================== with tf.variable_scope("prediction"): one_hot_labels = tf.one_hot(self.labels, max_sent_per_doc, dtype=tf.float32) likelihoods = tf.reduce_sum(tf.mul(self.probabilities, one_hot_labels), 1) likelihoods = tf.div(likelihoods, answer_counts) log_likelihoods = tf.log(likelihoods+0.00000000000000000001) self.loss = tf.div(tf.mul(tf.reduce_sum(log_likelihoods), -1), tf.cast(batch_size, tf.float32)) correct_vector = tf.cast(tf.equal(self.labels, tf.cast(tf.argmax(self.probabilities, 1), tf.int32)), \ tf.float32, name="correct_vector") self.accuracy = tf.reduce_mean(correct_vector)
forward_cell_q = GRUCell(state_size, input_size, scope="GRU-Forward-Q") backward_cell_q = GRUCell(state_size, input_size, scope="GRU-Backward-Q") # hidden_states_d, last_state_d = rnn(forward_cell_d, document_embedding, seq_lens_d) # hidden_states_q, last_state_q = rnn(forward_cell_q, question_embedding, seq_lens_q) hidden_states_d, last_state_d = bidirectional_rnn(forward_cell_d, backward_cell_d, \ document_embedding, seq_lens_d, concatenate=True) hidden_states_q, last_state_q = bidirectional_rnn(forward_cell_q, backward_cell_q, \ question_embedding, seq_lens_q, concatenate=True) with tf.variable_scope("attention"): time_mask = tf.sequence_mask(seq_lens_d, dtype=tf.float32) # Attention Layer attention = BilinearFunction(attending_size=state_size * 2, attended_size=state_size * 2) alpha_weights, attend_result = attention(attending=last_state_q, attended=hidden_states_d, \ seq_lens=seq_lens_d) with tf.variable_scope("prediction"): W_predict = tf.get_variable(name="predict_weight", shape=[state_size*2, max_entities], \ initializer=tf.random_normal_initializer(mean=0.0, stddev=0.1)) b_predict = tf.get_variable(name="predict_bias", shape=[max_entities], initializer=tf.random_normal_initializer( mean=0.0, stddev=0.1)) # Dimensions (batch_size x state_size*2) prediction_probs_unnormalized = tf.matmul(attend_result, W_predict) + b_predict # Custom Softmax b/c need to use time_mask --------------------
def __init__(self, max_entities, hidden_size=128, vocab_size=50000, embedding_dim=100, batch_size=32): self.max_entities = max_entities tf.set_random_seed(1234) # Placeholders # can add assert statements to ensure shared None dimensions are equal (batch_size) self.input_d = tf.placeholder(tf.int32, [None, None], name="input_d") self.input_q = tf.placeholder(tf.int32, [None, None], name="input_q") self.input_a = tf.placeholder(tf.int32, [ None, ], name="input_a") self.input_m = tf.placeholder(tf.int32, [ None, ], name="input_m") self.cbow_mask = tf.placeholder(tf.float32, [None, None, None], name="cbow_mask") self.window_size = tf.placeholder(tf.int32, name="window_size") self.num_docs = tf.placeholder(tf.int32, name="num_docs") self.max_length = tf.placeholder(tf.int32, name="max_length") seq_lens_d = tf.reduce_sum(tf.cast(self.input_d >= 0, tf.int32), 1) seq_lens_q = tf.reduce_sum(tf.cast(self.input_q >= 0, tf.int32), 1) mask_d = tf.cast(tf.sequence_mask(seq_lens_d), tf.int32) mask_q = tf.cast(tf.sequence_mask(seq_lens_q), tf.int32) mask_m = tf.cast(tf.sequence_mask(self.input_m, maxlen=max_entities), dtype=tf.float32) # Document and Query embddings; One-hot-encoded answers masked_d = tf.mul(self.input_d, mask_d) masked_q = tf.mul(self.input_q, mask_q) one_hot_a = tf.one_hot(self.input_a, self.max_entities) # Buildling Graph (Network Layers) # ================================================== with tf.device('/cpu:0'), tf.variable_scope("embedding"): W_embeddings = tf.get_variable(shape=[vocab_size, embedding_dim], \ initializer=tf.random_uniform_initializer(-0.01, 0.01),\ name="W_embeddings") ################## Make option to use pre-trained embeddings ################## # Dimensions: batch x max_length x embedding_dim document_embedding = tf.gather(W_embeddings, masked_d) question_embedding = tf.gather(W_embeddings, masked_q) #document_embedding = tf.reshape(document_embedding, shape = [self.num_docs, self.window_size, self.max_length / self.window_size, embedding_dim]) #document_cbow = tf.reduce_mean(document_embedding, 1) # Experimental aspect. Combining the CBOW of a sequence of 20 words in every document. #document_cbow = tf.batch_matmul(self.cbow_mask, document_embedding) with tf.variable_scope("bidirection_rnn"): #seq_lens_cbow = tf.reshape(tf.div(seq_lens_d, self.window_size), [-1]) mask_d = tf.cast(tf.sequence_mask(seq_lens), tf.float32) #or float64? #mask_d = tf.cast(tf.sequence_mask(seq_lens_d), tf.float32) mask_q = tf.cast(tf.sequence_mask(seq_lens_q), tf.float32) # Bidirectional RNNs for Document and Question forward_cell_d = GRUCell(state_size=hidden_size, input_size=embedding_dim, scope="GRU-Forward-D") backward_cell_d = GRUCell(state_size=hidden_size, input_size=embedding_dim, scope="GRU-Backward-D") forward_cell_q = GRUCell(state_size=hidden_size, input_size=embedding_dim, scope="GRU-Forward-Q") backward_cell_q = GRUCell(state_size=hidden_size, input_size=embedding_dim, scope="GRU-Backward-Q") hidden_states_d, last_state_d = bidirectional_rnn(forward_cell_d, backward_cell_d, \ document_embedding, mask_d, concatenate=True) hidden_states_q, last_state_q = bidirectional_rnn(forward_cell_q, backward_cell_q, \ question_embedding, mask_q, concatenate=True) with tf.variable_scope("attention"): # Attention Layer attention = BilinearFunction(attending_size=hidden_size * 2, attended_size=hidden_size * 2) self.alpha_weights, self.attend_result = attention(attending=last_state_q, attended=hidden_states_d, \ time_mask=mask_d) with tf.variable_scope("prediction"): W_predict = tf.get_variable(name="predict_weight", shape=[hidden_size*2, self.max_entities], \ initializer=tf.random_normal_initializer(mean=0.0, stddev=0.1)) b_predict = tf.get_variable( name="predict_bias", shape=[self.max_entities], initializer=tf.constant_initializer(0.0)) # Dimensions (batch_size x max_entities) predict_probs = (tf.matmul(self.attend_result, W_predict) + b_predict) * mask_m # Custom Softmax b/c need to use time_mask -------------------- # Also numerical stability: # e_x = exp(x - x.max(axis=1)) # out = e_x / e_x.sum(axis=1) numerator = tf.exp( tf.sub(predict_probs, tf.expand_dims(tf.reduce_max(predict_probs, 1), -1))) * mask_m denom = tf.reduce_sum(numerator, 1) # Transpose so broadcasting scalar division works properly # Dimensions (batch x max_entities) #self.predict_probs_normalized = tf.transpose(tf.div(tf.transpose(numerator), denom)) predict_probs_normalized = tf.div(numerator, tf.expand_dims(denom, 1)) likelihoods = tf.reduce_sum( tf.mul(predict_probs_normalized, one_hot_a), 1) log_likelihoods = tf.log(likelihoods + 0.00000000000000000001) # Negative log-likelihood loss self.loss = tf.mul(tf.reduce_sum(log_likelihoods), -1) correct_vector = tf.cast(tf.equal(tf.argmax(one_hot_a, 1), tf.argmax(predict_probs_normalized, 1)), \ tf.float32, name="correct_vector") self.accuracy = tf.reduce_mean(correct_vector)
def __init__(self, hidden_size=128, vocab_size=65011, embedding_size=50, embedding_matrix=None, \ embedding_trainable=False, sentence_rep_str="cbow", question_rep_str="cbow"): tf.set_random_seed(1234) # Placeholders # ================================================== # (batch_size * max_sentence_count x max_sentence_length) self.sentences = tf.placeholder(tf.int32, [None, None], name="sentences") self.questions = tf.placeholder(tf.int32, [None, None], name="questions") self.labels = tf.placeholder(tf.int32, [None, ], name="labels") # initialize dimension variables based on contructor arguments if sentence_rep_str == "cbow": attended_size = embedding_size elif sentence_rep_str == "rnn": attended_size = hidden_size*2 else: raise ValueError("Invalid `sentence_rep_str` argument; choose 'cbow' or 'rnn'.") if question_rep_str == "cbow": attending_size = embedding_size elif question_rep_str == "rnn": attending_size = hidden_size*2 else: raise ValueError("Invalid `question_rep_str` argument; choose 'cbow', 'rnn', or 'rnn-attention'.") # Input Preparation (Mask Creation) # ================================================== with tf.variable_scope("masks"): # MASK SENTENCES # (batch_size * mask_sentence_count x max_sentence_length) sentence_mask = tf.cast(self.sentences >= 0, tf.int32) #sentence_mask = tf.sequence_mask(doc_lengths, dtype=tf.int32) masked_sentences = tf.mul(self.sentences, sentence_mask) batch_size = tf.shape(self.questions)[0] # RESHAPE SENTENCE MASK # (batch_size x max_sent_per_doc) batch_mask = tf.reshape(tf.reduce_max(sentence_mask, 1), [batch_size, -1]) answer_counts = tf.cast(tf.reduce_sum(batch_mask, 1), tf.float32) # (batch_size * max_sent_per_doc x 1 x 1) sentence_batch_mask = tf.cast(tf.reshape(batch_mask, [-1, 1, 1]), tf.float32) # MASK QUESTIONS # create mask (batch_size x max_question_length) question_mask = tf.cast(self.questions >= 0, tf.int32) masked_question = tf.mul(question_mask, self.questions) question_mask_float = tf.expand_dims(tf.cast(question_mask, tf.float32), -1) max_sent_per_doc = tf.cast(tf.shape(sentence_mask)[0]/batch_size, tf.int32) # Embeddings # ================================================== with tf.variable_scope("embeddings"): if embedding_matrix is None: self.W_embeddings = tf.get_variable(shape=[vocab_size, embedding_size], \ initializer=tf.random_uniform_initializer(-0.01, 0.01),\ name="W_embeddings", dtype=tf.float32) else: ################## option to use pre-trained embeddings ################## self.W_embeddings = tf.Variable(embedding_matrix, \ name="W_embeddings", dtype=tf.float32, trainable=embedding_trainable) # batch_size * max_sent_per_doc x max_sentence_length x embedding_size sentence_embeddings = tf.gather(self.W_embeddings, masked_sentences) masked_sentence_embeddings = tf.mul(sentence_embeddings, tf.cast(tf.expand_dims(sentence_mask, -1), tf.float32)) # (batch_size x max_question_length x embedding_size) question_embeddings = tf.gather(self.W_embeddings, masked_question) masked_question_embeddings = tf.mul(question_embeddings, question_mask_float) # Sentence Representation (CBOW or RNN) # ================================================== with tf.variable_scope("sentence-representation"): # CBOW ----------------------------------------- if sentence_rep_str == "cbow": # (batch_size * max_sentence_count x embedding_size) cbow_sentences = tf.reduce_mean(masked_sentence_embeddings, 1) # reshape batch to (batch_size x max_doc_length x embedding_size) doc_sentences = tf.reshape(cbow_sentences, [batch_size, -1, embedding_size]) self.sentence_representation = cbow_sentences # RNN ----------------------------------------- elif sentence_rep_str == "rnn": self.forward_cell_d = GRUCell(state_size=hidden_size, input_size=embedding_size, scope="GRU-Forward-D") self.backward_cell_d = GRUCell(state_size=hidden_size, input_size=embedding_size, scope="GRU-Backward-D") self.hidden_states_d, last_state_d = bidirectional_rnn(self.forward_cell_d, self.backward_cell_d, \ sentence_embeddings, tf.cast(sentence_mask, tf.float32), concatenate=True) doc_sentences = tf.reshape(last_state_d, [batch_size, -1, hidden_size*2]) self.sentence_representation = last_state_d # Query Representation (CBOW or RNN) # ================================================== with tf.variable_scope("query-representation"): # CBOW ----------------------------------------- if question_rep_str == "cbow": # (batch_size x embedding_size) question_cbow = tf.reduce_mean(masked_question_embeddings, 1) self.question_representation = question_cbow # RNN ----------------------------------------- elif question_rep_str == "rnn": self.forward_cell_q = GRUCell(state_size=hidden_size, input_size=embedding_size, scope="GRU-Forward-Q") self.backward_cell_q = GRUCell(state_size=hidden_size, input_size=embedding_size, scope="GRU-Backward-Q") self.hidden_states_q, last_state_q = bidirectional_rnn(self.forward_cell_q, self.backward_cell_q, \ question_embeddings, tf.cast(question_mask, tf.float32), concatenate=True) #tf.reduce_mean(self.hidden_states_q, ) self.question_representation = last_state_q # Similarity Scoring # ================================================== # Using simple dot product/cosine similiarity as of now (https://arxiv.org/pdf/1605.07427v1.pdf) with tf.variable_scope("similarity-scoring"): # (batch_size x max_sent_per_doc) attention = BilinearFunction(attending_size=attending_size, attended_size=attended_size) alpha_weights, attend_result = attention(self.question_representation, attended=doc_sentences, \ time_mask=tf.cast(batch_mask, tf.float32)) self.probabilities = alpha_weights # Loss # ================================================== with tf.variable_scope("prediction"): one_hot_labels = tf.one_hot(self.labels, max_sent_per_doc, dtype=tf.float32) likelihoods = tf.reduce_sum(tf.mul(self.probabilities, one_hot_labels), 1) likelihoods = tf.div(likelihoods, answer_counts) log_likelihoods = tf.log(likelihoods+0.00000000000000000001) self.loss = tf.div(tf.mul(tf.reduce_sum(log_likelihoods), -1), tf.cast(batch_size, tf.float32)) self.correct_vector = tf.cast(tf.equal(self.labels, tf.cast(tf.argmax(self.probabilities, 1), tf.int32)), tf.float64, name="correct_vector") self.predict_labels = tf.argmax(self.probabilities, 1) self.accuracy = tf.reduce_mean(self.correct_vector)