def _scoring_f(self): with tf.device("/cpu:0"): E_subjs = tf.get_variable("E_s", [len(self._kb.get_symbols(1)), self._size]) E_objs = tf.get_variable("E_o", [len(self._kb.get_symbols(2)), self._size]) E_rels_s = tf.get_variable("E_r_s", [len(self._kb.get_symbols(0)), self._size]) E_rels_o = tf.get_variable("E_r_o", [len(self._kb.get_symbols(0)), self._size]) self.e_subj = tf.tanh(tf.nn.embedding_lookup(E_subjs, self._subj_input)) self.e_obj = tf.tanh(tf.nn.embedding_lookup(E_objs, self._obj_input)) self.e_rel_s = tf.tanh(tf.nn.embedding_lookup(E_rels_s, self._rel_input)) self.e_rel_o = tf.tanh(tf.nn.embedding_lookup(E_rels_o, self._rel_input)) score = tf_util.batch_dot(self.e_rel_s, self.e_subj) + tf_util.batch_dot(self.e_rel_o, self.e_obj) return score
def _scoring_f(self): with tf.device("/cpu:0"): E_rels = tf.get_variable( "E_r", [len(self._kb.get_symbols(0)), self._size]) E_tup_rels = tf.get_variable( "E_tup_r", [2 * self._num_relations + 1, self._size ]) # rels + inv rels + default rel blur_factor = tf.get_variable("blur", shape=[1], initializer=tf.constant_initializer(0.0)) blur_factor = tf.sigmoid(blur_factor) # duplicate rels to fit with observations e_rel = tf.gather( tf.tanh(tf.nn.embedding_lookup(E_rels, self._rel_input)), self._gather_rels_input) e_tup_rels = tf.tanh( tf.nn.embedding_lookup(E_tup_rels, self._sparse_values_input)) scores_flat = tf_util.batch_dot(e_rel, e_tup_rels) # for softmax set empty cells to something very small, so weight becomes practically zero scores = tf.sparse_to_dense(self._sparse_indices_input, self._shape_input, scores_flat, default_value=-1e-3) softmax = tf.nn.softmax(scores * blur_factor) weighted_scores = tf.reduce_sum(scores * softmax, reduction_indices=[1], keep_dims=False) return weighted_scores
def _scoring_f(self): with tf.device("/cpu:0"): E_rels = tf.get_variable("E_r", [len(self._kb.get_symbols(0)), self._size]) E_tups = tf.get_variable("E_t", [len(self.__tuple_lookup), self._size]) self.e_rel = tf.tanh(tf.nn.embedding_lookup(E_rels, self._rel_input)) self.e_tup = tf.tanh(tf.nn.embedding_lookup(E_tups, self._tuple_input)) return tf_util.batch_dot(self.e_rel, self.e_tup)
def _scoring_f(self): with tf.device("/cpu:0"): E_rels = tf.get_variable( "E_r", [len(self._kb.get_symbols(0)), self._size]) E_tups = tf.get_variable("E_t", [len(self.__tuple_lookup), self._size]) self.e_rel = tf.tanh(tf.nn.embedding_lookup(E_rels, self._rel_input)) self.e_tup = tf.tanh(tf.nn.embedding_lookup(E_tups, self._tuple_input)) return tf_util.batch_dot(self.e_rel, self.e_tup)
def _scoring_f(self): with tf.device("/cpu:0"): E_rels = tf.get_variable("E_r", [len(self._kb.get_symbols(0)), self._size]) E_tup_rels = tf.get_variable("E_tup_r", [2 * self._num_relations + 1, self._size]) # rels + inv rels + default rel self.e_rel = tf.tanh(tf.nn.embedding_lookup(E_rels, self._rel_input)) # weighted sum of tuple rel embeddings sparse_tensor = tf.SparseTensor(self._sparse_indices_input, self._sparse_values_input, self._shape_input) # mean embedding self.e_tuple_rels = tf.tanh(tf.nn.embedding_lookup_sparse(E_tup_rels, sparse_tensor, None)) return tf_util.batch_dot(self.e_rel, self.e_tuple_rels)
def _scoring_f(self): with tf.device("/cpu:0"): E_subjs = tf.get_variable("E_s", [len(self._kb.get_symbols(1)), self._size]) E_objs = tf.get_variable("E_o", [len(self._kb.get_symbols(2)), self._size]) E_rels = tf.get_variable("E_r", [len(self._kb.get_symbols(0)), self._size]) self.e_subj = tf.tanh(tf.nn.embedding_lookup(E_subjs, self._subj_input)) self.e_obj = tf.tanh(tf.nn.embedding_lookup(E_objs, self._obj_input)) self.e_rel = tf.sigmoid(tf.nn.embedding_lookup(E_rels, self._rel_input)) s_o_prod = self.e_obj * self.e_subj score = tf_util.batch_dot(self.e_rel, s_o_prod) return score
def _scoring_f(self): with tf.device("/cpu:0"): E_subjs = tf.get_variable( "E_s", [len(self._kb.get_symbols(1)), self._size]) E_objs = tf.get_variable( "E_o", [len(self._kb.get_symbols(2)), self._size]) E_rels_s = tf.get_variable( "E_r_s", [len(self._kb.get_symbols(0)), self._size]) E_rels_o = tf.get_variable( "E_r_o", [len(self._kb.get_symbols(0)), self._size]) self.e_subj = tf.tanh(tf.nn.embedding_lookup(E_subjs, self._subj_input)) self.e_obj = tf.tanh(tf.nn.embedding_lookup(E_objs, self._obj_input)) self.e_rel_s = tf.tanh( tf.nn.embedding_lookup(E_rels_s, self._rel_input)) self.e_rel_o = tf.tanh( tf.nn.embedding_lookup(E_rels_o, self._rel_input)) score = tf_util.batch_dot(self.e_rel_s, self.e_subj) + tf_util.batch_dot( self.e_rel_o, self.e_obj) return score
def _scoring_f(self): with tf.device("/cpu:0"): E_rels = tf.get_variable("E_r", [len(self._kb.get_symbols(0)), self._size]) E_tup_rels = tf.get_variable("E_tup_r", [2 * self._num_relations + 1, self._size]) # rels + inv rels + default rel # duplicate rels to fit with observations e_rel = tf.gather(tf.tanh(tf.nn.embedding_lookup(E_rels, self._rel_input)), self._gather_rels_input) e_tup_rels = tf.tanh(tf.nn.embedding_lookup(E_tup_rels, self._sparse_values_input)) scores_flat = tf_util.batch_dot(e_rel, e_tup_rels) # for softmax set empty cells to something very small, so weight becomes practically zero scores = tf.sparse_to_dense(self._sparse_indices_input, self._shape_input, scores_flat, default_value=-1e-3) softmax = tf.nn.softmax(scores) weighted_scores = tf.reduce_sum(scores * softmax, reduction_indices=[1], keep_dims=False) return weighted_scores
def _scoring_f(self): with tf.device("/cpu:0"): E_rels = tf.get_variable( "E_r", [len(self._kb.get_symbols(0)), self._size]) E_tup_rels = tf.get_variable( "E_tup_r", [2 * self._num_relations + 1, self._size ]) # rels + inv rels + default rel self.e_rel = tf.tanh(tf.nn.embedding_lookup(E_rels, self._rel_input)) # weighted sum of tuple rel embeddings sparse_tensor = tf.SparseTensor(self._sparse_indices_input, self._sparse_values_input, self._shape_input) # mean embedding self.e_tuple_rels = tf.tanh( tf.nn.embedding_lookup_sparse(E_tup_rels, sparse_tensor, None)) return tf_util.batch_dot(self.e_rel, self.e_tuple_rels)
def _scoring_f(self): with tf.device("/cpu:0"): E_subjs = tf.get_variable( "E_s", [len(self._kb.get_symbols(1)), self._size]) E_objs = tf.get_variable( "E_o", [len(self._kb.get_symbols(2)), self._size]) E_rels = tf.get_variable( "E_r", [len(self._kb.get_symbols(0)), self._size]) self.e_subj = tf.tanh(tf.nn.embedding_lookup(E_subjs, self._subj_input)) self.e_obj = tf.tanh(tf.nn.embedding_lookup(E_objs, self._obj_input)) self.e_rel = tf.sigmoid(tf.nn.embedding_lookup(E_rels, self._rel_input)) s_o_prod = self.e_obj * self.e_subj score = tf_util.batch_dot(self.e_rel, s_o_prod) return score
def _retrieve_answer(self, query): """ Retrieves answer based on the specified query. Implements consecutive updates to the query and answer. :return: answer, if num_hops is 0, returns query itself """ query, supp_queries = tf.dynamic_partition(query, self._query_partition, 2) with tf.variable_scope("support"): num_queries = tf.shape(query)[0] with tf.device("/cpu:0"): _, supp_answer_output_ids = tf.dynamic_partition( self._answer_input, self._query_partition, 2) _, supp_answer_input_ids = tf.dynamic_partition( self._answer_word_input, self._query_partition, 2) supp_answers = tf.nn.embedding_lookup(self.output_embedding, supp_answer_output_ids) aligned_supp_answers = tf.gather( supp_answers, self._support_ids) # and with respective answers if self._max_hops > 1: # used in multihop answer_words = tf.nn.embedding_lookup( self.input_embedding, supp_answer_input_ids) aligned_answers_input = tf.gather(answer_words, self._support_ids) self.support_scores = [] query_as_answer = tf.contrib.layers.fully_connected( query, self._size, activation_fn=None, weights_initializer=None, biases_initializer=None, scope="query_to_answer") query_as_answer = query_as_answer * tf.sigmoid( tf.get_variable("query_as_answer_gate", tuple(), initializer=tf.constant_initializer(0.0))) current_answer = query_as_answer current_query = query aligned_support = tf.gather( supp_queries, self._support_ids) # align supp_queries with queries collab_support = tf.gather( query, self._collab_support_ids) # align supp_queries with queries aligned_support = tf.concat(0, [aligned_support, collab_support]) query_ids = tf.concat(0, [self._query_ids, self._collab_query_ids]) self.answer_weights = [] for i in range(self._max_hops): if i > 0: tf.get_variable_scope().reuse_variables() collab_queries = tf.gather( current_query, self._collab_query_ids) # align supp_queries with queries aligned_queries = tf.gather(current_query, self._query_ids) # align queries aligned_queries = tf.concat(0, [aligned_queries, collab_queries]) with tf.variable_scope("support_scores"): scores = tf_util.batch_dot(aligned_queries, aligned_support) self.support_scores.append(scores) score_max = tf.gather(tf.segment_max(scores, query_ids), query_ids) e_scores = tf.exp(scores - score_max) norm = tf.unsorted_segment_sum( e_scores, query_ids, num_queries) + 0.00001 # for zero norms norm = tf.expand_dims(norm, 1) e_scores = tf.expand_dims(e_scores, 1) with tf.variable_scope("support_answers"): aligned_supp_answers_with_collab = tf.concat( 0, [aligned_supp_answers, collab_queries]) weighted_supp_answers = tf.unsorted_segment_sum( e_scores * aligned_supp_answers_with_collab, query_ids, num_queries) / norm with tf.variable_scope("support_queries"): weighted_supp_queries = tf.unsorted_segment_sum( e_scores * aligned_support, query_ids, num_queries) / norm with tf.variable_scope("answer_accumulation"): answer_p_max = tf.reduce_max(tf.nn.softmax( self._score_candidates(weighted_supp_answers)), [1], keep_dims=True) answer_weight = tf.contrib.layers.fully_connected( tf.concat(1, [ query_as_answer * weighted_supp_answers, weighted_supp_queries * current_query, answer_p_max ]), 1, activation_fn=tf.nn.sigmoid, weights_initializer=tf.constant_initializer(0.0), biases_initializer=tf.constant_initializer(0.0), scope="answer_weight") new_answer = answer_weight * weighted_supp_answers + current_answer # this condition allows for setting varying number of hops current_answer = tf.cond(tf.greater(self.num_hops, i), lambda: new_answer, lambda: current_answer) self.answer_weights.append(answer_weight) if i < self._max_hops - 1: with tf.variable_scope("query_update"): # prepare subsequent query aligned_answers_input_with_collab = tf.concat( 0, [aligned_answers_input, collab_queries]) weighted_answer_words = tf.unsorted_segment_sum( e_scores * aligned_answers_input_with_collab, query_ids, num_queries) / norm c = tf.contrib.layers.fully_connected( tf.concat(1, [ current_query, weighted_supp_queries, weighted_answer_words ]), self._size, activation_fn=tf.tanh, scope="update_candidate", weights_initializer=None, biases_initializer=None) gate = tf.contrib.layers.fully_connected( tf.concat(1, [current_query, weighted_supp_queries]), self._size, activation_fn=tf.sigmoid, weights_initializer=None, scope="update_gate", biases_initializer=tf.constant_initializer(1)) current_query = gate * current_query + (1 - gate) * c return current_answer
def __init__(self, size, batch_size, vocab_size, answer_vocab_size, max_length, is_train=True, learning_rate=1e-2, composition="GRU", max_hops=0, devices=None, keep_prob=1.0): """ :param size: size of hidden states :param batch_size: initial batch_size (adapts automatically) :param vocab_size: size of input vocabulary (vocabulary of contexts) :param answer_vocab_size: size of answer (candidates) vocabulary :param max_length: maximum length of an individual context :param is_train: :param learning_rate: :param composition: "GRU", "LSTM", "BiGRU" are possible :param max_hops: maximum number of hops, can be set manually to something lower by assigning a different value to variable (self.)num_hops which is initialized with max_hops :param devices: defaults to ["/cpu:0"], but can be a list of up to 3 devices. The model is automatically partitioned into the different devices. :param keep_prob: 1.0-dropout rate, that is applied to the input embeddings """ self._vocab_size = vocab_size self._max_length = max_length self._size = size self._batch_size = batch_size self._is_train = is_train self._composition = composition self._max_hops = max_hops self._device0 = devices[0] if devices is not None else "/cpu:0" self._device1 = devices[ 1 % len(devices)] if devices is not None else "/cpu:0" self._device2 = devices[ 2 % len(devices)] if devices is not None else "/cpu:0" self._init = tf.random_normal_initializer(0.0, 0.1) with tf.device(self._device0): with tf.variable_scope( self.name(), initializer=tf.contrib.layers.xavier_initializer()): self._init_inputs() self.keep_prob = tf.get_variable( "keep_prob", [], initializer=tf.constant_initializer(keep_prob)) with tf.device("/cpu:0"): # embeddings self.output_embedding = tf.get_variable( "E_candidate", [answer_vocab_size, self._size], initializer=self._init) self.input_embedding = tf.get_variable( "E_words", [vocab_size, self._size], initializer=self._init) answer, _ = tf.dynamic_partition(self._answer_input, self._query_partition, 2) lookup_individual = tf.nn.embedding_lookup( self.output_embedding, answer) cands, _ = tf.dynamic_partition(self._answer_candidates, self._query_partition, 2) self.candidate_lookup = tf.nn.embedding_lookup( self.output_embedding, cands) self.num_hops = tf.Variable(self._max_hops, trainable=False, name="num_queries") self.query = self._comp_f() answer = self._retrieve_answer(self.query) self.score = tf_util.batch_dot(lookup_individual, answer) self.scores_with_negs = self._score_candidates(answer) if is_train: self.learning_rate = tf.Variable(float(learning_rate), trainable=False, name="lr") self.global_step = tf.Variable(0, trainable=False, name="step") self.opt = tf.train.AdamOptimizer(self.learning_rate) current_batch_size = tf.gather( tf.shape(self.scores_with_negs), [0]) loss = math_ops.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( self.scores_with_negs, tf.tile(tf.constant([0], tf.int64), current_batch_size))) train_params = tf.trainable_variables() self.training_weight = tf.Variable(1.0, trainable=False, name="training_weight") self._loss = loss / math_ops.cast(current_batch_size, tf.float32) self._grads = tf.gradients( self._loss, train_params, self.training_weight, colocate_gradients_with_ops=True) if len(train_params) > 0: grads, _ = tf.clip_by_global_norm(self._grads, 5.0) self._update = self.opt.apply_gradients( zip(grads, train_params), global_step=self.global_step) else: self._update = tf.assign_add(self.global_step, 1) self.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)
def _retrieve_answer(self, query): """ Retrieves answer based on the specified query. Implements consecutive updates to the query and answer. :return: answer, if num_hops is 0, returns query itself """ query, supp_queries = tf.dynamic_partition(query, self._query_partition, 2) with tf.variable_scope("support"): num_queries = tf.shape(query)[0] with tf.device("/cpu:0"): _, supp_answer_output_ids = tf.dynamic_partition(self._answer_input, self._query_partition, 2) _, supp_answer_input_ids = tf.dynamic_partition(self._answer_word_input, self._query_partition, 2) supp_answers = tf.nn.embedding_lookup(self.output_embedding, supp_answer_output_ids) aligned_supp_answers = tf.gather(supp_answers, self._support_ids) # and with respective answers if self._max_hops > 1: # used in multihop answer_words = tf.nn.embedding_lookup(self.input_embedding, supp_answer_input_ids) aligned_answers_input = tf.gather(answer_words, self._support_ids) self.support_scores = [] query_as_answer = tf.contrib.layers.fully_connected(query, self._size, activation_fn=None, weights_initializer=None, biases_initializer=None, scope="query_to_answer") query_as_answer = query_as_answer * tf.sigmoid(tf.get_variable("query_as_answer_gate", tuple(), initializer=tf.constant_initializer(0.0))) current_answer = query_as_answer current_query = query aligned_support = tf.gather(supp_queries, self._support_ids) # align supp_queries with queries collab_support = tf.gather(query, self._collab_support_ids) # align supp_queries with queries aligned_support = tf.concat(0, [aligned_support, collab_support]) query_ids = tf.concat(0, [self._query_ids, self._collab_query_ids]) self.answer_weights = [] for i in range(self._max_hops): if i > 0: tf.get_variable_scope().reuse_variables() collab_queries = tf.gather(current_query, self._collab_query_ids) # align supp_queries with queries aligned_queries = tf.gather(current_query, self._query_ids) # align queries aligned_queries = tf.concat(0, [aligned_queries, collab_queries]) with tf.variable_scope("support_scores"): scores = tf_util.batch_dot(aligned_queries, aligned_support) self.support_scores.append(scores) score_max = tf.gather(tf.segment_max(scores, query_ids), query_ids) e_scores = tf.exp(scores - score_max) norm = tf.unsorted_segment_sum(e_scores, query_ids, num_queries) + 0.00001 # for zero norms norm = tf.expand_dims(norm, 1) e_scores = tf.expand_dims(e_scores, 1) with tf.variable_scope("support_answers"): aligned_supp_answers_with_collab = tf.concat(0, [aligned_supp_answers, collab_queries]) weighted_supp_answers = tf.unsorted_segment_sum(e_scores * aligned_supp_answers_with_collab, query_ids, num_queries) / norm with tf.variable_scope("support_queries"): weighted_supp_queries = tf.unsorted_segment_sum(e_scores * aligned_support, query_ids, num_queries) / norm with tf.variable_scope("answer_accumulation"): answer_p_max = tf.reduce_max(tf.nn.softmax(self._score_candidates(weighted_supp_answers)), [1], keep_dims=True) answer_weight = tf.contrib.layers.fully_connected(tf.concat(1, [query_as_answer * weighted_supp_answers, weighted_supp_queries * current_query, answer_p_max]), 1, activation_fn=tf.nn.sigmoid, weights_initializer=tf.constant_initializer(0.0), biases_initializer=tf.constant_initializer(0.0), scope="answer_weight") new_answer = answer_weight * weighted_supp_answers + current_answer # this condition allows for setting varying number of hops current_answer = tf.cond(tf.greater(self.num_hops, i), lambda: new_answer, lambda: current_answer) self.answer_weights.append(answer_weight) if i < self._max_hops - 1: with tf.variable_scope("query_update"): # prepare subsequent query aligned_answers_input_with_collab = tf.concat(0, [aligned_answers_input, collab_queries]) weighted_answer_words = tf.unsorted_segment_sum(e_scores * aligned_answers_input_with_collab, query_ids, num_queries) / norm c = tf.contrib.layers.fully_connected(tf.concat(1, [current_query, weighted_supp_queries, weighted_answer_words]), self._size, activation_fn=tf.tanh, scope="update_candidate", weights_initializer=None, biases_initializer=None) gate = tf.contrib.layers.fully_connected(tf.concat(1, [current_query, weighted_supp_queries]), self._size, activation_fn=tf.sigmoid, weights_initializer=None, scope="update_gate", biases_initializer=tf.constant_initializer(1)) current_query = gate * current_query + (1-gate) * c return current_answer
def __init__(self, size, batch_size, vocab_size, answer_vocab_size, max_length, is_train=True, learning_rate=1e-2, composition="GRU", max_hops=0, devices=None, keep_prob=1.0): """ :param size: size of hidden states :param batch_size: initial batch_size (adapts automatically) :param vocab_size: size of input vocabulary (vocabulary of contexts) :param answer_vocab_size: size of answer (candidates) vocabulary :param max_length: maximum length of an individual context :param is_train: :param learning_rate: :param composition: "GRU", "LSTM", "BiGRU" are possible :param max_hops: maximum number of hops, can be set manually to something lower by assigning a different value to variable (self.)num_hops which is initialized with max_hops :param devices: defaults to ["/cpu:0"], but can be a list of up to 3 devices. The model is automatically partitioned into the different devices. :param keep_prob: 1.0-dropout rate, that is applied to the input embeddings """ self._vocab_size = vocab_size self._max_length = max_length self._size = size self._batch_size = batch_size self._is_train = is_train self._composition = composition self._max_hops = max_hops self._device0 = devices[0] if devices is not None else "/cpu:0" self._device1 = devices[1 % len(devices)] if devices is not None else "/cpu:0" self._device2 = devices[2 % len(devices)] if devices is not None else "/cpu:0" self._init = tf.random_normal_initializer(0.0, 0.1) with tf.device(self._device0): with tf.variable_scope(self.name(), initializer=tf.contrib.layers.xavier_initializer()): self._init_inputs() self.keep_prob = tf.get_variable("keep_prob", [], initializer=tf.constant_initializer(keep_prob)) with tf.device("/cpu:0"): # embeddings self.output_embedding = tf.get_variable("E_candidate", [answer_vocab_size, self._size], initializer=self._init) self.input_embedding = tf.get_variable("E_words", [vocab_size, self._size], initializer=self._init) answer, _ = tf.dynamic_partition(self._answer_input, self._query_partition, 2) lookup_individual = tf.nn.embedding_lookup(self.output_embedding, answer) cands, _ = tf.dynamic_partition(self._answer_candidates, self._query_partition, 2) self.candidate_lookup = tf.nn.embedding_lookup(self.output_embedding, cands) self.num_hops = tf.Variable(self._max_hops, trainable=False, name="num_queries") self.query = self._comp_f() answer = self._retrieve_answer(self.query) self.score = tf_util.batch_dot(lookup_individual, answer) self.scores_with_negs = self._score_candidates(answer) if is_train: self.learning_rate = tf.Variable(float(learning_rate), trainable=False, name="lr") self.global_step = tf.Variable(0, trainable=False, name="step") self.opt = tf.train.AdamOptimizer(self.learning_rate) current_batch_size = tf.gather(tf.shape(self.scores_with_negs), [0]) loss = math_ops.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits(self.scores_with_negs, tf.tile(tf.constant([0], tf.int64), current_batch_size))) train_params = tf.trainable_variables() self.training_weight = tf.Variable(1.0, trainable=False, name="training_weight") self._loss = loss / math_ops.cast(current_batch_size, tf.float32) self._grads = tf.gradients(self._loss, train_params, self.training_weight, colocate_gradients_with_ops=True) if len(train_params) > 0: grads, _ = tf.clip_by_global_norm(self._grads, 5.0) self._update = self.opt.apply_gradients(zip(grads, train_params), global_step=self.global_step) else: self._update = tf.assign_add(self.global_step, 1) self.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)