def inference(self, subj, rel, obj): with tf.variable_scope('ffnn1'): triple = tf.concat([subj, rel, obj], axis=1) triple = tf.nn.dropout(triple, self.keep_prob) triple = linear(triple, output_size=self.ffnn_size, activation=self.activation) triple = tf.nn.dropout(triple, self.keep_prob) with tf.variable_scope('ffnn2'): score = linear(triple, output_size=1, activation=tf.nn.tanh) # true or false score = score / 2 + 0.5 return score
def calc_q_values(self, state): with tf.variable_scope('Inference', reuse=tf.AUTO_REUSE): x = state for i in range(self.num_ff_layers): with tf.variable_scope('Forward%d' % (i + 1)) as scope: x = linear(x, output_size=self.hidden_size, activation=self.hidden_activation, scope=scope) x = tf.nn.dropout(x, keep_prob=self.keep_prob) with tf.variable_scope('Output') as scope: q_values = linear(x, output_size=self.vocab_size.card, activation=self.output_activation, scope=scope) #activation=self.activation_f, scope=scope) return q_values
def encode(self, board_repls, num_ffnn_layers): with tf.variable_scope('Encode'): board_repls = cnn(board_repls, filter_sizes=[3, 4, 5, 10]) board_repls = tf.nn.dropout(board_repls, self.keep_prob) for _ in range(num_ffnn_layers): board_repls = linear(board_repls, activation=tf.nn.relu) board_repls = tf.nn.dropout(board_repls, self.keep_prob) return board_repls
def predict_relation(self, query_emb, mention_emb, mention_scores, is_query_subjective): ''' Args: - query_emb: [emb] - mention_emb: [n_mentions, emb] - is_query_subjective: A boolean. If true, this function outputs a distribution of relation label probabilities for a triple (query, rel, mention) across rel, otherwise for (mention, rel, query) - reuse: A boolean. The variables of this network should be reused by both query-subjective and query-objective predictions by switching the orders of input representations. ''' with tf.variable_scope('pair_emb'): n_mentions = shape(mention_emb, -2) query_emb = tf.tile(query_emb, [n_mentions, 1]) # [n_mentions, emb] if is_query_subjective: pair_emb = tf.concat([query_emb, mention_emb], -1) # [n_mentions, emb] else: pair_emb = tf.concat([mention_emb, query_emb], -1) # [n_mentions, emb] for i in range(self.ffnn_depth): with tf.variable_scope('Forward%d' % i): pair_emb = linear(pair_emb, output_size=self.ffnn_size, activation=self.activation) pair_emb = tf.nn.dropout(pair_emb, keep_prob=self.keep_prob) with tf.variable_scope('Output'): w = self.rel_w b = tf.get_variable('biases', [self.vocab.rel.size - 1]) x = pair_emb logits = tf.nn.xw_plus_b(x, w, b) no_relation = tf.zeros([shape(mention_scores, 0), 1], tf.float32) logits = tf.concat([no_relation, logits], axis=-1) # type A mention_unconfidence_penalty = tf.concat([ no_relation, tf.tile(tf.expand_dims(mention_scores, 1), [1, self.vocab.rel.size - 1]) ], axis=-1) # type B # mention_unconfidence_penalty = tf.concat([ # -tf.expand_dims(mention_scores, 1), # #tf.tile(tf.expand_dims(mention_scores, 1), [1, shape(logits, 1)-1]) # tf.zeros([shape(logits, 0), self.vocab.rel.size-1], dtype=tf.float32) # ], axis=-1) tf.get_variable_scope().reuse_variables() return logits + mention_unconfidence_penalty
def inference(self, mention_repls): # Take sum of all the context representation by entity. mention_repls = tf.reduce_sum(mention_repls, axis=1) # [batch_size, output_size] # Devide the aggregated mention representations by the actual number of the contexts, since some of sentences fed to placeholders can be dummy. mention_repls /= tf.expand_dims(self.num_contexts, axis=1) # <memo> Don't apply tf.softmax when tf.nn.sparse_softmax_cross_entropy_with_logits as is employed as loss function, which contains softmax on the inside. #outputs = tf.nn.softmax(linear(mention_repls, self.vocab.category.size)) outputs = linear(mention_repls, self.vocab.category.size) return outputs
def __init__(self, sess, config, encoder, tasks): super().__init__(sess, config) self.sess = sess self.encoder = encoder adv_outputs = [] task_ids = [] for i, t in enumerate(tasks): # inputs = [] # if self.encoder.wbase: # inputs.append(t.text_ph.word) # if self.encoder.cbase: # inputs.append(t.text_ph.char) print('adv_outputs', t, t.adv_outputs) if isinstance(t.encoder, MultiEncoderWrapper): # Split the encoders' represantions into the task-shared and the task-private. assert len(t.adv_outputs.get_shape() ) == 3 # [*, max_sentence_length, hidden_size] shared_repls, private_repls = tf.split(t.adv_outputs, 2, axis=2) # Take average of the representations across all the time step. shared_repls = tf.reduce_mean(shared_repls, axis=1) private_repls = tf.reduce_mean(private_repls, axis=1) # 論文ではこうなっているけど, 違う文を読んだベクトル同士も引き離す必要あるのか? #similarities = tf.matmul(tf.transpose(shared_repls), private_repls) similarities = tf.matmul(tf.transpose(shared_repls), private_repls) l_diff = squared_frobenius_norm(similarities) else: shared_repls = t.adv_outputs l_diff = 0.0 task_id = tf.tile([i], [shape(shared_repls, 0)]) adv_outputs.append(shared_repls) task_ids.append(task_id) adv_outputs = flip_gradient(tf.concat(adv_outputs, axis=0)) task_ids = tf.concat(task_ids, axis=0) task_ids = tf.one_hot(task_ids, len(tasks)) self.outputs = tf.nn.softmax(linear(adv_outputs, len(tasks))) l_adv = tf.nn.softmax_cross_entropy_with_logits(logits=self.outputs, labels=task_ids) l_adv = tf.reduce_sum(l_adv) self.loss = config.adv_weight * l_adv + config.diff_weight * l_diff
def inference(self, config, board_repls): logits = linear(board_repls, output_size=2, activation=None) return logits