def __init__(self, config): super().__init__() self.decoder = Decoder.build({**config["decoder"], "rename": True}) self.soft_mem_mask = config["decoder"]["mem_mask"] == "soft" if self.soft_mem_mask: self.mem_encoder = Encoder.build(config["mem_encoder"]) self.mem_decoder = Decoder.build(config["mem_decoder"]) self.decoder.mem_encoder = self.mem_encoder self.decoder.mem_decoder = self.mem_decoder self.beam_size = config["test"]["beam_size"]
def __init__(self, config): super().__init__() self.decoder = Decoder.build({**config["decoder"]}) self.subtype = config["decoder"]["type"] in ["XfmrSubtypeDecoder"] self.soft_mem_mask = config["decoder"]["mem_mask"] == "soft" if self.soft_mem_mask: self.mem_encoder = Encoder.build(config["mem_encoder"]) self.mem_decoder = Decoder.build(config["mem_decoder"]) self.decoder.mem_encoder = self.mem_encoder self.decoder.mem_decoder = self.mem_decoder self.beam_size = config["test"]["beam_size"]
def __init__(self, config, config_load=None): super().__init__() if config_load is not None: config = config_load self.encoder = Encoder.build(config["encoder"]) self.retype = config["data"].get("retype", False) self.rename = config["data"].get("rename", False) self.interleave = config["data"].get("interleave", False) if self.interleave: self.interleave_module = InterleaveDecodeModule(config) else: if self.retype: self.retyping_module = RetypingDecodeModule(config) if self.rename: self.renaming_module = RenamingDecodeModule(config) self.config = config self.vocab = Vocab.load(config["data"]["vocab_file"]) self._preprocess() self.soft_mem_mask = config["decoder"]["mem_mask"] == "soft"
class DMN_PLUS(object): def __init__(self, embedding_input, input_mask, embedding_question, vocab_size, max_mask_length, params): self.embedding_input = embedding_input self.input_mask = input_mask self.embedding_question = embedding_question self.vocab_size = vocab_size self.params = params self.encoder = Encoder(encoder_type=params.model.encoder_type, num_layers=params.model.num_layers, cell_type=params.model.cell_type, num_units=params.model.num_units, dropout=params.model.dropout) self.max_mask_length = max_mask_length self.output = self.inference() def get_predictions(self, output): preds = tf.nn.softmax(output) pred = tf.argmax(preds, 1) return pred def add_loss_op(self, output, labels): loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=labels)) for v in tf.trainable_variables(): if not 'bias' in v.name.lower(): loss += self.params.train.learning_rate tf.summary.scalar('loss', loss) return loss def add_training_op(self, loss): opt = tf.train.AdamOptimizer( learning_rate=self.params.train.learning_rate) gvs = opt.compute_gradients(loss) if self.params.model.cap_grads: gvs = [(tf.clip_by_norm(grad, self.params.model.max_grad_val), var) for grad, var in gvs] if self.params.model.noisy_grads: gvs = [(_add_gradient_noise(grad), var) for grad, var in gvs] train_op = opt.apply_gradients(gvs) return train_op def get_question_representation(self): question_length = tf.reduce_sum(tf.to_int32( tf.not_equal(tf.reduce_max(self.embedding_question, axis=2), self.params.data.PAD_ID)), axis=1) _, question = self.encoder.build(self.embedding_question, question_length, scope="encoder") return question[0] def get_input_representation(self): self.input_length = tf.reduce_max(self.input_mask, axis=1) input_encoder_outputs, _ = self.encoder.build(self.embedding_input, self.input_length, encoder_type="UNI", scope="encoder") with tf.variable_scope("facts") as scope: print(self.input_mask.shape) batch_size = tf.shape(self.input_mask)[0] max_mask_length = tf.shape(self.input_mask)[1] input_mask = self.input_mask def get_encoded_fact(i): # print(i) mask_lengths = tf.reduce_sum(tf.to_int32( tf.not_equal(input_mask[i], self.params.data.PAD_ID)), axis=0) input_mask_temp = tf.boolean_mask( input_mask[i], tf.sequence_mask(mask_lengths, max_mask_length)) encoded_facts = tf.gather_nd( input_encoder_outputs[i], tf.reshape(input_mask_temp, [-1, 1])) padding = tf.zeros( tf.stack([ max_mask_length - mask_lengths, self.params.model.num_units ])) return tf.concat([encoded_facts, padding], 0) facts_stacked = tf.map_fn(get_encoded_fact, tf.range(start=0, limit=batch_size), dtype=tf.float32) facts = tf.unstack(tf.transpose(facts_stacked, [1, 0, 2]), num=self.max_mask_length) return facts def build_input_module(self): facts = self.get_input_representation() question = self.get_question_representation() return facts, question def get_attention(self, q_vec, prev_memory, fact_vec, reuse): with tf.variable_scope("attention", reuse=reuse): features = [ fact_vec * q_vec, fact_vec * prev_memory, tf.abs(fact_vec - q_vec), tf.abs(fact_vec - prev_memory) ] feature_vec = tf.concat(features, 1) attention = tf.contrib.layers.fully_connected( feature_vec, self.params.model.embed_dim, activation_fn=tf.nn.tanh, reuse=reuse, scope="fc1") attention = tf.contrib.layers.fully_connected(attention, 1, activation_fn=None, reuse=reuse, scope="fc2") return attention def generate_episode(self, memory, q_vec, fact_vecs, hop_index): """Generate episode by applying attention to current fact vectors through a modified GRU""" attentions = [ tf.squeeze(self.get_attention(q_vec, memory, fv, bool(hop_index) or bool(i)), axis=1) for i, fv in enumerate(fact_vecs) ] attentions = tf.transpose(tf.stack(attentions)) self.attentions.append(attentions) attentions = tf.nn.softmax(attentions) attentions = tf.expand_dims(attentions, axis=-1) reuse = True if hop_index > 0 else False # concatenate fact vectors and attentions for input into attGRU tmp = tf.transpose(tf.stack(fact_vecs), [1, 0, 2]) gru_inputs = tf.concat([tmp, attentions], 2) with tf.variable_scope('attention_gru', reuse=reuse): _, episode = tf.nn.dynamic_rnn(AttentionGRUCell( self.params.model.num_units), gru_inputs, dtype=np.float32, sequence_length=self.input_length) return episode def add_answer_module(self, rnn_output, q_vec): """Linear softmax answer module""" if self.params.model.dropout: rnn_output = tf.nn.dropout(rnn_output, self.params.model.dropout) output = tf.layers.dense(tf.concat([rnn_output, q_vec], 1), self.vocab_size, activation=None) return output def inference(self): """Performs inference on the DMN model""" # input fusion module with tf.variable_scope( "input", initializer=tf.contrib.layers.xavier_initializer(), reuse=tf.AUTO_REUSE): print('==> get input representation') fact_vecs = self.get_input_representation() with tf.variable_scope( "question", initializer=tf.contrib.layers.xavier_initializer(), reuse=tf.AUTO_REUSE): print('==> get question representation') q_vec = self.get_question_representation() # keep track of attentions for possible strong supervision self.attentions = [] # memory module with tf.variable_scope( "memory", initializer=tf.contrib.layers.xavier_initializer(), reuse=tf.AUTO_REUSE): print('==> build episodic memory') # generate n_hops episodes prev_memory = q_vec for i in range(self.params.model.num_hops): # get a new episode print('==> generating episode', i) episode = self.generate_episode(prev_memory, q_vec, fact_vecs, i) # untied weights for memory update with tf.variable_scope("hop_%d" % i): prev_memory = tf.layers.dense(tf.concat( [prev_memory, episode, q_vec], 1), self.params.model.num_units, activation=tf.nn.relu) output = prev_memory # pass memory module output through linear answer module with tf.variable_scope( "answer", initializer=tf.contrib.layers.xavier_initializer(), reuse=tf.AUTO_REUSE): output = self.add_answer_module(output, q_vec) return output