def __init__(self, name, t_init, t_target, t_last, t_last_hidden, t_vecs): self.t_init = t_init self.t_last = t_last self.t_last_hidden = t_last_hidden multi = len(t_init.get_shape()) == 3 assert multi or len(t_init.get_shape()) == 2 cell = tf.contrib.rnn.GRUCell(N_HIDDEN) if multi: t_shape = tf.shape(t_target) t_n_batch, t_n_multi, t_n_toks = t_shape[0], t_shape[1], t_shape[2] t_init = tf.reshape(t_init, (t_n_batch*t_n_multi, N_HIDDEN)) t_target = tf.reshape(t_target, (t_n_batch*t_n_multi, t_n_toks)) t_shape = tf.shape(t_last) t_n_batch_d, t_n_multi_d = t_shape[0], t_shape[1] t_last = tf.reshape(t_last, (t_n_batch_d*t_n_multi_d,)) t_last_hidden = tf.reshape(t_last_hidden, (t_n_batch_d*t_n_multi_d, N_HIDDEN)) t_emb_target = _embed_dict(t_target, t_vecs) t_emb_last = _embed_dict(t_last, t_vecs) n_vocab = t_vecs.get_shape()[0].value with tf.variable_scope(name) as scope: v_proj = tf.get_variable("w", shape=(N_HIDDEN, n_vocab), initializer=tf.uniform_unit_scaling_initializer(factor=1.43)) b_proj = tf.get_variable("b", shape=(n_vocab,), initializer=tf.constant_initializer(0)) t_dec_state, _ = tf.nn.dynamic_rnn( cell, t_emb_target, initial_state=t_init, scope=scope) t_pred = tf.einsum("ijk,kl->ijl", t_dec_state, v_proj) + b_proj t_dec_err = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=t_target[:, 1:], logits=t_pred[:, :-1]) t_dec_loss = tf.reduce_mean(tf.reduce_sum(t_dec_err, axis=1)) t_scores = -tf.reduce_sum(t_dec_err, axis=1) scope.reuse_variables() t_next_hidden, _ = cell(t_emb_last, t_last_hidden) t_next_pred = tf.einsum("ij,jk->ik", t_next_hidden, v_proj) + b_proj if multi: t_next_hidden = tf.reshape(t_next_hidden, (t_n_batch_d, t_n_multi_d, N_HIDDEN)) t_next_pred = tf.reshape(t_next_pred, (t_n_batch_d, t_n_multi_d, n_vocab)) t_scores = tf.reshape(t_scores, (t_n_batch, t_n_multi)) self.t_scores = t_scores self.t_loss = t_dec_loss self.t_next_hidden = t_next_hidden self.t_next_pred = t_next_pred self.multi = multi self.random = None
def _encode(name, t_input, t_len, t_vecs, t_init=None, reuse=False): multi = len(t_input.get_shape()) == 3 assert multi or len(t_input.get_shape()) == 2 cell = tf.contrib.rnn.GRUCell(N_HIDDEN) if multi: t_shape = tf.shape(t_input) t_n_batch, t_n_multi, t_n_toks = t_shape[0], t_shape[1], t_shape[2] t_input = tf.reshape(t_input, (t_n_batch*t_n_multi, t_n_toks)) t_len = tf.reshape(t_len, (t_n_batch*t_n_multi,)) if t_init is not None: t_init = tf.tile(tf.expand_dims(t_init, 1), (1, t_n_multi, 1)) t_init = tf.reshape(t_init, (t_n_batch*t_n_multi, N_HIDDEN)) t_embed = _embed_dict(t_input, t_vecs) with tf.variable_scope(name, reuse=reuse): _, t_encode = tf.nn.dynamic_rnn( cell, t_embed, t_len, dtype=tf.float32, initial_state=t_init) if multi: t_encode = tf.reshape(t_encode, (t_n_batch, t_n_multi, N_HIDDEN)) return t_encode
def __init__(self, task): self.task = task self.t_state = tf.placeholder(tf.float32, (None, task.n_features)) self.t_action = tf.placeholder(tf.int32, (None, )) self.t_reward = tf.placeholder(tf.float32, (None, )) self.t_hint = tf.placeholder(tf.int32, (None, None)) self.t_hint_len = tf.placeholder(tf.int32, (None, )) self.t_task = tf.placeholder(tf.int32, (None, )) self.t_last_hyp = tf.placeholder(tf.int32, (None, ), "last_hyp") self.t_last_hyp_hidden = tf.placeholder(tf.float32, (None, N_DEC_HIDDEN), "last_hyp_hidden") t_hyp_init = tf.get_variable( "hyp_init", shape=(1, N_DEC_HIDDEN), initializer=tf.uniform_unit_scaling_initializer()) self.t_n_batch = tf.shape(self.t_state)[0] #self.t_n_batch = tf.placeholder(tf.int32, ()) t_hyp_tile = tf.tile(t_hyp_init, (self.t_n_batch, 1)) t_hint_vecs = tf.get_variable( "hint_vec", (len(task.vocab), N_EMBED), initializer=tf.uniform_unit_scaling_initializer()) t_hint_repr = tf.reduce_mean(_embed_dict(self.t_hint, t_hint_vecs), axis=1) self.hyp_decoder = Decoder("decode_hyp", t_hyp_tile, self.t_hint, self.t_last_hyp, self.t_last_hyp_hidden, t_hint_vecs) t_task_vecs = tf.get_variable( "task_vec", (task.n_tasks, N_EMBED), initializer=tf.uniform_unit_scaling_initializer()) t_task_repr = _embed_dict(self.t_task, t_task_vecs) if FLAGS.infer_hyp: self.t_concept = t_hint_repr else: self.t_concept = t_task_repr with tf.variable_scope("features"): t_features = _mlp(self.t_state, (N_HIDDEN, N_HIDDEN), (tf.nn.tanh, tf.nn.tanh)) with tf.variable_scope("param"): t_concept_param = _linear(self.t_concept, N_HIDDEN * task.n_actions) t_concept_mat = tf.reshape(t_concept_param, (-1, N_HIDDEN, task.n_actions)) self.t_score = tf.einsum("ij,ijk->ik", t_features, t_concept_mat) self.t_logprob = tf.nn.log_softmax(self.t_score) t_prob = tf.nn.softmax(self.t_score) t_entropy = -tf.reduce_mean( tf.reduce_sum(t_prob * self.t_logprob, axis=1)) with tf.variable_scope("baseline"): t_baseline = tf.squeeze(_linear(tf.stop_gradient(t_features), 1)) t_chosen_logprob = -tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.t_score, labels=self.t_action) t_loss_surrogate = -tf.reduce_mean( t_chosen_logprob * (self.t_reward - tf.stop_gradient(t_baseline))) t_baseline_err = tf.reduce_mean((t_baseline - self.t_reward)**2) self.t_rl_loss = t_loss_surrogate + t_baseline_err - 0.001 * t_entropy self.t_dagger_loss = -tf.reduce_mean(t_chosen_logprob) if FLAGS.concept_prior is not None: def normal(x): return tf.reduce_mean(tf.reduce_sum(tf.square(x), axis=1)) self.t_rl_loss += normal(self.t_concept) / FLAGS.concept_prior self.t_dagger_loss += normal(self.t_concept) / FLAGS.concept_prior if FLAGS.predict_hyp: self.t_loss = self.t_rl_loss + self.hyp_decoder.t_loss self.t_dagger_loss = self.t_dagger_loss + self.hyp_decoder.t_loss else: self.t_loss = self.t_rl_loss optimizer = tf.train.AdamOptimizer(0.001) self.o_train = optimizer.minimize(self.t_loss) self.o_rl_train = optimizer.minimize(self.t_rl_loss) self.o_dagger_train = optimizer.minimize(self.t_dagger_loss) self.session = tf.Session() self.session.run(tf.global_variables_initializer()) self.saver = tf.train.Saver() if FLAGS.restore is not None: self.restore(FLAGS.restore)