Exemple #1
0
    def __init__(self, name, t_init, t_target, t_last, t_last_hidden, t_vecs):
        self.t_init = t_init
        self.t_last = t_last
        self.t_last_hidden = t_last_hidden

        multi = len(t_init.get_shape()) == 3

        assert multi or len(t_init.get_shape()) == 2
        cell = tf.contrib.rnn.GRUCell(N_HIDDEN)
        if multi:
            t_shape = tf.shape(t_target)
            t_n_batch, t_n_multi, t_n_toks = t_shape[0], t_shape[1], t_shape[2]
            t_init = tf.reshape(t_init, (t_n_batch*t_n_multi, N_HIDDEN))
            t_target = tf.reshape(t_target, (t_n_batch*t_n_multi, t_n_toks))

            t_shape = tf.shape(t_last)
            t_n_batch_d, t_n_multi_d = t_shape[0], t_shape[1]
            t_last = tf.reshape(t_last, (t_n_batch_d*t_n_multi_d,))
            t_last_hidden = tf.reshape(t_last_hidden, (t_n_batch_d*t_n_multi_d, N_HIDDEN))

        t_emb_target = _embed_dict(t_target, t_vecs)
        t_emb_last = _embed_dict(t_last, t_vecs)
        n_vocab = t_vecs.get_shape()[0].value

        with tf.variable_scope(name) as scope:
            v_proj = tf.get_variable("w",
                    shape=(N_HIDDEN, n_vocab),
                    initializer=tf.uniform_unit_scaling_initializer(factor=1.43))
            b_proj = tf.get_variable("b",
                    shape=(n_vocab,),
                    initializer=tf.constant_initializer(0))

            t_dec_state, _ = tf.nn.dynamic_rnn(
                    cell, t_emb_target, initial_state=t_init, scope=scope)
            t_pred = tf.einsum("ijk,kl->ijl", t_dec_state, v_proj) + b_proj
            t_dec_err = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=t_target[:, 1:], logits=t_pred[:, :-1])
            t_dec_loss = tf.reduce_mean(tf.reduce_sum(t_dec_err, axis=1))
            t_scores = -tf.reduce_sum(t_dec_err, axis=1)

            scope.reuse_variables()

            t_next_hidden, _ = cell(t_emb_last, t_last_hidden)
            t_next_pred = tf.einsum("ij,jk->ik", t_next_hidden, v_proj) + b_proj

        if multi:
            t_next_hidden = tf.reshape(t_next_hidden, (t_n_batch_d, t_n_multi_d, N_HIDDEN))
            t_next_pred = tf.reshape(t_next_pred, (t_n_batch_d, t_n_multi_d, n_vocab))
            t_scores = tf.reshape(t_scores, (t_n_batch, t_n_multi))

        self.t_scores = t_scores
        self.t_loss = t_dec_loss
        self.t_next_hidden = t_next_hidden
        self.t_next_pred = t_next_pred
        self.multi = multi
        self.random = None
Exemple #2
0
def _encode(name, t_input, t_len, t_vecs, t_init=None, reuse=False):
    multi = len(t_input.get_shape()) == 3
    assert multi or len(t_input.get_shape()) == 2
    cell = tf.contrib.rnn.GRUCell(N_HIDDEN)
    if multi:
        t_shape = tf.shape(t_input)
        t_n_batch, t_n_multi, t_n_toks = t_shape[0], t_shape[1], t_shape[2]
        t_input = tf.reshape(t_input, (t_n_batch*t_n_multi, t_n_toks))
        t_len = tf.reshape(t_len, (t_n_batch*t_n_multi,))
        if t_init is not None:
            t_init = tf.tile(tf.expand_dims(t_init, 1), (1, t_n_multi, 1))
            t_init = tf.reshape(t_init, (t_n_batch*t_n_multi, N_HIDDEN))
    t_embed = _embed_dict(t_input, t_vecs)
    with tf.variable_scope(name, reuse=reuse):
        _, t_encode = tf.nn.dynamic_rnn(
                cell, t_embed, t_len, dtype=tf.float32, initial_state=t_init)
    if multi:
        t_encode = tf.reshape(t_encode, (t_n_batch, t_n_multi, N_HIDDEN))
    return t_encode
Exemple #3
0
    def __init__(self, task):
        self.task = task

        self.t_state = tf.placeholder(tf.float32, (None, task.n_features))
        self.t_action = tf.placeholder(tf.int32, (None, ))
        self.t_reward = tf.placeholder(tf.float32, (None, ))
        self.t_hint = tf.placeholder(tf.int32, (None, None))
        self.t_hint_len = tf.placeholder(tf.int32, (None, ))
        self.t_task = tf.placeholder(tf.int32, (None, ))

        self.t_last_hyp = tf.placeholder(tf.int32, (None, ), "last_hyp")
        self.t_last_hyp_hidden = tf.placeholder(tf.float32,
                                                (None, N_DEC_HIDDEN),
                                                "last_hyp_hidden")
        t_hyp_init = tf.get_variable(
            "hyp_init",
            shape=(1, N_DEC_HIDDEN),
            initializer=tf.uniform_unit_scaling_initializer())
        self.t_n_batch = tf.shape(self.t_state)[0]
        #self.t_n_batch = tf.placeholder(tf.int32, ())
        t_hyp_tile = tf.tile(t_hyp_init, (self.t_n_batch, 1))

        t_hint_vecs = tf.get_variable(
            "hint_vec", (len(task.vocab), N_EMBED),
            initializer=tf.uniform_unit_scaling_initializer())
        t_hint_repr = tf.reduce_mean(_embed_dict(self.t_hint, t_hint_vecs),
                                     axis=1)
        self.hyp_decoder = Decoder("decode_hyp", t_hyp_tile, self.t_hint,
                                   self.t_last_hyp, self.t_last_hyp_hidden,
                                   t_hint_vecs)

        t_task_vecs = tf.get_variable(
            "task_vec", (task.n_tasks, N_EMBED),
            initializer=tf.uniform_unit_scaling_initializer())
        t_task_repr = _embed_dict(self.t_task, t_task_vecs)

        if FLAGS.infer_hyp:
            self.t_concept = t_hint_repr
        else:
            self.t_concept = t_task_repr

        with tf.variable_scope("features"):
            t_features = _mlp(self.t_state, (N_HIDDEN, N_HIDDEN),
                              (tf.nn.tanh, tf.nn.tanh))
        with tf.variable_scope("param"):
            t_concept_param = _linear(self.t_concept,
                                      N_HIDDEN * task.n_actions)
            t_concept_mat = tf.reshape(t_concept_param,
                                       (-1, N_HIDDEN, task.n_actions))
        self.t_score = tf.einsum("ij,ijk->ik", t_features, t_concept_mat)

        self.t_logprob = tf.nn.log_softmax(self.t_score)
        t_prob = tf.nn.softmax(self.t_score)
        t_entropy = -tf.reduce_mean(
            tf.reduce_sum(t_prob * self.t_logprob, axis=1))

        with tf.variable_scope("baseline"):
            t_baseline = tf.squeeze(_linear(tf.stop_gradient(t_features), 1))

        t_chosen_logprob = -tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=self.t_score, labels=self.t_action)
        t_loss_surrogate = -tf.reduce_mean(
            t_chosen_logprob * (self.t_reward - tf.stop_gradient(t_baseline)))
        t_baseline_err = tf.reduce_mean((t_baseline - self.t_reward)**2)

        self.t_rl_loss = t_loss_surrogate + t_baseline_err - 0.001 * t_entropy
        self.t_dagger_loss = -tf.reduce_mean(t_chosen_logprob)

        if FLAGS.concept_prior is not None:

            def normal(x):
                return tf.reduce_mean(tf.reduce_sum(tf.square(x), axis=1))

            self.t_rl_loss += normal(self.t_concept) / FLAGS.concept_prior
            self.t_dagger_loss += normal(self.t_concept) / FLAGS.concept_prior

        if FLAGS.predict_hyp:
            self.t_loss = self.t_rl_loss + self.hyp_decoder.t_loss
            self.t_dagger_loss = self.t_dagger_loss + self.hyp_decoder.t_loss
        else:
            self.t_loss = self.t_rl_loss

        optimizer = tf.train.AdamOptimizer(0.001)
        self.o_train = optimizer.minimize(self.t_loss)
        self.o_rl_train = optimizer.minimize(self.t_rl_loss)
        self.o_dagger_train = optimizer.minimize(self.t_dagger_loss)

        self.session = tf.Session()
        self.session.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver()
        if FLAGS.restore is not None:
            self.restore(FLAGS.restore)