Esempio n. 1
0
    def sample_action(self, policy_parameters):
        """ Constructs a symbolic operation for stochastically sampling from the policy
            distribution

            arguments:
                policy_parameters
                    if discrete: logits of a categorical distribution over actions 
                        sy_logits_na: (batch_size, self.ac_dim)
                    if continuous: (mean, log_std) of a Gaussian distribution over actions
                        sy_mean: (batch_size, self.ac_dim)
                        sy_logstd: (self.ac_dim,)

            returns:
                sy_sampled_ac: 
                    if discrete: (batch_size)
                    if continuous: (batch_size, self.ac_dim)

            Hint: for the continuous case, use the reparameterization trick:
                 The output from a Gaussian distribution with mean 'mu' and std 'sigma' is
        
                      mu + sigma * z,         z ~ N(0, I)
        
                 This reduces the problem to just sampling z. (Hint: use tf.random_normal!)
        """
        if self.discrete:
            sy_logits_na = policy_parameters
            sy_sampled_ac = tf.squeeze(tf.multinomial(sy_logits_na,
                                                      num_samples=1),
                                       axis=1)
        else:
            sy_mean, sy_logstd = policy_parameters
            sy_sampled_ac = sy_mean + tf.exp(sy_logstd) * tf.random_normal(
                tf.shape(sy_mean), 0, 1)
        return sy_sampled_ac
 def multinomial_squeeze(self, logits, temperature=1.0):
   """multinomial sampling from logits."""
   logits_shape = utils.shape_list(logits)
   reshaped_logits = (tf.reshape(logits, [-1, logits_shape[-1]]) / temperature)
   choices = tf.multinomial(reshaped_logits, 1)
   choices = tf.reshape(choices, logits_shape[:-1])
   return tf.to_int32(choices)
 def build_action_sampling(self):
     if self.discrete:
         logits_na = self.parameters
         self.sample_ac = tf.squeeze(tf.multinomial(logits_na, num_samples=1), axis=1)
     else:
         mean, logstd = self.parameters
         self.sample_ac = mean + tf.exp(logstd) * tf.random_normal(tf.shape(mean), 0, 1)
Esempio n. 4
0
    def _build_networks(self):
        # Define input placeholders
        self.s = tf.placeholder(tf.float32,
                                shape=[None] + self.state_dim,
                                name='state')
        self.a = tf.placeholder(tf.int32, shape=(None, ), name='action')
        self.s_next = tf.placeholder(tf.float32,
                                     shape=[None] + self.state_dim,
                                     name='next_state')
        self.r = tf.placeholder(tf.float32, shape=(None, ), name='reward')
        self.done = tf.placeholder(tf.float32,
                                   shape=(None, ),
                                   name='done_flag')

        # Actor: action probabilities
        self.actor = dense_nn(self.s,
                              self.layer_sizes + [self.act_size],
                              name='actor')
        self.sampled_actions = tf.squeeze(tf.multinomial(self.actor, 1))
        self.actor_proba = tf.nn.softmax(self.actor)
        self.actor_vars = self.scope_vars('actor')

        # Critic: action value (V value)
        self.critic = dense_nn(self.s, self.layer_sizes + [1], name='critic')
        self.critic_next = dense_nn(self.s_next,
                                    self.layer_sizes + [1],
                                    name='critic',
                                    reuse=True)
        self.critic_vars = self.scope_vars('critic')

        # TD target
        self.td_target = self.r + self.gamma * tf.squeeze(
            self.critic_next) * (1.0 - self.done)
        self.td_error = self.td_target - tf.squeeze(self.critic)
    def generate_string(self, initial_logits, initial_state, sequence_length):
        """Builds sub-graph to generate a string, sampled from the model.

    Args:
      initial_logits: Starting logits to sample from.
      initial_state: Starting state for the RNN core.
      sequence_length: Number of characters to sample.

    Returns:
      A Tensor of characters, with dimensions `[sequence_length, batch_size,
      output_size]`.
    """

        current_logits = initial_logits
        current_state = initial_state

        generated_letters = []
        for _ in range(sequence_length):
            # Sample a character index from distribution.
            char_index = tf.squeeze(tf.multinomial(current_logits, 1))
            char_one_hot = tf.one_hot(char_index, self._output_size, 1.0, 0.0)
            generated_letters.append(char_one_hot)

            # Feed character back into the deep_lstm.
            gen_out_seq, current_state = self._core(
                tf.nn.relu(self._embed_module(char_one_hot)), current_state)
            current_logits = self._output_module(gen_out_seq)

        generated_string = tf.stack(generated_letters)

        return generated_string
Esempio n. 6
0
 def _g_recurrence_2(i, x_t, h_tm1, given_num, gen_x):
     h_t = self.g_recurrent_unit(x_t, h_tm1)  # hidden_memory_tuple
     o_t = self.g_output_unit(h_t)  # batch x vocab , logits not prob
     log_prob = tf.log(tf.nn.softmax(o_t))
     next_token = tf.cast(tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32)
     x_tp1 = tf.nn.embedding_lookup(self.g_embeddings, next_token)  # batch x emb_dim
     gen_x = gen_x.write(i, next_token)  # indices, batch_size
     return i + 1, x_tp1, h_t, given_num, gen_x
Esempio n. 7
0
 def provide_one_hot_labels(self, batch_size):
   """Provides one hot labels."""
   pitch_counts = self.get_pitch_counts()
   pitches = sorted(pitch_counts.keys())
   counts = [pitch_counts[p] for p in pitches]
   indices = tf.reshape(
       tf.multinomial(tf.log([tf.to_float(counts)]), batch_size), [batch_size])
   one_hot_labels = tf.one_hot(indices, depth=len(pitches))
   return one_hot_labels
Esempio n. 8
0
def multinomial_sample(x, vocab_size, temperature):
    """Multinomial sampling from a n-dimensional tensor."""
    if temperature > 0:
        samples = tf.multinomial(
            tf.reshape(x, [-1, vocab_size]) / temperature, 1)
    else:
        samples = tf.argmax(x, axis=-1)
    reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1])
    return tf.to_int32(reshaped_samples)
Esempio n. 9
0
 def sample_from_logits(logits):
   with tf.control_dependencies([tf.assert_greater(temperature, 0.0)]):
     logits = tf.identity(logits)
   reshaped_logits = (
       tf.reshape(logits, [-1, tf.shape(logits)[-1]]) / temperature)
   choices = tf.multinomial(reshaped_logits, 1)
   choices = tf.reshape(choices,
                        tf.shape(logits)[:logits.get_shape().ndims - 1])
   return choices
Esempio n. 10
0
 def _g_recurrence(i, x_t, h_tm1, gen_o, gen_x):
     h_t = self.g_recurrent_unit(x_t, h_tm1)  # hidden_memory_tuple
     o_t = self.g_output_unit(h_t)  # batch x vocab , logits not prob
     log_prob = tf.log(tf.nn.softmax(o_t))
     next_token = tf.cast(tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32)
     x_tp1 = tf.nn.embedding_lookup(self.g_embeddings, next_token)  # batch x emb_dim
     gen_o = gen_o.write(i, tf.reduce_sum(tf.multiply(tf.one_hot(next_token, self.num_emb, 1.0, 0.0),
                                                      tf.nn.softmax(o_t)), 1))  # [batch_size] , prob
     gen_x = gen_x.write(i, next_token)  # indices, batch_size
     return i + 1, x_tp1, h_t, gen_o, gen_x
Esempio n. 11
0
 def false_fn():
     """add mutations."""
     mask = tf.cast(
         tf.multinomial(tf.log([[1 - mutation_rate, mutation_rate]]),
                        seq_len), tf.int32)[0]
     possible_mutations = tf.random_uniform([seq_len],
                                            minval=1,
                                            maxval=4,
                                            dtype=tf.int32)
     x_new = tf.mod(x + mask * possible_mutations, 4)
     return x_new
Esempio n. 12
0
 def body(past, prev, output):
     next_outputs = step(hparams, prev, past=past)
     logits = next_outputs['logits'][:, -1, :]  / tf.to_float(temperature)
     logits = top_k_logits(logits, k=top_k)
     logits = top_p_logits(logits, p=top_p)
     samples = tf.multinomial(logits, num_samples=1, output_dtype=tf.int32)
     return [
         next_outputs['presents'] if past is None else tf.concat([past, next_outputs['presents']], axis=-2),
         samples,
         tf.concat([output, samples], axis=1)
     ]
Esempio n. 13
0
 def body(past, prev, output):
     next_outputs = step(hparams, prev[:, tf.newaxis], past=past)
     logits = next_outputs['logits'][:, -1, :] / \
         tf.to_float(temperature)
     logits = top_k_logits(logits, k=top_k)
     samples = tf.multinomial(
         logits, num_samples=1, output_dtype=tf.int32)
     return [
         tf.concat([past, next_outputs['presents']], axis=-2),
         tf.squeeze(samples, axis=[1]),
         tf.concat([output, samples], axis=1),
     ]
Esempio n. 14
0
 def _sample_n(n):
   """Sample vector of categoricals."""
   if logits.shape.ndims == 2:
     logits_2d = logits
   else:
     logits_2d = tf.reshape(logits, [-1, event_size])
   sample_dtype = tf.int64 if logits.dtype.size > 4 else tf.int32
   draws = tf.multinomial(
       logits_2d, n, seed=seed, output_dtype=sample_dtype)
   draws = tf.reshape(
       tf.transpose(draws),
       tf.concat([[n], batch_shape_tensor], 0))
   return tf.cast(draws, dtype)
Esempio n. 15
0
  def _head(self, core_output):
    """Build the head of the agent: linear policy and value function."""
    policy_logits = snt.Linear(
        self._num_actions, name='policy_logits')(
            core_output)
    baseline = tf.squeeze(snt.Linear(1, name='baseline')(core_output), axis=-1)

    # Sample an action from the policy.
    new_action = tf.multinomial(
        policy_logits, num_samples=1, output_dtype=tf.int32)
    new_action = tf.squeeze(new_action, 1, name='new_action')

    return AgentOutput(new_action, policy_logits, baseline)
Esempio n. 16
0
    def sample_action(self, policy_parameters):
        """
        Constructs a symbolic operation for stochastically sampling from the
        policy distribution

        arguments:
            policy_parameters
                if discrete: logits of a categorical distribution over actions
                    sy_logits_na: (batch_size, self.ac_dim)
                if continuous: (mean, log_std) of a Gaussian distribution over actions
                    sy_mean: (batch_size, self.ac_dim)
                    sy_logstd: (self.ac_dim,)

        returns:
            sy_sampled_ac:
                if discrete: (batch_size,)
                if continuous: (batch_size, self.ac_dim)

        Hint: for the continuous case, use the reparameterization trick:
             The output from a Gaussian distribution with mean 'mu' and std 'sigma' is

                  mu + sigma * z,         z ~ N(0, I)

             This reduces the problem to just sampling z.
        """
        if self.discrete:
            sy_logits_na = policy_parameters
            # ------------------------------------------------------------------
            # START OF YOUR CODE
            # ------------------------------------------------------------------
            # draw a sample from sy_logits_na.  tf.multinomial deprecated in tf2.
            # the tf2 equivalent is tf.random.categorical.
            sy_sampled_ac = tf.reshape(tf.multinomial(sy_logits_na, 1), [-1])
            # ------------------------------------------------------------------
            # END OF YOUR CODE
            # ------------------------------------------------------------------
        else:
            sy_mean, sy_logstd = policy_parameters
            # ------------------------------------------------------------------
            # START OF YOUR CODE
            # ------------------------------------------------------------------
            # sampling from z using random_normal.
            # mean is sy_mean, stdev is exp(sy_logstd)
            sy_sampled_ac = tf.random_normal(shape=tf.shape(sy_mean),
                                             mean=sy_mean,
                                             stddev=tf.exp(sy_logstd))
            # ------------------------------------------------------------------
            # END OF YOUR CODE
            # ------------------------------------------------------------------
        return sy_sampled_ac
Esempio n. 17
0
    def __init__(self, name: str, env):
        """
        :param name: string
        :param env: gym env
        """
        ob_space = env.observation_space
        act_space = env.action_space

        with tf.variable_scope(name):
            self.obs = tf.placeholder(dtype=tf.float32,
                                      shape=[None] + list(ob_space.shape),
                                      name='obs')
            # Actor (Policy): Given a state (or observation)
            # obtain the distribution of actions
            with tf.variable_scope('policy_net'):
                layer_1 = tf.layers.dense(inputs=self.obs,
                                          units=20,
                                          activation=tf.tanh)
                layer_2 = tf.layers.dense(inputs=layer_1,
                                          units=20,
                                          activation=tf.tanh)
                layer_3 = tf.layers.dense(inputs=layer_2,
                                          units=act_space.n,
                                          activation=tf.tanh)
                self.act_probs = tf.layers.dense(inputs=layer_3,
                                                 units=act_space.n,
                                                 activation=tf.nn.softmax)

            # Critic
            with tf.variable_scope('value_net'):
                layer_1 = tf.layers.dense(inputs=self.obs,
                                          units=20,
                                          activation=tf.tanh)
                layer_2 = tf.layers.dense(inputs=layer_1,
                                          units=20,
                                          activation=tf.tanh)
                self.v_preds = tf.layers.dense(inputs=layer_2,
                                               units=1,
                                               activation=None)

            self.act_stochastic = tf.multinomial(tf.log(self.act_probs),
                                                 num_samples=1)
            self.act_stochastic = tf.reshape(self.act_stochastic, shape=[-1])

            self.act_deterministic = tf.argmax(self.act_probs, axis=1)

            # 辅助功能
            self.scope = tf.get_variable_scope().name
Esempio n. 18
0
    def nearest_neighbor(self, x, means):
        """Find the nearest element in means to elements in x.

    Args:
        x: Batch of encoder continuous latent states sliced/projected into
           shape [-1, num_blocks, block_dim].
        means: Embedding means of shape.

    Returns:
      Tensor with nearest element in mean encoded in one-hot notation.
    """
        x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keep_dims=True)
        means_norm_sq = tf.reduce_sum(tf.square(means),
                                      axis=-1,
                                      keep_dims=True)
        scalar_prod = tf.matmul(tf.transpose(x, perm=[1, 0, 2]),
                                tf.transpose(means, perm=[0, 2, 1]))
        scalar_prod = tf.transpose(scalar_prod, perm=[1, 0, 2])
        dist = x_norm_sq + tf.transpose(means_norm_sq,
                                        perm=[2, 0, 1]) - 2 * scalar_prod

        if self.hparams.soft_em:
            nearest_idx = tf.stack([
                tf.multinomial(-dist[:, i, :],
                               num_samples=self.hparams.num_samples)
                for i in range(self.hparams.num_blocks)
            ],
                                   axis=1)
            nearest_hot = tf.one_hot(nearest_idx,
                                     depth=self.hparams.block_v_size)
            nearest_hot = tf.reduce_mean(nearest_hot, axis=-2)
        else:
            if self.hparams.random_top_k > 1:
                _, top_k_idx = tf.nn.top_k(-dist, k=self.hparams.random_top_k)
                nearest_idx = tf.gather(
                    top_k_idx,
                    tf.random_uniform([1],
                                      minval=0,
                                      maxval=self.hparams.random_top_k - 1,
                                      dtype=tf.int32),
                    axis=-1)
            else:
                if self.hparams.use_scales:
                    dist /= tf.reshape(self.hparams.scales,
                                       [1, 1, self.hparams.moe_num_experts])
                nearest_idx = tf.argmax(-dist, axis=-1)
            nearest_hot = tf.one_hot(nearest_idx, self.hparams.block_v_size)
        return nearest_hot
Esempio n. 19
0
    def _preprocess(self, features):
        """Preprocesses features for multilingual translation."""
        seqs, tags = {}, {}

        if self._hparams.mode == tf.estimator.ModeKeys.TRAIN:
            seqs["src"] = features["inputs"]
            seqs["tgt"] = features["targets"]
            seqs["aux"] = None
            tags["src"] = features["input_tags"]
            tags["tgt"] = features["target_tags"]
            tags["aux"] = None

            # Construct a tensor of auxiliary tags.
            batch_size = common_layers.shape_list(features["all_tags"])[0]
            num_all_tags = common_layers.shape_list(features["all_tags"])[1]
            # <float32> [num_all_tags, 1, emb_dim].
            all_tags = features["all_tags"][0]  # batch elements are identical.
            # <int32> [batch_size].
            aux_tag_index = tf.multinomial(tf.ones([1, num_all_tags]),
                                           batch_size,
                                           output_dtype=tf.int32)[0]
            # <float32> [batch_size, 1, 1, emb_dim].
            tags["aux"] = tf.expand_dims(tf.gather(all_tags, aux_tag_index), 1)

            from_domains = ["src", "src", "tgt"]
            to_domains = ["tgt", "aux", "aux"]
        else:
            seqs["src"] = features["inputs"]
            seqs["tgt"] = features["targets"]
            tags["src"] = None
            tags["tgt"] = features["target_tags"]

            # Expand target tags to beam width, if necessary.
            if self._hparams.mode == tf.estimator.ModeKeys.PREDICT:
                tags["tgt"] = tf.tile(tags["tgt"],
                                      [self._hparams.beam_width, 1, 1, 1])

            from_domains = ["src"]
            to_domains = ["tgt"]

        # Construct inputs and targets.
        inputs, targets = {}, {}
        for fd, td in zip(from_domains, to_domains):
            key = "%s>%s" % (fd, td)
            inputs[key], targets[key] = self._build_inputs_and_targets(
                seqs[fd], tags[fd], seqs[td], tags[td])

        return inputs, targets
Esempio n. 20
0
def mlp_categorical_policy(x, a, hidden_sizes, activation, output_activation,
                           action_space):
    act_dim = action_space.n
    logits = mlp(x, list(hidden_sizes) + [act_dim], activation, None)
    logp_all = tf.nn.log_softmax(logits)
    pi = tf.squeeze(tf.multinomial(logits, 1), axis=1)
    logp = tf.reduce_sum(tf.one_hot(a, depth=act_dim) * logp_all, axis=1)
    logp_pi = tf.reduce_sum(tf.one_hot(pi, depth=act_dim) * logp_all, axis=1)

    old_logp_all = placeholder(act_dim)
    d_kl = categorical_kl(logp_all, old_logp_all)
    ent = categorical_entropy(logp_all)

    pi_info = {'logp_all': logp_all}
    pi_info_phs = {'logp_all': old_logp_all}

    return pi, logp, logp_pi, pi_info, pi_info_phs, d_kl, ent
Esempio n. 21
0
def categorical_policy(x, a, config, action_space):
    act_dim = action_space.n
    config["output_size"] = act_dim
    logits = make_network(x, config)
    logp_all = tf.nn.log_softmax(logits)
    pi = tf.squeeze(tf.multinomial(logits, 1), axis=1)
    logp = tf.reduce_sum(tf.one_hot(a, depth=act_dim) * logp_all, axis=1)
    logp_pi = tf.reduce_sum(tf.one_hot(pi, depth=act_dim) * logp_all, axis=1)

    old_logp_all = placeholder(act_dim)
    d_kl = categorical_kl(logp_all, old_logp_all)
    ent = categorical_entropy(logp_all)

    pi_info = {"logp_all": logp_all}
    pi_info_phs = {"logp_all": old_logp_all}

    return pi, logp, logp_pi, pi_info, pi_info_phs, d_kl, ent
Esempio n. 22
0
    def _build_sampler(self):
        """Build the sampler ops and the log_prob ops."""

        arc_seq = []
        sample_log_probs = []
        all_h = []

        # sampler ops
        inputs = self.g_emb
        prev_c = [
            tf.zeros([1, self.lstm_size], dtype=tf.float32)
            for _ in range(self.lstm_num_layers)
        ]
        prev_h = [
            tf.zeros([1, self.lstm_size], dtype=tf.float32)
            for _ in range(self.lstm_num_layers)
        ]
        for layer_id in range(self.num_layers):
            for branch_id in range(self.num_branches):
                next_c, next_h = stack_lstm(inputs, prev_c, prev_h,
                                            self.w_lstm)
                all_h.append(tf.stop_gradient(next_h[-1]))

                logits = tf.matmul(next_h[-1], self.w_soft)
                if self.temperature is not None:
                    logits /= self.temperature
                if self.tanh_constant is not None:
                    logits = self.tanh_constant * tf.tanh(logits)

                config_id = tf.multinomial(logits, 1)
                config_id = tf.to_int32(config_id)
                config_id = tf.reshape(config_id, [1])
                arc_seq.append(config_id)
                log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits, labels=config_id)
                sample_log_probs.append(log_prob)

                inputs = tf.nn.embedding_lookup(self.w_emb, config_id)
        arc_seq = tf.concat(arc_seq, axis=0)
        self.sample_arc = arc_seq

        self.sample_log_probs = tf.concat(sample_log_probs, axis=0)
        self.ppl = tf.exp(
            tf.reduce_sum(self.sample_log_probs) /
            tf.to_float(self.num_layers * self.num_branches))
        self.all_h = all_h
Esempio n. 23
0
    def _head(self, policy_input, heading, xy, target_xy):
        """Build the head of the agent: linear policy and value function, and pass
    the auxiliary outputs through.
    """

        # Linear policy and value function.
        policy_logits = snt.Linear(self._num_actions,
                                   name='policy_logits')(policy_input)
        baseline = tf.squeeze(snt.Linear(1, name='baseline')(policy_input),
                              axis=-1)

        # Sample an action from the policy.
        new_action = tf.multinomial(policy_logits,
                                    num_samples=1,
                                    output_dtype=tf.int32)
        new_action = tf.squeeze(new_action, 1, name='new_action')

        return AgentOutput(new_action, policy_logits, baseline, heading, xy,
                           target_xy)
Esempio n. 24
0
def vq_nearest_neighbor(x, hparams):
    """Find the nearest element in means to elements in x."""
    bottleneck_size = 2**hparams.bottleneck_bits
    means = hparams.means
    x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True)
    means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True)
    scalar_prod = tf.matmul(x, means, transpose_b=True)
    dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod
    if hparams.bottleneck_kind == "em":
        x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples)
        x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size)
        x_means_hot = tf.reduce_mean(x_means_hot, axis=1)
    else:
        x_means_idx = tf.argmax(-dist, axis=-1)
        x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size)
    x_means = tf.matmul(x_means_hot, means)
    e_loss = tf.reduce_mean(tf.squared_difference(x,
                                                  tf.stop_gradient(x_means)))
    return x_means_hot, e_loss
Esempio n. 25
0
def multinomial_sample(x, vocab_size=None, sampling_method="random",
                       temperature=1.0):
  """Multinomial sampling from a n-dimensional tensor.

  Args:
    x: Tensor of shape [..., vocab_size]. Parameterizes logits of multinomial.
    vocab_size: Number of classes in multinomial distribution.
    sampling_method: String, "random" or otherwise deterministic.
    temperature: Positive float.

  Returns:
    Tensor of shape [...].
  """
  vocab_size = vocab_size or common_layers.shape_list(x)[-1]
  if sampling_method == "random" and temperature > 0.0:
    samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1)
  else:
    samples = tf.argmax(x, axis=-1)
  reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1])
  return reshaped_samples
Esempio n. 26
0
    def _build(self, inputs):
        (shared_inputs, extra_policy_inputs) = inputs
        policy_in = tf.concat([shared_inputs, extra_policy_inputs], axis=1)

        policy = snt.nets.MLP(output_sizes=self._policy_layers,
                              activation=self._activation,
                              name='policy_mlp')(policy_in)

        # Sample an action from the policy logits.
        action = tf.multinomial(policy, num_samples=1, output_dtype=tf.int32)
        action = tf.squeeze(action, 1)  # [B, 1] -> [B]

        if self._policy_clip_abs_value > 0:
            policy = snt.clip_gradient(
                net=policy,
                clip_value_min=-self._policy_clip_abs_value,
                clip_value_max=self._policy_clip_abs_value)

        baseline_in = tf.concat(
            [shared_inputs, tf.stop_gradient(policy)], axis=1)
        baseline = snt.nets.MLP(self._baseline_layers,
                                activation=self._activation,
                                name='baseline_mlp')(baseline_in)
        baseline = tf.squeeze(baseline, axis=-1)  # [B, 1] -> [B]

        if self._policy_clip_abs_value > 0:
            baseline = snt.clip_gradient(
                net=baseline,
                clip_value_min=-self._policy_clip_abs_value,
                clip_value_max=self._policy_clip_abs_value)

        outputs = PolicyOutputs(policy=policy,
                                action=action,
                                baseline=baseline)

        return outputs
Esempio n. 27
0
    n_max_steps = 1000
    n_episode = 100
    gamma = 0.95

    initializer = tf.variance_scaling_initializer()
    X = tf.placeholder(tf.float32, shape=[None, n_inputs])
    y = tf.placeholder(tf.float32, shape=[None, n_outputs])

    hidden = tf.layers.dense(X,
                             n_hidden,
                             activation=tf.nn.elu,
                             kernel_initializer=initializer)
    logits = tf.layers.dense(hidden, n_outputs, kernel_initializer=initializer)
    outputs = tf.nn.sigmoid(logits)
    p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs])
    action = tf.multinomial(tf.log(p_left_and_right), num_samples=1)

    y = 1. - tf.to_float(action)
    cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y,
                                                            logits=logits)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    grads_and_vars = optimizer.compute_gradients(cross_entropy)
    gradients = [grad for grad, variable in grads_and_vars]
    gradient_placeholders = []
    grads_and_vars_feed = []
    for grad, variable in grads_and_vars:
        gradient_placeholder = tf.placeholder(tf.float32,
                                              shape=grad.get_shape())
        gradient_placeholders.append(gradient_placeholder)
        grads_and_vars_feed.append((gradient_placeholder, variable))
    training_op = optimizer.apply_gradients(grads_and_vars_feed)
Esempio n. 28
0
    def make_data_tensor(self, train=True):
        if train:
            folders = self.metatrain_character_folders
            # number of tasks, not number of meta-iterations. (divide by metabatch size to measure)
            num_total_batches = 200000
            if FLAGS.task_type == "ne":
                print("Inside ne train")
                folders = self.metatrain_character_folders[:50] # 10 classification tasks
                num_total_batches = 5000
        else:
            folders = self.metaval_character_folders
            num_total_batches = 600
            if FLAGS.task_type == "ne":
                print("inside ne val")
                if FLAGS.test_set:
                    folders = self.metaval_character_folders[:15]
                else:
                    folders = self.metaval_character_folders[:15] # 3 classification tasks
                num_total_batches = 60

        # print("folders", folders)
        # make list of files
        print('Generating filenames')

        if FLAGS.task_setting == 'ne' and train:
            # all_filenames = []
            # random.shuffle(folders)
            # for i in range(len(folders)/self.num_classes):
            #     #sampled_character_folders = random.sample(folders, self.num_classes)
            #     #random.shuffle(sampled_character_folders)
            #     sampled_character_folders = folders[i*self.num_classes:(i+1)*self.num_classes]
            #     labels_and_images = get_images(sampled_character_folders, range(self.num_classes), nb_samples=self.num_samples_per_class, shuffle=False)
            #     # make sure the above isn't randomized order
            #     labels = [li[0] for li in labels_and_images]
            #     filenames = [li[1] for li in labels_and_images]
            #     all_filenames.extend(filenames)
            all_filenames = []
            print("len folders", len(folders))
            random.shuffle(folders)
            task_folders_new = []
            for i in range(int(len(folders)/self.num_classes)):
                # sampled_character_folders = random.sample(folders, self.num_classes)
                # random.shuffle(sampled_character_folders)
                sampled_character_folders = folders[i*self.num_classes:(i+1)*self.num_classes]
                task_folders_temp = itertools.permutations(sampled_character_folders)
                task_folders_new.extend(task_folders_temp)
            # print("task_folders_new", task_folders_new)
            print("len of task_folders_new", len(task_folders_new))
            random.shuffle(task_folders_new)
            for i in range(len(task_folders_new)):
                sampled_character_folders = task_folders_new[i]
                # print("scf", sampled_character_folders)
                labels_and_images = get_images(sampled_character_folders, range(self.num_classes), nb_samples=self.num_samples_per_class, shuffle=False)
                # make sure the above isn't randomized order
                labels = [li[0] for li in labels_and_images]
                filenames = [li[1] for li in labels_and_images]
                all_filenames.extend(filenames)
        else:
            all_filenames = []
            for _ in range(num_total_batches):
                sampled_character_folders = random.sample(folders, self.num_classes)
                random.shuffle(sampled_character_folders)
                labels_and_images = get_images(sampled_character_folders, range(self.num_classes), nb_samples=self.num_samples_per_class, shuffle=False)
                # make sure the above isn't randomized order
                labels = [li[0] for li in labels_and_images]
                filenames = [li[1] for li in labels_and_images]
                all_filenames.extend(filenames)

        # make queue for tensorflow to read from
        filename_queue = tf.train.string_input_producer(tf.convert_to_tensor(all_filenames), shuffle=False)
        print('Generating image processing ops')
        image_reader = tf.WholeFileReader()
        _, image_file = image_reader.read(filename_queue)
        if FLAGS.datasource == 'miniimagenet':
            image = tf.image.decode_jpeg(image_file, channels=3)
            image.set_shape((self.img_size[0],self.img_size[1],3))
            image = tf.reshape(image, [self.dim_input])
            image = tf.cast(image, tf.float32) / 255.0
        else:
            image = tf.image.decode_png(image_file)
            image.set_shape((self.img_size[0],self.img_size[1],1))
            image = tf.reshape(image, [self.dim_input])
            image = tf.cast(image, tf.float32) / 255.0
            image = 1.0 - image  # invert
        num_preprocess_threads = 1 # TODO - enable this to be set to >1
        min_queue_examples = 256
        examples_per_batch = self.num_classes * self.num_samples_per_class
        batch_image_size = self.batch_size  * examples_per_batch
        print('Batching images')
        print("batch_image_size", batch_image_size)
        images = tf.train.batch(
                [image],
                batch_size = batch_image_size,
                num_threads=num_preprocess_threads,
                capacity=min_queue_examples + 3 * batch_image_size,
                )
        print("len images", images.shape)
        all_image_batches, all_label_batches = [], []
        print('Manipulating image data to be right shape')
        for i in range(self.batch_size):
            image_batch = images[i*examples_per_batch:(i+1)*examples_per_batch]

            if FLAGS.datasource == 'omniglot':
                # omniglot augments the dataset by rotating digits to create new classes
                # get rotation per class (e.g. 0,1,2,0,0 if there are 5 classes)
                rotations = tf.multinomial(tf.log([[1., 1., 1., 1.]]), self.num_classes)
            # print("labels", labels)
            label_batch = tf.convert_to_tensor(labels)
            new_list, new_label_list = [], []
            # shuffles the data within a batch, class labels remain fixed
            for k in range(self.num_samples_per_class):
                class_idxs = tf.range(0, self.num_classes)
                class_idxs = tf.random_shuffle(class_idxs)

                true_idxs = class_idxs * self.num_samples_per_class + k
                new_list.append(tf.gather(image_batch,true_idxs))
                if FLAGS.datasource == 'omniglot':  # and FLAGS.train:
                    new_list[-1] = tf.stack([tf.reshape(tf.image.rot90(
                        tf.reshape(new_list[-1][ind], [self.img_size[0], self.img_size[1],1]),
                        k=tf.cast(rotations[0, class_idxs[ind]], tf.int32)), (self.dim_input,))
                        for ind in range(self.num_classes)])
                new_label_list.append(tf.gather(label_batch, true_idxs))
            new_list = tf.concat(new_list, 0)  # has shape [self.num_classes*self.num_samples_per_class, self.dim_input]
            new_label_list = tf.concat(new_label_list, 0)
            all_image_batches.append(new_list)
            all_label_batches.append(new_label_list)
        all_image_batches = tf.stack(all_image_batches)
        all_label_batches = tf.stack(all_label_batches)
        print("all_image_batches", all_image_batches)
        all_label_batches = tf.one_hot(all_label_batches, self.num_classes)
        return all_image_batches, all_label_batches
Esempio n. 29
0
    def _create_network(self, view_space, feature_space):
        input_view = tf.placeholder(tf.float32, (None, ) + view_space)
        input_feature = tf.placeholder(tf.float32, (None, ) + feature_space)
        action = tf.placeholder(tf.int32, [None])

        reward = tf.placeholder(tf.float32, [None])

        hidden_size = [256]

        # fully connected
        flatten_view = tf.reshape(
            input_view,
            [-1, np.prod([v.value for v in input_view.shape[1:]])])
        h_view = tf.layers.dense(flatten_view,
                                 units=hidden_size[0],
                                 activation=tf.nn.relu)

        h_emb = tf.layers.dense(input_feature,
                                units=hidden_size[0],
                                activation=tf.nn.relu)

        dense = tf.concat([h_view, h_emb], axis=1)
        dense = tf.layers.dense(dense,
                                units=hidden_size[0] * 2,
                                activation=tf.nn.relu)

        policy = tf.layers.dense(dense / 0.1,
                                 units=self.num_actions,
                                 activation=tf.nn.softmax)
        policy = tf.clip_by_value(policy, 1e-10, 1 - 1e-10)

        self.calc_action = tf.multinomial(tf.log(policy), 1)

        value = tf.layers.dense(dense, units=1)
        value = tf.reshape(value, (-1, ))

        action_mask = tf.one_hot(action, self.num_actions)
        advantage = tf.stop_gradient(reward - value)

        log_policy = tf.log(policy + 1e-6)
        log_prob = tf.reduce_sum(log_policy * action_mask, axis=1)

        pg_loss = -tf.reduce_mean(advantage * log_prob)
        vf_loss = self.value_coef * tf.reduce_mean(tf.square(reward - value))
        neg_entropy = self.ent_coef * tf.reduce_mean(
            tf.reduce_sum(policy * log_policy, axis=1))
        total_loss = pg_loss + vf_loss + neg_entropy

        # train op (clip gradient)
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        gradients, variables = zip(*optimizer.compute_gradients(total_loss))
        gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
        self.train_op = optimizer.apply_gradients(zip(gradients, variables))

        train_op = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate).minimize(total_loss)

        self.input_view = input_view
        self.input_feature = input_feature
        self.action = action
        self.reward = reward

        self.policy, self.value = policy, value
        self.train_op = train_op
        self.pg_loss, self.vf_loss, self.reg_loss = pg_loss, vf_loss, neg_entropy
        self.total_loss = total_loss
Esempio n. 30
0
#initial_means = tf.placeholder_with_default(
#    tf.constant([[-3,1],
#                 [-3,-3],
#                 [-1,3],
#                 [-1,-1],
#                 [3,3],
#                 [1,1],
#                 [1,-3],
#                 [3,1]],dtype='float64'),
#    shape=[COMPONENTS, DIMENSIONS]
#)

initial_means = tf.placeholder_with_default(tf.gather(
    input,
    tf.squeeze(tf.multinomial(tf.ones([1, tf.shape(input)[0]]), COMPONENTS))),
                                            shape=[COMPONENTS, DIMENSIONS])
initial_covariances = tf.placeholder_with_default(
    tf.cast(tf.ones([COMPONENTS, DIMENSIONS]), tf.float64) * avg_dim_variance,
    shape=[COMPONENTS, DIMENSIONS])
initial_weights = tf.placeholder_with_default(tf.cast(
    tf.constant(1.0 / COMPONENTS, shape=[COMPONENTS]), tf.float64),
                                              shape=[COMPONENTS])

# trainable variables: component means, covariances, and weights

means = tf.Variable(tf.constant(
    [[-3, 1], [-3, -3], [-1, 3], [-1, -1], [3, 3], [1, 1], [1, -3], [3, 1]],
    dtype='float64'),
                    dtype=tf.float64)
means = tf.Variable(initial_means, dtype=tf.float64)