def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128, normalize=False, use_recurrent=False): LearningModel.__init__(self, m_size, normalize, use_recurrent, brain) num_streams = 1 hidden_streams = self.create_new_obs(num_streams, h_size, n_layers) hidden = hidden_streams[0] self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate") hidden_reg = tf.layers.dropout(hidden, self.dropout_rate) if self.use_recurrent: self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in') hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in) self.memory_out = tf.identity(self.memory_out, name='recurrent_out') self.policy = tf.layers.dense(hidden_reg, self.a_size, activation=None, use_bias=False, kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)) if brain.vector_action_space_type == "discrete": self.action_probs = tf.nn.softmax(self.policy) self.sample_action_float = tf.multinomial(self.policy, 1) self.sample_action_float = tf.identity(self.sample_action_float, name="action") self.sample_action = tf.cast(self.sample_action_float, tf.int32) self.true_action = tf.placeholder(shape=[None], dtype=tf.int32, name="teacher_action") self.action_oh = tf.one_hot(self.true_action, self.a_size) self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh) self.action_percent = tf.reduce_mean(tf.cast( tf.equal(tf.cast(tf.argmax(self.action_probs, axis=1), tf.int32), self.sample_action), tf.float32)) else: self.sample_action = tf.identity(self.policy, name="action") self.true_action = tf.placeholder(shape=[None, self.a_size], dtype=tf.float32, name="teacher_action") self.loss = tf.reduce_sum(tf.squared_difference(self.true_action, self.sample_action)) optimizer = tf.train.AdamOptimizer(learning_rate=lr) self.update = optimizer.minimize(self.loss)
def _g_recurrence_2(i, x_t, gen_x, h_tm1, h_tm1_manager, last_goal, real_goal): # with tf.device('/cpu:0'): cur_sen = tf.cond(i > 0, lambda: tf.split(tf.concat([tf.transpose(gen_x.stack(), perm=[1, 0]), self.padding_array], 1), [self.sequence_length, i - 1], 1)[0], lambda: self.padding_array) with tf.variable_scope(self.scope): feature = self.FeatureExtractor_unit(cur_sen, self.drop_out) h_t_Worker = self.g_worker_recurrent_unit(x_t, h_tm1) # hidden_memory_tuple o_t_Worker = self.g_worker_output_unit(h_t_Worker) # batch x vocab , logits not prob o_t_Worker = tf.reshape(o_t_Worker, [self.batch_size, self.num_vocabulary, self.goal_size]) h_t_manager = self.g_manager_recurrent_unit(feature, h_tm1_manager) sub_goal = self.g_manager_output_unit(h_t_manager) sub_goal = tf.nn.l2_normalize(sub_goal, 1) real_sub_goal = tf.add(last_goal, sub_goal) w_g = tf.matmul(real_goal, self.g_change) # batch x goal_size w_g = tf.nn.l2_normalize(w_g, 1) w_g = tf.expand_dims(w_g, 2) # batch x goal_size x 1 x_logits = tf.matmul(o_t_Worker, w_g) x_logits = tf.squeeze(x_logits) log_prob = tf.log(tf.nn.softmax(x_logits)) next_token = tf.cast(tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32) x_tp1 = tf.nn.embedding_lookup(self.g_embeddings, next_token) # batch x emb_dim with tf.control_dependencies([cur_sen]): gen_x = gen_x.write(i - 1, next_token) # indices, batch_size return i + 1, x_tp1, gen_x, h_t_Worker, h_t_manager, \ tf.cond(((i) % self.step_size) > 0, lambda: real_sub_goal, lambda: tf.constant(0.0, shape=[self.batch_size, self.goal_out_size])), \ tf.cond(((i) % self.step_size) > 0, lambda: real_goal, lambda: real_sub_goal)
def testSmallEntropy(self): with self.test_session(use_gpu=self.use_gpu): # A logit value of -10 corresponds to a probability of ~5e-5. logits = tf.constant([[-10., 10., -10.], [-10., -10., 10.]]) num_samples = 1000 samples = tf.multinomial(logits, num_samples).eval() self.assertAllEqual([[1] * num_samples, [2] * num_samples], samples)
def body(i, prev_base_state, prev_high_states, prev_y, prev_emb, y_array): state1 = decoder.grustep1.forward(prev_base_state, prev_emb) att_ctx = decoder.attstep.forward(state1) base_state = decoder.grustep2.forward(state1, att_ctx) if decoder.high_gru_stack == None: output = base_state high_states = [] else: if decoder.high_gru_stack.context_state_size == 0: output, high_states = decoder.high_gru_stack.forward_single( prev_high_states, base_state) else: output, high_states = decoder.high_gru_stack.forward_single( prev_high_states, base_state, context=att_ctx) logits = decoder.predictor.get_logits(prev_emb, output, att_ctx, multi_step=False) new_y = tf.multinomial(logits, num_samples=1) new_y = tf.cast(new_y, dtype=tf.int32) new_y = tf.squeeze(new_y, axis=1) new_y = tf.where(tf.equal(prev_y, tf.constant(0, dtype=tf.int32)), tf.zeros_like(new_y), new_y) y_array = y_array.write(index=i, value=new_y) new_emb = decoder.y_emb_layer.forward(new_y, factor=0) return i+1, base_state, high_states, new_y, new_emb, y_array
def testEmpty(self): classes = 5 with self.test_session(use_gpu=self.use_gpu): for batch in 0, 3: for samples in 0, 7: x = tf.multinomial(tf.zeros([batch, classes]), samples).eval() self.assertEqual(x.shape, (batch, samples))
def loop_function(prev,_): prev = tf.nn.xw_plus_b( prev, output_projection[0], output_projection[1]) prev_symbol = tf.cast(tf.reshape(tf.multinomial(prev, 1), [FLAGS.batch_size*FLAGS.max_dec_sen_num]), tf.int32) emb_prev = tf.nn.embedding_lookup(embedding, prev_symbol) return emb_prev
def UpdateProbs(self, inp): """Update probabilities of each particle based on 2D matrix inp which is a 2D perspectiuve projection of the scene""" projection, onscreen = self.project() filtered_projection = tf.to_int64(tf.select(onscreen, projection, tf.zeros_like(projection))) per_state_probabilities = tf.gather_nd(inp, filtered_projection) filtered_probabilities = tf.select(onscreen, per_state_probabilities, tf.zeros_like(per_state_probabilities)) new_state_indicies = tf.squeeze(tf.multinomial(tf.expand_dims(tf.log(filtered_probabilities),0), self.particles/10*9)) new_state = tf.gather(self.state, new_state_indicies) # Add momentum new_state = tf.concat(1, [new_state[:, 0:3] + new_state[:, 3:6], new_state[:, 3:10]]) # Add in particles for the "just come onscreen" case. new_state = tf.concat(0, [new_state, tf.random_normal([self.particles/10, 10]) * self.initial_std + self.initial_bias]) new_state = new_state + tf.random_normal([self.particles, 10]) * self.update_std # Todo: permute state by adding noise. return self.state.assign(new_state)
def call(self, inputs): """Calculates logits and action. Args: inputs: Observations from a step in the cart-pole environment, of shape `(batch_size, input_size)` Returns: logits: the logits output by the output layer. This can be viewed as the likelihood vales of choosing the left (0) action. Shape: `(batch_size, 1)`. actions: randomly selected actions ({0, 1}) based on the logits. Shape: `(batch_size, 1)`. """ hidden = self._hidden_layer(inputs) logits = self._output_layer(hidden) left_prob = tf.nn.sigmoid(logits) action_probs = tf.concat([left_prob, 1.0 - left_prob], 1) self._grad_fn = eager.implicit_gradients( self._get_cross_entropy_and_save_actions) actions = tf.multinomial(tf.log(action_probs), 1) return logits, actions
def multinomial_squeeze(logits, temperature=1.0): logits_shape = common_layers.shape_list(logits) reshaped_logits = ( tf.reshape(logits, [-1, logits_shape[-1]]) / temperature) choices = tf.multinomial(reshaped_logits, 1) choices = tf.reshape(choices, logits_shape[:-1]) return choices
def generate_string(self, initial_logits, initial_state, sequence_length): """Builds sub-graph to generate a string, sampled from the model. Args: initial_logits: Starting logits to sampling from. initial_state: Starting state for the RNN core. sequence_length: Number of characters to sample. Returns: A Tensor of characters, with dimensions `[sequence_length, batch_size, output_size]`. """ current_logits = initial_logits current_state = initial_state generated_letters = [] for _ in range(sequence_length): # Sample a character index from distribution. char_index = tf.squeeze(tf.multinomial(current_logits, 1)) char_one_hot = tf.one_hot(char_index, self._output_size, 1.0, 0.0) generated_letters.append(char_one_hot) # Feed character back into the deep_lstm. gen_out_seq, current_state = self._core( tf.nn.relu(self._embed_module(char_one_hot)), current_state) current_logits = self._output_module(gen_out_seq) generated_string = tf.stack(generated_letters) return generated_string
def create_dc_actor_critic(self, h_size, num_layers): num_streams = 1 hidden_streams = self.create_new_obs(num_streams, h_size, num_layers) hidden = hidden_streams[0] if self.use_recurrent: tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32) self.prev_action = tf.placeholder(shape=[None], dtype=tf.int32, name='prev_action') self.prev_action_oh = c_layers.one_hot_encoding(self.prev_action, self.a_size) hidden = tf.concat([hidden, self.prev_action_oh], axis=1) self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in') hidden, self.memory_out = self.create_recurrent_encoder(hidden, self.memory_in) self.memory_out = tf.identity(self.memory_out, name='recurrent_out') self.policy = tf.layers.dense(hidden, self.a_size, activation=None, use_bias=False, kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)) self.all_probs = tf.nn.softmax(self.policy, name="action_probs") self.output = tf.multinomial(self.policy, 1) self.output = tf.identity(self.output, name="action") self.value = tf.layers.dense(hidden, 1, activation=None) self.value = tf.identity(self.value, name="value_estimate") self.entropy = -tf.reduce_sum(self.all_probs * tf.log(self.all_probs + 1e-10), axis=1) self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32) self.selected_actions = c_layers.one_hot_encoding(self.action_holder, self.a_size) self.all_old_probs = tf.placeholder(shape=[None, self.a_size], dtype=tf.float32, name='old_probabilities') # We reshape these tensors to [batch x 1] in order to be of the same rank as continuous control probabilities. self.probs = tf.expand_dims(tf.reduce_sum(self.all_probs * self.selected_actions, axis=1), 1) self.old_probs = tf.expand_dims(tf.reduce_sum(self.all_old_probs * self.selected_actions, axis=1), 1)
def decoder_fn(time, cell_state, cell_input, cell_output, context_state): with tf.name_scope(name, "simple_decoder_fn_inference", [time, cell_state, cell_input, cell_output, context_state]): if cell_input is not None: raise ValueError("Expected cell_input to be None, but saw: %s" % cell_input) if cell_output is None: # invariant that this is time == 0 next_input_id = tf.ones([batch_size], dtype=dtype) * ( start_of_sequence_id) done = tf.zeros([batch_size], dtype=tf.bool) cell_state = encoder_state cell_output = tf.zeros([cell_size], dtype=tf.float32) else: softmax_output = output_fn(cell_output) if sample: next_input_id = tf.squeeze(tf.multinomial(softmax_output, 1), 1) else: next_input_id = tf.argmax(softmax_output, 1) next_input_id = tf.cast(next_input_id, dtype=dtype) done = tf.equal(next_input_id, end_of_sequence_id) next_input = tf.gather(embeddings, next_input_id) # if time > maxlen, return all true vector done = tf.cond( tf.greater(time, maximum_length), lambda: tf.ones([batch_size], dtype=tf.bool), lambda: done) return (done, cell_state, next_input, next_input_id, context_state)
def __init__(self, q_values, observations, num_actions, stochastic, eps, softmax, softmax_temp): if softmax: action_dist = Categorical(q_values / softmax_temp) self.action = action_dist.sample() self.action_prob = action_dist.sampled_action_prob() return deterministic_actions = tf.argmax(q_values, axis=1) batch_size = tf.shape(observations)[0] # Special case masked out actions (q_value ~= -inf) so that we don't # even consider them for exploration. random_valid_action_logits = tf.where( tf.equal(q_values, tf.float32.min), tf.ones_like(q_values) * tf.float32.min, tf.ones_like(q_values)) random_actions = tf.squeeze( tf.multinomial(random_valid_action_logits, 1), axis=1) chose_random = tf.random_uniform( tf.stack([batch_size]), minval=0, maxval=1, dtype=tf.float32) < eps stochastic_actions = tf.where(chose_random, random_actions, deterministic_actions) self.action = tf.cond(stochastic, lambda: stochastic_actions, lambda: deterministic_actions) self.action_prob = None
def _sample_single(args): logits, n_draw = args[0], args[1] # [K], [] x = tf.multinomial(logits[tf.newaxis, ...], n_draw, seed) # [1, n*n_draw] x = tf.reshape(x, shape=[n, -1]) # [n, n_draw] x = tf.reduce_sum(tf.one_hot(x, depth=k), axis=-2) # [n, k] return x
def sample(self, projected_output): """Return integer ID tensor representing the sampled word. Args: projected_output: Tensor [1, 1, state_size], representing a single decoder timestep output. """ # TODO: We really need a tf.control_dependencies check here (for rank). with tf.name_scope('decoder_sampler', values=[projected_output]): # Protect against extra size-1 dimensions; grab the 1D tensor # of size state_size. logits = tf.squeeze(projected_output) if self.temperature < 0.02: return tf.argmax(logits, axis=0) # Convert logits to probability distribution. probabilities = tf.div(logits, self.temperature) projected_output = tf.div( tf.exp(probabilities), tf.reduce_sum(tf.exp(probabilities), axis=-1)) # Sample 1 time from the probability distribution. sample_ID = tf.squeeze( tf.multinomial(tf.expand_dims(probabilities, 0), 1)) return sample_ID
def testNegativeMinLogits(self): tf.set_random_seed(78844) with self.test_session(use_gpu=self.use_gpu): logits = tf.constant([[np.finfo(np.float32).min] * 1023 + [0]]) num_samples = 1000 samples = tf.multinomial(logits, num_samples).eval() self.assertAllEqual([[1023] * num_samples], samples)
def build_generator(self): """ Generator for generating captions Support sample max or sample from distribution No Beam search here; beam search is in decoder """ # Variables for the sample setting self.sample_max = tf.Variable(True, trainable = False, name = "sample_max") self.sample_temperature = tf.Variable(1.0, trainable = False, name = "temperature") self.generator = [] with tf.variable_scope("rnnlm"): flattened_ctx = tf.reshape(self.context, [self.batch_size, 196, 512]) ctx_mean = tf.reduce_mean(flattened_ctx, 1) tf.get_variable_scope().reuse_variables() initial_state = utils.get_initial_state(ctx_mean, self.cell.state_size) #projected context # This is used in attention module; do this outside the loop to reduce redundant computations # with tf.variable_scope("attention"): if self.att_hid_size == 0: pctx = slim.fully_connected(flattened_ctx, 1, activation_fn = None, scope = 'ctx_att') # (batch) * 196 * 1 else: pctx = slim.fully_connected(flattened_ctx, self.att_hid_size, activation_fn = None, scope = 'ctx_att') # (batch) * 196 * att_hid_size rnn_input = tf.nn.embedding_lookup(self.Wemb, tf.zeros([self.batch_size], tf.int32)) prev_h = utils.last_hidden_vec(initial_state) self.g_alphas = [] outputs = [] state = initial_state for ind in range(MAX_STEPS): with tf.variable_scope("attention"): alpha = self.get_alpha(prev_h, pctx) self.g_alphas.append(alpha) weighted_context = tf.reduce_sum(flattened_ctx * tf.expand_dims(alpha, 2), 1) output, state = self.cell(tf.concat(axis=1, values=[weighted_context, rnn_input]), state) outputs.append(output) prev_h = output # Get the input of next timestep prev_logit = slim.fully_connected(prev_h, self.vocab_size + 1, activation_fn = None, scope = 'logit') prev_symbol = tf.stop_gradient(tf.cond(self.sample_max, lambda: tf.argmax(prev_logit, 1), # pick the word with largest probability as the input of next time step lambda: tf.squeeze( tf.multinomial(tf.nn.log_softmax(prev_logit) / self.sample_temperature, 1), 1))) # Sample from the distribution self.generator.append(prev_symbol) rnn_input = tf.nn.embedding_lookup(self.Wemb, prev_symbol) self.g_output = output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, self.rnn_size]) # outputs[1:], because we don't calculate loss on time 0. self.g_logits = logits = slim.fully_connected(output, self.vocab_size + 1, activation_fn = None, scope = 'logit') self.g_probs = probs = tf.reshape(tf.nn.softmax(logits), [self.batch_size, MAX_STEPS, self.vocab_size + 1]) self.generator = tf.transpose(tf.reshape(tf.concat(axis=0, values=self.generator), [MAX_STEPS, -1]))
def st_sampler(logits): """straight-through stochastic sampler""" flat_samples = tf.reshape(tf.multinomial(tf.reshape(logits, [-1, len(charmap)]), 1), [-1]) onehot = tf.reshape(tf.one_hot(flat_samples, len(charmap)), tf.shape(logits)) residual = onehot - logits onehot = logits + tf.stop_gradient(residual) return onehot
def multinomial_sample(x, vocab_size, temperature): """Multinomial sampling from a n-dimensional tensor.""" if temperature > 0: samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1) else: samples = tf.argmax(x, axis=-1) reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1]) return tf.to_int32(reshaped_samples)
def _g_recurrence_2(i, x_t, h_tm1, given_num, gen_x): h_t = self.g_recurrent_unit(x_t, h_tm1) # hidden_memory_tuple o_t = self.g_output_unit(h_t) # batch x vocab , logits not prob log_prob = tf.log(tf.nn.softmax(o_t)) next_token = tf.cast(tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32) x_tp1 = tf.nn.embedding_lookup(self.g_embeddings, next_token) # batch x emb_dim gen_x = gen_x.write(i, next_token) # indices, batch_size return i + 1, x_tp1, h_t, given_num, gen_x
def sample_from_logits(logits): with tf.control_dependencies([tf.assert_greater(temperature, 0.0)]): logits = tf.identity(logits) reshaped_logits = ( tf.reshape(logits, [-1, tf.shape(logits)[-1]]) / temperature) choices = tf.multinomial(reshaped_logits, 1) choices = tf.reshape(choices, tf.shape(logits)[:logits.get_shape().ndims - 1]) return choices
def provide_one_hot_labels(self, batch_size): """Provides one hot labels.""" pitch_counts = self.get_pitch_counts() pitches = sorted(pitch_counts.keys()) counts = [pitch_counts[p] for p in pitches] indices = tf.reshape( tf.multinomial(tf.log([tf.to_float(counts)]), batch_size), [batch_size]) one_hot_labels = tf.one_hot(indices, depth=len(pitches)) return one_hot_labels
def loop(prev, i): with tf.variable_scope(rnnlm_scope): prev = slim.fully_connected(prev, self.vocab_size + 1, activation_fn = None, scope = 'logit') prev_symbol = tf.stop_gradient(tf.cond(self.sample_max, lambda: tf.argmax(prev, 1), # pick the word with largest probability as the input of next time step lambda: tf.squeeze( tf.multinomial(tf.nn.log_softmax(prev) / self.sample_temperature, 1), 1))) # Sample from the distribution self.generator.append(prev_symbol) return tf.nn.embedding_lookup(self.Wemb, prev_symbol)
def _pretrain_recurrence(i, x_t, h_tm1, g_predictions): h_t = self.g_recurrent_unit(x_t, h_tm1) o_t = self.g_output_unit(h_t) log_prob = tf.log(tf.nn.softmax(o_t)) next_token = tf.cast(tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32) g_predictions = g_predictions.write(i, tf.nn.softmax(o_t)) # batch x vocab_size x_tp1 = tf.cond(tf.less(tf.constant(random.random()), self.curriculum_rate), lambda: ta_emb_x.read(i), lambda: tf.nn.embedding_lookup(self.g_embeddings, next_token)) return i + 1, x_tp1, h_t, g_predictions
def testLargeLogits(self): for neg in [True, False]: with self.test_session(use_gpu=self.use_gpu): logits = np.array([[1000.] * 5]) if neg: logits *= -1 samples = tf.multinomial(logits, 10).eval() # Sampled classes should be in-range. self.assertTrue((samples >= 0).all()) self.assertTrue((samples < 5).all())
def create_model(seq, temp, vocab, hidden=HIDDEN_SIZE): seq = tf.one_hot(seq, len(vocab)) output, in_state, out_state = create_rnn(seq, hidden) # fully_connected is syntactic sugar for tf.matmul(w, output) + b # it will create w and b for us logits = tf.contrib.layers.fully_connected(output, len(vocab), None) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits[:, :-1], seq[:, 1:])) # sample the next word from Maxwell-Boltzmann Distribution with temperature temp sample = tf.multinomial(tf.exp(logits[:, -1] / temp), 1)[:, 0] return loss, sample, in_state, out_state
def _g_recurrence_temperature(i, x_t, h_tm1, gen_o_temp, gen_x_temp, alpha): h_t = self.g_recurrent_unit(x_t, h_tm1) # hidden_memory_tuple o_t = self.g_output_unit(h_t)/alpha # batch x vocab , logits not prob log_prob = tf.log(tf.nn.softmax(o_t)) next_token = tf.cast(tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32) x_tp1 = tf.nn.embedding_lookup(self.g_embeddings, next_token) # batch x emb_dim gen_o_temp = gen_o_temp.write(i, tf.reduce_sum(tf.multiply(tf.one_hot(next_token, self.num_vocabulary, 1.0, 0.0), tf.nn.softmax(o_t)), 1)) # [batch_size] , prob gen_x_temp = gen_x_temp.write(i, next_token) # indices, batch_size return i + 1, x_tp1, h_t, gen_o_temp, gen_x_temp, alpha
def body(past, prev, output): next_outputs = step(hparams, prev[:, tf.newaxis], past=past) logits = next_outputs['logits'][:, -1, :] / tf.to_float(temperature) logits = top_k_logits(logits, k=top_k) samples = tf.multinomial(logits, num_samples=1, output_dtype=tf.int32) return [ tf.concat([past, next_outputs['presents']], axis=-2), tf.squeeze(samples, axis=[1]), tf.concat([output, samples], axis=1), ]
def create_model(self): seq = tf.one_hot(self.seq, len(self.vocab)) self.create_rnn(seq) self.logits = tf.layers.dense(self.output, len(self.vocab), None) loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits[:, :-1], labels=seq[:, 1:]) self.loss = tf.reduce_sum(loss) # sample the next character from Maxwell-Boltzmann Distribution # with temperature temp. It works equally well without tf.exp self.sample = tf.multinomial(tf.exp(self.logits[:, -1] / self.temp), 1)[:, 0] self.opt = tf.train.AdamOptimizer(self.lr).minimize(self.loss, global_step=self.gstep)
def __init__(self, dim): self._dim = dim weights_var = tf.placeholder( dtype=tf.float32, shape=(None, dim), name="weights" ) self._f_sample = tensor_utils.compile_function( inputs=[weights_var], outputs=tf.multinomial(weights_var, num_samples=1)[:, 0], )
def create_variables(self): with tf.name_scope("model_inputs"): self.states = tf.placeholder(dtype=tf.float32, shape=(None, self.state_dim), name="states") # rollout action based on current policy with tf.name_scope("predict_actions"): # initialize actor-critic network with tf.variable_scope("actor_network"): self.policy_outputs = self.actor_network(self.states) with tf.variable_scope("critic_network"): self.value_outputs = self.critic_network(self.states) # predict actions from policy network self.action_scores = tf.identity(self.policy_outputs, name="action_scores") self.predicted_actions = tf.multinomial(self.action_scores, 1) # get variable list actor_network_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope="actor_network") critic_network_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope="critic_network") # compute loss and gradients with tf.name_scope("compute_pg_gradients"): # gradients for selecting action from policy network self.taken_actions = tf.placeholder(dtype=tf.int32, shape=(None, ), name="taken_actions") self.discounted_rewards = tf.placeholder(dtype=tf.float32, shape=(None, ), name="discounted_rewards") with tf.variable_scope("actor_network", reuse=True): self.logprobs = self.actor_network(self.states) with tf.variable_scope("critic_network", reuse=True): self.estimated_values = self.critic_network(self.states) # compute policy loss and regularization loss self.cross_entropy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logprobs, labels=self.taken_actions) self.pg_loss = tf.reduce_mean(self.cross_entropy_loss) self.actor_reg_loss = tf.reduce_sum( [tf.reduce_sum(tf.square(x)) for x in actor_network_variables]) self.actor_loss = self.pg_loss + self.reg_param * self.actor_reg_loss # compute actor gradients self.actor_gradients = self.optimizer.compute_gradients( self.actor_loss, actor_network_variables) # compute advantages A(s) = R - V(s) self.advantages = tf.reduce_sum(self.discounted_rewards - self.estimated_values) # compute policy gradients for i, (grad, var) in enumerate(self.actor_gradients): if grad is not None: self.actor_gradients[i] = (grad * self.advantages, var) # compute critic gradients self.mean_square_loss = tf.reduce_mean( tf.square(self.discounted_rewards - self.estimated_values)) self.critic_reg_loss = tf.reduce_sum([ tf.reduce_sum(tf.square(x)) for x in critic_network_variables ]) self.critic_loss = self.mean_square_loss + self.reg_param * self.critic_reg_loss self.critic_gradients = self.optimizer.compute_gradients( self.critic_loss, critic_network_variables) # collect all gradients self.gradients = self.actor_gradients + self.critic_gradients # gradients clipping by norm for i, (grad, var) in enumerate(self.gradients): if grad is not None: self.gradients[i] = (tf.clip_by_norm( grad, self.max_gradient), var) # summarize gradients for grad, var in self.gradients: tf.summary.histogram(var.name, var) if grad is not None: tf.summary.histogram(var.name + '/gradients', grad) # emit summaries tf.summary.histogram("estimated_values", self.estimated_values) tf.summary.scalar("actor_loss", self.actor_loss) tf.summary.scalar("critic_loss", self.critic_loss) tf.summary.scalar("reg_loss", self.actor_reg_loss + self.critic_reg_loss) # training update with tf.name_scope("train_actor_critic"): # apply gradients to update actor network self.train_op = self.optimizer.apply_gradients(self.gradients) self.summarize = tf.summary.merge_all() self.no_op = tf.no_op()
def build_policy_network_op(self, scope="policy_network"): """ Build the policy network, construct the tensorflow operation to sample actions from the policy network outputs, and compute the log probabilities of the actions taken (for computing the loss later). These operations are stored in self.sampled_action and self.logprob. Must handle both settings of self.discrete. Args: scope: the scope of the neural network TODO: Discrete case: action_logits: the logits for each action HINT: use build_mlp, check self.config for layer_size and n_layers self.sampled_action: sample from these logits HINT: use tf.multinomial + tf.squeeze self.logprob: compute the log probabilities of the taken actions HINT: 1. tf.nn.sparse_softmax_cross_entropy_with_logits computes the *negative* log probabilities of labels, given logits. 2. taken actions are different than sampled actions! Continuous case: To build a policy in a continuous action space domain, we will have the model output the means of each action dimension, and then sample from a multivariate normal distribution with these means and trainable standard deviation. That is, the action a_t ~ N( mu(o_t), sigma) where mu(o_t) is the network that outputs the means for each action dimension, and sigma is a trainable variable for the standard deviations. N here is a multivariate gaussian distribution with the given parameters. action_means: the predicted means for each action dimension. HINT: use build_mlp, check self.config for layer_size and n_layers log_std: a trainable variable for the log standard deviations. HINT: think about why we use log std as the trainable variable instead of std HINT: use tf.get_variable HINT: The shape of this should match the shape of action dimension self.sampled_action: sample from the gaussian distribution as described above HINT: use tf.random_normal HINT: use re-parametrization to obtain N(mu, sigma) from N(0, 1) self.lobprob: the log probabilities of the taken actions HINT: use tf.contrib.distributions.MultivariateNormalDiag """ ####################################################### ######### YOUR CODE HERE - 8-12 lines. ############ if self.discrete: mlp_out = build_mlp(self.observation_placeholder, self.action_dim, scope, self.config.n_layers, self.config.layer_size, self.config.activation) self.sampled_action = tf.squeeze( tf.multinomial(mlp_out, num_samples=1), 1) self.logprob = -tf.nn.sparse_softmax_cross_entropy_with_logits( self.action_placeholder, mlp_out) else: action_means = build_mlp(self.observation_placeholder, self.action_dim, scope, self.config.n_layers, self.config.layer_size) log_std = tf.get_variable("log_std", shape=[1, self.action_dim], trainable=True) self.sampled_action = tf.random_normal((1, ), mean=action_means, stddev=tf.math.exp(log_std)) mvn = tf.contrib.distributions.MultivariateNormalDiag( action_means, tf.math.exp(log_std)) self.lobprob = mvn.log_prob(self.action_placeholder).eval()
def _body(layer_id, inputs, prev_c, prev_h, anchors, anchors_w_1, arc_seq, entropy, log_prob): indices = tf.range(0, layer_id, dtype=tf.int32) start_id = 4 * (layer_id - 2) prev_layers = [] for i in range(2): # index_1, index_2 next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h query = anchors_w_1.gather(indices) query = tf.reshape(query, [layer_id, self.lstm_size]) query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2)) query = tf.matmul(query, self.v_attn) logits = tf.reshape(query, [1, layer_id]) if self.temperature is not None: logits /= self.temperature if self.tanh_constant is not None: logits = self.tanh_constant * tf.tanh(logits) index = tf.multinomial(logits, 1) index = tf.to_int32(index) index = tf.reshape(index, [1]) arc_seq = arc_seq.write(start_id + 2 * i, index) curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=index) log_prob += curr_log_prob curr_ent = tf.stop_gradient( tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=tf.nn.softmax(logits))) entropy += curr_ent prev_layers.append(anchors.read(tf.reduce_sum(index))) inputs = prev_layers[-1] for i in range(2): # op_1, op_2 next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h logits = tf.matmul(next_h[-1], self.w_soft) + self.b_soft if self.temperature is not None: logits /= self.temperature if self.tanh_constant is not None: op_tanh = self.tanh_constant / self.op_tanh_reduce logits = op_tanh * tf.tanh(logits) if use_bias: logits += self.b_soft_no_learn op_id = tf.multinomial(logits, 1) op_id = tf.to_int32(op_id) op_id = tf.reshape(op_id, [1]) arc_seq = arc_seq.write(start_id + 2 * i + 1, op_id) curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=op_id) log_prob += curr_log_prob curr_ent = tf.stop_gradient( tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=tf.nn.softmax(logits))) entropy += curr_ent inputs = tf.nn.embedding_lookup(self.w_emb, op_id) next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) anchors = anchors.write(layer_id, next_h[-1]) anchors_w_1 = anchors_w_1.write( layer_id, tf.matmul(next_h[-1], self.w_attn_1)) inputs = self.g_emb return (layer_id + 1, inputs, next_c, next_h, anchors, anchors_w_1, arc_seq, entropy, log_prob)
def __init__(self, state_shape, n_actions, n_hidden, summary=True): super(ActorCriticNetworkDiscreteCNNRNN, self).__init__() self.state_shape = state_shape self.n_actions = n_actions self.n_hidden = n_hidden self.summary = summary self.states = tf.placeholder(tf.float32, [None] + state_shape, name="states") self.adv = tf.placeholder(tf.float32, name="advantage") self.actions_taken = tf.placeholder(tf.float32, name="actions_taken") self.r = tf.placeholder(tf.float32, [None], name="r") x = self.states # Convolution layers for i in range(4): x = tf.nn.elu(conv2d(x, 32, "l{}".format(i + 1), [3, 3], [2, 2])) # Flatten reshape = tf.expand_dims(flatten(x), [0]) lstm_size = 256 self.enc_cell = tf.contrib.rnn.BasicLSTMCell(lstm_size) lstm_state_size = self.enc_cell.state_size c_init = np.zeros((1, lstm_state_size.c), np.float32) h_init = np.zeros((1, lstm_state_size.h), np.float32) self.state_init = [c_init, h_init] self.rnn_state_in = self.enc_cell.zero_state(1, tf.float32) tf.add_to_collection("rnn_state_in_c", self.rnn_state_in.c) tf.add_to_collection("rnn_state_in_h", self.rnn_state_in.h) L3, self.rnn_state_out = tf.nn.dynamic_rnn( cell=self.enc_cell, inputs=reshape, initial_state=self.rnn_state_in, dtype=tf.float32) tf.add_to_collection("rnn_state_out_c", self.rnn_state_out.c) tf.add_to_collection("rnn_state_out_h", self.rnn_state_out.h) L3 = tf.reshape(L3, [-1, lstm_size]) # Fully connected for Actor self.logits = linear(L3, n_actions, "actionlogits", normalized_columns_initializer(0.01)) self.value = tf.reshape( linear(L3, 1, "value", normalized_columns_initializer(1.0)), [-1]) self.probs = tf.nn.softmax(self.logits) self.action = tf.squeeze(tf.multinomial( self.logits - tf.reduce_max(self.logits, [1], keep_dims=True), 1), [1], name="action") self.action = tf.one_hot(self.action, n_actions)[0, :] log_probs = tf.nn.log_softmax(self.logits) self.actor_loss = -tf.reduce_sum( tf.reduce_sum(log_probs * self.actions_taken, [1]) * self.adv) self.critic_loss = 0.5 * tf.reduce_sum(tf.square(self.value - self.r)) self.entropy = -tf.reduce_sum(self.probs * log_probs) self.loss = self.actor_loss + 0.5 * self.critic_loss - self.entropy * 0.01 self.vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
def sampling_typeless_SNIS_rs(p, parten, nodes_nbrs, nbr_segment, edge_features, num_sample, n_node_type, edge_type_array): unique_nbrs = tf.unique_with_counts(nbr_segment) num_nbrs = tf.size(unique_nbrs.y) q = tf.gather( tf.ones(num_nbrs) / tf.cast(num_nbrs, dtype=tf.float32), unique_nbrs.idx) samples = tf.unique( tf.cast(tf.multinomial(tf.log([q]), num_sample)[0], tf.int32)).y infos = tf.sparse_to_dense(tf.reshape(tf.contrib.framework.sort(samples), [-1, 1]), output_shape=tf.shape(unique_nbrs.idx), sparse_values=tf.ones_like(samples, dtype=tf.int32)) partitions = tf.gather(infos, unique_nbrs.idx) samples_to_gather = tf.dynamic_partition( tf.range(tf.size(partitions), dtype=tf.int32), partitions, 2)[1], sampled_p = tf.gather(p, samples_to_gather) sampled_q = tf.gather(tf.gather(q, unique_nbrs.idx), samples_to_gather) sampled_parten = tf.gather(parten, samples_to_gather) sampled_nbrs = tf.gather(nodes_nbrs, samples_to_gather) nbrset = tf.dynamic_partition(sampled_nbrs, sampled_parten, n_node_type) segset = tf.dynamic_partition(tf.gather(nbr_segment, samples_to_gather), sampled_parten, n_node_type) edge_f_set = [] feature_ids = tf.dynamic_partition( tf.gather(tf.range(tf.size(nbr_segment)), samples_to_gather), sampled_parten, n_node_type) for i in range(n_node_type): edge_f_set.append(tf.gather(edge_features, feature_ids[i])) sampled_ps = tf.dynamic_partition(sampled_p, sampled_parten, n_node_type) sampled_qs = tf.dynamic_partition(sampled_q, sampled_parten, n_node_type) condition2 = [ tf.reduce_all(tf.math.greater(tf.size(nbrset[i]), 0)) for i in range(n_node_type) ] all_weight = [] for i in range(n_node_type): weights = tf.cond( condition2[i], false_fn=lambda: [tf.zeros(0)], true_fn=lambda: calculate_pq_SNIS(segset[i], nbrset[i], edge_f_set[ i], sampled_ps[i], sampled_qs[i])) all_weight.append(weights) print(edge_type_array[i]) edge_type_array[i] = tf.cond( condition2[i], true_fn=lambda: edge_type_array[i], false_fn=lambda: -tf.ones(tf.size(edge_type_array[i]), tf.int32)) num_sampled_edges = tf.size(samples_to_gather) num_sampled_nbrs = tf.size(samples) return [ all_weight, num_sampled_edges, num_sampled_nbrs, nbrset, segset, edge_f_set, edge_type_array ]
def __init__(self, env): self.env = env num_actions = self.env.action_space.n # we have three place holders we'll use... # observations; used either during rollout to sample some actions, or # during training when combined with actions_taken and advantages. shape_with_batch = [None] + list(self.env.observation_space.shape) self.observations = tf.placeholder(shape=shape_with_batch, dtype=tf.float32) # the actions we took during rollout self.actions = tf.placeholder(tf.int32, name='actions') # the advantages we got from taken 'action_taken' in 'observation' self.advantages = tf.placeholder(tf.float32, name='advantages') # our model is a very simple MLP with tf.variable_scope("model"): # stack of hidden layers on flattened input; (batch,2,2,7) -> (batch,28) flat_input_state = slim.flatten(self.observations, scope='flat') final_hidden = self.hidden_layers_starting_at( flat_input_state, opts.hidden_layers) logits = slim.fully_connected(inputs=final_hidden, num_outputs=num_actions, activation_fn=None) # in the eval case just pick arg max self.action_argmax = tf.argmax(logits, 1) # for rollouts we need an op that samples actions from this # model to give a stochastic action. sample_action = tf.multinomial(logits, num_samples=1) self.sampled_action_op = tf.reshape(sample_action, shape=[]) # we are trying to maximise the product of two components... # 1) the log_p of "good" actions. # 2) the advantage term based on the rewards from actions. # first we need the log_p values for each observation for the actions we specifically # took by sampling... we first run a log_softmax over the action logits to get # probabilities. log_softmax = tf.nn.log_softmax(logits) self.debug_softmax = tf.exp(log_softmax) # we then use a mask to only select the elements of the softmaxs that correspond # to the actions we actually took. we could also do this by complex indexing and a # gather but i always think this is more natural. the "cost" of dealing with the # mostly zero one hot, as opposed to doing a gather on sparse indexes, isn't a big # deal when the number of observations is >> number of actions. action_mask = tf.one_hot(indices=self.actions, depth=num_actions) action_log_prob = tf.reduce_sum(log_softmax * action_mask, reduction_indices=1) # the (element wise) product of these action log_p's with the total reward of the # episode represents the quantity we want to maximise. we standardise the advantage # values so roughly 1/2 +ve / -ve as a variance control. action_mul_advantages = tf.mul(action_log_prob, util.standardise(self.advantages)) self.loss = -tf.reduce_sum( action_mul_advantages) # recall: we are maximising. with tf.variable_scope("optimiser"): # dynamically create optimiser based on opts optimiser = util.construct_optimiser(opts) # calc gradients gradients = optimiser.compute_gradients(self.loss) # potentially clip and wrap with debugging tf.Print gradients = util.clip_and_debug_gradients(gradients, opts) # apply self.train_op = optimiser.apply_gradients(gradients)
def __init__(self, lr, brain, h_size, epsilon, beta, max_step, normalize, num_layers): """ Creates Discrete Control Actor-Critic model. :param brain: State-space size :param h_size: Hidden layer size """ super(DiscreteControlModel, self).__init__() self._create_global_steps() self._create_reward_encoder() self.normalize = normalize hidden_state, hidden_visual, hidden = None, None, None if brain.number_observations > 0: height_size, width_size = brain.camera_resolutions[0][ 'height'], brain.camera_resolutions[0]['width'] bw = brain.camera_resolutions[0]['blackAndWhite'] hidden_visual = self._create_visual_encoder( height_size, width_size, bw, h_size, 1, tf.nn.elu, num_layers)[0] if brain.state_space_size > 0: s_size = brain.state_space_size if brain.state_space_type == "continuous": hidden_state = self._create_continuous_state_encoder( s_size, h_size, 1, tf.nn.elu, num_layers)[0] else: hidden_state = self._create_discrete_state_encoder( s_size, h_size, 1, tf.nn.elu, num_layers)[0] if hidden_visual is None and hidden_state is None: raise Exception( "No valid network configuration possible. " "There are no states or observations in this brain") elif hidden_visual is not None and hidden_state is None: hidden = hidden_visual elif hidden_visual is None and hidden_state is not None: hidden = hidden_state elif hidden_visual is not None and hidden_state is not None: hidden = tf.concat([hidden_visual, hidden_state], axis=1) a_size = brain.action_space_size self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size') self.policy = tf.layers.dense( hidden, a_size, activation=None, use_bias=False, kernel_initializer=c_layers.variance_scaling_initializer( factor=0.01)) self.probs = tf.nn.softmax(self.policy, name="action_probs") self.output = tf.multinomial(self.policy, 1) self.output_max = tf.argmax(self.probs, name='action_max', axis=1) self.output = tf.identity(self.output, name="action") self.value = tf.layers.dense( hidden, 1, activation=None, use_bias=False, kernel_initializer=c_layers.variance_scaling_initializer( factor=1.0)) self.value = tf.identity(self.value, name="value_estimate") self.entropy = -tf.reduce_sum(self.probs * tf.log(self.probs + 1e-10), axis=1) self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32) self.selected_actions = c_layers.one_hot_encoding( self.action_holder, a_size) self.old_probs = tf.placeholder(shape=[None, a_size], dtype=tf.float32, name='old_probabilities') self.responsible_probs = tf.reduce_sum(self.probs * self.selected_actions, axis=1) self.old_responsible_probs = tf.reduce_sum(self.old_probs * self.selected_actions, axis=1) self._create_ppo_optimizer(self.responsible_probs, self.old_responsible_probs, self.value, self.entropy, beta, epsilon, lr, max_step)
def body(*args) -> LoopState: loop_state = LoopState(*args) histories = loop_state.histories feedables = loop_state.feedables step = feedables.step decoded_symbols_ta = histories.decoded_symbols.write( step, feedables.input_symbol) # shape (time, batch) decoded_symbols = decoded_symbols_ta.stack() decoded_symbols.set_shape([None, None]) decoded_symbols_in_batch = tf.transpose(decoded_symbols) # mask (time, batch) mask = histories.input_mask.stack() mask.set_shape([None, None]) with tf.variable_scope(self._variable_scope, reuse=tf.AUTO_REUSE): # shape (batch, time, dimension) embedded_inputs = self.embed_inputs(decoded_symbols_in_batch) last_layer = self.layer(self.depth, embedded_inputs, tf.transpose(mask)) # (batch, state_size) output_state = last_layer.temporal_states[:, -1, :] # See train_logits definition logits = tf.matmul(output_state, self.decoding_w) logits += self.decoding_b if sample: next_symbols = tf.multinomial(logits, num_samples=1) else: next_symbols = tf.to_int32(tf.argmax(logits, axis=1)) int_unfinished_mask = tf.to_int32( tf.logical_not(loop_state.feedables.finished)) # Note this works only when PAD_TOKEN_INDEX is 0. Otherwise # this have to be rewritten assert PAD_TOKEN_INDEX == 0 next_symbols = next_symbols * int_unfinished_mask has_just_finished = tf.equal(next_symbols, END_TOKEN_INDEX) has_finished = tf.logical_or(feedables.finished, has_just_finished) not_finished = tf.logical_not(has_finished) new_feedables = DecoderFeedables(step=step + 1, finished=has_finished, input_symbol=next_symbols, prev_logits=logits) # TransformerHistories is a type and should be callable # pylint: disable=not-callable new_histories = TransformerHistories( logits=histories.logits.write(step, logits), decoder_outputs=histories.decoder_outputs.write( step, output_state), mask=histories.mask.write(step, not_finished), outputs=histories.outputs.write(step, next_symbols), # transformer-specific: # TODO handle attention histories correctly decoded_symbols=decoded_symbols_ta, self_attention_histories=histories.self_attention_histories, inter_attention_histories=histories.inter_attention_histories, input_mask=histories.input_mask.write( step + 1, tf.to_float(not_finished))) # pylint: enable=not-callable new_loop_state = LoopState(histories=new_histories, constants=[], feedables=new_feedables) return new_loop_state
def generator(self, name="generator", reuse=False): ''' Caption sampler, sample words follow the probability distribution. ''' random_uniform_init = tf.random_uniform_initializer(minval=-0.1, maxval=0.1) with tf.variable_scope(name): if reuse: tf.get_variable_scope().reuse_variables() with tf.device("/cpu:0"), tf.variable_scope("word"): # name: "gnerator/word" word_emb_W = tf.get_variable("word_emb_W", [self.vocab_size, self.G_hidden_size], tf.float32, random_uniform_init) with tf.variable_scope("image_feat"): # name: "generator/image_feat" image_feat_W = tf.get_variable("image_feat_W", [self.image_feat_dim, self.G_hidden_size], tf.float32, random_uniform_init) image_feat_b = tf.get_variable("image_feat_b", [self.G_hidden_size], tf.float32, random_uniform_init) with tf.variable_scope("output"): # name: "generator/output" output_W = tf.get_variable("output_W", [self.G_hidden_size, self.vocab_size], tf.float32, random_uniform_init) output_b = tf.get_variable("output_b", [self.vocab_size], tf.float32, random_uniform_init) with tf.variable_scope("lstm_encoder"): if self.rnn_cell == 'lstm': encoder = tf.nn.rnn_cell.LSTMCell(self.G_hidden_size, state_is_tuple=True) elif self.rnn_cell == 'gru': encoder = tf.nn.rnn_cell.GRUCell(self.G_hidden_size) with tf.variable_scope("lstm_decoder"): # WONT BE CREATED HERE if self.rnn_cell == 'lstm': decoder = tf.nn.rnn_cell.LSTMCell(self.G_hidden_size, state_is_tuple=True) elif self.rnn_cell == 'gru': decoder = tf.nn.rnn_cell.GRUCell(self.G_hidden_size) #============================= encoder =================================================================== state = encoder.zero_state(self.batch_size, tf.float32) with tf.variable_scope("image_feat") as scope: image_feat = self.batch_norm(self.image_feat[:,:], mode='train', name='') image_feat_emb = tf.matmul(image_feat, image_feat_W) + image_feat_b # B,H lstm_input = image_feat_emb with tf.variable_scope("lstm_encoder") as scope: _, state = encoder(lstm_input, state) encoder_state = state #============================= decoder =================================================================== start_token = tf.constant(self.START, tf.int32, [self.batch_size]) mask = tf.constant(True, "bool", [self.batch_size]) sample_words = [] state = encoder_state for j in range(self.lstm_steps): with tf.device("/cpu:0"): if j == 0: decoder_input = tf.nn.embedding_lookup(word_emb_W, start_token) else: decoder_input = tf.nn.embedding_lookup(word_emb_W, sample_word) with tf.variable_scope("lstm"): if not j == 0: tf.get_variable_scope().reuse_variables() output, state = decoder(decoder_input, state) logits = tf.matmul(output, output_W) + output_b log_probs = tf.log(tf.clip_by_value(tf.nn.softmax(logits), 1e-20, 1.0)) # B,Vocab_size # add 1e-8 to prevent log(0) # sample once from the multinomial distribution # Montecarlo sampling sample_word = tf.reshape(tf.multinomial(log_probs, 1), [self.batch_size]) # 1 means sample once sample_words.append(sample_word) return sample_words
def categorical_sample(logits, d): value = tf.squeeze( tf.multinomial(logits - tf.reduce_max(logits, [1], keep_dims=True), 1), [1]) return tf.one_hot(value, d)
def __init__(self, n_inputs, n_outputs, **params): super(TDM_agent, self).__init__(**params) self.n_inputs = n_inputs self.n_outputs = n_outputs self.lr = params['agent_params'].pop('lr', 1e-3) self.discount = params['agent_params'].pop('discount', 1e-3) self.tau = params['agent_params'].pop('tau', 1e-3) self.max_td = params['agent_params'].pop('max_td', 0) self.soft_learning = params['agent_params'].pop('soft_learning', False) self.reward_scale = params['agent_params'].pop('reward_scale', 1.0) self.double = params['agent_params'].pop('double', False) self.reward_scale = params['agent_params'].pop('reward_scale', 1.0) self.huber_loss = params['agent_params'].pop('huber_loss', True) self.clip_gradients = params['agent_params'].pop( 'clip_gradients', False) self.train_steps_per_t = params['agent_params'].pop( 'train_steps_per_t', 1) self.q_train_steps_per_t = params['agent_params'].pop( 'q_train_steps_per_t', 1) self.extra_q_train_steps_per_t = self.q_train_steps_per_t - self.train_steps_per_t assert self.extra_q_train_steps_per_t >= 0 self.multi_step = params['agent_params'].pop('multi_step', False) if self.multi_step: self.discount = self.discount**self.multi_step assert not (self.soft_learning and self.double) self._init_placeholders() ### QNET self.qnet = Qnet(self.obs, self.n_outputs, params['network_spec'], scope='qnet') self.model_Q_params = self.qnet.get_params_internal() self.model_Q_outputs = self.qnet.outputs ### FNET self.fnet = Qnet([self.obs, self.actions, self.scaled_tds], self.n_inputs, params['network_spec'], scope='fnet') self.model_F_params = self.fnet.get_params_internal() self.model_F_outputs = self.fnet.outputs ### RNET self.rnet = Qnet([self.obs, self.actions, self.scaled_tds], 1, params['network_spec'], scope='rnet') self.model_R_params = self.rnet.get_params_internal() self.model_R_outputs = self.rnet.outputs ### ENET if self.soft_learning: self.model_Q_predict_action_from_next_obs = tf.stop_gradient( tf.one_hot( tf.multinomial( self.qnet.make_network(inputs=self.next_obs), 1)[:, 0], self.qnet.output_size)) else: self.model_Q_predict_action_from_next_obs = tf.stop_gradient( tf.one_hot( tf.argmax(self.qnet.make_network(inputs=self.next_obs), axis=1), self.qnet.output_size)) # Duplicate the Fnet with different variables for the target network self.tfnet = Qnet([ self.next_obs, self.model_Q_predict_action_from_next_obs, self.scaled_next_tds ], self.n_inputs, params['network_spec'], scope='tfnet') self.target_F_outputs = self.tfnet.outputs self.target_F_params = self.tfnet.get_params_internal() self.target_F_from_obs = self.tfnet.make_network( inputs=[self.obs, self.actions, self.scaled_tds]) # Duplicate the Rnet with different variables for the target network self.trnet = Qnet([ self.next_obs, self.model_Q_predict_action_from_next_obs, self.scaled_next_tds ], 1, params['network_spec'], scope='trnet') self.target_R_outputs = self.trnet.outputs self.target_R_params = self.trnet.get_params_internal() self.target_R_from_obs = self.trnet.make_network( inputs=[self.obs, self.actions, self.scaled_tds]) # Duplicate the Qnet with different variables for the target network self.tqnet = Qnet(tf.add(self.next_obs, self.td_is_not_zero * self.target_F_outputs), self.n_outputs, params['network_spec'], scope='tqnet') self.target_Q_outputs = self.tqnet.outputs self.target_Q_params = self.tqnet.get_params_internal() if self.soft_learning: # For soft learning # V = sum(p(s,a) * (q(s,a) - log(p(s,a))) # = sum(exp(q)/z * (q - log(exp(q)/z))) # = sum(p* (log(z))) # = log(z) self.partition_function = tf.reduce_mean( self.target_Q_outputs, axis=1) + tf.log( tf.reduce_sum( tf.exp(self.target_Q_outputs - tf.reduce_mean( self.target_Q_outputs, axis=1, keepdims=True)), axis=1)) self.target_V = self.partition_function params['policy_params']['action_choice'] = params[ 'policy_params'].get('action_choice', 'Boltzmann') assert params['policy_params'][ 'action_choice'] == 'Boltzmann' # Softmax on outputs self.policy = Policy_Discrete_for_Qnet(self.qnet, **params['policy_params']) self.rb = Replay_Buffer(self.n_inputs, self.n_outputs, discrete_action=True, multi_step=self.multi_step, **params['replay_buffer_params']) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) self.train_ops = [] self._init_training_ops() self.target_Q_update = uf.update_target_network( self.model_Q_params, self.target_Q_params, tau=self.tau, update_op_control_dependencies=self.q_train_op) self.target_R_update = uf.update_target_network( self.model_R_params, self.target_R_params, tau=self.tau, update_op_control_dependencies=self.train_ops) self.target_F_update = uf.update_target_network( self.model_F_params, self.target_F_params, tau=self.tau, update_op_control_dependencies=self.train_ops) self.train_ops.append([self.target_R_update, self.target_F_update]) self.q_train_ops = tf.group(self.q_train_op, self.target_Q_update) self.train_ops = tf.group(self.train_ops, self.q_train_ops) self.loss_ops = [self.R_Loss, self.F_Loss, self.Q_Loss] self._finish_agent_setup()
hidden2 = tf.layers.dense(hidden1, n_hidden, activation=tf.nn.relu, kernel_initializer=intializer) output_layer = tf.layers.dense(hidden2, out, activation=tf.nn.sigmoid, kernel_initializer=intializer) # Left, Right Probb # Concatenate tensors along one direction probb = tf.concat(values=[output_layer, 1 - output_layer], axis=1) # Final Action "0" or "1" action = tf.multinomial(probb, num_samples=1) init = tf.global_variables_initializer() # For 50 episodes of game, take 500 time steps and declare the game as Done. n_steps = 500 episodes = 50 avg_steps = [] env = gym.make('CartPole-v1') with tf.Session() as sess: sess.run(init) for i in range(episodes): # Reset Environment obs = env.reset()
def build_policy_network_op(self, scope="policy_network"): """ Build the policy network, construct the tensorflow operation to sample actions from the policy network outputs, and compute the log probabilities of the taken actions (for computing the loss later). These operations are stored in self.sampled_action and self.logprob. Must handle both settings of self.discrete. TODO: Discrete case: logits: the logits for each action HINT: use build_mlp self.sampled_action: sample from these logits HINT: use tf.multinomial + tf.squeeze self.logprob: compute the log probabilities of the taken actions HINT: 1. tf.nn.sparse_softmax_cross_entropy_with_logits computes the *negative* log probabilities of labels, given logits. 2. taken actions are different than sampled actions! Continuous case: To build a policy in a continuous action space domain, we will have the model output the means of each action dimension, and then sample from a multivariate normal distribution with these means and trainable standard deviation. That is, the action a_t ~ N( mu(o_t), sigma) where mu(o_t) is the network that outputs the means for each action dimension, and sigma is a trainable variable for the standard deviations. N here is a multivariate gaussian distribution with the given parameters. action_means: the predicted means for each action dimension. HINT: use build_mlp log_std: a trainable variable for the log standard deviations. --> think about why we use log std as the trainable variable instead of std self.sampled_actions: sample from the gaussian distribution as described above HINT: use tf.random_normal self.lobprob: the log probabilities of the taken actions HINT: use tf.contrib.distributions.MultivariateNormalDiag """ ####################################################### ######### YOUR CODE HERE - 5-10 lines. ############ if self.discrete: action_logits = build_mlp( self.observation_placeholder, self.action_dim, "discrete", ) # TODO self.sampled_action = tf.reshape(tf.multinomial(action_logits, 1), [-1]) # TODO self.logprob = -tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.action_placeholder, logits=action_logits) # TODO else: action_means = build_mlp( self.observation_placeholder, self.action_dim, "continuous", ) # TODO log_std = tf.get_variable("std", [self.action_dim], dtype=tf.float32) # TODO self.sampled_action = tf.random_normal( shape=tf.shape(action_means), mean=action_means, stddev=tf.exp(log_std)) # TODO self.logprob = tf.contrib.distributions.MultivariateNormalDiag( loc=action_means, scale_diag=tf.exp(log_std)).log_prob( self.action_placeholder) # TODO
def ssd_random_sample_patch(image, labels, bboxes, ratio_list=[0.1, 0.3, 0.5, 0.7, 0.9, 1.], name=None): '''ssd_random_sample_patch. select one min_iou sample _width and _height from [0-width] and [0-height] check if the aspect ratio between 0.5-2. select left_top point from (width - _width, height - _height) check if this bbox has a min_iou with all ground_truth bboxes keep ground_truth those center is in this sampled patch, if none then try again ''' def sample_width_height(width, height): with tf.name_scope('sample_width_height'): index = 0 max_attempt = 10 sampled_width, sampled_height = width, height def condition(index, sampled_width, sampled_height, width, height): return tf.logical_or( tf.logical_and( tf.logical_or( tf.greater(sampled_width, sampled_height * 2), tf.greater(sampled_height, sampled_width * 2)), tf.less(index, max_attempt)), tf.less(index, 1)) def body(index, sampled_width, sampled_height, width, height): sampled_width = tf.random_uniform( [1], minval=0.3, maxval=0.999, dtype=tf.float32)[0] * width sampled_height = tf.random_uniform( [1], minval=0.3, maxval=0.999, dtype=tf.float32)[0] * height return index + 1, sampled_width, sampled_height, width, height [index, sampled_width, sampled_height, _, _] = tf.while_loop( condition, body, [index, sampled_width, sampled_height, width, height], parallel_iterations=4, back_prop=False, swap_memory=True) return tf.cast(sampled_width, tf.int32), tf.cast(sampled_height, tf.int32) def jaccard_with_anchors(roi, bboxes): with tf.name_scope('jaccard_with_anchors'): int_ymin = tf.maximum(roi[0], bboxes[:, 0]) int_xmin = tf.maximum(roi[1], bboxes[:, 1]) int_ymax = tf.minimum(roi[2], bboxes[:, 2]) int_xmax = tf.minimum(roi[3], bboxes[:, 3]) h = tf.maximum(int_ymax - int_ymin + 1., 0.) w = tf.maximum(int_xmax - int_xmin + 1., 0.) inter_vol = h * w union_vol = (roi[3] - roi[1] + 1.) * (roi[2] - roi[0] + 1.) + ( (bboxes[:, 2] - bboxes[:, 0] + 1.) * (bboxes[:, 3] - bboxes[:, 1] + 1.) - inter_vol) jaccard = tf.div(inter_vol, union_vol) return jaccard def areas(bboxes): with tf.name_scope('bboxes_areas'): vol = (bboxes[:, 3] - bboxes[:, 1] + 1.) * (bboxes[:, 2] - bboxes[:, 0] + 1.) return vol def check_roi_center(width, height, labels, bboxes): with tf.name_scope('check_roi_center'): index = 0 max_attempt = 20 float_width = tf.to_float(width) float_height = tf.to_float(height) roi = [0., 0., float_height - 1., float_width - 1.] mask = tf.cast(tf.zeros_like(labels, dtype=tf.uint8), tf.bool) center_x, center_y = (bboxes[:, 1] + bboxes[:, 3]) / 2, ( bboxes[:, 0] + bboxes[:, 2]) / 2 def condition(index, roi, mask): return tf.logical_or( tf.logical_and( tf.reduce_sum(tf.to_int32(mask)) < 1, tf.less(index, max_attempt)), tf.less(index, 1)) def body(index, roi, mask): sampled_width, sampled_height = sample_width_height( float_width, float_height) x = tf.random_uniform([], minval=0, maxval=width - sampled_width, dtype=tf.int32) y = tf.random_uniform([], minval=0, maxval=height - sampled_height, dtype=tf.int32) roi = [ tf.to_float(y), tf.to_float(x), tf.to_float(y + sampled_height), tf.to_float(x + sampled_width) ] mask_min = tf.logical_and(tf.greater(center_y, roi[0]), tf.greater(center_x, roi[1])) mask_max = tf.logical_and(tf.less(center_y, roi[2]), tf.less(center_x, roi[3])) mask = tf.logical_and(mask_min, mask_max) return index + 1, roi, mask [index, roi, mask] = tf.while_loop(condition, body, [index, roi, mask], parallel_iterations=10, back_prop=False, swap_memory=True) mask_labels = tf.boolean_mask(labels, mask) mask_bboxes = tf.boolean_mask(bboxes, mask) return roi, mask_labels, mask_bboxes def check_roi_overlap(width, height, labels, bboxes, min_iou): with tf.name_scope('check_roi_overlap'): index = 0 max_attempt = 50 float_width = tf.to_float(width) float_height = tf.to_float(height) roi = [0., 0., float_height - 1., float_width - 1.] mask_labels = labels mask_bboxes = bboxes def condition(index, roi, mask_labels, mask_bboxes): return tf.logical_or( tf.logical_or( tf.logical_and( tf.reduce_sum( tf.to_int32( jaccard_with_anchors( roi, mask_bboxes) < min_iou)) > 0, tf.less(index, max_attempt)), tf.less(index, 1)), tf.less(tf.shape(mask_labels)[0], 1)) def body(index, roi, mask_labels, mask_bboxes): roi, mask_labels, mask_bboxes = check_roi_center( width, height, labels, bboxes) return index + 1, roi, mask_labels, mask_bboxes [index, roi, mask_labels, mask_bboxes ] = tf.while_loop(condition, body, [index, roi, mask_labels, mask_bboxes], parallel_iterations=16, back_prop=False, swap_memory=True) return tf.cond( tf.greater(tf.shape(mask_labels)[0], 0), lambda: (tf.to_int32([ roi[0], roi[1], roi[2] - roi[0] + 1., roi[3] - roi[1] + 1. ]), mask_labels, mask_bboxes), lambda: (tf.to_int32( [0., 0., float_height, float_width]), labels, bboxes)) def sample_patch(image, labels, bboxes, min_iou): with tf.name_scope('sample_patch'): height, width, depth = _ImageDimensions(image, rank=3) roi_slice_range, mask_labels, mask_bboxes = check_roi_overlap( width, height, labels, bboxes, min_iou) # Add offset. offset = tf.cast( tf.stack([ roi_slice_range[0], roi_slice_range[1], roi_slice_range[0], roi_slice_range[1] ]), mask_bboxes.dtype) mask_bboxes = mask_bboxes - offset cliped_ymin = tf.maximum(0., mask_bboxes[:, 0]) cliped_xmin = tf.maximum(0., mask_bboxes[:, 1]) cliped_ymax = tf.minimum( tf.to_float(roi_slice_range[2]) - 1., mask_bboxes[:, 2]) cliped_xmax = tf.minimum( tf.to_float(roi_slice_range[3]) - 1., mask_bboxes[:, 3]) mask_bboxes = tf.stack( [cliped_ymin, cliped_xmin, cliped_ymax, cliped_xmax], axis=-1) return tf.cond( tf.logical_or(tf.less(roi_slice_range[2], 1), tf.less(roi_slice_range[3], 1)), lambda: (image, labels, bboxes), lambda: (tf.slice(image, [roi_slice_range[0], roi_slice_range[1], 0], [ roi_slice_range[2], roi_slice_range[3], -1 ]), mask_labels, mask_bboxes)) with tf.name_scope('ssd_random_sample_patch'): image = tf.convert_to_tensor(image, name='image') min_iou_list = tf.convert_to_tensor(ratio_list) samples_min_iou = tf.multinomial( tf.log([[1. / len(ratio_list)] * len(ratio_list)]), 1) sampled_min_iou = min_iou_list[tf.cast(samples_min_iou[0][0], tf.int32)] return tf.cond( tf.less(sampled_min_iou, 1.), lambda: sample_patch(image, labels, bboxes, sampled_min_iou), lambda: (image, labels, bboxes))
def train_PG(exp_name='', env_name='CartPole-v0', n_iter=100, gamma=1.0, min_timesteps_per_batch=1000, max_path_length=None, learning_rate=5e-3, reward_to_go=True, animate=True, logdir=None, normalize_advantages=True, nn_baseline=False, seed=0, # network arguments n_layers=1, size=32 ): start = time.time() # Configure output directory for logging logz.configure_output_dir(logdir) # Log experimental parameters args = inspect.getargspec(train_PG)[0] locals_ = locals() params = {k: locals_[k] if k in locals_ else None for k in args} logz.save_params(params) # Set random seeds tf.set_random_seed(seed) np.random.seed(seed) # Make the gym environment env = gym.make(env_name) # Is this env continuous, or discrete? discrete = isinstance(env.action_space, gym.spaces.Discrete) # Maximum length for episodes max_path_length = max_path_length or env.spec.max_episode_steps #========================================================================================# # Notes on notation: # # Symbolic variables have the prefix sy_, to distinguish them from the numerical values # that are computed later in the function # # Prefixes and suffixes: # ob - observation # ac - action # _no - this tensor should have shape (batch size /n/, observation dim) # _na - this tensor should have shape (batch size /n/, action dim) # _n - this tensor should have shape (batch size /n/) # # Note: batch size /n/ is defined at runtime, and until then, the shape for that axis # is None #========================================================================================# # Observation and action sizes ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.n if discrete else env.action_space.shape[0] #========================================================================================# # ----------SECTION 4---------- # Placeholders # # Need these for batch observations / actions / advantages in policy gradient loss function. #========================================================================================# sy_ob_no = tf.placeholder(shape=[None, ob_dim], name="ob", dtype=tf.float32) if discrete: sy_ac_na = tf.placeholder(shape=[None], name="ac", dtype=tf.int32) else: sy_ac_na = tf.placeholder(shape=[None, ac_dim], name="ac", dtype=tf.float32) # Define a placeholder for advantages sy_adv_n = tf.placeholder(shape=[None], name='adv', dtype=tf.float32) #========================================================================================# # ----------SECTION 4---------- # Networks # # Make symbolic operations for # 1. Policy network outputs which describe the policy distribution. # a. For the discrete case, just logits for each action. # # b. For the continuous case, the mean / log std of a Gaussian distribution over # actions. # # Hint: use the 'build_mlp' function you defined in utilities. # # Note: these ops should be functions of the placeholder 'sy_ob_no' # # 2. Producing samples stochastically from the policy distribution. # a. For the discrete case, an op that takes in logits and produces actions. # # Should have shape [None] # # b. For the continuous case, use the reparameterization trick: # The output from a Gaussian distribution with mean 'mu' and std 'sigma' is # # mu + sigma * z, z ~ N(0, I) # # This reduces the problem to just sampling z. (Hint: use tf.random_normal!) # # Should have shape [None, ac_dim] # # Note: these ops should be functions of the policy network output ops. # # 3. Computing the log probability of a set of actions that were actually taken, # according to the policy. # # Note: these ops should be functions of the placeholder 'sy_ac_na', and the # policy network output ops. # #========================================================================================# if discrete: # YOUR_CODE_HERE sy_logits_na = build_mlp(sy_ob_no, ac_dim, 'policy', n_layers=n_layers, size=size) sy_sampled_ac = tf.multinomial(sy_logits_na, 1) sy_logprob_n = tf.nn.softmax_cross_entropy_with_logits_v2(labels=sy_ac_na, logits=sy_sampled_ac) else: # YOUR_CODE_HERE sy_mean = TODO sy_logstd = TODO # logstd should just be a trainable variable, not a network output. sy_sampled_ac = TODO sy_logprob_n = TODO # Hint: Use the log probability under a multivariate gaussian. #========================================================================================# # ----------SECTION 4---------- # Loss Function and Training Operation #========================================================================================# loss = tf.reduce_mean(sy_logprob_n) # Loss function that we'll differentiate to get the policy gradient. update_op = tf.train.AdamOptimizer(learning_rate).minimize(loss) #========================================================================================# # ----------SECTION 5---------- # Optional Baseline #========================================================================================# if nn_baseline: baseline_prediction = tf.squeeze(build_mlp( sy_ob_no, 1, "nn_baseline", n_layers=n_layers, size=size)) # Define placeholders for targets, a loss function and an update op for fitting a # neural network baseline. These will be used to fit the neural network baseline. # YOUR_CODE_HERE # This is some actor-critic stuff, not prepared for this sy_b_n = tf.placeholder(shape=[None], name='b', dtype=tf.float32) b_loss = tf.reduce_mean(tf.losses.mean_squared_error(labels=sy_b_n, predictions=baseline_prediction)) _update_op = tf.train.AdamOptimizer(learning_rate).minimize(b_loss) #========================================================================================# # Tensorflow Engineering: Config, Session, Variable initialization #========================================================================================# tf_config = tf.ConfigProto(inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) sess = tf.Session(config=tf_config) sess.__enter__() # equivalent to `with sess:` tf.global_variables_initializer().run() #pylint: disable=E1101 #========================================================================================# # Training Loop #========================================================================================# total_timesteps = 0 for itr in range(n_iter): print("********** Iteration %i ************"%itr) # Collect paths until we have enough timesteps timesteps_this_batch = 0 paths = [] while True: ob = env.reset() obs, acs, rewards = [], [], [] animate_this_episode=(len(paths)==0 and (itr % 10 == 0) and animate) steps = 0 while True: if animate_this_episode: env.render() time.sleep(0.05) obs.append(ob) ac = sess.run(sy_sampled_ac, feed_dict={sy_ob_no : ob[None]}) ac = ac[0] acs.append(ac) ob, rew, done, _ = env.step(ac) rewards.append(rew) steps += 1 if done or steps > max_path_length: break path = {"observation" : np.array(obs), "reward" : np.array(rewards), "action" : np.array(acs)} paths.append(path) timesteps_this_batch += pathlength(path) if timesteps_this_batch > min_timesteps_per_batch: break total_timesteps += timesteps_this_batch # Build arrays for observation, action for the policy gradient update by concatenating # across paths ob_no = np.concatenate([path["observation"] for path in paths]) ac_na = np.concatenate([path["action"] for path in paths]) #====================================================================================# # ----------SECTION 4---------- # Computing Q-values # # Your code should construct numpy arrays for Q-values which will be used to compute # advantages (which will in turn be fed to the placeholder you defined above). # # Recall that the expression for the policy gradient PG is # # PG = E_{tau} [sum_{t=0}^T grad log pi(a_t|s_t) * (Q_t - b_t )] # # where # # tau=(s_0, a_0, ...) is a trajectory, # Q_t is the Q-value at time t, Q^{pi}(s_t, a_t), # and b_t is a baseline which may depend on s_t. # # You will write code for two cases, controlled by the flag 'reward_to_go': # # Case 1: trajectory-based PG # # (reward_to_go = False) # # Instead of Q^{pi}(s_t, a_t), we use the total discounted reward summed over # entire trajectory (regardless of which time step the Q-value should be for). # # For this case, the policy gradient estimator is # # E_{tau} [sum_{t=0}^T grad log pi(a_t|s_t) * Ret(tau)] # # where # # Ret(tau) = sum_{t'=0}^T gamma^t' r_{t'}. # # Thus, you should compute # # Q_t = Ret(tau) # # Case 2: reward-to-go PG # # (reward_to_go = True) # # Here, you estimate Q^{pi}(s_t, a_t) by the discounted sum of rewards starting # from time step t. Thus, you should compute # # Q_t = sum_{t'=t}^T gamma^(t'-t) * r_{t'} # # # Store the Q-values for all timesteps and all trajectories in a variable 'q_n', # like the 'ob_no' and 'ac_na' above. # #====================================================================================# # YOUR_CODE_HERE if reward_to_go == False: q_n = np.concatenate([ np.sum(path['reward'])*np.ones_like(path['actions']) for path in paths ]) else: # the same as np.flip(np.cumsum(np.flip(b, axis=0)), axis=0) # the [::-1] means use a stride of -1(backwards), one can use it instead of flip q_n = np.concatenate([ np.cumsum(path['reward'][::-1])[::-1] for path in paths ]) #====================================================================================# # ----------SECTION 5---------- # Computing Baselines #====================================================================================# if nn_baseline: # If nn_baseline is True, use your neural network to predict reward-to-go # at each timestep for each trajectory, and save the result in a variable 'b_n' # like 'ob_no', 'ac_na', and 'q_n'. # # Hint #bl1: rescale the output from the nn_baseline to match the statistics # (mean and std) of the current or previous batch of Q-values. (Goes with Hint # #bl2 below.) b_n = sess.run(baseline_prediction, feed_dict={sy_ob_no: ob_no}) adv_n = q_n - b_n else: adv_n = q_n.copy() #====================================================================================# # ----------SECTION 4---------- # Advantage Normalization #====================================================================================# if normalize_advantages: # On the next line, implement a trick which is known empirically to reduce variance # in policy gradient methods: normalize adv_n to have mean zero and std=1. # YOUR_CODE_HERE pass #====================================================================================# # ----------SECTION 5---------- # Optimizing Neural Network Baseline #====================================================================================# if nn_baseline: # ----------SECTION 5---------- # If a neural network baseline is used, set up the targets and the inputs for the # baseline. # # Fit it to the current batch in order to use for the next iteration. Use the # baseline_update_op you defined earlier. # # Hint #bl2: Instead of trying to target raw Q-values directly, rescale the # targets to have mean zero and std=1. (Goes with Hint #bl1 above.) # YOUR_CODE_HERE pass #====================================================================================# # ----------SECTION 4---------- # Performing the Policy Update #====================================================================================# # Call the update operation necessary to perform the policy gradient update based on # the current batch of rollouts. # # For debug purposes, you may wish to save the value of the loss function before # and after an update, and then log them below. # YOUR_CODE_HERE # Log diagnostics returns = [path["reward"].sum() for path in paths] ep_lengths = [pathlength(path) for path in paths] logz.log_tabular("Time", time.time() - start) logz.log_tabular("Iteration", itr) logz.log_tabular("AverageReturn", np.mean(returns)) logz.log_tabular("StdReturn", np.std(returns)) logz.log_tabular("MaxReturn", np.max(returns)) logz.log_tabular("MinReturn", np.min(returns)) logz.log_tabular("EpLenMean", np.mean(ep_lengths)) logz.log_tabular("EpLenStd", np.std(ep_lengths)) logz.log_tabular("TimestepsThisBatch", timesteps_this_batch) logz.log_tabular("TimestepsSoFar", total_timesteps) logz.dump_tabular() logz.pickle_tf_vars()
def generate_corruptions_for_fit(X, entities_list=None, eta=1, corrupt_side='s+o', entities_size=0, rnd=None): """Generate corruptions for training. Creates corrupted triples for each statement in an array of statements, as described by :cite:`trouillon2016complex`. .. note:: Collisions are not checked, as this will be computationally expensive :cite:`trouillon2016complex`. That means that some corruptions *may* result in being positive statements (i.e. *unfiltered* settings). .. note:: When processing large knowledge graphs, it may be useful to generate corruptions only using entities from a single batch. This also brings the benefit of creating more meaningful negatives, as entities used to corrupt are sourced locally. The function can be configured to generate corruptions *only* using the entities from the current batch. You can enable such behaviour be setting ``entities_size==-1``. In such case, if ``entities_list=None`` all entities from the *current batch* will be used to generate corruptions. Parameters ---------- X : Tensor, shape [n, 3] An array of positive triples that will be used to create corruptions. entities_list : list List of entities to be used for generating corruptions. (default:None). if ``entities_list=None``, all entities will be used to generate corruptions (default behaviour). eta : int The number of corruptions per triple that must be generated. corrupt_side: string Specifies which side of the triple to corrupt: - 's': corrupt only subject. - 'o': corrupt only object - 's+o': corrupt both subject and object entities_size: int Size of entities to be used while generating corruptions. It assumes entity id's start from 0 and are continuous. (default: 0). When processing large knowledge graphs, it may be useful to generate corruptions only using entities from a single batch. This also brings the benefit of creating more meaningful negatives, as entities used to corrupt are sourced locally. The function can be configured to generate corruptions *only* using the entities from the current batch. You can enable such behaviour be setting ``entities_size==-1``. In such case, if ``entities_list=None`` all entities from the *current batch* will be used to generate corruptions. rnd: numpy.random.RandomState A random number generator. Returns ------- out : Tensor, shape [n * eta, 3] An array of corruptions for a list of positive triples x. For each row in X the corresponding corruption indexes can be found at [index+i*n for i in range(eta)] """ logger.debug('Generating corruptions for fit.') if corrupt_side not in ['s+o', 's', 'o']: msg = 'Invalid argument value {} for corruption side passed for evaluation.'.format( corrupt_side) logger.error(msg) raise ValueError(msg) dataset = tf.reshape(tf.tile(tf.reshape(X, [-1]), [eta]), [tf.shape(X)[0] * eta, 3]) if corrupt_side == 's+o': keep_subj_mask = tf.tile( tf.cast( tf.random_uniform([tf.shape(X)[0]], 0, 2, dtype=tf.int32, seed=rnd), tf.bool), [eta]) else: keep_subj_mask = tf.cast(tf.ones(tf.shape(X)[0] * eta, tf.int32), tf.bool) if corrupt_side == 's': keep_subj_mask = tf.logical_not(keep_subj_mask) keep_obj_mask = tf.logical_not(keep_subj_mask) keep_subj_mask = tf.cast(keep_subj_mask, tf.int32) keep_obj_mask = tf.cast(keep_obj_mask, tf.int32) logger.debug('Created corruption masks.') if entities_size != 0: replacements = tf.random_uniform([tf.shape(dataset)[0]], 0, entities_size, dtype=tf.int32, seed=rnd) else: if entities_list is None: # use entities in the batch entities_list, _ = tf.unique( tf.squeeze( tf.concat([ tf.slice(X, [0, 0], [tf.shape(X)[0], 1]), tf.slice(X, [0, 2], [tf.shape(X)[0], 1]) ], 0))) random_indices = tf.squeeze( tf.multinomial(tf.expand_dims(tf.zeros(tf.shape(entities_list)[0]), 0), num_samples=tf.shape(dataset)[0], seed=rnd)) replacements = tf.gather(entities_list, random_indices) subjects = tf.math.add(tf.math.multiply(keep_subj_mask, dataset[:, 0]), tf.math.multiply(keep_obj_mask, replacements)) logger.debug('Created corrupted subjects.') relationships = dataset[:, 1] logger.debug('Retained relationships.') objects = tf.math.add(tf.math.multiply(keep_obj_mask, dataset[:, 2]), tf.math.multiply(keep_subj_mask, replacements)) logger.debug('Created corrupted objects.') out = tf.transpose(tf.stack([subjects, relationships, objects])) logger.debug('Returning corruptions for fit.') return out
def train_mode(self, vocab, encoder_dim, encoder_states, encoder_features, passage_word_idx, passage_mask, init_state, decoder_inputs, answer_batch, loss_weights, mode_gen='ce_train'): ''' encoder_dim: int-valued encoder_states: [batch_size, passage_len, encoder_dim]. passage_word_idx: [batch_size, passage_len] int32 passage_mask: [batch_size, passage_len] 0/1 init_state: Tuple of [batch_size, gen_hidden_size] decoder_inputs: [batch_size, max_dec_steps]. answer_batch: [batch_size, max_dec_steps] ''' options = self.options input_shape = tf.shape(encoder_states) batch_size = input_shape[0] passage_len = input_shape[1] # map decoder inputs to word embeddings decoder_inputs = tf.unstack(decoder_inputs, axis=1) # max_enc_steps * [batch_size] answer_batch_unstack = tf.unstack(answer_batch, axis=1) # initialize all the variables state_t_1 = init_state context_t_1 = tf.zeros([batch_size, encoder_dim]) coverage_t_1 = None # store variables from each time-step coverages = [] attn_dists = [] p_gens = [] vocab_scores = [] sampled_words = [] self.encoder_features = encoder_features with variable_scope.variable_scope("attention_decoder"): # Get the weight vectors v and W_c (W_c is for coverage) v = variable_scope.get_variable("v", [options.attention_vec_size]) v = tf.expand_dims(tf.expand_dims(v, axis=0), axis=0) w_c = None if options.use_coverage: with variable_scope.variable_scope("coverage"): w_c = variable_scope.get_variable( "w_c", [options.attention_vec_size]) w_c = tf.expand_dims(tf.expand_dims(w_c, axis=0), axis=0) # For each step, dec_input => lstm_output => vocab_score wordidx_t = decoder_inputs[0] # [batch_size] int32 for i in range(options.max_answer_len): if mode_gen in ( 'ce_train', 'loss', ): wordidx_t = decoder_inputs[ i] # the wordidx_t must from decoder_inputs for phrase model word_t = self.embedding_lookup(wordidx_t) if i > 0: variable_scope.get_variable_scope().reuse_variables() (state_t, context_t, coverage_t, attn_dist_t, p_gen_t, output_t) = self.one_step_decoder( state_t_1, context_t_1, coverage_t_1, word_t, encoder_states, self.encoder_features, passage_word_idx, passage_mask, v, w_c, vocab) coverages.append(coverage_t) attn_dists.append(attn_dist_t) p_gens.append(p_gen_t) vocab_scores.append(output_t) # The vocabulary distributions. state_t_1 = state_t context_t_1 = context_t coverage_t_1 = coverage_t if mode_gen == 'greedy': wordidx_t = tf.argmax(output_t, 1) # [batch_size] wordidx_t = tf.reshape(wordidx_t, [-1]) # [batch_size] elif mode_gen == 'sample': log_score_t = tf.log(output_t) # [batch_size, vsize] wordidx_t = tf.multinomial(log_score_t, 1) # [batch_size, 1] wordidx_t = tf.reshape(wordidx_t, [-1]) # [batch_size] elif mode_gen in ( 'ce_train', 'loss', ): wordidx_t = answer_batch_unstack[i] else: assert False, 'unknown generating mode %s' % mode_gen sampled_words.append(wordidx_t) if len(sampled_words) != 0: sampled_words = tf.stack(sampled_words, axis=1) # [batch_size, max_dec_steps] vocab_scores = tf.stack(vocab_scores, axis=1) # [batch_size, max_dec_steps, vocab] # calculating loss self._loss = None if mode_gen in ( 'ce_train', 'loss', ): xent = CE_loss(vocab_scores, answer_batch, loss_weights) # [batch_size] if mode_gen == 'loss': xent *= self.placeholders.reward # multiply with rewards self._loss = tf.reduce_mean(xent) # Calculate coverage loss from the attention distributions if options.use_coverage: with tf.variable_scope('coverage_loss'): self._coverage_loss = _coverage_loss( attn_dists, loss_weights) self._loss = self._loss + options.cov_loss_wt * self._coverage_loss # accuracy is calculated only under 'ce_train', where true answer is given if mode_gen == 'ce_train': accuracy = _mask_and_accuracy(vocab_scores, answer_batch, loss_weights) return accuracy, self._loss, sampled_words else: return None, self._loss, sampled_words
def create_selection_weights(name, type_, shape, inv_t=1, initializer=tf.zeros_initializer(), regularizer=None, names=None): """Create a SelectionWeights tuple. Args: name: Name for the underlying variable containing the unnormalized weights. type_: "softmax" or "sigmoid" or ("softmax_topk", k) where k is an int. shape: Shape for the variable. inv_t: Inverse of the temperature to use in normalization. initializer: Initializer for the variable, passed to `tf.get_variable`. regularizer: Regularizer for the variable. A callable which accepts `tempered_var` and `normalized`. names: Name of each selection. Returns: The created SelectionWeights tuple. Raises: ValueError: if type_ is not in the supported range. """ var = tf.get_variable(name, shape, initializer=initializer) if callable(inv_t): inv_t = inv_t(var) if inv_t == 1: tempered_var = var else: tempered_var = var * inv_t if type_ == "softmax": weights = tf.nn.softmax(tempered_var) elif type_ == "sigmoid": weights = tf.nn.sigmoid(tempered_var) elif isinstance(type_, (list, tuple)) and type_[0] == "softmax_topk": assert len(shape) == 1 # TODO(rshin): Change this to select without replacement? selection = tf.multinomial(tf.expand_dims(var, axis=0), 4) selection = tf.squeeze(selection, axis=0) # [k] selected classes. to_run = tf.one_hot(selection, shape[0]) # [k x nmodules] one-hot. # [nmodules], 0=not run, 1=run. to_run = tf.minimum(tf.reduce_sum(to_run, axis=0), 1) weights = tf.nn.softmax(tempered_var - 1e9 * (1.0 - to_run)) else: raise ValueError("Unknown type: %s" % type_) if regularizer is not None: loss = regularizer(tempered_var, weights) if loss is not None: tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, loss) if names is not None: tf.get_collection_ref("selection_weight_names/" + var.name).extend( names.flatten() if isinstance(names, np.ndarray) else names) tf.add_to_collection("selection_weight_names_tensor/" + var.name, tf.constant(names)) return SelectionWeights(var=var, tempered_var=tempered_var, inv_t=inv_t, normalized=weights)
def __init__(self, lr, o_size_h, o_size_w, a_size, h_size, epsilon, beta, max_step): """ Creates Discrete Control Actor-Critic model for use with visual observations (images). :param o_size_h: Observation height. :param o_size_w: Observation width. :param a_size: Action-space size. :param h_size: Hidden layer size. """ self.observation_in = tf.placeholder( shape=[None, o_size_h, o_size_w, 1], dtype=tf.float32, name='observation_0') self.conv1 = tf.layers.conv2d(self.observation_in, 32, kernel_size=[3, 3], strides=[2, 2], use_bias=False, activation=tf.nn.elu) self.conv2 = tf.layers.conv2d(self.conv1, 64, kernel_size=[3, 3], strides=[2, 2], use_bias=False, activation=tf.nn.elu) self.batch_size = tf.placeholder(shape=None, dtype=tf.int32) hidden = tf.layers.dense(c_layers.flatten(self.conv2), h_size, use_bias=False, activation=tf.nn.elu) self.policy = tf.layers.dense( hidden, a_size, activation=None, use_bias=False, kernel_initializer=c_layers.variance_scaling_initializer( factor=0.1)) self.probs = tf.nn.softmax(self.policy) self.action = tf.multinomial(self.policy, 1) self.output = tf.identity(self.action, name='action') self.value = tf.layers.dense(hidden, 1, activation=None, use_bias=False) self.entropy = -tf.reduce_sum(self.probs * tf.log(self.probs + 1e-10), axis=1) self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32) self.selected_actions = c_layers.one_hot_encoding( self.action_holder, a_size) self.old_probs = tf.placeholder(shape=[None, a_size], dtype=tf.float32, name='old_probabilities') self.responsible_probs = tf.reduce_sum(self.probs * self.selected_actions, axis=1) self.old_responsible_probs = tf.reduce_sum(self.old_probs * self.selected_actions, axis=1) PPOModel.__init__(self, self.responsible_probs, self.old_responsible_probs, self.value, self.entropy, beta, epsilon, lr, max_step)
def select_sample_input(): current_logits = logits.read(ts) decoder_input = tf.to_int32(tf.multinomial(current_logits, 1)) decoder_input = tf.stop_gradient(decoder_input) return tf.squeeze(decoder_input, [1])
def make_data_tensor(self, train=True): if train: folders = self.metatrain_character_folders # number of tasks, not number of meta-iterations. (divide by metabatch size to measure) num_total_batches = 200000 else: folders = self.metaval_character_folders num_total_batches = 600 # make list of files print('Generating filenames') all_filenames = [] for _ in range(num_total_batches): sampled_character_folders = random.sample(folders, self.num_classes) random.shuffle(sampled_character_folders) labels_and_images = get_images( sampled_character_folders, range(self.num_classes), nb_samples=self.num_samples_per_class, shuffle=False) # make sure the above isn't randomized order labels = [li[0] for li in labels_and_images] filenames = [li[1] for li in labels_and_images] all_filenames.extend(filenames) # make queue for tensorflow to read from filename_queue = tf.train.string_input_producer( tf.convert_to_tensor(all_filenames), shuffle=False) print('Generating image processing ops') image_reader = tf.WholeFileReader() _, image_file = image_reader.read(filename_queue) if FLAGS.datasource == 'miniimagenet': image = tf.image.decode_jpeg(image_file, channels=3) image.set_shape((self.img_size[0], self.img_size[1], 3)) image = tf.reshape(image, [self.dim_input]) image = tf.cast(image, tf.float32) / 255.0 else: image = tf.image.decode_png(image_file) image.set_shape((self.img_size[0], self.img_size[1], 1)) image = tf.reshape(image, [self.dim_input]) image = tf.cast(image, tf.float32) / 255.0 image = 1.0 - image # invert num_preprocess_threads = 1 # TODO - enable this to be set to >1 min_queue_examples = 256 examples_per_batch = self.num_classes * self.num_samples_per_class batch_image_size = self.batch_size * examples_per_batch print('Batching images') images = tf.train.batch( [image], batch_size=batch_image_size, num_threads=num_preprocess_threads, capacity=min_queue_examples + 3 * batch_image_size, ) all_image_batches, all_label_batches = [], [] print('Manipulating image data to be right shape') for i in range(self.batch_size): image_batch = images[i * examples_per_batch:(i + 1) * examples_per_batch] if FLAGS.datasource == 'omniglot': # omniglot augments the dataset by rotating digits to create new classes # get rotation per class (e.g. 0,1,2,0,0 if there are 5 classes) rotations = tf.multinomial(tf.log([[1., 1., 1., 1.]]), self.num_classes) label_batch = tf.convert_to_tensor(labels) new_list, new_label_list = [], [] for k in range(self.num_samples_per_class): class_idxs = tf.range(0, self.num_classes) class_idxs = tf.random_shuffle(class_idxs) true_idxs = class_idxs * self.num_samples_per_class + k new_list.append(tf.gather(image_batch, true_idxs)) if FLAGS.datasource == 'omniglot': # and FLAGS.train: new_list[-1] = tf.stack([ tf.reshape( tf.image.rot90(tf.reshape( new_list[-1][ind], [self.img_size[0], self.img_size[1], 1]), k=tf.cast( rotations[0, class_idxs[ind]], tf.int32)), (self.dim_input, )) for ind in range(self.num_classes) ]) new_label_list.append(tf.gather(label_batch, true_idxs)) new_list = tf.concat( new_list, 0 ) # has shape [self.num_classes*self.num_samples_per_class, self.dim_input] new_label_list = tf.concat(new_label_list, 0) all_image_batches.append(new_list) all_label_batches.append(new_label_list) all_image_batches = tf.stack(all_image_batches) all_label_batches = tf.stack(all_label_batches) all_label_batches = tf.one_hot(all_label_batches, self.num_classes) return all_image_batches, all_label_batches
def __init__( self ): self.num_layers = 4 self.num_branches = 6 self.lstm_size = 32 self.num_blocks_per_branch = 6 self.l2_reg = 1e-4 self.lstm_weight = [] for layer_id in range(self.num_layers): with tf.variable_scope("layer_{}".format(layer_id)): w = tf.get_variable("w", [2 * self.lstm_size, 4 * self.lstm_size]) self.lstem_weight.append(w) self.num_configs = (2 ** self.num_blocks_per_branch) - 1 with tf.variable_scope("embedding"): self.embed_graph = tf.get_variable("embed_graph", [1, self.lstm_size]) self.embed_weight = tf.get_variable("weight", [ self.num_blocks_per_branch, self.lstm_size ]) with tf.variable_scope("softmax"): self.softmax_weight = tf.get_variable("weight", [ self.lstm_size, self.num_blocks_per_branch ]) with tf.variable_scope("critic"): self.critic_weight = tf.get_variable("weight", [self.lstm_size, 1]) arc_seq = [] sample_log_probs = [] all_h = [] inputs = self.embed_graph prev_channel = [ tf.zeros([1, self.lstm_size], dtype=tf.float32) for _ in range(self.lstm_num_layers) ] prev_height = [ tf.zeros([1, self.lstm_size], dtype=tf.float32) for _ in range(self.lstm_num_layers) ] for layer_id in range(self.num_layers): for branch_id in range(self.num_branches): next_channel, next_height = stack_lstm( inputs, prev_channel, prev_height, self.lstm_weight ) all_h.append(tf.stop_gradient(next_height[-1])) logits = tf.matmul(next_height[-1], self.softmax_weight) logits = 1.10 * tf.tanh(logits) config_id = tf.multinomial(logits, 1) config_id = tf.to_int32(config_id) config_id = tf.reshape(config_id, [1]) arc_seq.append(config_id) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=config_id ) inputs = tf.nn.embedding_lookup(self.embed_weight, config_id) self.sample_arc = tf.concat(arc_seq, axis=0) self.sample_log_probs = tf.concat(sample_log_probs, axis=0) self.ppl = tf.exp( tf.reduce_sum(self.sample_log_probs) / tf.to_float(self.num_layers, self.num_branches) ) self.all_h = all_h
def multinomial_sample(x, vocab_size, temperature): """Multinomial sampling from a n-dimensional tensor.""" samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1) reshaped_samples = tf.reshape(samples, tf.shape(x)[:-1]) return tf.to_int32(reshaped_samples)
def loop_fn(time, cell_output, cell_state, loop_state): if cell_output is None: # time == 0 next_cell_state = encoder_states next_input = tf.tile(go_embedding, to_T([N, 1])) else: # time > 0 next_cell_state = cell_state # compute the attention map over the input sequence # a_raw has shape [T, N, 1] att_raw = tf.reduce_sum( tf.tanh(tf.nn.xw_plus_b(cell_output, W_a, b_a) + self.encoder_h_transformed) * v, axis=2, keep_dims=True) # softmax along the first dimension (T) over not finished examples # att has shape [T, N, 1] att = tf.nn.softmax(att_raw, dim=0)*self.seq_not_finished att = att / tf.reduce_sum(att + 1e-10, axis=0, keep_dims=True) # d has shape [N, lstm_dim] d2 = tf.reduce_sum(att*self.encoder_outputs, axis=0) # token_scores has shape [N, num_vocab] token_scores = tf.nn.xw_plus_b( tf.concat([cell_output, d2], axis=1), W_y, b_y) decoding_state = loop_state[2] # token_validity has shape [N, num_vocab] token_validity = _get_valid_tokens(decoding_state, self.W, self.b) token_validity.set_shape([None, self.decoder_num_vocab]) if use_gt_layout is not None: # when there's ground-truth layout, do not re-normalize prob # and treat all tokens as valid token_validity = tf.logical_or(token_validity, use_gt_layout) validity_mult = tf.cast(token_validity, tf.float32) # predict the next token (behavior depending on parameters) if sampling: token_scores_valid = token_scores - (1-validity_mult) * 50 # TODO:debug sampled_token = tf.cast(tf.reshape( tf.multinomial(token_scores_valid/self.temperature, 1), [-1]), tf.int32) # make sure that the predictions are ALWAYS valid # (it can be invalid with very small prob) # If not, just fall back to min cases # pred_mask has shape [N, num_vocab] sampled_mask = tf.equal(mask_range, tf.reshape(sampled_token, [-1, 1])) is_sampled_valid = tf.reduce_any( tf.logical_and(sampled_mask, token_validity), axis=1) # Fall back to max score (no sampling) min_score = tf.reduce_min(token_scores) token_scores_valid = tf.where(token_validity, token_scores, tf.ones_like(token_scores)*(min_score-1)) max_score_token = tf.cast(tf.argmax(token_scores_valid, 1), tf.int32) predicted_token = tf.where(is_sampled_valid, sampled_token, max_score_token) else: min_score = tf.reduce_min(token_scores) token_scores_valid = tf.where(token_validity, token_scores, tf.ones_like(token_scores)*(min_score-1)) # predicted_token has shape [N] predicted_token = tf.cast(tf.argmax(token_scores_valid, 1), tf.int32) if use_gt_layout is not None: predicted_token = (gt_layout_batch[time-1] * gt_layout_mult + predicted_token * pred_layout_mult) # a robust version of softmax # all_token_probs has shape [N, num_vocab] all_token_probs = tf.nn.softmax(token_scores) * validity_mult # tf.check_numerics(all_token_probs, 'NaN/Inf before div') all_token_probs = all_token_probs / tf.reduce_sum(all_token_probs + 1e-10, axis=1, keep_dims=True) # tf.check_numerics(all_token_probs, 'NaN/Inf after div') # mask has shape [N, num_vocab] mask = tf.equal(mask_range, tf.reshape(predicted_token, [-1, 1])) # token_prob has shape [N], the probability of the predicted token # although token_prob is not needed for predicting the next token # it is needed in output (for policy gradient training) # [N, num_vocab] token_prob = tf.reduce_sum(all_token_probs * tf.cast(mask, tf.float32), axis=1) # tf.assert_positive(token_prob) neg_entropy = tf.reduce_sum( all_token_probs * tf.log(all_token_probs + (1-validity_mult) + 1e-10), axis=1) # update states updated_decoding_state = _update_decoding_state( decoding_state, predicted_token, self.P) # the prediction is from the cell output of the last step # timestep (t-1), feed it as input into timestep t next_input = tf.nn.embedding_lookup(embedding_mat, predicted_token) elements_finished = tf.greater_equal(time, T_max) # loop_state is a 5-tuple, representing # 1) the predicted_tokens # 2) the prob of predicted_tokens # 3) the decoding state (used for validity) # 4) the negative entropy of policy (accumulated across timesteps) # 5) the attention if loop_state is None: # time == 0 # Write the predicted token into the output predicted_token_array = tf.TensorArray(dtype=tf.int32, size=T_max, infer_shape=False) token_prob_array = tf.TensorArray(dtype=tf.float32, size=T_max, infer_shape=False) init_decoding_state = tf.tile(to_T([[0, 0, T_max]], dtype=tf.int32), to_T([N, 1])) att_array = tf.TensorArray(dtype=tf.float32, size=T_max, infer_shape=False) next_loop_state = (predicted_token_array, token_prob_array, init_decoding_state, tf.zeros(to_T([N]), dtype=tf.float32), att_array) else: # time > 0 t_write = time-1 next_loop_state = (loop_state[0].write(t_write, predicted_token), loop_state[1].write(t_write, token_prob), updated_decoding_state, loop_state[3] + neg_entropy, loop_state[4].write(t_write, att)) return (elements_finished, next_input, next_cell_state, cell_output, next_loop_state)
def random_category(self, size, dtype): prior = tf.ones(tf.stack((tf.shape(self.gan.inputs.x)[0], size)))*1./size dist = tf.log(prior + TINY) sample=tf.multinomial(dist, num_samples=1)[:, 0] return tf.one_hot(sample, size, dtype=dtype)
def inner_loop(i, alive_seq): logit = symbols_to_logits_fn(alive_seq)[0] new_samples = tf.multinomial(logit, 1) new_samples = tf.to_int32(new_samples) alive_seq = tf.concat([alive_seq, new_samples], 1) return (i + 1, alive_seq)
def _build_sampler(self): """Build the sampler ops and the log_prob ops.""" arc_seq = [] sample_log_probs = [] sample_entropy = [] all_h = [] all_h_w = [] # sampler ops inputs = self.g_emb prev_c, prev_h = [], [] for _ in range(self.lstm_num_layers): prev_c.append(tf.zeros([1, self.lstm_size], dtype=tf.float32)) prev_h.append(tf.zeros([1, self.lstm_size], dtype=tf.float32)) # used = tf.zeros([self.rhn_depth, 2], dtype=tf.int32) for layer_id in range(self.rhn_depth): next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h all_h.append(next_h[-1]) all_h_w.append(tf.matmul(next_h[-1], self.attn_w_1)) if layer_id > 0: query = tf.matmul(next_h[-1], self.attn_w_2) query = query + tf.concat(all_h_w[:-1], axis=0) query = tf.tanh(query) logits = tf.matmul(query, self.attn_v) logits = tf.reshape(logits, [1, layer_id]) if self.temperature is not None: logits /= self.temperature if self.tanh_constant is not None: logits = self.tanh_constant * tf.tanh(logits) diff = tf.to_float(layer_id - tf.range(0, layer_id)) ** 2 logits -= tf.reshape(diff, [1, layer_id]) / 6.0 skip_index = tf.multinomial(logits, 1) skip_index = tf.to_int32(skip_index) skip_index = tf.reshape(skip_index, [1]) arc_seq.append(skip_index) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=skip_index) sample_log_probs.append(log_prob) entropy = log_prob * tf.exp(-log_prob) sample_entropy.append(tf.stop_gradient(entropy)) inputs = tf.nn.embedding_lookup( tf.concat(all_h[:-1], axis=0), skip_index) inputs /= (0.1 + tf.to_float(layer_id - skip_index)) else: inputs = self.g_emb next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h logits = tf.matmul(next_h[-1], self.w_soft) if self.temperature is not None: logits /= self.temperature if self.tanh_constant is not None: logits = self.tanh_constant * tf.tanh(logits) func = tf.multinomial(logits, 1) func = tf.to_int32(func) func = tf.reshape(func, [1]) arc_seq.append(func) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=func) sample_log_probs.append(log_prob) entropy = log_prob * tf.exp(-log_prob) sample_entropy.append(tf.stop_gradient(entropy)) inputs = tf.nn.embedding_lookup(self.w_emb, func) arc_seq = tf.concat(arc_seq, axis=0) self.sample_arc = arc_seq self.sample_log_probs = tf.concat(sample_log_probs, axis=0) self.ppl = tf.exp(tf.reduce_mean(self.sample_log_probs)) sample_entropy = tf.concat(sample_entropy, axis=0) self.sample_entropy = tf.reduce_sum(sample_entropy) self.all_h = all_h
def image_augmentations(image, data_augmentations, model_input_image_size, label=None): """Coordinating image augmentations for both image and heatmap.""" im_size = [int(x) for x in image.get_shape()] im_size_check = np.any( np.less_equal(model_input_image_size[:2], im_size[:2])) if data_augmentations is not None: # Pixel/image-level augmentations if 'singleton' in data_augmentations: image = tf.expand_dims(image, axis=-1) print 'Adding singleton dimension to image.' if 'singleton_label' in data_augmentations: label = tf.expand_dims(label, axis=-1) print 'Adding singleton dimension to label.' if 'bsds_crop' in data_augmentations and im_size_check: assert len(image.get_shape()) == 3, '4D not implemented yet.' # intermediate_size = [171, 256, 3] # intermediate_size = [256, 384, 3] intermediate_size = [324, 484, 3] image = tf.image.resize_image_with_crop_or_pad( image, intermediate_size[0], intermediate_size[1]) label = tf.image.resize_image_with_crop_or_pad( label, intermediate_size[0], intermediate_size[1]) print 'Applying BSDS crop.' if 'uint8_rescale' in data_augmentations: image = tf.cast(image, tf.float32) / 255. print 'Applying uint8 rescale to the image.' if 'uint8_rescale_label' in data_augmentations: label = tf.cast(label, tf.float32) / 255. print 'Applying uint8 rescale to the label.' if 'uint8_rescale_-1_1' in data_augmentations: image = 2 * (tf.cast(image, tf.float32) / 255.) - 1 print 'Applying uint8 rescale.' if 'image_to_bgr' in data_augmentations: image = tf.stack([image[:, :, 2], image[:, :, 1], image[:, :, 0]], axis=-1) if 'pascal_normalize' in data_augmentations: image = image - [123.68, 116.78, 103.94] if 'random_contrast' in data_augmentations: assert len(image.get_shape()) == 3, '4D not implemented yet.' image = tf.image.random_contrast(image, lower=0.2, upper=1.8) print 'Applying random contrast.' if 'random_brightness' in data_augmentations: assert len(image.get_shape()) == 3, '4D not implemented yet.' image = tf.image.random_brightness(image, max_delta=63.) print 'Applying random brightness.' if 'grayscale' in data_augmentations and im_size_check: # image = tf.image.rgb_to_grayscale(image) image = tf.expand_dims(image[:, :, 0], axis=-1) # ABOVE INSTEAD? print 'Converting to grayscale.' # Affine augmentations if 'rotate' in data_augmentations and im_size_check: max_theta = 22. angle_rad = (max_theta / 180.) * math.pi angles = tf.random_uniform([], -angle_rad, angle_rad) transform = tf.contrib.image.angles_to_projective_transforms( angles, im_size[0], im_size[1]) image = tf.contrib.image.transform( image, tf.contrib.image.compose_transforms(transform), interpolation='BILINEAR') # or 'NEAREST' print 'Applying random rotate.' if 'rotate_image_label' in data_augmentations and im_size_check: max_theta = 30. angle_rad = (max_theta / 180.) * math.pi angles = tf.random_uniform([], -angle_rad, angle_rad) transform = tf.contrib.image.angles_to_projective_transforms( angles, im_size[0], im_size[1]) image = tf.contrib.image.transform( image, tf.contrib.image.compose_transforms(transform), interpolation='BILINEAR') # or 'NEAREST' label = tf.contrib.image.transform( label, tf.contrib.image.compose_transforms(transform), interpolation='BILINEAR') # or 'NEAREST' print 'Applying random rotate.' if 'random_scale_crop_image_label' in data_augmentations\ and im_size_check: scale_choices = tf.convert_to_tensor([1., 1.02, 1.04, 1.06, 1.08]) samples = tf.multinomial(tf.log([tf.ones_like(scale_choices)]), 1) image_shape = image.get_shape().as_list() scale = scale_choices[tf.cast(samples[0][0], tf.int32)] scale_tf = tf.cast( tf.round( np.asarray(model_input_image_size[:2]).astype(np.float32) * scale), tf.int32) combined = tf.concat([image, label], axis=-1) combo_shape = combined.get_shape().as_list() combined_crop = tf.random_crop( combined, tf.concat([scale_tf, [combo_shape[-1]]], 0)) combined_resize = tf.squeeze(tf.image.resize_bicubic( tf.expand_dims(combined_crop, axis=0), model_input_image_size[:2], align_corners=True), axis=0) image = combined_resize[:, :, :image_shape[-1]] label = combined_resize[:, :, image_shape[-1]:] image.set_shape(model_input_image_size) label.set_shape(model_input_image_size[:2] + [combo_shape[-1] - model_input_image_size[-1]]) if 'rc_res' in data_augmentations and im_size_check: image = random_crop(image, model_input_image_size) if len(model_input_image_size) > 2: model_input_image_size = model_input_image_size[:2] ms = [x // 2 for x in model_input_image_size] image = resize_image_label(im=image, model_input_image_size=ms, f='bicubic') print 'Applying random crop and resize.' if 'cc_res' in data_augmentations and im_size_check: image = center_crop(image, model_input_image_size) if len(model_input_image_size) > 2: model_input_image_size = model_input_image_size[:2] ms = [x // 2 for x in model_input_image_size] image = resize_image_label(im=image, model_input_image_size=ms, f='bicubic') print 'Applying center crop and resize.' if 'random_crop' in data_augmentations and im_size_check: image = random_crop(image, model_input_image_size) print 'Applying random crop.' if 'center_crop' in data_augmentations and im_size_check: image = center_crop(image, model_input_image_size) print 'Applying center crop.' if 'random_crop_image_label' in data_augmentations and im_size_check: assert len(image.get_shape()) == 3, '4D not implemented yet.' image, label = crop_image_label(image=image, label=label, size=model_input_image_size, crop='random') if 'center_crop_image_label' in data_augmentations and im_size_check: assert len(image.get_shape()) == 3, '4D not implemented yet.' image, label = crop_image_label(image=image, label=label, size=model_input_image_size, crop='center') if 'resize' in data_augmentations and im_size_check: if len(model_input_image_size) > 2: model_input_image_size = model_input_image_size[:2] image = resize_image_label( im=image, model_input_image_size=model_input_image_size, f='bicubic') print 'Applying area resize.' if 'jk_resize' in data_augmentations and im_size_check: if len(model_input_image_size) > 2: model_input_image_size = model_input_image_size[:2] image = tf.image.resize_image_with_crop_or_pad( image, model_input_image_size[0], model_input_image_size[1]) print 'Applying area resize.' if 'resize_and_crop' in data_augmentations and im_size_check: model_input_image_size_1 = np.asarray( model_input_image_size[:2]) + 28 image = resize_image_label( im=image, model_input_image_size=model_input_image_size_1, f='area') image = center_crop(image, model_input_image_size) print 'Applying area resize.' if 'resize_nn' in data_augmentations and im_size_check: assert len(image.get_shape()) == 3, '4D not implemented yet.' if len(model_input_image_size) > 2: model_input_image_size = model_input_image_size[:2] image = resize_image_label( im=image, model_input_image_size=model_input_image_size, f='nearest') print 'Applying nearest resize.' if 'resize_image_label' in data_augmentations and im_size_check: assert len(image.get_shape()) == 3, '4D not implemented yet.' if len(model_input_image_size) > 2: model_input_image_size = model_input_image_size[:2] image = resize_image_label( im=image, model_input_image_size=model_input_image_size, f='bicubic') label = resize_image_label( im=label, model_input_image_size=model_input_image_size, f='bicubic') print 'Applying bilinear resize.' elif 'resize_nn_image_label' in data_augmentations and im_size_check: assert len(image.get_shape()) == 3, '4D not implemented yet.' if len(model_input_image_size) > 2: model_input_image_size = model_input_image_size[:2] image = resize_image_label( im=image, model_input_image_size=model_input_image_size, f='nearest') label = resize_image_label( im=label, model_input_image_size=model_input_image_size, f='nearest') print 'Applying nearest resize.' else: pass if 'left_right' in data_augmentations: image = image_flip(image, direction='left_right') print 'Applying random flip left-right.' if 'up_down' in data_augmentations: image = image_flip(image, direction='up_down') print 'Applying random flip up-down.' if 'lr_flip_image_label' in data_augmentations: assert len(image.get_shape()) == 3, '4D not implemented yet.' image, label = lr_flip_image_label(image, label) if 'ud_flip_image_label' in data_augmentations: assert len(image.get_shape()) == 3, '4D not implemented yet.' image, label = ud_flip_image_label(image, label) if 'gaussian_noise' in data_augmentations: im_shape = image.get_shape().as_list() assert len(im_shape) == 3, '4D not implemented yet.' sigma = 1. / 10. mu = 0. image = image + tf.random_normal(im_shape, mean=mu, stddev=sigma) print 'Applying gaussian noise.' if 'gaussian_noise_small' in data_augmentations: im_shape = image.get_shape().as_list() assert len(im_shape) == 3, '4D not implemented yet.' sigma = 1. / 20. mu = 0. image = image + tf.random_normal(im_shape, mean=mu, stddev=sigma) print 'Applying gaussian noise.' if 'calculate_rate_time_crop' in data_augmentations: im_shape = image.get_shape().as_list() minval = im_shape[0] // 3 time_crop = tf.random_uniform([], minval=minval, maxval=im_shape[0], dtype=tf.int32) # For now always pull from the beginning indices = tf.range(0, time_crop, dtype=tf.int32) selected_image = tf.gather(image, indices) padded_image = tf.zeros([im_shape[0] - time_crop] + im_shape[1:], dtype=selected_image.dtype) # Randomly concatenate pad to front or back image = tf.cond(pred=tf.greater( tf.random_uniform([], minval=0, maxval=1, dtype=tf.float32), 0.5), true_fn=lambda: tf.concat( [selected_image, padded_image], axis=0), false_fn=lambda: tf.concat( [padded_image, selected_image], axis=0)) image.set_shape(im_shape) # Convert label to rate label = label / im_shape[0] if 'calculate_rate' in data_augmentations: label = label / image.get_shape().as_list()[0] print 'Applying rate transformation.' if 'threshold' in data_augmentations: image = tf.cast(tf.greater(image, 0.1), tf.float32) print 'Applying threshold.' if 'nonzero_label' in data_augmentations: label = tf.cast(tf.greater(label, 0.2), tf.float32) print 'Applying threshold.' if 'zero_one' in data_augmentations: image = tf.minimum(tf.maximum(image, 0.), 1.) print 'Applying threshold.' if 'timestep_duplication' in data_augmentations: image = tf.stack([image for iid in range(7)]) print 'Applying timestep duplication.' if 'per_image_standardization' in data_augmentations: image = tf.image.per_image_standardization(image) print 'Applying per-image zscore.' if 'flip_polarity' in data_augmentations: image = tf.abs(image - 1.) if 'NCHW' in data_augmentations: image = tf.transpose(image, (2, 0, 1)) else: assert len(image.get_shape()) == 3, '4D not implemented yet.' image = tf.image.resize_image_with_crop_or_pad( image, model_input_image_size[0], model_input_image_size[1]) return image, label
n_outputs = 1 learning_rate = 0.01 initializer = tf.variance_scaling_initializer() X = tf.placeholder(tf.float32, shape=[None, n_inputs]) hidden = tf.layers.dense(X, n_hidden, activation=tf.nn.elu, kernel_initializer=initializer) logits = tf.layers.dense(hidden, n_outputs) outputs = tf.nn.sigmoid(logits) # probability of action 0 (left) p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs]) action = tf.multinomial(tf.log(p_left_and_right), num_samples=1) y = 1. - tf.to_float(action) cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits) optimizer = tf.train.AdamOptimizer(learning_rate) grads_and_vars = optimizer.compute_gradients(cross_entropy) gradients = [grad for grad, variable in grads_and_vars] gradient_placeholders = [] grads_and_vars_feed = [] for grad, variable in grads_and_vars: gradient_placeholder = tf.placeholder(tf.float32, shape=grad.get_shape()) gradient_placeholders.append(gradient_placeholder) grads_and_vars_feed.append((gradient_placeholder, variable)) training_op = optimizer.apply_gradients(grads_and_vars_feed)
def ptb_producer(doc, que, ans, batch_size, vocab=100, name=None, config=None): """Iterate on the raw PTB data. This chunks up raw_data into batches of examples and returns Tensors that are drawn from these batches. Args: raw_data: one of the raw data outputs from ptb_raw_data. batch_size: int, the batch size. num_steps: int, the number of unrolls. name: the name of this operation (optional). Returns: A pair of Tensors, each shaped [batch_size, num_steps]. The second element of the tuple is the same data time-shifted to the right by one. Raises: tf.errors.InvalidArgumentError: if batch_size or num_steps are too high. """ #print(ans) with tf.name_scope(name, "PTBProducer", [doc, que, ans]): doc_len = len(doc) vocab = config.vocab_size vans = [] for e in ans: van = [0] * vocab van[e] = 1 vans.append(van) #print(doc) d = len(doc[0]) q = len(que[0]) #print(d) epoch_size = doc_len // batch_size #print(epoch_size) #print(ans) doc = tf.convert_to_tensor(doc, name="documents", dtype=tf.int32) que = tf.convert_to_tensor(que, name="questions", dtype=tf.int32) vans = tf.convert_to_tensor(vans, name="vanswers", dtype=tf.int32) ans = tf.convert_to_tensor(ans, name="answers", dtype=tf.int32) #data_len = len(documents) # batch_len = len(documents[0]) assertion = tf.assert_positive( epoch_size, message="epoch_size == 0, decrease batch_size or num_steps") with tf.control_dependencies([assertion]): epoch_size = tf.identity(epoch_size, name="epoch_size") i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue() #elems = tf.convert_to_tensor([1,2,3,5]) batch_prob = [] #for batch_number in range(batch_size): batch_prob.append([10.] * doc_len) samples = tf.multinomial(tf.log(batch_prob), batch_size) # note log-prob print(tf.get_variable_scope().reuse == False) x = [] y = [] z = [] zz = [] for batch_number in range(batch_size): x.append(doc[tf.cast(samples[0][batch_number], tf.int32)]) y.append(que[tf.cast(samples[0][batch_number], tf.int32)]) z.append(vans[tf.cast(samples[0][batch_number], tf.int32)]) zz.append(ans[tf.cast(samples[0][batch_number], tf.int32)]) x = tf.convert_to_tensor(x, name="documents", dtype=tf.int32) y = tf.convert_to_tensor(y, name="questions", dtype=tf.int32) z = tf.convert_to_tensor(z, name="vanswers", dtype=tf.int32) zz = tf.convert_to_tensor(zz, name="answers", dtype=tf.int32) ''' x= tf.slice(doc,[i*batch_size,0],[batch_size,d]) y= tf.slice(que,[i*batch_size,0],[batch_size,q]) z= tf.slice(vans,[i*batch_size,0],[batch_size,vocab]) zz=tf.slice(ans,[i*batch_size],[batch_size]) ''' #print(i) #print(epoch_size) return x, y, z, zz, epoch_size