def sample_action(self, policy_parameters): """ Constructs a symbolic operation for stochastically sampling from the policy distribution arguments: policy_parameters if discrete: logits of a categorical distribution over actions sy_logits_na: (batch_size, self.ac_dim) if continuous: (mean, log_std) of a Gaussian distribution over actions sy_mean: (batch_size, self.ac_dim) sy_logstd: (self.ac_dim,) returns: sy_sampled_ac: if discrete: (batch_size) if continuous: (batch_size, self.ac_dim) Hint: for the continuous case, use the reparameterization trick: The output from a Gaussian distribution with mean 'mu' and std 'sigma' is mu + sigma * z, z ~ N(0, I) This reduces the problem to just sampling z. (Hint: use tf.random_normal!) """ if self.discrete: sy_logits_na = policy_parameters sy_sampled_ac = tf.squeeze(tf.multinomial(sy_logits_na, num_samples=1), axis=1) else: sy_mean, sy_logstd = policy_parameters sy_sampled_ac = sy_mean + tf.exp(sy_logstd) * tf.random_normal( tf.shape(sy_mean), 0, 1) return sy_sampled_ac
def multinomial_squeeze(self, logits, temperature=1.0): """multinomial sampling from logits.""" logits_shape = utils.shape_list(logits) reshaped_logits = (tf.reshape(logits, [-1, logits_shape[-1]]) / temperature) choices = tf.multinomial(reshaped_logits, 1) choices = tf.reshape(choices, logits_shape[:-1]) return tf.to_int32(choices)
def build_action_sampling(self): if self.discrete: logits_na = self.parameters self.sample_ac = tf.squeeze(tf.multinomial(logits_na, num_samples=1), axis=1) else: mean, logstd = self.parameters self.sample_ac = mean + tf.exp(logstd) * tf.random_normal(tf.shape(mean), 0, 1)
def _build_networks(self): # Define input placeholders self.s = tf.placeholder(tf.float32, shape=[None] + self.state_dim, name='state') self.a = tf.placeholder(tf.int32, shape=(None, ), name='action') self.s_next = tf.placeholder(tf.float32, shape=[None] + self.state_dim, name='next_state') self.r = tf.placeholder(tf.float32, shape=(None, ), name='reward') self.done = tf.placeholder(tf.float32, shape=(None, ), name='done_flag') # Actor: action probabilities self.actor = dense_nn(self.s, self.layer_sizes + [self.act_size], name='actor') self.sampled_actions = tf.squeeze(tf.multinomial(self.actor, 1)) self.actor_proba = tf.nn.softmax(self.actor) self.actor_vars = self.scope_vars('actor') # Critic: action value (V value) self.critic = dense_nn(self.s, self.layer_sizes + [1], name='critic') self.critic_next = dense_nn(self.s_next, self.layer_sizes + [1], name='critic', reuse=True) self.critic_vars = self.scope_vars('critic') # TD target self.td_target = self.r + self.gamma * tf.squeeze( self.critic_next) * (1.0 - self.done) self.td_error = self.td_target - tf.squeeze(self.critic)
def generate_string(self, initial_logits, initial_state, sequence_length): """Builds sub-graph to generate a string, sampled from the model. Args: initial_logits: Starting logits to sample from. initial_state: Starting state for the RNN core. sequence_length: Number of characters to sample. Returns: A Tensor of characters, with dimensions `[sequence_length, batch_size, output_size]`. """ current_logits = initial_logits current_state = initial_state generated_letters = [] for _ in range(sequence_length): # Sample a character index from distribution. char_index = tf.squeeze(tf.multinomial(current_logits, 1)) char_one_hot = tf.one_hot(char_index, self._output_size, 1.0, 0.0) generated_letters.append(char_one_hot) # Feed character back into the deep_lstm. gen_out_seq, current_state = self._core( tf.nn.relu(self._embed_module(char_one_hot)), current_state) current_logits = self._output_module(gen_out_seq) generated_string = tf.stack(generated_letters) return generated_string
def _g_recurrence_2(i, x_t, h_tm1, given_num, gen_x): h_t = self.g_recurrent_unit(x_t, h_tm1) # hidden_memory_tuple o_t = self.g_output_unit(h_t) # batch x vocab , logits not prob log_prob = tf.log(tf.nn.softmax(o_t)) next_token = tf.cast(tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32) x_tp1 = tf.nn.embedding_lookup(self.g_embeddings, next_token) # batch x emb_dim gen_x = gen_x.write(i, next_token) # indices, batch_size return i + 1, x_tp1, h_t, given_num, gen_x
def provide_one_hot_labels(self, batch_size): """Provides one hot labels.""" pitch_counts = self.get_pitch_counts() pitches = sorted(pitch_counts.keys()) counts = [pitch_counts[p] for p in pitches] indices = tf.reshape( tf.multinomial(tf.log([tf.to_float(counts)]), batch_size), [batch_size]) one_hot_labels = tf.one_hot(indices, depth=len(pitches)) return one_hot_labels
def multinomial_sample(x, vocab_size, temperature): """Multinomial sampling from a n-dimensional tensor.""" if temperature > 0: samples = tf.multinomial( tf.reshape(x, [-1, vocab_size]) / temperature, 1) else: samples = tf.argmax(x, axis=-1) reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1]) return tf.to_int32(reshaped_samples)
def sample_from_logits(logits): with tf.control_dependencies([tf.assert_greater(temperature, 0.0)]): logits = tf.identity(logits) reshaped_logits = ( tf.reshape(logits, [-1, tf.shape(logits)[-1]]) / temperature) choices = tf.multinomial(reshaped_logits, 1) choices = tf.reshape(choices, tf.shape(logits)[:logits.get_shape().ndims - 1]) return choices
def _g_recurrence(i, x_t, h_tm1, gen_o, gen_x): h_t = self.g_recurrent_unit(x_t, h_tm1) # hidden_memory_tuple o_t = self.g_output_unit(h_t) # batch x vocab , logits not prob log_prob = tf.log(tf.nn.softmax(o_t)) next_token = tf.cast(tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32) x_tp1 = tf.nn.embedding_lookup(self.g_embeddings, next_token) # batch x emb_dim gen_o = gen_o.write(i, tf.reduce_sum(tf.multiply(tf.one_hot(next_token, self.num_emb, 1.0, 0.0), tf.nn.softmax(o_t)), 1)) # [batch_size] , prob gen_x = gen_x.write(i, next_token) # indices, batch_size return i + 1, x_tp1, h_t, gen_o, gen_x
def false_fn(): """add mutations.""" mask = tf.cast( tf.multinomial(tf.log([[1 - mutation_rate, mutation_rate]]), seq_len), tf.int32)[0] possible_mutations = tf.random_uniform([seq_len], minval=1, maxval=4, dtype=tf.int32) x_new = tf.mod(x + mask * possible_mutations, 4) return x_new
def body(past, prev, output): next_outputs = step(hparams, prev, past=past) logits = next_outputs['logits'][:, -1, :] / tf.to_float(temperature) logits = top_k_logits(logits, k=top_k) logits = top_p_logits(logits, p=top_p) samples = tf.multinomial(logits, num_samples=1, output_dtype=tf.int32) return [ next_outputs['presents'] if past is None else tf.concat([past, next_outputs['presents']], axis=-2), samples, tf.concat([output, samples], axis=1) ]
def body(past, prev, output): next_outputs = step(hparams, prev[:, tf.newaxis], past=past) logits = next_outputs['logits'][:, -1, :] / \ tf.to_float(temperature) logits = top_k_logits(logits, k=top_k) samples = tf.multinomial( logits, num_samples=1, output_dtype=tf.int32) return [ tf.concat([past, next_outputs['presents']], axis=-2), tf.squeeze(samples, axis=[1]), tf.concat([output, samples], axis=1), ]
def _sample_n(n): """Sample vector of categoricals.""" if logits.shape.ndims == 2: logits_2d = logits else: logits_2d = tf.reshape(logits, [-1, event_size]) sample_dtype = tf.int64 if logits.dtype.size > 4 else tf.int32 draws = tf.multinomial( logits_2d, n, seed=seed, output_dtype=sample_dtype) draws = tf.reshape( tf.transpose(draws), tf.concat([[n], batch_shape_tensor], 0)) return tf.cast(draws, dtype)
def _head(self, core_output): """Build the head of the agent: linear policy and value function.""" policy_logits = snt.Linear( self._num_actions, name='policy_logits')( core_output) baseline = tf.squeeze(snt.Linear(1, name='baseline')(core_output), axis=-1) # Sample an action from the policy. new_action = tf.multinomial( policy_logits, num_samples=1, output_dtype=tf.int32) new_action = tf.squeeze(new_action, 1, name='new_action') return AgentOutput(new_action, policy_logits, baseline)
def sample_action(self, policy_parameters): """ Constructs a symbolic operation for stochastically sampling from the policy distribution arguments: policy_parameters if discrete: logits of a categorical distribution over actions sy_logits_na: (batch_size, self.ac_dim) if continuous: (mean, log_std) of a Gaussian distribution over actions sy_mean: (batch_size, self.ac_dim) sy_logstd: (self.ac_dim,) returns: sy_sampled_ac: if discrete: (batch_size,) if continuous: (batch_size, self.ac_dim) Hint: for the continuous case, use the reparameterization trick: The output from a Gaussian distribution with mean 'mu' and std 'sigma' is mu + sigma * z, z ~ N(0, I) This reduces the problem to just sampling z. """ if self.discrete: sy_logits_na = policy_parameters # ------------------------------------------------------------------ # START OF YOUR CODE # ------------------------------------------------------------------ # draw a sample from sy_logits_na. tf.multinomial deprecated in tf2. # the tf2 equivalent is tf.random.categorical. sy_sampled_ac = tf.reshape(tf.multinomial(sy_logits_na, 1), [-1]) # ------------------------------------------------------------------ # END OF YOUR CODE # ------------------------------------------------------------------ else: sy_mean, sy_logstd = policy_parameters # ------------------------------------------------------------------ # START OF YOUR CODE # ------------------------------------------------------------------ # sampling from z using random_normal. # mean is sy_mean, stdev is exp(sy_logstd) sy_sampled_ac = tf.random_normal(shape=tf.shape(sy_mean), mean=sy_mean, stddev=tf.exp(sy_logstd)) # ------------------------------------------------------------------ # END OF YOUR CODE # ------------------------------------------------------------------ return sy_sampled_ac
def __init__(self, name: str, env): """ :param name: string :param env: gym env """ ob_space = env.observation_space act_space = env.action_space with tf.variable_scope(name): self.obs = tf.placeholder(dtype=tf.float32, shape=[None] + list(ob_space.shape), name='obs') # Actor (Policy): Given a state (or observation) # obtain the distribution of actions with tf.variable_scope('policy_net'): layer_1 = tf.layers.dense(inputs=self.obs, units=20, activation=tf.tanh) layer_2 = tf.layers.dense(inputs=layer_1, units=20, activation=tf.tanh) layer_3 = tf.layers.dense(inputs=layer_2, units=act_space.n, activation=tf.tanh) self.act_probs = tf.layers.dense(inputs=layer_3, units=act_space.n, activation=tf.nn.softmax) # Critic with tf.variable_scope('value_net'): layer_1 = tf.layers.dense(inputs=self.obs, units=20, activation=tf.tanh) layer_2 = tf.layers.dense(inputs=layer_1, units=20, activation=tf.tanh) self.v_preds = tf.layers.dense(inputs=layer_2, units=1, activation=None) self.act_stochastic = tf.multinomial(tf.log(self.act_probs), num_samples=1) self.act_stochastic = tf.reshape(self.act_stochastic, shape=[-1]) self.act_deterministic = tf.argmax(self.act_probs, axis=1) # 辅助功能 self.scope = tf.get_variable_scope().name
def nearest_neighbor(self, x, means): """Find the nearest element in means to elements in x. Args: x: Batch of encoder continuous latent states sliced/projected into shape [-1, num_blocks, block_dim]. means: Embedding means of shape. Returns: Tensor with nearest element in mean encoded in one-hot notation. """ x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keep_dims=True) means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keep_dims=True) scalar_prod = tf.matmul(tf.transpose(x, perm=[1, 0, 2]), tf.transpose(means, perm=[0, 2, 1])) scalar_prod = tf.transpose(scalar_prod, perm=[1, 0, 2]) dist = x_norm_sq + tf.transpose(means_norm_sq, perm=[2, 0, 1]) - 2 * scalar_prod if self.hparams.soft_em: nearest_idx = tf.stack([ tf.multinomial(-dist[:, i, :], num_samples=self.hparams.num_samples) for i in range(self.hparams.num_blocks) ], axis=1) nearest_hot = tf.one_hot(nearest_idx, depth=self.hparams.block_v_size) nearest_hot = tf.reduce_mean(nearest_hot, axis=-2) else: if self.hparams.random_top_k > 1: _, top_k_idx = tf.nn.top_k(-dist, k=self.hparams.random_top_k) nearest_idx = tf.gather( top_k_idx, tf.random_uniform([1], minval=0, maxval=self.hparams.random_top_k - 1, dtype=tf.int32), axis=-1) else: if self.hparams.use_scales: dist /= tf.reshape(self.hparams.scales, [1, 1, self.hparams.moe_num_experts]) nearest_idx = tf.argmax(-dist, axis=-1) nearest_hot = tf.one_hot(nearest_idx, self.hparams.block_v_size) return nearest_hot
def _preprocess(self, features): """Preprocesses features for multilingual translation.""" seqs, tags = {}, {} if self._hparams.mode == tf.estimator.ModeKeys.TRAIN: seqs["src"] = features["inputs"] seqs["tgt"] = features["targets"] seqs["aux"] = None tags["src"] = features["input_tags"] tags["tgt"] = features["target_tags"] tags["aux"] = None # Construct a tensor of auxiliary tags. batch_size = common_layers.shape_list(features["all_tags"])[0] num_all_tags = common_layers.shape_list(features["all_tags"])[1] # <float32> [num_all_tags, 1, emb_dim]. all_tags = features["all_tags"][0] # batch elements are identical. # <int32> [batch_size]. aux_tag_index = tf.multinomial(tf.ones([1, num_all_tags]), batch_size, output_dtype=tf.int32)[0] # <float32> [batch_size, 1, 1, emb_dim]. tags["aux"] = tf.expand_dims(tf.gather(all_tags, aux_tag_index), 1) from_domains = ["src", "src", "tgt"] to_domains = ["tgt", "aux", "aux"] else: seqs["src"] = features["inputs"] seqs["tgt"] = features["targets"] tags["src"] = None tags["tgt"] = features["target_tags"] # Expand target tags to beam width, if necessary. if self._hparams.mode == tf.estimator.ModeKeys.PREDICT: tags["tgt"] = tf.tile(tags["tgt"], [self._hparams.beam_width, 1, 1, 1]) from_domains = ["src"] to_domains = ["tgt"] # Construct inputs and targets. inputs, targets = {}, {} for fd, td in zip(from_domains, to_domains): key = "%s>%s" % (fd, td) inputs[key], targets[key] = self._build_inputs_and_targets( seqs[fd], tags[fd], seqs[td], tags[td]) return inputs, targets
def mlp_categorical_policy(x, a, hidden_sizes, activation, output_activation, action_space): act_dim = action_space.n logits = mlp(x, list(hidden_sizes) + [act_dim], activation, None) logp_all = tf.nn.log_softmax(logits) pi = tf.squeeze(tf.multinomial(logits, 1), axis=1) logp = tf.reduce_sum(tf.one_hot(a, depth=act_dim) * logp_all, axis=1) logp_pi = tf.reduce_sum(tf.one_hot(pi, depth=act_dim) * logp_all, axis=1) old_logp_all = placeholder(act_dim) d_kl = categorical_kl(logp_all, old_logp_all) ent = categorical_entropy(logp_all) pi_info = {'logp_all': logp_all} pi_info_phs = {'logp_all': old_logp_all} return pi, logp, logp_pi, pi_info, pi_info_phs, d_kl, ent
def categorical_policy(x, a, config, action_space): act_dim = action_space.n config["output_size"] = act_dim logits = make_network(x, config) logp_all = tf.nn.log_softmax(logits) pi = tf.squeeze(tf.multinomial(logits, 1), axis=1) logp = tf.reduce_sum(tf.one_hot(a, depth=act_dim) * logp_all, axis=1) logp_pi = tf.reduce_sum(tf.one_hot(pi, depth=act_dim) * logp_all, axis=1) old_logp_all = placeholder(act_dim) d_kl = categorical_kl(logp_all, old_logp_all) ent = categorical_entropy(logp_all) pi_info = {"logp_all": logp_all} pi_info_phs = {"logp_all": old_logp_all} return pi, logp, logp_pi, pi_info, pi_info_phs, d_kl, ent
def _build_sampler(self): """Build the sampler ops and the log_prob ops.""" arc_seq = [] sample_log_probs = [] all_h = [] # sampler ops inputs = self.g_emb prev_c = [ tf.zeros([1, self.lstm_size], dtype=tf.float32) for _ in range(self.lstm_num_layers) ] prev_h = [ tf.zeros([1, self.lstm_size], dtype=tf.float32) for _ in range(self.lstm_num_layers) ] for layer_id in range(self.num_layers): for branch_id in range(self.num_branches): next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) all_h.append(tf.stop_gradient(next_h[-1])) logits = tf.matmul(next_h[-1], self.w_soft) if self.temperature is not None: logits /= self.temperature if self.tanh_constant is not None: logits = self.tanh_constant * tf.tanh(logits) config_id = tf.multinomial(logits, 1) config_id = tf.to_int32(config_id) config_id = tf.reshape(config_id, [1]) arc_seq.append(config_id) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=config_id) sample_log_probs.append(log_prob) inputs = tf.nn.embedding_lookup(self.w_emb, config_id) arc_seq = tf.concat(arc_seq, axis=0) self.sample_arc = arc_seq self.sample_log_probs = tf.concat(sample_log_probs, axis=0) self.ppl = tf.exp( tf.reduce_sum(self.sample_log_probs) / tf.to_float(self.num_layers * self.num_branches)) self.all_h = all_h
def _head(self, policy_input, heading, xy, target_xy): """Build the head of the agent: linear policy and value function, and pass the auxiliary outputs through. """ # Linear policy and value function. policy_logits = snt.Linear(self._num_actions, name='policy_logits')(policy_input) baseline = tf.squeeze(snt.Linear(1, name='baseline')(policy_input), axis=-1) # Sample an action from the policy. new_action = tf.multinomial(policy_logits, num_samples=1, output_dtype=tf.int32) new_action = tf.squeeze(new_action, 1, name='new_action') return AgentOutput(new_action, policy_logits, baseline, heading, xy, target_xy)
def vq_nearest_neighbor(x, hparams): """Find the nearest element in means to elements in x.""" bottleneck_size = 2**hparams.bottleneck_bits means = hparams.means x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True) means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True) scalar_prod = tf.matmul(x, means, transpose_b=True) dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod if hparams.bottleneck_kind == "em": x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples) x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size) x_means_hot = tf.reduce_mean(x_means_hot, axis=1) else: x_means_idx = tf.argmax(-dist, axis=-1) x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size) x_means = tf.matmul(x_means_hot, means) e_loss = tf.reduce_mean(tf.squared_difference(x, tf.stop_gradient(x_means))) return x_means_hot, e_loss
def multinomial_sample(x, vocab_size=None, sampling_method="random", temperature=1.0): """Multinomial sampling from a n-dimensional tensor. Args: x: Tensor of shape [..., vocab_size]. Parameterizes logits of multinomial. vocab_size: Number of classes in multinomial distribution. sampling_method: String, "random" or otherwise deterministic. temperature: Positive float. Returns: Tensor of shape [...]. """ vocab_size = vocab_size or common_layers.shape_list(x)[-1] if sampling_method == "random" and temperature > 0.0: samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1) else: samples = tf.argmax(x, axis=-1) reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1]) return reshaped_samples
def _build(self, inputs): (shared_inputs, extra_policy_inputs) = inputs policy_in = tf.concat([shared_inputs, extra_policy_inputs], axis=1) policy = snt.nets.MLP(output_sizes=self._policy_layers, activation=self._activation, name='policy_mlp')(policy_in) # Sample an action from the policy logits. action = tf.multinomial(policy, num_samples=1, output_dtype=tf.int32) action = tf.squeeze(action, 1) # [B, 1] -> [B] if self._policy_clip_abs_value > 0: policy = snt.clip_gradient( net=policy, clip_value_min=-self._policy_clip_abs_value, clip_value_max=self._policy_clip_abs_value) baseline_in = tf.concat( [shared_inputs, tf.stop_gradient(policy)], axis=1) baseline = snt.nets.MLP(self._baseline_layers, activation=self._activation, name='baseline_mlp')(baseline_in) baseline = tf.squeeze(baseline, axis=-1) # [B, 1] -> [B] if self._policy_clip_abs_value > 0: baseline = snt.clip_gradient( net=baseline, clip_value_min=-self._policy_clip_abs_value, clip_value_max=self._policy_clip_abs_value) outputs = PolicyOutputs(policy=policy, action=action, baseline=baseline) return outputs
n_max_steps = 1000 n_episode = 100 gamma = 0.95 initializer = tf.variance_scaling_initializer() X = tf.placeholder(tf.float32, shape=[None, n_inputs]) y = tf.placeholder(tf.float32, shape=[None, n_outputs]) hidden = tf.layers.dense(X, n_hidden, activation=tf.nn.elu, kernel_initializer=initializer) logits = tf.layers.dense(hidden, n_outputs, kernel_initializer=initializer) outputs = tf.nn.sigmoid(logits) p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs]) action = tf.multinomial(tf.log(p_left_and_right), num_samples=1) y = 1. - tf.to_float(action) cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits) optimizer = tf.train.AdamOptimizer(learning_rate) grads_and_vars = optimizer.compute_gradients(cross_entropy) gradients = [grad for grad, variable in grads_and_vars] gradient_placeholders = [] grads_and_vars_feed = [] for grad, variable in grads_and_vars: gradient_placeholder = tf.placeholder(tf.float32, shape=grad.get_shape()) gradient_placeholders.append(gradient_placeholder) grads_and_vars_feed.append((gradient_placeholder, variable)) training_op = optimizer.apply_gradients(grads_and_vars_feed)
def make_data_tensor(self, train=True): if train: folders = self.metatrain_character_folders # number of tasks, not number of meta-iterations. (divide by metabatch size to measure) num_total_batches = 200000 if FLAGS.task_type == "ne": print("Inside ne train") folders = self.metatrain_character_folders[:50] # 10 classification tasks num_total_batches = 5000 else: folders = self.metaval_character_folders num_total_batches = 600 if FLAGS.task_type == "ne": print("inside ne val") if FLAGS.test_set: folders = self.metaval_character_folders[:15] else: folders = self.metaval_character_folders[:15] # 3 classification tasks num_total_batches = 60 # print("folders", folders) # make list of files print('Generating filenames') if FLAGS.task_setting == 'ne' and train: # all_filenames = [] # random.shuffle(folders) # for i in range(len(folders)/self.num_classes): # #sampled_character_folders = random.sample(folders, self.num_classes) # #random.shuffle(sampled_character_folders) # sampled_character_folders = folders[i*self.num_classes:(i+1)*self.num_classes] # labels_and_images = get_images(sampled_character_folders, range(self.num_classes), nb_samples=self.num_samples_per_class, shuffle=False) # # make sure the above isn't randomized order # labels = [li[0] for li in labels_and_images] # filenames = [li[1] for li in labels_and_images] # all_filenames.extend(filenames) all_filenames = [] print("len folders", len(folders)) random.shuffle(folders) task_folders_new = [] for i in range(int(len(folders)/self.num_classes)): # sampled_character_folders = random.sample(folders, self.num_classes) # random.shuffle(sampled_character_folders) sampled_character_folders = folders[i*self.num_classes:(i+1)*self.num_classes] task_folders_temp = itertools.permutations(sampled_character_folders) task_folders_new.extend(task_folders_temp) # print("task_folders_new", task_folders_new) print("len of task_folders_new", len(task_folders_new)) random.shuffle(task_folders_new) for i in range(len(task_folders_new)): sampled_character_folders = task_folders_new[i] # print("scf", sampled_character_folders) labels_and_images = get_images(sampled_character_folders, range(self.num_classes), nb_samples=self.num_samples_per_class, shuffle=False) # make sure the above isn't randomized order labels = [li[0] for li in labels_and_images] filenames = [li[1] for li in labels_and_images] all_filenames.extend(filenames) else: all_filenames = [] for _ in range(num_total_batches): sampled_character_folders = random.sample(folders, self.num_classes) random.shuffle(sampled_character_folders) labels_and_images = get_images(sampled_character_folders, range(self.num_classes), nb_samples=self.num_samples_per_class, shuffle=False) # make sure the above isn't randomized order labels = [li[0] for li in labels_and_images] filenames = [li[1] for li in labels_and_images] all_filenames.extend(filenames) # make queue for tensorflow to read from filename_queue = tf.train.string_input_producer(tf.convert_to_tensor(all_filenames), shuffle=False) print('Generating image processing ops') image_reader = tf.WholeFileReader() _, image_file = image_reader.read(filename_queue) if FLAGS.datasource == 'miniimagenet': image = tf.image.decode_jpeg(image_file, channels=3) image.set_shape((self.img_size[0],self.img_size[1],3)) image = tf.reshape(image, [self.dim_input]) image = tf.cast(image, tf.float32) / 255.0 else: image = tf.image.decode_png(image_file) image.set_shape((self.img_size[0],self.img_size[1],1)) image = tf.reshape(image, [self.dim_input]) image = tf.cast(image, tf.float32) / 255.0 image = 1.0 - image # invert num_preprocess_threads = 1 # TODO - enable this to be set to >1 min_queue_examples = 256 examples_per_batch = self.num_classes * self.num_samples_per_class batch_image_size = self.batch_size * examples_per_batch print('Batching images') print("batch_image_size", batch_image_size) images = tf.train.batch( [image], batch_size = batch_image_size, num_threads=num_preprocess_threads, capacity=min_queue_examples + 3 * batch_image_size, ) print("len images", images.shape) all_image_batches, all_label_batches = [], [] print('Manipulating image data to be right shape') for i in range(self.batch_size): image_batch = images[i*examples_per_batch:(i+1)*examples_per_batch] if FLAGS.datasource == 'omniglot': # omniglot augments the dataset by rotating digits to create new classes # get rotation per class (e.g. 0,1,2,0,0 if there are 5 classes) rotations = tf.multinomial(tf.log([[1., 1., 1., 1.]]), self.num_classes) # print("labels", labels) label_batch = tf.convert_to_tensor(labels) new_list, new_label_list = [], [] # shuffles the data within a batch, class labels remain fixed for k in range(self.num_samples_per_class): class_idxs = tf.range(0, self.num_classes) class_idxs = tf.random_shuffle(class_idxs) true_idxs = class_idxs * self.num_samples_per_class + k new_list.append(tf.gather(image_batch,true_idxs)) if FLAGS.datasource == 'omniglot': # and FLAGS.train: new_list[-1] = tf.stack([tf.reshape(tf.image.rot90( tf.reshape(new_list[-1][ind], [self.img_size[0], self.img_size[1],1]), k=tf.cast(rotations[0, class_idxs[ind]], tf.int32)), (self.dim_input,)) for ind in range(self.num_classes)]) new_label_list.append(tf.gather(label_batch, true_idxs)) new_list = tf.concat(new_list, 0) # has shape [self.num_classes*self.num_samples_per_class, self.dim_input] new_label_list = tf.concat(new_label_list, 0) all_image_batches.append(new_list) all_label_batches.append(new_label_list) all_image_batches = tf.stack(all_image_batches) all_label_batches = tf.stack(all_label_batches) print("all_image_batches", all_image_batches) all_label_batches = tf.one_hot(all_label_batches, self.num_classes) return all_image_batches, all_label_batches
def _create_network(self, view_space, feature_space): input_view = tf.placeholder(tf.float32, (None, ) + view_space) input_feature = tf.placeholder(tf.float32, (None, ) + feature_space) action = tf.placeholder(tf.int32, [None]) reward = tf.placeholder(tf.float32, [None]) hidden_size = [256] # fully connected flatten_view = tf.reshape( input_view, [-1, np.prod([v.value for v in input_view.shape[1:]])]) h_view = tf.layers.dense(flatten_view, units=hidden_size[0], activation=tf.nn.relu) h_emb = tf.layers.dense(input_feature, units=hidden_size[0], activation=tf.nn.relu) dense = tf.concat([h_view, h_emb], axis=1) dense = tf.layers.dense(dense, units=hidden_size[0] * 2, activation=tf.nn.relu) policy = tf.layers.dense(dense / 0.1, units=self.num_actions, activation=tf.nn.softmax) policy = tf.clip_by_value(policy, 1e-10, 1 - 1e-10) self.calc_action = tf.multinomial(tf.log(policy), 1) value = tf.layers.dense(dense, units=1) value = tf.reshape(value, (-1, )) action_mask = tf.one_hot(action, self.num_actions) advantage = tf.stop_gradient(reward - value) log_policy = tf.log(policy + 1e-6) log_prob = tf.reduce_sum(log_policy * action_mask, axis=1) pg_loss = -tf.reduce_mean(advantage * log_prob) vf_loss = self.value_coef * tf.reduce_mean(tf.square(reward - value)) neg_entropy = self.ent_coef * tf.reduce_mean( tf.reduce_sum(policy * log_policy, axis=1)) total_loss = pg_loss + vf_loss + neg_entropy # train op (clip gradient) optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) gradients, variables = zip(*optimizer.compute_gradients(total_loss)) gradients, _ = tf.clip_by_global_norm(gradients, 5.0) self.train_op = optimizer.apply_gradients(zip(gradients, variables)) train_op = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(total_loss) self.input_view = input_view self.input_feature = input_feature self.action = action self.reward = reward self.policy, self.value = policy, value self.train_op = train_op self.pg_loss, self.vf_loss, self.reg_loss = pg_loss, vf_loss, neg_entropy self.total_loss = total_loss
#initial_means = tf.placeholder_with_default( # tf.constant([[-3,1], # [-3,-3], # [-1,3], # [-1,-1], # [3,3], # [1,1], # [1,-3], # [3,1]],dtype='float64'), # shape=[COMPONENTS, DIMENSIONS] #) initial_means = tf.placeholder_with_default(tf.gather( input, tf.squeeze(tf.multinomial(tf.ones([1, tf.shape(input)[0]]), COMPONENTS))), shape=[COMPONENTS, DIMENSIONS]) initial_covariances = tf.placeholder_with_default( tf.cast(tf.ones([COMPONENTS, DIMENSIONS]), tf.float64) * avg_dim_variance, shape=[COMPONENTS, DIMENSIONS]) initial_weights = tf.placeholder_with_default(tf.cast( tf.constant(1.0 / COMPONENTS, shape=[COMPONENTS]), tf.float64), shape=[COMPONENTS]) # trainable variables: component means, covariances, and weights means = tf.Variable(tf.constant( [[-3, 1], [-3, -3], [-1, 3], [-1, -1], [3, 3], [1, 1], [1, -3], [3, 1]], dtype='float64'), dtype=tf.float64) means = tf.Variable(initial_means, dtype=tf.float64)