def _update_step(self, observ, action, old_mean, old_logstd, reward, advantage, length): """Compute the current combined loss and perform a gradient update step. Args: observ: Sequences of observations. action: Sequences of actions. old_mean: Sequences of action means of the behavioral policy. old_logstd: Sequences of action log stddevs of the behavioral policy. reward: Sequences of reward. advantage: Sequences of advantages. length: Batch of sequence lengths. Returns: Tuple of value loss, policy loss, and summary tensor. """ value_loss, value_summary = self._value_loss(observ, reward, length) network = self._network(observ, length) policy_loss, policy_summary = self._policy_loss(network.mean, network.logstd, old_mean, old_logstd, action, advantage, length) value_gradients, value_variables = (zip(*self._optimizer.compute_gradients(value_loss))) policy_gradients, policy_variables = (zip(*self._optimizer.compute_gradients(policy_loss))) all_gradients = value_gradients + policy_gradients all_variables = value_variables + policy_variables optimize = self._optimizer.apply_gradients(zip(all_gradients, all_variables)) summary = tf.summary.merge([ value_summary, policy_summary, tf.summary.scalar('value_gradient_norm', tf.global_norm(value_gradients)), tf.summary.scalar('policy_gradient_norm', tf.global_norm(policy_gradients)), utility.gradient_summaries(zip(value_gradients, value_variables), dict(value=r'.*')), utility.gradient_summaries(zip(policy_gradients, policy_variables), dict(policy=r'.*')) ]) with tf.control_dependencies([optimize]): return [tf.identity(x) for x in (value_loss, policy_loss, summary)]
def clip_by_global_norm_summary(t_list, clip_norm, norm_name, variables): # wrapper around tf.clip_by_global_norm that also does summary ops of norms # compute norms # use global_norm with one element to handle IndexedSlices vs dense norms = [tf.global_norm([t]) for t in t_list] # summary ops before clipping summary_ops = [] for ns, v in zip(norms, variables): name = 'norm_pre_clip/' + v.name.replace(":", "_") summary_ops.append(tf.summary.scalar(name, ns)) # clip clipped_t_list, tf_norm = tf.clip_by_global_norm(t_list, clip_norm) # summary ops after clipping norms_post = [tf.global_norm([t]) for t in clipped_t_list] for ns, v in zip(norms_post, variables): name = 'norm_post_clip/' + v.name.replace(":", "_") summary_ops.append(tf.summary.scalar(name, ns)) summary_ops.append(tf.summary.scalar(norm_name, tf_norm)) return clipped_t_list, tf_norm, summary_ops
def setup_loss_critic(critic): # we are starting with critic.outputs symbol (after logistic layer) with tf.variable_scope("rl", initializer=tf.uniform_unit_scaling_initializer(1.0)): # loss setup # None to timestep critic.target_qt = tf.placeholder(tf.float32, shape=[None, None, critic.vocab_size], name="q_action_score") # p_actions is the target_token, and it's already [T, batch_size] # q_t needs to be expanded... # critic.outputs [T, batch_size, vocab_size] # let's populate (expand) target tokens to fill up qt (just like what we did with one-hot labels) critic.q_loss = tf.reduce_mean(tf.square(critic.outputs - critic.target_qt)) # Note: not adding lambda*C yet (variance) opt = nlc_model.get_optimizer(FLAGS.optimizer)(critic.learning_rate) # update params = tf.trainable_variables() gradients = tf.gradients(critic.q_loss, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, FLAGS.max_gradient_norm) # self.gradient_norm = tf.global_norm(clipped_gradients) critic.gradient_norm = tf.global_norm(gradients) critic.param_norm = tf.global_norm(params) critic.updates = opt.apply_gradients( zip(clipped_gradients, params), global_step=critic.global_step)
def create_variables_for_optimization(self): with tf.name_scope("optimization"): with tf.name_scope("masker"): self.mask = tf.sequence_mask(self.seq_len, self.num_step) self.mask = tf.reshape(tf.cast(self.mask, tf.float32), (-1,)) if self.loss_function == "cross_entropy": self.pl_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logit, labels=self.actions_flatten) elif self.loss_function == "l2": self.one_hot_actions = tf.one_hot(self.actions_flatten, self.num_actions) self.pl_loss = tf.reduce_mean((self.probs - self.one_hot_actions) ** 2, axis=1) else: raise ValueError("loss function type is not defined") self.pl_loss = tf.multiply(self.pl_loss, self.mask) self.pl_loss = tf.reduce_mean(tf.multiply(self.pl_loss, self.returns_flatten)) self.entropy = tf.multiply(self.entropy, self.mask) self.entropy = tf.reduce_mean(self.entropy) self.loss = self.pl_loss - self.entropy_bonus * self.entropy self.trainable_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="policy_network") self.gradients = self.optimizer.compute_gradients(self.loss, var_list=self.trainable_variables) self.clipped_gradients = [(tf.clip_by_norm(grad, self.max_gradient), var) for grad, var in self.gradients] self.train_op = self.optimizer.apply_gradients(self.clipped_gradients, self.global_step) self.grad_norm = tf.global_norm([grad for grad, var in self.gradients]) self.var_norm = tf.global_norm(self.trainable_variables)
def setup_actor_update(actor): with tf.variable_scope("rl"): actor.critic_output = tf.placeholder(tf.float32, [None, None, actor.vocab_size], name='critic_output') # action_gradients is passed in by Q_network... # and in DDPG, it's the gradients of Q w.r.t. policy's chosen actions # but in AC, it's the output of Q network w.r.t. all actions opt = nlc_model.get_optimizer(FLAGS.optimizer)(actor.learning_rate) # update params = tf.trainable_variables() # TODO: hope this would work with tf.variable_scope("Loss"): doshape = tf.shape(actor.decoder_output) T, batch_size = doshape[0], doshape[1] do2d = tf.reshape(actor.decoder_output, [-1, actor.size]) logits2d = rnn_cell._linear(do2d, actor.vocab_size, True, 1.0) # outputs2d = tf.nn.log_softmax(logits2d) # apply Q-network's score here (similar to advantage function) # 1. reshape critic_output like decoder_output (same shape anyway) # TODO: hope this is correct critic_do2d = tf.reshape(actor.critic_output, [-1, actor.vocab_size]) # should reshape according to critic # 2. multiply this with actor's logitis rl_logits2d = logits2d * critic_do2d # actor.outputs = tf.reshape(outputs2d, tf.pack([T, batch_size, actor.vocab_size])) targets_no_GO = tf.slice(actor.target_tokens, [1, 0], [-1, -1]) masks_no_GO = tf.slice(actor.target_mask, [1, 0], [-1, -1]) # easier to pad target/mask than to split decoder input since tensorflow does not support negative indexing labels1d = tf.reshape(tf.pad(targets_no_GO, [[0, 1], [0, 0]]), [-1]) mask1d = tf.reshape(tf.pad(masks_no_GO, [[0, 1], [0, 0]]), [-1]) losses1d = tf.nn.sparse_softmax_cross_entropy_with_logits(rl_logits2d, labels1d) * tf.to_float(mask1d) losses2d = tf.reshape(losses1d, tf.pack([T, batch_size])) actor.rl_losses = tf.reduce_sum(losses2d) / tf.to_float(batch_size) # http://pemami4911.github.io/blog/2016/08/21/ddpg-rl.html (DDPG update) gradients = tf.gradients(actor.rl_losses, params) # step 7: update # Not sure if I understood this part lol clipped_gradients, _ = tf.clip_by_global_norm(gradients, FLAGS.max_gradient_norm) # clip, then multiply, otherwise we are not learning the signals from critic # clipped_gradients: [T, batch_size, vocab_size] # updated_gradients = clipped_gradients * actor.critic_output # pass in as input actor.rl_gradient_norm = tf.global_norm(clipped_gradients) actor.rl_param_norm = tf.global_norm(params) actor.rl_updates = opt.apply_gradients( zip(clipped_gradients, params), global_step=actor.global_step)
def __init__(self, vocab_size, label_size, size, num_layers, batch_size, learning_rate, learning_rate_decay_factor, dropout, embedding, src_steps, tgt_steps, mode='sq2sq', max_gradient_norm=5.0, forward_only=False): self.size = size self.mode = mode self.vocab_size = vocab_size self.label_size = label_size self.embedding = embedding self.src_steps = src_steps self.tgt_steps = tgt_steps self.batch_size = batch_size self.num_layers = num_layers self.keep_prob = 1.0 - dropout self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.source_tokens = tf.placeholder(tf.int32, shape=[None, self.src_steps], name='srcInput') self.target_tokens = tf.placeholder(tf.int32, shape=[None, self.tgt_steps], name='targetInput') self.label_placeholder = tf.placeholder(tf.float32, shape=[None, self.label_size]) self.decoder_state_input, self.decoder_state_output = [], [] self.tgt_encoder_state_input, self.tgt_encoder_state_output = [], [] for i in xrange(num_layers): self.decoder_state_input.append(tf.placeholder(tf.float32, shape=[None, size])) self.tgt_encoder_state_input.append(tf.placeholder(tf.float32, shape=[None, size])) self.setup_embeddings() self.setup_encoder() self.setup_decoder() if mode == 'sq2sq': self.setup_label_loss() else: raise NotImplementedError params = tf.trainable_variables() if not forward_only: opt = tf.train.AdamOptimizer(self.learning_rate) gradients = tf.gradients(self.losses, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm) self.gradient_norm = tf.global_norm(clipped_gradients) self.param_norm = tf.global_norm(params) self.updates = opt.apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.all_variables())
def initialize(self): if self.summarize: bs = tf.to_float(tf.shape(self.x)[0]) tf.summary.scalar("model/policy_loss", self.pi_loss / bs) tf.summary.scalar("model/grad_gnorm", tf.global_norm(self.grads)) tf.summary.scalar("model/var_gnorm", tf.global_norm(self.var_list)) self.summary_op = tf.summary.merge_all() # TODO(rliaw): Can consider exposing these parameters self.sess = tf.Session(graph=self.g, config=tf.ConfigProto( intra_op_parallelism_threads=1, inter_op_parallelism_threads=2, gpu_options=tf.GPUOptions(allow_growth=True))) self.variables = ray.experimental.TensorFlowVariables(self.loss, self.sess) self.sess.run(tf.global_variables_initializer())
def optim(loss, **kwargs): r"""Applies gradients to variables. Args: loss: A 0-D `Tensor` containing the value to minimize. kwargs: optim: A name for optimizer. 'MaxProp' (default), 'AdaMax', 'Adam', or 'sgd'. lr: A Python Scalar (optional). Learning rate. Default is .001. beta1: A Python Scalar (optional). Default is .9. beta2: A Python Scalar (optional). Default is .99. category: A string or string list. Specifies the variables that should be trained (optional). Only if the name of a trainable variable starts with `category`, it's value is updated. Default is '', which means all trainable variables are updated. """ opt = Opt(kwargs) # opt += Opt(optim='MaxProp', lr=0.001, beta1=0.9, beta2=0.99, category='') # default training options opt += Opt(optim='MaxProp', lr=0.001, beta1=0.9, beta2=0.99, category='') # select optimizer # if opt.optim == 'MaxProp': # optim = tf.sg_optimize.MaxPropOptimizer(learning_rate=opt.lr, beta2=opt.beta2) # elif opt.optim == 'AdaMax': # optim = tf.sg_optimize.AdaMaxOptimizer(learning_rate=opt.lr, beta1=opt.beta1, beta2=opt.beta2) # elif opt.optim == 'Adam': if opt.optim == 'Adm': optim = tf.train.AdamOptimizer(learning_rate=opt.lr, beta1=opt.beta1, beta2=opt.beta2) else: optim = tf.train.GradientDescentOptimizer(learning_rate=opt.lr) # get trainable variables if isinstance(opt.category, (tuple, list)): var_list = [] for cat in opt.category: var_list.extend([t for t in tf.trainable_variables() if t.name.startswith(cat)]) else: var_list = [t for t in tf.trainable_variables() if t.name.startswith(opt.category)] # calc gradient gradient = optim.compute_gradients(loss, var_list=var_list) # add summary for v, g in zip(var_list, gradient): # exclude batch normal statics if 'mean' not in v.name and 'variance' not in v.name \ and 'beta' not in v.name and 'gamma' not in v.name: prefix = '' # summary name name = prefix + ''.join(v.name.split(':')[:-1]) # summary statistics # noinspection PyBroadException try: tf.summary.scalar(name + '/grad', tf.global_norm([g])) tf.summary.histogram(name + '/grad-h', g) except: pass global_step = tf.Variable(0, name='global_step', trainable=False) # gradient update op return optim.apply_gradients(gradient, global_step=global_step), global_step
def _update_network(self, trainer): self.actions = tf.placeholder(shape=[None], dtype=tf.int32) self.actions_onehot = tf.one_hot( self.actions, self.a_dim, dtype=tf.float32) self.target_v = tf.placeholder(shape=[None], dtype=tf.float32) self.advantages = tf.placeholder(shape=[None], dtype=tf.float32) self.outputs = tf.reduce_sum( self.policy * self.actions_onehot, [1]) # loss self.value_loss = 0.5 * tf.reduce_sum(tf.square( self.target_v - tf.reshape(self.value, [-1]))) # higher entropy -> lower loss -> encourage exploration self.entropy = -tf.reduce_sum(self.policy * tf.log(self.policy)) self.policy_loss = -tf.reduce_sum( tf.log(self.outputs) * self.advantages) self.loss = 0.5 * self.value_loss \ + self.policy_loss - 0.01 * self.entropy # local gradients local_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, self.scope) self.gradients = tf.gradients(self.loss, local_vars) self.var_norms = tf.global_norm(local_vars) # grads[i] * clip_norm / max(global_norm, clip_norm) grads, self.grad_norms = tf.clip_by_global_norm(self.gradients, 40.0) # apply gradients to global network global_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = trainer.apply_gradients(zip(grads, global_vars))
def _update_policy_step(self, observ, action, old_mean, old_logstd, advantage, length): """Compute the current policy loss and perform a gradient update step. Args: observ: Sequences of observations. action: Sequences of actions. old_mean: Sequences of action means of the behavioral policy. old_logstd: Sequences of action log stddevs of the behavioral policy. advantage: Sequences of advantages. length: Batch of sequence lengths. Returns: Tuple of loss tensor and summary tensor. """ network = self._network(observ, length) loss, summary = self._policy_loss(network.mean, network.logstd, old_mean, old_logstd, action, advantage, length) gradients, variables = (zip(*self._policy_optimizer.compute_gradients(loss))) optimize = self._policy_optimizer.apply_gradients(zip(gradients, variables)) summary = tf.summary.merge([ summary, tf.summary.scalar('gradient_norm', tf.global_norm(gradients)), utility.gradient_summaries(zip(gradients, variables), dict(policy=r'.*')) ]) with tf.control_dependencies([optimize]): return [tf.identity(loss), tf.identity(summary)]
def _update_step(self, sequence): """Compute the current combined loss and perform a gradient update step. The sequences must be a dict containing the keys `length` and `sequence`, where the latter is a tuple containing observations, actions, parameters of the behavioral policy, rewards, and advantages. Args: sequence: Sequences of episodes or chunks of episodes. Returns: Tuple of value loss, policy loss, and summary tensor. """ observ, action, old_policy_params, reward, advantage = sequence['sequence'] length = sequence['length'] old_policy = self._policy_type(**old_policy_params) value_loss, value_summary = self._value_loss(observ, reward, length) network = self._network(observ, length) policy_loss, policy_summary = self._policy_loss( old_policy, network.policy, action, advantage, length) loss = policy_loss + value_loss + network.get('loss', 0) gradients, variables = ( zip(*self._optimizer.compute_gradients(loss))) optimize = self._optimizer.apply_gradients( zip(gradients, variables)) summary = tf.summary.merge([ value_summary, policy_summary, tf.summary.histogram('network_loss', network.get('loss', 0)), tf.summary.scalar('gradient_norm', tf.global_norm(gradients)), utility.gradient_summaries(zip(gradients, variables))]) with tf.control_dependencies([optimize]): return [tf.identity(x) for x in (value_loss, policy_loss, summary)]
def _summarize_vars_and_grads(grads_and_vars): tf.logging.info('Trainable variables:') tf.logging.info('-' * 60) for grad, var in grads_and_vars: tf.logging.info(var) def tag(name, v=var): return v.op.name + '_' + name # Variable summary mean = tf.reduce_mean(var) tf.summary.scalar(tag('mean'), mean) with tf.name_scope(tag('stddev')): stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) tf.summary.scalar(tag('stddev'), stddev) tf.summary.scalar(tag('max'), tf.reduce_max(var)) tf.summary.scalar(tag('min'), tf.reduce_min(var)) tf.summary.histogram(tag('histogram'), var) # Gradient summary if grad is not None: if isinstance(grad, tf.IndexedSlices): grad_values = grad.values else: grad_values = grad tf.summary.histogram(tag('gradient'), grad_values) tf.summary.scalar(tag('gradient_norm'), tf.global_norm([grad_values])) else: tf.logging.info('Var %s has no gradient', var.op.name)
def _add_gradients_summaries(grads_and_vars): """Add histogram summaries to gradients. Note: The summaries are also added to the SUMMARIES collection. Args: grads_and_vars: A list of gradient to variable pairs (tuples). Returns: The _list_ of the added summaries for grads_and_vars. """ summaries = [] for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, tf.IndexedSlices): grad_values = grad.values else: grad_values = grad summaries.append(tf.histogram_summary(var.op.name + ':gradient', grad_values)) summaries.append(tf.histogram_summary(var.op.name + ':gradient_norm', tf.global_norm([grad_values]))) else: tf.logging.info('Var %s has no gradient', var.op.name) return summaries
def get_train_op(loss, params): """Generate training operation that updates variables based on loss.""" with tf.variable_scope("get_train_op"): learning_rate = get_learning_rate( params.learning_rate, params.hidden_size, params.learning_rate_warmup_steps) # Create optimizer. Use LazyAdamOptimizer from TF contrib, which is faster # than the TF core Adam optimizer. optimizer = tf.contrib.opt.LazyAdamOptimizer( learning_rate, beta1=params.optimizer_adam_beta1, beta2=params.optimizer_adam_beta2, epsilon=params.optimizer_adam_epsilon) # Calculate and apply gradients using LazyAdamOptimizer. global_step = tf.train.get_global_step() tvars = tf.trainable_variables() gradients = optimizer.compute_gradients( loss, tvars, colocate_gradients_with_ops=True) train_op = optimizer.apply_gradients( gradients, global_step=global_step, name="train") # Save gradient norm to Tensorboard tf.summary.scalar("global_norm/gradient_norm", tf.global_norm(list(zip(*gradients))[0])) return train_op
def __init__(self, vocab_size, size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, dropout, FLAGS, forward_only=False, optimizer="adam"): self.size = size self.vocab_size = vocab_size self.batch_size = batch_size self.num_layers = num_layers self.keep_prob_config = 1.0 - dropout self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.keep_prob = tf.placeholder(tf.float32) self.source_tokens = tf.placeholder(tf.int32, shape=[None, None]) self.target_tokens = tf.placeholder(tf.int32, shape=[None, None]) self.source_mask = tf.placeholder(tf.int32, shape=[None, None]) self.target_mask = tf.placeholder(tf.int32, shape=[None, None]) self.beam_size = tf.placeholder(tf.int32) self.target_length = tf.reduce_sum(self.target_mask, reduction_indices=0) self.FLAGS = FLAGS self.decoder_state_input, self.decoder_state_output = [], [] for i in xrange(num_layers): self.decoder_state_input.append(tf.placeholder(tf.float32, shape=[None, size])) with tf.variable_scope("NLC", initializer=tf.uniform_unit_scaling_initializer(1.0)): self.setup_embeddings() self.setup_encoder() self.setup_decoder() self.setup_loss() self.setup_beam() params = tf.trainable_variables() if not forward_only: opt = get_optimizer(optimizer)(self.learning_rate) gradients = tf.gradients(self.losses, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm) # self.gradient_norm = tf.global_norm(clipped_gradients) self.gradient_norm = tf.global_norm(gradients) self.param_norm = tf.global_norm(params) self.updates = opt.apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.keep) # write_version=tf.train.SaverDef.V1
def gradient_clip(gradients, max_gradient_norm): """Clipping gradients of a model.""" clipped_gradients, gradient_norm = tf.clip_by_global_norm( gradients, max_gradient_norm) gradient_norm_summary = [tf.summary.scalar("grad_norm", gradient_norm)] gradient_norm_summary.append( tf.summary.scalar("clipped_gradient", tf.global_norm(clipped_gradients))) return clipped_gradients, gradient_norm_summary, gradient_norm
def __init__(self, FLAGS, encoder, decoder, classifier): self.FLAGS = FLAGS self.encoder = encoder self.decoder = decoder self.classifier = classifier self.xplaceholder = tf.placeholder(tf.int32, shape = (None, self.FLAGS.maxSentenceLength)) self.yplaceholder = tf.placeholder(tf.float64, shape = (None,)) # self.maskplaceholder = tf.placeholder(tf.int32, shape = (None,self.FLAGS.maxSentenceLength)) self.maskplaceholder = tf.placeholder(tf.int32, shape = (None,)) self.drop_placeholder = tf.placeholder(tf.float64, shape = ()) self.lr_placeholder = tf.placeholder(tf.float64, shape = ()) self.opplaceholder = tf.placeholder(tf.float64) with tf.variable_scope("tldr", initializer = tf.contrib.layers.xavier_initializer()): self.setup_embeddings() self.setup_system() self.setup_loss() params = tf.trainable_variables() self.globalnorm = 0 self.paramnorm = 0 for param in params: shp = param.get_shape() if len(shp) >= 2: self.paramnorm += tf.nn.l2_loss(param) opt = tf.train.AdamOptimizer(self.lr_placeholder) if self.FLAGS.clipGradients == 1: try: grads, _ = zip(*opt.compute_gradients(self.loss)) grads, _ = tf.clip_by_global_norm(grads, self.FLAGS.max_gradient_norm) self.globalnorm = tf.global_norm(grads) grads_vars = zip(grads, params) self.updates = opt.apply_gradients(grads_vars) except AttributeError: self.updates = None else: grads = tf.gradients(self.loss, params) self.globalnorm = tf.global_norm(grads) try: self.updates = opt.minimize(self.loss) except AttributeError: self.updates = None self.saver = tf.train.Saver(keep_checkpoint_every_n_hours = 2, max_to_keep = 0)
def _create_loss_optimizer(self): # The loss is composed of two terms: # 1.) The reconstruction loss (the negative log probability # of the input under the reconstructed Bernoulli distribution # induced by the decoder in the data space). # This can be interpreted as the number of "nats" required # for reconstructing the input when the activation in latent # is given. orig_energies = tf.reshape(self.x, [self.batch_size, -1]) new_energies = tf.reshape(self.x_reconstr_mean, [self.batch_size, -1]) diff = tf.square(tf.sub(orig_energies, new_energies)) diff_norm = tf.div(diff,tf.exp(tf.minimum(20.,self.x_reconstr_log_sigma_sq))) denom_log = tf.log(2*np.pi) + self.x_reconstr_log_sigma_sq self.vae_loss_likelihood = tf.reduce_sum(0.5*(diff_norm+denom_log), 1) # 2.) The latent loss, which is defined as the Kullback Leibler divergence ## between the distribution in latent space induced by the encoder on # the data and some prior. This acts as a kind of regularizer. # This can be interpreted as the number of "nats" required # for transmitting the the latent space distribution given # the prior. self.vae_loss_kl = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq - tf.square(self.z_mean) - tf.exp(tf.minimum(20.,self.z_log_sigma_sq)), 1) self.cost = tf.reduce_mean(self.vae_loss_likelihood + self.lamb*self.vae_loss_kl) # average over batch #self.cost = tf.reduce_mean(self.vae_loss_kl + self.vae_loss_l2) self.t_vars = tf.trainable_variables() # Use RMSProp optimizer opt = tf.train.AdamOptimizer(self.learning_rate) #.minimize(self.cost, var_list=self.t_vars) grads, t_vars = zip(*opt.compute_gradients(self.cost, self.t_vars)) self.gradnorm = tf.global_norm(grads) grads = tf.cond( tf.global_norm(grads) > 1e-20, lambda: tf.clip_by_global_norm(grads, 500.)[0], lambda: grads) self.optimizer = opt.apply_gradients(zip(grads,t_vars))
def __init__(self, vocab_size, size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, dropout, forward_only=False): self.size = size self.vocab_size = vocab_size self.batch_size = batch_size self.num_layers = num_layers self.keep_prob = 1.0 - dropout self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.source_tokens = tf.placeholder(tf.int32, shape=[None, None]) self.target_tokens = tf.placeholder(tf.int32, shape=[None, None]) self.source_mask = tf.placeholder(tf.int32, shape=[None, None]) self.target_mask = tf.placeholder(tf.int32, shape=[None, None]) self.target_length = tf.reduce_sum(self.target_mask, reduction_indices=0) self.decoder_state_input, self.decoder_state_output = [], [] for i in xrange(num_layers): self.decoder_state_input.append(tf.placeholder(tf.float32, shape=[None, size])) self.setup_embeddings() self.setup_encoder() self.setup_decoder() self.setup_loss() params = tf.trainable_variables() if not forward_only: opt = tf.train.AdamOptimizer(self.learning_rate) gradients = tf.gradients(self.losses, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm) # self.gradient_norm = tf.global_norm(clipped_gradients) self.gradient_norm = tf.global_norm(gradients) self.param_norm = tf.global_norm(params) self.updates = opt.apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.all_variables())
def _update_network(self, trainer): ''' Build losses, compute gradients and apply gradients to the global net ''' self.actions = tf.placeholder(shape=[None], dtype=tf.int32) actions_onehot = tf.one_hot(self.actions, self.a_dim, dtype=tf.float32) self.target_v = tf.placeholder(shape=[None], dtype=tf.float32) self.advantages = tf.placeholder(shape=[None], dtype=tf.float32) action_prob = tf.reduce_sum(self.policy * actions_onehot, [1]) # MSE critic loss self.critic_loss = 0.5 * tf.reduce_sum( tf.squared_difference( self.target_v, tf.reshape(self.value, [-1]))) # high entropy -> low loss -> encourage exploration self.entropy = -tf.reduce_sum(self.policy * tf.log(self.policy + 1e-30), 1) self.entropy_loss = -self.entropy_ratio * tf.reduce_sum(self.entropy) # policy gradients = d_[-log(p) * advantages] / d_theta self.actor_loss = -tf.reduce_sum( tf.log(action_prob + 1e-30) * self.advantages) self.actor_loss += self.entropy_loss self.loss = self.actor_loss + self.critic_loss local_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, self.scope) self.grads = tf.gradients(self.loss, local_vars) # global norm gradients clipping self.grads, self.grad_norms = \ tf.clip_by_global_norm(self.grads, self.clip_grads) self.var_norms = tf.global_norm(local_vars) global_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads_to_global = \ trainer.apply_gradients(zip(self.grads, global_vars)) # summaries if self.scope == 'worker_1': tf.summary.scalar('loss/entropy', tf.reduce_sum(self.entropy)) tf.summary.scalar('loss/actor_loss', self.actor_loss) tf.summary.scalar('loss/critic_loss', self.critic_loss) tf.summary.scalar('advantages', tf.reduce_mean(self.advantages)) tf.summary.scalar('norms/grad_norms', self.grad_norms) tf.summary.scalar('norms/var_norms', self.var_norms) summaries = tf.get_collection(tf.GraphKeys.SUMMARIES) self.summaries = tf.summary.merge(summaries) else: self.summaries = tf.no_op()
def __init__(self, FLAGS, id2word, word2id, emb_matrix): """ Initializes the QA model. Inputs: FLAGS: the flags passed in from main.py id2word: dictionary mapping word idx (int) to word (string) word2id: dictionary mapping word (string) to word idx (int) emb_matrix: numpy array shape (400002, embedding_size) containing pre-traing GloVe embeddings """ print "Initializing the QAModel..." self.FLAGS = FLAGS self.id2word = id2word self.word2id = word2id # Add all parts of the graph with tf.variable_scope("QAModel", initializer=tf.contrib.layers.variance_scaling_initializer(factor=1.0, uniform=True)): self.add_placeholders() self.add_embedding_layer(emb_matrix) self.build_graph() self.add_loss() # Define trainable parameters, gradient, gradient norm, and clip by gradient norm params = tf.trainable_variables() gradients = tf.gradients(self.loss, params) self.gradient_norm = tf.global_norm(gradients) clipped_gradients, _ = tf.clip_by_global_norm(gradients, FLAGS.max_gradient_norm) self.param_norm = tf.global_norm(params) # Define optimizer and updates # (updates is what you need to fetch in session.run to do a gradient update) self.global_step = tf.Variable(0, name="global_step", trainable=False) opt = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) # you can try other optimizers self.updates = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) # Define savers (for checkpointing) and summaries (for tensorboard) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.keep) self.bestmodel_saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) self.summaries = tf.summary.merge_all()
def __init__(self,s_size,a_size,scope,trainer): with tf.variable_scope(scope): # quantile regression dqn self.quantile = 1.0 / N self.cumulative_probabilities = (2.0 * np.arange(N) + 1) / (2.0 * N) # network self.inputs = tf.placeholder(shape=[None,s_size],dtype=tf.float32) self.imageIn = tf.reshape(self.inputs,shape=[-1,84,84,1]) self.conv1 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.imageIn,num_outputs=32, kernel_size=[8,8],stride=[4,4],padding='VALID') self.conv2 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.conv1,num_outputs=64, kernel_size=[4,4],stride=[2,2],padding='VALID') self.conv3 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.conv2,num_outputs=64, kernel_size=[3,3],stride=[1,1],padding='VALID') hidden = slim.fully_connected(slim.flatten(self.conv3),512,activation_fn=tf.nn.relu) self.out = slim.fully_connected(hidden, a_size * N, activation_fn=None, weights_initializer=normalized_columns_initializer(0.1), biases_initializer=None) self.out = tf.reshape(self.out, [-1, a_size, N]) self.Q = tf.reduce_sum(self.out * self.quantile, axis=2) #Only the worker network need ops for loss functions and gradient updating. if scope != 'global': self.actions_q = tf.placeholder(shape=[None, a_size, N], dtype=tf.float32) self.q_target = tf.placeholder(shape=[None, N], dtype=tf.float32) self.q_actiona = tf.multiply(self.out, self.actions_q) self.q_action = tf.reduce_sum(self.q_actiona, axis=1) self.u = self.q_target - self.q_action self.loss = tf.reduce_mean(tf.reduce_sum(tf.square(self.u),axis=1)) self.delta = tf.to_float(self.u < 0.0) self.loss1 = tf.abs(self.cumulative_probabilities - self.delta) self.loss2 = self.huber(self.u, k) #self.loss = tf.reduce_mean(tf.reduce_mean(self.loss1*self.loss2,axis=1)) #Get gradients from local network using local losses local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) self.gradients = tf.gradients(self.loss,local_vars) self.var_norms = tf.global_norm(local_vars) grads,self.grad_norms = tf.clip_by_global_norm(self.gradients,40.0) #Apply local gradients to global network global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = trainer.apply_gradients(zip(grads,global_vars))
def _make_training_op(self): optimizer = tf.train.AdamOptimizer(self.config.learning_rate) params = tf.trainable_variables() gradients = tf.gradients(self.loss, params) clipped_gradients, gradient_norm = tf.clip_by_global_norm( gradients, self.config.max_gradient_norm) tf.summary.scalar("grad_norm", gradient_norm) tf.summary.scalar("clipped_norm", tf.global_norm(clipped_gradients)) train_op = optimizer.apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) return train_op
def __init__(self,s_size,a_size,scope,trainer): with tf.variable_scope(scope): # distribution dqn self.atoms = 21 self.v_max = 10. self.v_min = -10. self.delta_z = (self.v_max - self.v_min) / (self.atoms - 1) self.z = [self.v_min + i * self.delta_z for i in range(self.atoms)] # network self.inputs = tf.placeholder(shape=[None,s_size],dtype=tf.float32) self.imageIn = tf.reshape(self.inputs,shape=[-1,84,84,1]) self.conv1 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.imageIn,num_outputs=32, kernel_size=[8,8],stride=[4,4],padding='VALID') self.conv2 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.conv1,num_outputs=64, kernel_size=[4,4],stride=[2,2],padding='VALID') self.conv3 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.conv2,num_outputs=64, kernel_size=[3,3],stride=[1,1],padding='VALID') hidden = slim.fully_connected(slim.flatten(self.conv3),512,activation_fn=tf.nn.relu) self.out = slim.fully_connected(hidden, a_size*self.atoms, activation_fn=None, weights_initializer=normalized_columns_initializer(0.1), biases_initializer=None) self.out = tf.reshape(self.out, [-1, a_size, self.atoms]) self.p = tf.nn.softmax(self.out, dim=2) self.Q = tf.reduce_sum(self.z * self.p, axis=2) #Only the worker network need ops for loss functions and gradient updating. if scope != 'global': self.m_input = tf.placeholder(shape=[None, self.atoms], dtype=tf.float32) self.actions_p = tf.placeholder(shape=[None, a_size, self.atoms],dtype=tf.float32) self.p_actiona = tf.multiply(self.p, self.actions_p) self.p_action = tf.reduce_sum(self.p_actiona, axis=1) self.p_alog = - tf.log(self.p_action+1e-20) + tf.log(self.m_input+1e-20) self.loss = tf.reduce_mean(tf.reduce_sum(self.m_input * self.p_alog, axis=1)) local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) self.gradients = tf.gradients(self.loss,local_vars) self.var_norms = tf.global_norm(local_vars) grads,self.grad_norms = tf.clip_by_global_norm(self.gradients,40.0) #Apply local gradients to global network global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = trainer.apply_gradients(zip(grads,global_vars))
def add_training_op(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See TODO: - Get the gradients for the loss from optimizer using optimizer.compute_gradients. - if self.clip_gradients is true, clip the global norm of the gradients using tf.clip_by_global_norm to self.config.max_grad_norm - Compute the resultant global norm of the gradients using tf.global_norm and save this global norm in self.grad_norm. - Finally, actually create the training operation by calling optimizer.apply_gradients. See: https://www.tensorflow.org/api_docs/python/train/gradient_clipping Args: loss: Loss tensor. Returns: train_op: The Op for training. """ optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.config.lr) ### YOUR CODE HERE (~6-10 lines) # - Remember to clip gradients only if self.config.clip_gradients # is True. # - Remember to set self.grad_norm grads_and_vars = optimizer.compute_gradients(loss) variables = [output[1] for output in grads_and_vars] gradients = [output[0] for output in grads_and_vars] if self.config.clip_gradients: tmp_gradients = tf.clip_by_global_norm(gradients, clip_norm=self.config.max_grad_norm)[0] gradients = tmp_gradients grads_and_vars = [(gradients[i], variables[i]) for i in range(len(gradients))] self.grad_norm = tf.global_norm(gradients) train_op = optimizer.apply_gradients(grads_and_vars) ### END YOUR CODE assert self.grad_norm is not None, "grad_norm was not set properly!" return train_op
def define_ppo_step(observation, action, reward, done, value, old_pdf, policy_factory, config): """Step of PPO.""" new_policy_dist, new_value, _ = policy_factory(observation) new_pdf = new_policy_dist.prob(action) ratio = new_pdf / old_pdf clipped_ratio = tf.clip_by_value(ratio, 1 - config.clipping_coef, 1 + config.clipping_coef) advantage = calculate_generalized_advantage_estimator( reward, value, done, config.gae_gamma, config.gae_lambda) advantage_mean, advantage_variance = tf.nn.moments(advantage, axes=[0, 1], keep_dims=True) advantage_normalized = tf.stop_gradient( (advantage - advantage_mean)/(tf.sqrt(advantage_variance) + 1e-8)) surrogate_objective = tf.minimum(clipped_ratio * advantage_normalized, ratio * advantage_normalized) policy_loss = -tf.reduce_mean(surrogate_objective) value_error = calculate_generalized_advantage_estimator( reward, new_value, done, config.gae_gamma, config.gae_lambda) value_loss = config.value_loss_coef * tf.reduce_mean(value_error ** 2) entropy = new_policy_dist.entropy() entropy_loss = -config.entropy_loss_coef * tf.reduce_mean(entropy) optimizer = get_optimizer(config) losses = [policy_loss, value_loss, entropy_loss] gradients = [list(zip(*optimizer.compute_gradients(loss))) for loss in losses] gradients_norms = [tf.global_norm(gradient[0]) for gradient in gradients] gradients_flat = sum([gradient[0] for gradient in gradients], ()) gradients_variables_flat = sum([gradient[1] for gradient in gradients], ()) optimize_op = optimizer.apply_gradients(zip(gradients_flat, gradients_variables_flat)) with tf.control_dependencies([optimize_op]): return [tf.identity(x) for x in losses + gradients_norms]
def get_gradients(self, loss_or_grads, params): """ Note ---- The returned gradients may contain None value """ # check valid algorithm if self.algorithm is None or \ not hasattr(self.algorithm, 'compute_gradients') or \ not hasattr(self.algorithm, 'apply_gradients'): raise RuntimeError("Optimizer is None, or doesn't has attributes: " "compute_gradients and apply_gradients.") with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE) as scope: scope_name = scope.name # get the gradient grads_var = self.algorithm.compute_gradients(loss_or_grads, var_list=params) grads_var = {g: v for g, v in grads_var if g is not None} grads = list(grads_var.keys()) params = list(grads_var.values()) # ====== clipnorm ====== # if self.clipnorm is not None: if self.clip_alg == 'norm': grads = [tf.clip_by_norm(g, self.clipnorm) for g in grads] elif self.clip_alg == 'total_norm': grads, _ = tf.clip_by_global_norm(grads, self.clipnorm) elif self.clip_alg == 'avg_norm': grads = [tf.clip_by_average_norm(g, self.clipnorm) for g in grads] else: raise ValueError("Unknown norm clipping algorithm: '%s'" % self.clip_alg) # ====== clipvalue ====== # if self.clipvalue is not None: grads = [tf.clip_by_value(g, -self.clipvalue, self.clipvalue) for g in grads] # ====== get final norm value ====== # self._norm = add_roles(tf.global_norm(grads, name="GradientNorm"), GradientsNorm) # ====== setting Optimizer roles ====== # for v in get_all_variables(scope=scope_name): add_roles(v, roles=OptimizerVariable) return [(g, p) for g, p in zip(grads, params)]
def _createModel(self): with tf.variable_scope(self.scope): self.inputs = tf.placeholder('float', shape=[None,self.stateSize]) x1 = slim.fully_connected( self.inputs, 64, scope='fc/fc_1', activation_fn=tf.nn.relu) self.policy = slim.fully_connected(x1, self.actionSize, activation_fn=tf.nn.softmax, weights_initializer=Brian.normalized_columns_initializer(0.01), biases_initializer=None) self.value = slim.fully_connected(x1,1, activation_fn=None, weights_initializer=Brian.normalized_columns_initializer(1.0), biases_initializer=None) self.update_local_ops = Brian.update_target_graph('global',self.scope) if self.scope != 'global': self.actions = tf.placeholder( shape=[None], dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions, self.actionSize, dtype=tf.float32) self.target_v = tf.placeholder(shape=[None],dtype=tf.float32) self.advantages = tf.placeholder(shape=[None],dtype=tf.float32) self.responsible_outputs = tf.reduce_sum(self.policy * self.actions_onehot, [1]) #Loss functions self.value_loss = 0.5 * tf.reduce_sum(tf.square(self.target_v - tf.reshape(self.value,[-1]))) self.entropy = - tf.reduce_sum(self.policy * tf.log(self.policy)) self.policy_loss = -tf.reduce_sum(tf.log(self.responsible_outputs)*self.advantages) self.loss = 0.5 * self.value_loss + self.policy_loss - self.entropy * 0.01 #Get gradients from local network using local losses local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope) self.gradients = tf.gradients(self.loss,local_vars) self.var_norms = tf.global_norm(local_vars) grads,self.grad_norms = tf.clip_by_global_norm(self.gradients,40.0) #Apply local gradients to global network global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = self.trainer.apply_gradients(zip(grads,global_vars))
def get_train_op_and_metrics(loss, params): """Generate training op and metrics to save in TensorBoard.""" with tf.variable_scope("get_train_op"): learning_rate = get_learning_rate( learning_rate=params["learning_rate"], hidden_size=params["hidden_size"], learning_rate_warmup_steps=params["learning_rate_warmup_steps"]) # Create optimizer. Use LazyAdamOptimizer from TF contrib, which is faster # than the TF core Adam optimizer. optimizer = tf.contrib.opt.LazyAdamOptimizer( learning_rate, beta1=params["optimizer_adam_beta1"], beta2=params["optimizer_adam_beta2"], epsilon=params["optimizer_adam_epsilon"]) if params["use_tpu"] and params["tpu"] != tpu_util.LOCAL: optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer) # Uses automatic mixed precision FP16 training if on GPU. if params["dtype"] == "fp16": optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite( optimizer) # Calculate and apply gradients using LazyAdamOptimizer. global_step = tf.train.get_global_step() tvars = tf.trainable_variables() gradients = optimizer.compute_gradients( loss, tvars, colocate_gradients_with_ops=True) minimize_op = optimizer.apply_gradients( gradients, global_step=global_step, name="train") update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) train_op = tf.group(minimize_op, update_ops) train_metrics = {"learning_rate": learning_rate} if not params["use_tpu"]: # gradient norm is not included as a summary when running on TPU, as # it can cause instability between the TPU and the host controller. gradient_norm = tf.global_norm(list(zip(*gradients))[0]) train_metrics["global_norm/gradient_norm"] = gradient_norm return train_op, train_metrics
def add_train_op(self, loss): self.global_step = tf.Variable(0, name='global_step', trainable=False) opt = tf.train.AdamOptimizer(learning_rate=self.lr) gradients, variables = zip(*opt.compute_gradients(loss)) # save selected gradient summaries #for grad in gradients: #if 'BasicDecoder' in grad.name or 'gru_cell' in grad.name or 'highway_3' in grad.name: #tf.summary.scalar(grad.name, tf.reduce_sum(grad)) # optionally cap and noise gradients to regularize if self.config.cap_grads > 0: with tf.variable_scope('cap_grads'): tf.summary.scalar('global_gradient_norm', tf.global_norm(gradients)) gradients, _ = tf.clip_by_global_norm(gradients, self.config.cap_grads) train_op = opt.apply_gradients(zip(gradients, variables), global_step=self.global_step) return train_op
def __init__(self, scope, trainer, global_step=None): with tf.variable_scope(scope): self.prob_of_random_goal = tf.Variable( FLAGS.initial_random_goal_prob, trainable=False, name="prob_of_random_goal", dtype=tf.float32) self.inputs = tf.placeholder(shape=[ None, FLAGS.resized_height, FLAGS.resized_width, FLAGS.agent_history_length ], dtype=tf.float32, name="Inputs") self.prev_rewards = tf.placeholder(shape=[None], dtype=tf.float32, name="Prev_Rewards") self.prev_rewards_onehot = tf.one_hot(tf.cast(self.prev_rewards, dtype=tf.int32), 2, dtype=tf.float32, name="Prev_Rewards_OneHot") self.prev_rewards = tf.expand_dims(self.prev_rewards, 1, name="rewards") # self.prev_rewards_onehot = tf.expand_dims(self.prev_rewards, 0) self.prev_actions = tf.placeholder(shape=[None], dtype=tf.int32, name="Prev_Actions") self.prev_actions_onehot = tf.one_hot(self.prev_actions, FLAGS.nb_actions, dtype=tf.float32, name="Prev_Actions_OneHot") self.prev_goal = tf.placeholder(shape=[None, FLAGS.hidden_dim], dtype=tf.float32, name="Prev_Goals") self.image_summaries = [] if FLAGS.game not in flags.SUPPORTED_ENVS: self.conv0 = tf.contrib.layers.conv2d(self.inputs, 16, 8, 4, activation_fn=tf.nn.elu, scope="conv0") with tf.variable_scope('conv0'): tf.get_variable_scope().reuse_variables() weights = tf.get_variable('weights') grid = self.put_kernels_on_grid(weights) self.image_summaries.append( tf.summary.image('kernels', grid, max_outputs=1)) self.conv = tf.contrib.layers.conv2d(self.conv0, 32, 4, 2, activation_fn=tf.nn.elu, scope="conv1") else: self.conv = tf.contrib.layers.conv2d(self.inputs, 32, 5, 2, activation_fn=tf.nn.elu, scope="conv1") with tf.variable_scope('conv1'): tf.get_variable_scope().reuse_variables() weights = tf.get_variable('weights') grid = self.put_kernels_on_grid(weights) self.image_summaries.append( tf.summary.image('kernels', grid, max_outputs=1)) with tf.variable_scope('inputs'): tf.get_variable_scope().reuse_variables() self.image_summaries.append( tf.summary.image('input', self.inputs, max_outputs=100)) self.conv_flat = tf.contrib.layers.flatten(self.conv) self.fc = tf.contrib.layers.fully_connected( self.conv_flat, FLAGS.hidden_dim) self.fc = tf.contrib.layers.layer_norm(self.fc) self.f_percept = tf.nn.elu(self.fc, name="Zt") if FLAGS.game not in flags.SUPPORTED_ENVS: self.f_percept = tf.concat([self.f_percept, self.prev_rewards], 1, name="Zt_r") else: self.f_percept = tf.concat( [self.f_percept, self.prev_rewards_onehot], 1, name="Zt_r") summary_f_percept_act = tf.contrib.layers.summarize_activation( self.f_percept) ############################################################################################################ # Manager network if FLAGS.meta: self.f_Mspace = tf.concat([self.f_percept, self.prev_goal], 1, name="Zt_r") else: self.f_Mspace = tf.identity(self.f_percept, name="Zt_r") self.f_Mspace = tf.contrib.layers.fully_connected( self.f_Mspace, FLAGS.hidden_dim) self.f_percept = tf.concat( [self.f_percept, self.prev_actions_onehot], 1, name="Zt_r") self.f_Mspace = tf.contrib.layers.layer_norm(self.f_Mspace) self.f_Mspace = tf.nn.elu(self.f_Mspace, name="St") summary_f_Mspace_act = tf.contrib.layers.summarize_activation( self.f_Mspace) m_rnn_in = tf.expand_dims(self.f_Mspace, [0], name="Mrnn_in") step_size = tf.shape(self.inputs)[:1] m_lstm_cell = tf.contrib.rnn.LayerNormBasicLSTMCell( FLAGS.hidden_dim) m_c_init = np.zeros((1, FLAGS.hidden_dim * FLAGS.manager_horizon), np.float32) m_h_init = np.zeros((1, FLAGS.hidden_dim * FLAGS.manager_horizon), np.float32) self.m_state_init = [m_c_init, m_h_init] m_c_in = tf.placeholder( tf.float32, [1, FLAGS.hidden_dim * FLAGS.manager_horizon], name="Mrnn_c_in") m_h_in = tf.placeholder( tf.float32, [1, FLAGS.hidden_dim * FLAGS.manager_horizon], name="Mrnn_h_in") self.m_state_in = (m_c_in, m_h_in) m_state_in = tf.contrib.rnn.LSTMStateTuple(m_c_in, m_h_in) m_lstm_outputs, m_lstm_state = self.fast_dlstm( m_rnn_in, m_state_in, m_lstm_cell, FLAGS.manager_horizon, FLAGS.hidden_dim * FLAGS.manager_horizon) m_lstm_c, m_lstm_h = m_lstm_state self.m_state_out = (m_lstm_c[-1, :1, :], m_lstm_h[-1, :1, :]) self.goals = tf.reshape(m_lstm_outputs, [-1, FLAGS.hidden_dim]) self.normalized_goals = tf.contrib.layers.fully_connected( self.goals, FLAGS.hidden_dim, activation_fn=tf.tanh, name="Gt") summary_goals = tf.contrib.layers.summarize_activation( self.normalized_goals) def randomize_goals(t): t = tf.cast(t, tf.int32) packed_tensors = tf.stack([ tf.random_normal([ FLAGS.hidden_dim, ]), self.normalized_goals[t, :] ]) to_update = tf.cond( tf.less( self.prob_of_random_goal, tf.constant(FLAGS.final_random_goal_prob, dtype=tf.float32)), lambda: tf.cast( tf.multinomial( tf.log([[ self.prob_of_random_goal, tf.subtract(tf.constant(1.0), self. prob_of_random_goal) ]]), 1)[0][0], tf.int32), lambda: tf.constant(1, tf.int32)) resulted_tensor = tf.gather(packed_tensors, to_update) return resulted_tensor self.randomized_goals = tf.map_fn(lambda t: randomize_goals(t), tf.to_float( tf.range(0, step_size[0])), name="random_gt") summary_random_goals = tf.contrib.layers.summarize_activation( self.randomized_goals) self.decrease_prob_of_random_goal = tf.assign_sub( self.prob_of_random_goal, tf.constant( (FLAGS.initial_random_goal_prob - FLAGS.final_random_goal_prob) / FLAGS.explore_steps)) m_fc_value_w = tf.get_variable( "M_Value_W", shape=[FLAGS.hidden_dim, 1], initializer=normalized_columns_initializer(1.0)) self.m_value = tf.matmul(m_rnn_out, m_fc_value_w, name="M_Value") summary_m_value_act = tf.contrib.layers.summarize_activation( self.m_value) ############################################################################################################ # Worker network self.sum_prev_goals = tf.placeholder( shape=[None, FLAGS.hidden_dim], dtype=tf.float32, name="Prev_c_Goals_sum") w_rnn_in = tf.expand_dims(self.f_percept, [0], name="Wrnn_in") step_size = tf.shape(self.inputs)[:1] w_lstm_cell = tf.contrib.rnn.LayerNormBasicLSTMCell( FLAGS.goal_embedding_size * FLAGS.nb_actions) w_c_init = np.zeros((1, w_lstm_cell.state_size.c), np.float32) w_h_init = np.zeros((1, w_lstm_cell.state_size.h), np.float32) self.w_state_init = [w_c_init, w_h_init] w_c_in = tf.placeholder(tf.float32, [1, w_lstm_cell.state_size.c], name="Wrnn_c_in") w_h_in = tf.placeholder(tf.float32, [1, w_lstm_cell.state_size.h], name="Wrnn_h_in") self.w_state_in = (w_c_in, w_h_in) w_state_in = tf.contrib.rnn.LSTMStateTuple(w_c_in, w_h_in) w_lstm_outputs, w_lstm_state = tf.nn.dynamic_rnn( w_lstm_cell, w_rnn_in, initial_state=w_state_in, sequence_length=step_size, time_major=False) w_lstm_c, w_lstm_h = w_lstm_state self.w_state_out = (w_lstm_c[:1, :], w_lstm_h[:1, :]) Ut = tf.reshape( w_lstm_outputs, [step_size[0], FLAGS.nb_actions, FLAGS.goal_embedding_size], name="Ut") Ut_flat = tf.reshape( w_lstm_outputs, [step_size[0], FLAGS.nb_actions * FLAGS.goal_embedding_size], name="Ut_flat") summary_wrnn_act = tf.contrib.layers.summarize_activation(Ut) goal_encoding = tf.contrib.layers.fully_connected( self.sum_prev_goals, FLAGS.goal_embedding_size, biases_initializer=None, scope="goal_emb") interm_rez = tf.squeeze( tf.matmul(Ut, tf.expand_dims(goal_encoding, 2)), 2) interm_rez = tf.contrib.layers.flatten(interm_rez) self.w_policy = tf.nn.softmax(interm_rez, name="W_Policy") summary_w_policy_act = tf.contrib.layers.summarize_activation( self.w_policy) w_fc_value_w = tf.get_variable( "W_Value_W", shape=[ FLAGS.nb_actions * FLAGS.goal_embedding_size + FLAGS.goal_embedding_size, 1 ], initializer=normalized_columns_initializer(1.0)) self.w_value = tf.matmul(tf.concat([Ut_flat, goal_encoding], 1), w_fc_value_w, name="W_Value") summary_w_value_act = tf.contrib.layers.summarize_activation( self.w_value) if scope != 'global': self.w_extrinsic_return = tf.placeholder(shape=[None], dtype=tf.float32) self.m_extrinsic_return = tf.placeholder(shape=[None], dtype=tf.float32) self.w_intrinsic_return = tf.placeholder(shape=[None], dtype=tf.float32) def gather_state_at_horiz(t): t = tf.cast(t, tf.int32) f_Mspace_c = tf.gather( self.f_Mspace, tf.minimum( t + tf.constant(FLAGS.manager_horizon, dtype=tf.int32), step_size[0] - 1)) return f_Mspace_c self.f_Mspace_c = tf.cast(tf.map_fn( lambda t: gather_state_at_horiz(t), tf.to_float(tf.range(0, step_size[0])), name="state_at_horiz"), dtype=tf.float32) self.state_diff = self.f_Mspace_c - self.f_Mspace self.cos_sim_state_diff = self.cosine_distance( tf.stop_gradient(self.state_diff), self.normalized_goals, dim=1) self.m_advantages = self.m_extrinsic_return - tf.stop_gradient( tf.reshape(self.m_value, [-1])) self.goals_loss = -tf.reduce_sum( self.m_advantages * self.cos_sim_state_diff) self.m_value_loss = FLAGS.m_beta_v * tf.reduce_sum( tf.square(self.m_extrinsic_return - tf.reshape(self.m_value, [-1]))) self.actions = tf.placeholder(shape=[None], dtype=tf.int32, name="Actions") self.actions_onehot = tf.one_hot(self.actions, FLAGS.nb_actions, dtype=tf.float32, name="Actions_Onehot") self.responsible_outputs = tf.reduce_sum( self.w_policy * self.actions_onehot, [1]) self.intrinsic_return = FLAGS.alpha * self.w_intrinsic_return self.total_return = self.w_extrinsic_return + self.intrinsic_return self.w_advantages = self.total_return - tf.stop_gradient( tf.reshape(self.w_value, [-1])) # Loss functions self.w_value_loss = FLAGS.w_beta_v * tf.reduce_sum( tf.square(self.total_return - tf.reshape(self.w_value, [-1]))) self.entropy = -tf.reduce_sum( self.w_policy * tf.log(self.w_policy + 1e-7)) self.w_policy_loss = -tf.reduce_sum( tf.log(self.responsible_outputs + 1e-7) * self.w_advantages) - self.entropy * FLAGS.beta_e self.loss = self.w_value_loss + self.w_policy_loss + self.m_value_loss + self.goals_loss local_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope) self.gradients = tf.gradients(self.loss, local_vars) self.var_norms = tf.global_norm(local_vars) grads, self.grad_norms = tf.clip_by_global_norm( self.gradients, FLAGS.gradient_clip_value) self.worker_summaries = [ summary_f_percept_act, summary_f_Mspace_act, summary_goals, summary_random_goals, summary_m_value_act, summary_wrnn_act, summary_w_policy_act, summary_w_value_act ] for grad, weight in zip(grads, local_vars): self.worker_summaries.append( tf.summary.histogram(weight.name + '_grad', grad)) self.worker_summaries.append( tf.summary.histogram(weight.name, weight)) self.merged_summary = tf.summary.merge(self.worker_summaries) global_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = trainer.apply_gradients( zip(grads, global_vars))
def train(self, config): seed = 0 np.random.seed(seed) tf.set_random_seed(seed) """Train DCGAN""" if config.dataset == "mnist": data_X, val_data, test_data, train_dist = mnist_data.load_mnist() elif config.dataset == "cifar": data_X, val_data, test_data = cifar_data.load_cifar() if self.model_type == "nice": val_data = np.reshape(val_data, (-1, self.image_size)) test_data = np.reshape(test_data, (-1, self.image_size)) lr = config.learning_rate self.learning_rate = tf.placeholder(tf.float32, [], name='lr') d_optim_ = tf.train.AdamOptimizer(self.learning_rate, beta1=config.beta1, beta2=0.9) d_grad = d_optim_.compute_gradients(self.d_loss, var_list=self.d_vars) d_grad_mag = tf.global_norm(d_grad) d_optim = d_optim_.apply_gradients(d_grad) g_optim_ = tf.train.AdamOptimizer(self.learning_rate, beta1=config.beta1, beta2=0.9) if self.n_critic <= 0: g_grad = g_optim_.compute_gradients(self.train_log_likelihood\ , var_list=self.g_vars) else: if self.like_reg > 0: if self.model_type == "real_nvp": g_grad_1 = g_optim_.compute_gradients(self.g_loss / self.like_reg, var_list=self.g_vars) g_grad_2 = g_optim_.compute_gradients( self.train_log_likelihood, var_list=self.g_vars) grads_1, _ = zip(*g_grad_1) grads_2, _ = zip(*g_grad_2) sum_grad = [g1 + g2 for g1, g2 in zip(grads_1, grads_2)] g_grad = [ pair for pair in zip(sum_grad, [var for grad, var in g_grad_1]) ] else: g_grad = g_optim_.compute_gradients( self.g_loss / self.like_reg + self.train_log_likelihood, var_list=self.g_vars) else: g_grad = g_optim_.compute_gradients(self.g_loss, var_list=self.g_vars) g_grad_mag = tf.global_norm(g_grad) g_optim = g_optim_.apply_gradients(g_grad) try: ##for data-dependent init (not implemented) if self.model_type == "real_nvp": self.sess.run(tf.global_variables_initializer(), {self.x_init: data_X[0:config.batch_size]}) else: self.sess.run(tf.global_variables_initializer()) except: if self.model_type == "real_nvp": self.sess.run(tf.global_variables_initializer(), {self.x_init: data_X[0:config.batch_size]}) else: self.sess.run(tf.global_variables_initializer()) self.g_sum = merge_summary([ self.z_sum, self.d__sum, self.G_sum, self.d_loss_fake_sum, self.g_loss_sum ]) self.d_sum = merge_summary( [self.z_sum, self.d_sum, self.d_loss_real_sum, self.d_loss_sum]) self.writer = SummaryWriter("./" + self.log_dir, self.sess.graph) counter = 1 start_time = time.time() could_load, checkpoint_counter = self.load(self.checkpoint_dir) if could_load: counter = checkpoint_counter print(" [*] Load SUCCESS") else: print(" [!] Load failed...") ############## A FIXED BATCH OF Zs FOR GENERATING SAMPLES ###################### if self.prior == "uniform": sample_z = np.random.uniform(-1, 1, size=(self.sample_num, self.z_dim)) elif self.prior == "logistic": sample_z = np.random.logistic(loc=0., scale=1., size=(self.sample_num, self.z_dim)) elif self.prior == "gaussian": sample_z = np.random.normal(0.0, 1.0, size=(self.sample_num, self.z_dim)) else: print("ERROR: Unrecognized prior...exiting") exit(-1) ################################ Evaluate initial model lli ######################## val_nlli = self.evaluate_neg_loglikelihood(val_data, config) # train_nlli = self.evaluate_neg_loglikelihood(train_data, config) curr_inception_score = self.calculate_inception_and_mode_score() print("INITIAL TEST: val neg logli: %.8f,incep score: %.8f" % (val_nlli,\ curr_inception_score[0])) if counter > 1: old_data = np.load("./" + config.sample_dir + '/graph_data.npy') self.best_val_nlli = old_data[2] self.best_model_counter = old_data[3] self.best_model_path = old_data[4] self.val_nlli_list = old_data[1] self.counter_list = old_data[5] self.batch_train_nlli_list = old_data[-4] self.inception_list = old_data[-2] self.samples_list = old_data[0] self.loss_list = old_data[-1] manifold_h, manifold_w = old_data[6] else: self.writer.add_summary(tf.Summary(\ value=[tf.Summary.Value(tag="Val Neg Log-likelihood", simple_value=val_nlli)]), counter) # self.writer.add_summary(tf.Summary(\ # value=[tf.Summary.Value(tag="Train Neg Log-likelihood", simple_value=train_nlli)]), counter) self.best_val_nlli = val_nlli # self.best_model_train_nlli = train_nlli self.best_model_counter = counter self.best_model_path = self.save(config.checkpoint_dir, counter) # self.train_nlli_list = [train_nlli] self.val_nlli_list = [val_nlli] self.counter_list = [1] self.batch_train_nlli_list = [] self.inception_list = [curr_inception_score] self.samples_list = self.sess.run([self.sampler], feed_dict={ self.z: sample_z, }) sample_inputs = data_X[0:config.batch_size] samples = self.samples_list[0] manifold_h = int(np.ceil(np.sqrt(samples.shape[0]))) manifold_w = int(np.floor(np.sqrt(samples.shape[0]))) self.loss_list = self.sess.run( [self.d_loss_real, self.d_loss_fake], feed_dict={ self.z: sample_z, self.inputs: sample_inputs, }) ################################################################################## for epoch in xrange(config.epoch): np.random.shuffle(data_X) batch_idxs = len(data_X) // config.batch_size for idx in xrange(0, batch_idxs): sys.stdout.flush() batch_images = data_X[idx * config.batch_size:(idx + 1) * config.batch_size] if self.prior == "uniform": batch_z = np.random.uniform(-1, 1, [config.batch_size, self.z_dim]) \ .astype(np.float32) elif self.prior == "logistic": batch_z = np.random.logistic(loc=0.,scale=1.0,size=[config.batch_size, self.z_dim]) \ .astype(np.float32) elif self.prior == "gaussian": batch_z = np.random.normal(0.0, 1.0, size=(config.batch_size, self.z_dim)) else: print("ERROR: Unrecognized prior...exiting") exit(-1) for r in range(self.n_critic): _, d_g_mag, errD_fake, errD_real, summary_str = self.sess.run( [ d_optim, d_grad_mag, self.d_loss_fake, self.d_loss_real, self.d_sum ], feed_dict={ self.inputs: batch_images, self.z: batch_z, self.learning_rate: lr, }) if self.n_critic > 0: self.writer.add_summary(summary_str, counter) # Update G network if self.like_reg > 0 or self.n_critic <= 0: _, g_g_mag, errG, summary_str = self.sess.run( [g_optim, g_grad_mag, self.g_loss, self.g_sum], feed_dict={ self.z: batch_z, self.learning_rate: lr, self.inputs: batch_images, }) else: _, g_g_mag, errG, summary_str = self.sess.run( [g_optim, g_grad_mag, self.g_loss, self.g_sum], feed_dict={ self.z: batch_z, self.learning_rate: lr, }) self.writer.add_summary(summary_str, counter) batch_images_nl = batch_images if self.model_type == "nice": batch_images_nl = np.reshape( batch_images_nl, (self.batch_size, -1))[:, self.permutation] b_train_nlli = self.sess.run([self.log_likelihood], feed_dict={ self.log_like_batch: batch_images_nl, }) b_train_nlli = b_train_nlli[0] self.batch_train_nlli_list.append(b_train_nlli) if self.n_critic > 0: self.loss_list.append([errD_real, errD_fake]) self.writer.add_summary(tf.Summary(\ value=[tf.Summary.Value(tag="training loss", simple_value=-(errD_fake+errD_real))]) ,counter) self.writer.add_summary(tf.Summary(\ value=[tf.Summary.Value(tag="Batch train Neg Log-likelihood", simple_value=b_train_nlli)]) ,counter) counter += 1 lr = max(lr * self.lr_decay, self.min_lr) if np.mod(counter, 703) == 1: #340 if self.n_critic > 0: print("Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f, d_grad_mag: %.8f, g_grad_mag: %.8f, lr: %.8f" \ % (epoch, idx, batch_idxs, time.time() - start_time, errD_fake+errD_real, errG, d_g_mag, g_g_mag, lr)) else: print("Epoch: [%2d] [%4d/%4d] time: %4.4f, g_loss: %.8f, g_grad_mag: %.8f, lr: %.8f" \ % (epoch, idx, batch_idxs, time.time() - start_time, errG, g_g_mag, lr)) curr_model_path = self.save(config.checkpoint_dir, counter) val_nlli = self.evaluate_neg_loglikelihood( val_data, config) # train_nlli = self.evaluate_neg_loglikelihood(train_data, config) curr_inception_score = self.calculate_inception_and_mode_score( ) print("[LogLi (%d,%d)]: val neg logli: %.8f, ince: %.8f, train lli: %.8f" % (epoch, idx,val_nlli,\ curr_inception_score[0], np.mean(self.batch_train_nlli_list[-700:]))) self.writer.add_summary(tf.Summary(\ value=[tf.Summary.Value(tag="Val Neg Log-likelihood", simple_value=val_nlli)]), counter) # self.writer.add_summary(tf.Summary(\ # value=[tf.Summary.Value(tag="Train Neg Log-likelihood", simple_value=train_nlli)]), counter) if val_nlli < self.best_val_nlli: self.best_val_nlli = val_nlli self.best_model_counter = counter self.best_model_path = curr_model_path # self.best_model_train_nlli = train_nlli # self.train_nlli_list.append(train_nlli) self.val_nlli_list.append(val_nlli) self.counter_list.append(counter) samples, d_loss, g_loss = self.sess.run( [self.sampler, self.d_loss, self.g_loss], feed_dict={ self.z: sample_z, self.inputs: sample_inputs, }) self.samples_list.append(samples) self.samples_list[-1].shape[1] manifold_h = int(np.ceil(np.sqrt(samples.shape[0]))) manifold_w = int(np.floor(np.sqrt(samples.shape[0]))) self.inception_list.append(curr_inception_score) save_images( samples, [manifold_h, manifold_w], './{}/train_{:02d}_{:04d}.png'.format( config.sample_dir, epoch, idx)) print("[Sample] d_loss: %.8f, g_loss: %.8f" % (d_loss, g_loss)) np.save("./"+config.sample_dir+'/graph_data', [self.samples_list, self.val_nlli_list, self.best_val_nlli, self.best_model_counter,\ self.best_model_path, self.counter_list, [manifold_h, manifold_w], \ self.batch_train_nlli_list, self.inception_list, self.loss_list]) np.save("./"+config.sample_dir+'/graph_data', [self.samples_list, self.val_nlli_list, self.best_val_nlli, self.best_model_counter,\ self.best_model_path, self.counter_list, [manifold_h, manifold_w], \ self.batch_train_nlli_list, self.inception_list, self.loss_list]) self.test_model(test_data, config)
def get_train_ops(loss, tf_variables, train_step, clip_mode=None, grad_bound=None, l2_reg=1e-4, lr_warmup_val=None, lr_warmup_steps=100, lr_init=0.1, lr_dec_start=0, lr_dec_every=10000, lr_dec_rate=0.1, lr_dec_min=None, lr_cosine=False, lr_max=None, lr_min=None, lr_T_0=None, lr_T_mul=None, num_train_batches=None, optim_algo=None, sync_replicas=False, num_aggregate=None, num_replicas=None, get_grad_norms=False, moving_average=None, is_controller=False): """ Args: clip_mode: "global", "norm", or None. moving_average: store the moving average of parameters """ #TODO Maybe dont reduce here??? # if not is_controller: # Dont quantize controller, vanishing grad problem? # for i, var in enumerate(tf_variables): # if var.dtype != tf.float16: # tf_variables[i] = tf.Variable(tf.cast(tf_variables[i], tf.float16), name=tf_variables[i].name.split(':')[0]) # if loss.dtype != tf.float16: # loss = tf.cast(loss, tf.float16, name=loss.name.split(':')[0]) if l2_reg > 0: l2_losses = [] if not is_controller: for var in tf_variables: l2_losses.append(tf.reduce_sum(tf.cast(var, tf.float32)**2)) else: for var in tf_variables: l2_losses.append(tf.reduce_sum(var**2)) #TODO l2_loss = tf.add_n(l2_losses) #OG loss += l2_reg * l2_loss # loss = loss + 1e-4*l2_loss # import code # code.interact(local=locals()) if lr_cosine: assert lr_max is not None, "Need lr_max to use lr_cosine" assert lr_min is not None, "Need lr_min to use lr_cosine" assert lr_T_0 is not None, "Need lr_T_0 to use lr_cosine" assert lr_T_mul is not None, "Need lr_T_mul to use lr_cosine" assert num_train_batches is not None, ("Need num_train_batches to use" " lr_cosine") curr_epoch = train_step // num_train_batches # train step will be calculated by just one batch! last_reset = tf.Variable(0, dtype=tf.int32, trainable=False, name="last_reset") T_i = tf.Variable(lr_T_0, dtype=tf.int32, trainable=False, name="T_i") T_curr = curr_epoch - last_reset def _update(): update_last_reset = tf.assign(last_reset, curr_epoch, use_locking=True) update_T_i = tf.assign(T_i, T_i * lr_T_mul, use_locking=True) with tf.control_dependencies([update_last_reset, update_T_i]): rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926 lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate)) return lr def _no_update(): rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926 lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate)) return lr learning_rate = tf.cond(tf.greater_equal(T_curr, T_i), _update, _no_update) else: learning_rate = tf.train.exponential_decay( lr_init, tf.maximum(train_step - lr_dec_start, 0), lr_dec_every, lr_dec_rate, staircase=True) if lr_dec_min is not None: learning_rate = tf.maximum(learning_rate, lr_dec_min) if lr_warmup_val is not None: learning_rate = tf.cond(tf.less(train_step, lr_warmup_steps), lambda: lr_warmup_val, lambda: learning_rate) if optim_algo == "momentum": opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_locking=True, use_nesterov=True) elif optim_algo == "sgd": opt = tf.train.GradientDescentOptimizer(learning_rate, use_locking=True) elif optim_algo == "adam": opt = tf.train.AdamOptimizer(learning_rate, beta1=0.0, epsilon=1e-3, use_locking=True) else: raise ValueError("Unknown optim_algo {}".format(optim_algo)) if sync_replicas: assert num_aggregate is not None, "Need num_aggregate to sync." assert num_replicas is not None, "Need num_replicas to sync." opt = tf.train.SyncReplicasOptimizer( opt, replicas_to_aggregate=num_aggregate, total_num_replicas=num_replicas, use_locking=True) if moving_average is not None: opt = tf.contrib.opt.MovingAverageOptimizer( opt, average_decay=moving_average) #TODO if not is_controller: loss_scale_manager = tf.contrib.mixed_precision.FixedLossScaleManager( 5000) # too big? try 10000 loss_scale_optimizer = tf.contrib.mixed_precision.LossScaleOptimizer( opt, loss_scale_manager) grads_and_vars = loss_scale_optimizer.compute_gradients( loss, tf_variables) grads = [grad_and_var[0] for grad_and_var in grads_and_vars] else: grads = tf.gradients(loss, tf_variables) # import code # code.interact(local=locals()) grad_norm = tf.global_norm(grads) grad_norms = {} for v, g in zip(tf_variables, grads): if v is None or g is None: continue if isinstance(g, tf.IndexedSlices): grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g.values**2)) else: grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g**2)) if clip_mode is not None: assert grad_bound is not None, "Need grad_bound to clip gradients." if clip_mode == "global": grads, _ = tf.clip_by_global_norm(grads, grad_bound) elif clip_mode == "norm": clipped = [] for g in grads: if isinstance(g, tf.IndexedSlices): c_g = tf.clip_by_norm(g.values, grad_bound) c_g = tf.IndexedSlices(g.indices, c_g) else: c_g = tf.clip_by_norm(g, grad_bound) clipped.append(g) grads = clipped else: raise NotImplementedError("Unknown clip_mode {}".format(clip_mode)) try: #TODO if not is_controller: assert (len(grads) == len(tf_variables)) grads_and_vars = [ tuple([grads[i], tf_variables[i]]) for i, _ in enumerate(grads) ] train_op = loss_scale_optimizer.apply_gradients( grads_and_vars, global_step=train_step) else: train_op = opt.apply_gradients(zip(grads, tf_variables), global_step=train_step) except Exception as e: print("\ncould not apply_gradients(), exception: {}".format(e)) import code code.interact(local=locals()) if get_grad_norms: return train_op, learning_rate, grad_norm, opt, grad_norms else: return train_op, learning_rate, grad_norm, opt
def main(argv=None): print ('Number of arguments:', len(sys.argv), 'arguments.') print ('Argument List:', str(sys.argv)) try: opts, args = getopt.getopt(sys.argv[1:], "h", ["max_grad_norm=", "num_epochs=", "learning_rate=" ,"dropout=", "num_layers=", "num_steps=", "hidden_size=", "batch_size="]) except getopt.GetoptError: print ('tsc_main_h_par.py --max_grad_norm <> --num_epochs <> --learning_rate <> --dropout <> --num_layers <> --num_steps <> --hidden_size <> --batch_size <>') sys.exit(2) for opt, arg in opts: if opt == '-h': print ('tsc_main_h_par.py --max_grad_norm <> --num_epochs <> --learning_rate <> --dropout <> --num_layers <> --num_steps <> --hidden_size <> --batch_size <>') sys.exit() elif opt == '--max_grad_norm': global max_grad_norm max_grad_norm = int(arg) elif opt == '--num_epochs': global num_epochs num_epochs = int(arg) elif opt == '--learning_rate': global learning_rate learning_rate = float(arg) elif opt == '--dropout': global dropout dropout= float(arg) elif opt == '--num_layers': global num_layers num_layers = int(arg) elif opt == '--num_steps': global num_steps num_steps = int(arg) elif opt == '--hidden_size': global hidden_size hidden_size = int(arg) elif opt == '--batch_size': global batch_size batch_size = int(arg) def normalize_matrix(matrix): columns = matrix.shape[1] for i in range(0, columns): x = matrix[:, i] x_normed = (x - x.min(0)) / x.ptp(0) matrix[:,i] = x_normed return matrix def read_datasets(train_csv, validation_csv, test_csv): class Data(object): pass data_sets = Data() train = np.genfromtxt(train_csv, delimiter=',', dtype=float) train = train.astype(np.float) validation = np.genfromtxt(validation_csv, delimiter=',', dtype=float) validation = validation.astype(np.float) test = np.genfromtxt(test_csv, delimiter=',', dtype=float) test = test.astype(np.float) rows, columns = train.shape arr = np.arange(rows) np.random.shuffle(arr) matrix = np.zeros((rows, columns)) for i in range (0, rows): matrix[i] = train[arr[i],:] mTrain = matrix[:, 0:-1] train_labels = matrix[:, columns - 1] rows, columns = validation.shape arr = np.arange(rows) np.random.shuffle(arr) matrix = np.zeros((rows, columns)) for i in range (0, rows): matrix[i] = validation[arr[i],:] mValidation = matrix[:, 0:-1] validation_labels = matrix[:, columns - 1] rows, columns = test.shape arr = np.arange(rows) np.random.shuffle(arr) matrix = np.zeros((rows, columns)) for i in range (0, rows): matrix[i] = test[arr[i],:] mTest = matrix[:, 0:-1] test_labels = matrix[:, columns - 1] mData = np.concatenate((mTrain, mValidation, mTest)) mData = normalize_matrix(mData) trainSize = len(mTrain) validationSize = len(mValidation) testSize = len(mTest) mTrain = mData[0:trainSize, :] mValidation = mData[trainSize:trainSize + validationSize, :] mTest = mData[-testSize:, :] train_labels = train_labels.astype(np.uint8) validation_labels = validation_labels.astype(np.uint8) test_labels = test_labels.astype(np.uint8) data_sets.train = mTrain data_sets.validation = mValidation data_sets.test = mTest data_sets.train_labels = train_labels data_sets.validation_labels = validation_labels data_sets.test_labels = test_labels return data_sets def sample_batch(X_train,y_train,batch_size,num_steps): """ Function to sample a batch for training""" N,data_len = X_train.shape ind_N = np.random.choice(N,batch_size,replace=False).astype(int)[:] ind_start = np.random.choice(data_len-num_steps,1).astype(int)[0] X_batch = X_train[ind_N,ind_start:ind_start+num_steps] y_batch = y_train[ind_N] return X_batch,y_batch def check_test(X_test,y_test,batch_size,num_steps): """ Function to check the test_accuracy on the entire test set""" N = X_test.shape[0] num_batch = np.floor(N/batch_size).astype(int) test_acc = np.zeros(num_batch) test_predictions=[] for i in range(num_batch): X_batch, y_batch = sample_batch(X_test,y_test,batch_size,num_steps) test_acc[i], test_pred = sess.run([accuracy, predictions], feed_dict = {input_data: X_batch, targets: y_batch, keep_prob:1}) test_predictions = np.append(test_predictions,test_pred) return np.mean(test_acc), test_predictions """Load the data""" # dummy = True # if dummy: # data_train = np.loadtxt(dir_path + 'UCR_TS_Archive_2015/Two_Patterns/Two_Patterns_TRAIN',delimiter=',') # data_test_val = np.loadtxt(dir_path + 'UCR_TS_Archive_2015/Two_Patterns/Two_Patterns_TEST',delimiter=',') # else: # data_train = np.loadtxt('data_train_dummy',delimiter=',') # data_test_val = np.loadtxt('data_test_dummy',delimiter=',') # data_test,data_val = np.split(data_test_val,2) # X_train = data_train[:,1:] # X_val = data_val[:,1:] # X_test = data_test[:,1:] # N = X_train.shape[0] # Ntest = X_test.shape[0] # Targets have labels 1-indexed. We subtract one for 0-indexed # y_train = data_train[:,0]-1 # y_val = data_val[:,0]-1 # y_test = data_test[:,0]-1 # num_classes = len(np.unique(y_train)) data_sets = read_datasets('Data/Test/train_data.csv','Data/Test/validation_data.csv', 'Data/Test/test_data.csv') X_train = data_sets.train train_size = X_train.shape[0] max_iterations = int((num_epochs * train_size) // batch_size) X_val = data_sets.validation X_test = data_sets.test N = X_train.shape[0] Ntest = X_test.shape[0] y_train = data_sets.train_labels y_val = data_sets.validation_labels y_test = data_sets.test_labels num_classes = len(np.unique(y_train)) # Collect the costs in a numpy fashion epochs = np.floor(batch_size*max_iterations / N) print('Train with approximately %d epochs' %(epochs)) if max_iterations%100 == 0: perf_collect = np.zeros((3,int(np.floor(max_iterations /100)))) else: perf_collect = np.zeros((3,int(np.floor(max_iterations /100)) + 1 )) """Place holders""" input_data = tf.placeholder(tf.float32, shape=(batch_size, num_steps), name = 'input_data') print(input_data) targets = tf.placeholder(tf.int64, shape=(batch_size), name='Targets') print(targets) #Used later on for drop_out. At testtime, we pass 1.0 keep_prob = tf.placeholder("float", name = 'Drop_out_keep_prob') with tf.name_scope("LSTM_setup") as scope: cell = tf.nn.rnn_cell.LSTMCell(hidden_size, state_is_tuple=True) cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=keep_prob) cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True) initial_state = cell.zero_state(batch_size, tf.float32) #We have only one input dimension, but we generalize our code for future expansion inputs = tf.expand_dims(input_data, 2) #Define the recurrent nature of the LSTM with tf.name_scope("LSTM") as scope: outputs = [] state = initial_state with tf.variable_scope("LSTM_state"): for time_step in range(num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() #Re-use variables only after first time-step (cell_output, state) = cell(inputs[:, time_step, :], state) outputs.append(cell_output) #Now cell_output is size [batch_size x hidden_size] output = tf.reduce_mean(tf.pack(outputs),0) #Generate a classification from the last cell_output #Note, this is where timeseries classification differs from sequence to sequence #modelling. We only output to Softmax at last time step with tf.name_scope("Softmax") as scope: with tf.variable_scope("Softmax_params"): softmax_w = tf.get_variable("softmax_w", [hidden_size, num_classes]) softmax_b = tf.get_variable("softmax_b", [num_classes]) logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) #Use sparse Softmax because we have mutually exclusive classes loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits,targets,name = 'Sparse_softmax') cost = tf.reduce_sum(loss) / batch_size with tf.name_scope("Evaluating_accuracy") as scope: predictions = tf.argmax(logits,1) correct_prediction = tf.equal(predictions,targets) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) tf.scalar_summary("accuracy", accuracy) """Optimizer""" with tf.name_scope("Optimizer") as scope: tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),max_grad_norm) #We clip the gradients to prevent explosion optimizer = tf.train.AdamOptimizer(learning_rate) gradients = zip(grads, tvars) train_op = optimizer.apply_gradients(gradients) # Add histograms for variables, gradients and gradient norms. # The for-loop loops over all entries of the gradient and plots # a histogram. We cut of for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient h1 = tf.histogram_summary(variable.name, variable) h2 = tf.histogram_summary(variable.name + "/gradients", grad_values) h3 = tf.histogram_summary(variable.name + "/gradient_norm", tf.global_norm([grad_values])) #Final code for the TensorBoard merged = tf.merge_all_summaries() """Session time""" sess = tf.Session() #Depending on your use, do not forget to close the session writer = tf.train.SummaryWriter(dir_path + "/logs/log_tb") sess.run(tf.initialize_all_variables()) step = 0 cost_train_ma = -np.log(1/float(num_classes)+1e-9) for i in range(max_iterations): # Calculate some sizes N = X_train.shape[0] #Sample batch for training X_batch, y_batch = sample_batch(X_train,y_train,batch_size,num_steps) #Next line does the actual training cost_train, _ = sess.run([cost,train_op],feed_dict = {input_data: X_batch,targets: y_batch,keep_prob:dropout}) cost_train_ma = cost_train_ma*0.99 + cost_train*0.01 if i%100 == 0: #Evaluate training performance perf_collect[0,step] = cost_train #Evaluate validation performance X_batch, y_batch = sample_batch(X_val,y_val,batch_size,num_steps) result = sess.run([cost,merged,accuracy],feed_dict = {input_data: X_batch, targets: y_batch, keep_prob:1}) cost_val = result[0] perf_collect[1,step] = cost_val acc_val = result[2] perf_collect[2,step] = acc_val print('At %5.0f out of %5.0f: Cost is TRAIN %.3f(%.3f) VAL %.3f and val acc is %.3f' %(i,max_iterations,cost_train,cost_train_ma,cost_val,acc_val)) #Write information to TensorBoard summary_str = result[1] writer.add_summary(summary_str, i) writer.flush() step +=1 acc_test, predictions = check_test(X_test,y_test,batch_size,num_steps) """Additional plots""" print('The accuracy on the test data is %.3f' %(acc_test)) plt.plot(perf_collect[0],label='Train') plt.plot(perf_collect[1],label = 'Valid') plt.plot(perf_collect[2],label = 'Valid accuracy') plt.axis([0, step, 0, np.max(perf_collect)]) plt.legend() plt.show() #y_val = y_val[1849:3724] print('\nY Results') print('Test accuracy is: %.1f%%' % (100.0 * accuracy_score(y_test[0:predictions.shape[0]],predictions))) print('\nConfusion_matrix') print(confusion_matrix_table(y_test[0:predictions.shape[0]],predictions)) print('\n', classification_report(y_test[0:predictions.shape[0]],predictions))
def __init__(self, s_size, a_size, scope, trainer, cell_units): print(scope) with tf.variable_scope(scope): # Input self.inputs = tf.placeholder(shape=[None, s_size], dtype=tf.float32) # Recurrent network for temporal dependencies lstm_cell = tf.contrib.rnn.BasicLSTMCell(cell_units, state_is_tuple=True) c_init = np.zeros((1, lstm_cell.state_size.c), np.float32) h_init = np.zeros((1, lstm_cell.state_size.h), np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c]) h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h]) self.state_in = [c_in, h_in] rnn_in = tf.expand_dims(self.inputs, [0]) state_in = tf.contrib.rnn.LSTMStateTuple(c_in, h_in) lstm_outputs, lstm_state = tf.nn.dynamic_rnn( lstm_cell, rnn_in, initial_state=state_in, time_major=False) lstm_c, lstm_h = lstm_state self.state_out = (lstm_c[:1, :], lstm_h[:1, :]) rnn_out = tf.reshape(lstm_outputs, [-1, cell_units]) # Output layers for policy and value estimations self.policy = slim.fully_connected( rnn_out, a_size, activation_fn=tf.nn.softmax, weights_initializer=normalized_columns_initializer(0.01), biases_initializer=None, ) self.value = slim.fully_connected( rnn_out, 1, activation_fn=None, weights_initializer=normalized_columns_initializer(1.0), biases_initializer=None, ) # Only the worker network need ops for loss functions and gradient updating. if scope != "global" and scope != "init": self.actions = tf.placeholder(shape=[None, a_size], dtype=tf.float32) self.target_v = tf.placeholder(shape=[None], dtype=tf.float32) self.advantages = tf.placeholder(shape=[None], dtype=tf.float32) self.responsible_outputs = tf.reduce_sum( self.policy * self.actions, [1]) # Value loss function self.value_loss = 0.5 * tf.reduce_sum( tf.square(self.target_v - tf.reshape(self.value, [-1]))) # Softmax policy loss function self.policy_loss = -tf.reduce_sum( tf.log(tf.maximum(self.responsible_outputs, 1e-12)) * self.advantages) # Softmax entropy function self.entropy = -tf.reduce_sum( self.policy * tf.log(tf.maximum(self.policy, 1e-12))) self.loss = (0.5 * self.value_loss + self.policy_loss - self.entropy * 0.01) # Get gradients from local network using local losses local_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope) self.gradients = tf.gradients(self.loss, local_vars) self.var_norms = tf.global_norm(local_vars) grads, self.grad_norms = tf.clip_by_global_norm( self.gradients, 40.0) # Apply local gradients to global network global_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, "global") self.apply_grads = trainer.apply_gradients( list(zip(grads, global_vars)))
def __init__(self, env, monitor_path: str, video=False, **usercfg) -> None: super(PPO, self).__init__(**usercfg) self.monitor_path: str = monitor_path self.env = wrappers.Monitor(env, monitor_path, force=True, video_callable=(None if video else False)) self.config.update( dict( n_hidden_units=20, n_hidden_layers=2, gamma=0.99, gae_lambda=0.95, learning_rate=0.001, n_epochs=10, n_iter=10000, batch_size=64, # Timesteps per training batch n_local_steps=256, gradient_clip_value=None, vf_coef=0.5, entropy_coef=0.01, cso_epsilon=0.2 # Clipped surrogate objective epsilon )) self.config.update(usercfg) with tf.variable_scope("old_network"): self.old_network = self.build_networks() self.old_network_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name) with tf.variable_scope("new_network"): self.new_network = self.build_networks() if self.RNN: self.initial_features = self.new_network.state_init else: self.initial_features = None self.new_network_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name) self.action = self.new_network.action self.value = self.new_network.value self.states = self.new_network.states self.actions_taken = self.new_network.actions_taken self.advantage = tf.placeholder(tf.float32, [None], name="advantage") self.ret = tf.placeholder(tf.float32, [None], name="return") self.set_old_to_new = tf.group(*[ v1.assign(v2) for v1, v2 in zip(self.old_network_vars, self.new_network_vars) ]) # Reduces by taking the mean instead of summing self.actor_loss = -tf.reduce_mean( self.make_actor_loss(self.old_network, self.new_network, self.advantage)) self.critic_loss = tf.reduce_mean(tf.square(self.value - self.ret)) self.mean_entropy = tf.reduce_mean(self.new_network.entropy) self.loss = self.actor_loss + self.config["vf_coef"] * self.critic_loss + \ self.config["entropy_coef"] * self.mean_entropy grads = tf.gradients(self.loss, self.new_network_vars) self._global_step = tf.get_variable( "global_step", [], tf.int32, initializer=tf.constant_initializer(0, dtype=tf.int32), trainable=False) self.n_steps = tf.shape(self.states)[0] self.session = tf.Session() if self.config["save_model"]: tf.add_to_collection("action", self.action) tf.add_to_collection("states", self.states) self.saver = FastSaver() summary_actor_loss = tf.summary.scalar("model/Actor_loss", self.actor_loss) summary_critic_loss = tf.summary.scalar("model/Critic_loss", self.critic_loss) summary_loss = tf.summary.scalar("model/Loss", self.loss) summary_entropy = tf.summary.scalar("model/entropy", -self.mean_entropy) summary_grad_norm = tf.summary.scalar("model/grad_global_norm", tf.global_norm(grads)) summary_var_norm = tf.summary.scalar( "model/var_global_norm", tf.global_norm(self.new_network_vars)) summaries = [] for v in tf.trainable_variables(): if "new_network" in v.name: summaries.append(tf.summary.histogram(v.name, v)) summaries += [ summary_actor_loss, summary_critic_loss, summary_loss, summary_entropy, summary_grad_norm, summary_var_norm ] self.model_summary_op = tf.summary.merge(summaries) self.writer = tf.summary.FileWriter( os.path.join(self.monitor_path, "summaries"), self.session.graph) self.env_runner = EnvRunner(self.env, self, usercfg, summary_writer=self.writer) # grads before clipping were passed to the summary, now clip and apply them if self.config["gradient_clip_value"] is not None: grads, _ = tf.clip_by_global_norm( grads, self.config["gradient_clip_value"]) self.optimizer = tf.train.AdamOptimizer(self.config["learning_rate"], name="optim") apply_grads = self.optimizer.apply_gradients( zip(grads, self.new_network_vars)) inc_step = self._global_step.assign_add(self.n_steps) self.train_op = tf.group(apply_grads, inc_step) init = tf.global_variables_initializer() self.session.run(init) return
def _buildNetwork(self): def _vwwd(shape, stddev, wd): # variable with weight decay var = tf.Variable(tf.truncated_normal(shape, stddev=stddev, dtype=tf.float32)) if wd is not None: tf.add_to_collection('losses', tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')) return var def conv2d(name, l_input, w, b): return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(l_input, w, strides=[1,1,1,1], padding='SAME'), b), name=name) def max_pool(name, l_input, ksize, strides): return tf.nn.max_pool(l_input, ksize=[1,ksize,ksize,1], strides=[1,strides,strides,1], padding='SAME', name=name) def norm(name, l_input, lsize=4): return tf.nn.lrn(l_input, lsize, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name=name) def local(name, l_input, w, b): return tf.nn.relu(tf.matmul(l_input, w) + b, name=name) n_class = 10 _weights = { 'wc1': _vwwd([5, 5, 3, 64], stddev=5e-2, wd=0.0), 'wc2': _vwwd([5, 5, 64, 64], stddev=5e-2, wd=0.0), 'wl3': _vwwd([IMAGE_SIZE * IMAGE_SIZE * 4, 384], stddev=0.04, wd=0.004), 'wl4': _vwwd([384, 192], stddev=0.04, wd=0.004), 'out': _vwwd([192, n_class], stddev=1/192.0, wd=0.0), } _biases = { 'bc1' : tf.Variable(tf.constant(value=0.0 ,shape=[64], dtype=tf.float32)), 'bc2' : tf.Variable(tf.constant(value=0.1, shape=[64], dtype=tf.float32)), 'bl3' : tf.Variable(tf.constant(value=0.1, shape=[384], dtype=tf.float32)), 'bl4' : tf.Variable(tf.constant(value=0.1, shape=[192], dtype=tf.float32)), 'out' : tf.Variable(tf.constant(value=0.0, shape=[n_class], dtype=tf.float32)), } self.x = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3]) self.y_ = tf.placeholder(tf.int64, shape=[None]) batch_num = tf.Variable(self.batch_num, tf.int64) self.keep_prob = tf.placeholder(tf.float32) _dropout = self.keep_prob conv1 = conv2d('conv1', self.x, _weights['wc1'], _biases['bc1']) pool1 = max_pool('pool1', conv1, ksize=3, strides=2) norm1 = norm('norm1', pool1, lsize=4) print 'norm1', norm1.get_shape() norm1 = tf.nn.dropout(norm1, _dropout) conv2 = conv2d('conv2', norm1, _weights['wc2'], _biases['bc2']) # [very interesting, reverse the order] norm2 = norm('norm2', conv2, lsize=4) pool2 = max_pool('pool2', norm2, ksize=3, strides=2) print 'pool2', pool2.get_shape() pool2= tf.nn.dropout(pool2, _dropout) # [very interesting, delete the dropout] pool2 = tf.reshape(pool2, [-1, IMAGE_SIZE * IMAGE_SIZE * 4]) print 'pool2', pool2.get_shape() local3 = local('local3', pool2, _weights['wl3'], _biases['bl3']) local4 = local('local4', local3, _weights['wl4'], _biases['bl4']) self.softmax = tf.add(tf.matmul(local4, _weights['out']), _biases['out'], name='softmax') #global_step = tf.Variable(0, trainable=False) #decay_step = 100 self.cross_entropy_individual = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.softmax, labels=self.y_) self.cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.softmax, labels=self.y_)) ''' tf.add_to_collection('losses', self.cross_entropy) self.loss = tf.add_n(tf.get_collection('losses'), name='total_loss') self.lr = tf.train.exponential_decay(0.1, global_step, decay_step, 0.1, staircase=True) losses = tf.get_collection('losses') loss_average = tf.train.ExponentialMovingAverage(0.9, name='avg') loss_averages_op = loss_average.apply(losses + [self.loss]) with tf.control_dependencies([loss_averages_op]): opt = tf.train.GradientDescentOptimizer(self.lr) grads = opt.compute_gradients(self.loss) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage(0.9999, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies([apply_gradient_op, variables_averages_op]): self.train_op = tf.no_op(name='train') ''' #self.lr = tf.train.exponential_decay(0.001, global_step, decay_step, 0.996, staircase=True) #self.train_step = tf.train.AdamOptimizer(self.lr).minimize(self.cross_entropy, global_step=global_step) self.opt = tf.train.AdamOptimizer(0.001) self.train_step = self.opt.minimize(self.cross_entropy) self.grad = self.opt.compute_gradients(self.cross_entropy) self.norm = tf.global_norm([i[0] for i in self.grad]) self.correct_prediction = tf.equal(tf.argmax(self.softmax, 1), self.y_) self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))
def __init__(self, num_classes, num_frames, num_temp_features, num_st_features, num_units, max_gradient_norm, learning_rate, learning_rate_decay_factor, adam_epsilon, GD, attention_lstm_num_units, attention_num_hidden_fc1, forward_only=False, l2_regularization=False, weight_decay=0, log_dir=None): """" Create S-RNN model edgeRNNs: dictionary with keys as RNN name and value is a list of layers nodeRNNs: dictionary with keys as RNN name and value is a list of layers nodeToEdgeConnections: dictionary with keys as nodeRNNs name and value is another dictionary whose keys are edgeRNNs the nodeRNN is connected to and value is a list of size-2 which indicate the features to choose from the unConcatenateLayer edgeListComplete: cost: nodeLabels: learning_rate: clipnorm: update_type: weight_decay: return: """ self.save_summaries = log_dir is not None if self.save_summaries: print('Writing summaries for Tensorboard') num_layers = 1 self.num_classes = num_classes self.num_temp_features = num_temp_features self.num_st_features = num_st_features self.learning_rate = tf.Variable(float(learning_rate), trainable=False) #self.learning_rate = float(learning_rate) #self.learning_rate_decay = tf.Variable(float(learning_rate), trainable=False) # self.learning_rate_decay_op = self.learning_rate.assign( # self.learning_rate * 0.1) self.max_grad_norm = max_gradient_norm self.global_step = tf.Variable(0, trainable=False) self.adam_epsilon = adam_epsilon self.GD = GD self.weight_decay = weight_decay # self.previous_eval_loss = [] # self.best_val_loss = np.inf # self.strikes = tf.Variable(0, trainable=False) self.temp_features_names = [ 'face-face', 'neck-neck', 'belly-belly', 'rightShoulder-rightShoulder', 'leftShoulder-leftShoulder', 'rightElbow-rightElbow', 'leftElbow-leftElbow', 'rightArm-rightArm', 'leftArm-leftArm', 'rightHip-rightHip', 'leftHip-leftHip', 'rightKnee-rightKnee', 'leftKnee-leftKnee', 'rightLeg-rightLeg', 'leftLeg-leftLeg' ] self.st_features_names = [ 'face-neck', 'face-belly', 'face-rightShoulder', 'face-leftShoulder', 'face-rightElbow', 'face-leftElbow', 'face-rightArm', 'face-leftArm', 'face-rightHip', 'face-leftHip', 'face-rightKnee', 'face-leftKnee', 'face-rightLeg', 'face-leftLeg', 'neck-belly', 'neck-rightShoulder', 'neck-leftShoulder', 'neck-rightElbow', 'neck-leftElbow', 'neck-rightArm', 'neck-leftArm', 'neck-rightHip', 'neck-leftHip', 'neck-rightKnee', 'neck-leftKnee', 'neck-rightLeg', 'neck-leftLeg', 'belly-rightShoulder', 'belly-leftShoulder', 'belly-rightElbow', 'belly-leftElbow', 'belly-rightArm', 'belly-leftArm', 'belly-rightHip', 'belly-leftHip', 'belly-rightKnee', 'belly-leftKnee', 'belly-rightLeg', 'belly-leftLeg', 'rightShoulder-leftShoulder', 'rightShoulder-rightElbow', 'rightShoulder-leftElbow', 'rightShoulder-rightArm', 'rightShoulder-leftArm', 'rightShoulder-rightHip', 'rightShoulder-leftHip', 'rightShoulder-rightKnee', 'rightShoulder-leftKnee', 'rightShoulder-rightLeg', 'rightShoulder-leftLeg', 'leftShoulder-rightElbow', 'leftShoulder-leftElbow', 'leftShoulder-rightArm', 'leftShoulder-leftArm', 'leftShoulder-rightHip', 'leftShoulder-leftHip', 'leftShoulder-rightKnee', 'leftShoulder-leftKnee', 'leftShoulder-rightLeg', 'leftShoulder-leftLeg', 'rightElbow-leftElbow', 'rightElbow-rightArm', 'rightElbow-leftArm', 'rightElbow-rightHip', 'rightElbow-leftHip', 'rightElbow-rightKnee', 'rightElbow-leftKnee', 'rightElbow-rightLeg', 'rightElbow-leftLeg', 'leftElbow-rightArm', 'leftElbow-leftArm', 'leftElbow-rightHip', 'leftElbow-leftHip', 'leftElbow-rightKnee', 'leftElbow-leftKnee', 'leftElbow-rightLeg', 'leftElbow-leftLeg', 'rightArm-leftArm', 'rightArm-rightHip', 'rightArm-leftHip', 'rightArm-rightKnee', 'rightArm-leftKnee', 'rightArm-rightLeg', 'rightArm-leftLeg', 'leftArm-rightHip', 'leftArm-leftHip', 'leftArm-rightKnee', 'leftArm-leftKnee', 'leftArm-rightLeg', 'leftArm-leftLeg', 'rightHip-leftHip', 'rightHip-rightKnee', 'rightHip-leftKnee', 'rightHip-rightLeg', 'rightHip-leftLeg', 'leftHip-rightKnee', 'leftHip-leftKnee', 'leftHip-rightLeg', 'leftHip-leftLeg', 'rightKnee-leftKnee', 'rightKnee-rightLeg', 'rightKnee-leftLeg', 'leftKnee-rightLeg', 'leftKnee-leftLeg', 'rightLeg-leftLeg' ] #nodes_names = {'face','neck','belly','right-shoulder','left-shoulder','right-elbow','left-elbow','right-arm','left-arm','right-hip','left-hip','right-knee','left-knee','right-leg', 'left-leg'} nodes_names = { 'face', 'belly', 'right-elbow', 'left-elbow', 'right-arm', 'left-arm', 'right-knee', 'left-knee', 'right-leg', 'left-leg' } edgesRNN = {} nodesRNN = {} states = {} infos = {} self.batch_size = tf.placeholder(dtype=tf.int32, shape=[], name='batch_size') #self.batch_size = 36 self.inputs = {} self.targets = tf.placeholder(tf.float32, shape=(None, num_classes), name='targets') for temp_feat in self.temp_features_names: infos[temp_feat] = { 'input_gates': [], 'forget_gates': [], 'modulated_input_gates': [], 'output_gates': [], 'activations': [], 'state_c': [], 'state_m': [] } self.inputs[temp_feat] = tf.placeholder( tf.float32, shape=(None, num_frames, self.num_temp_features), name=temp_feat) if num_layers == 1: edgesRNN[temp_feat] = tf.contrib.rnn.BasicLSTMCell( num_units, state_is_tuple=True, activation=tf.nn.softsign) else: cells = [] for _ in range(num_layers): cell = tf.contrib.rnn.DropoutWrapper( tf.contrib.rnn.BasicLSTMCell( num_units, state_is_tuple=True, activation=tf.nn.softsign)) cells.append(cell) edgesRNN[temp_feat] = tf.contrib.rnn.MultiRNNCell( cells, state_is_tuple=True) states[temp_feat] = edgesRNN[temp_feat].zero_state( self.batch_size, dtype=tf.float32) for st_feat in self.st_features_names: infos[st_feat] = { 'input_gates': [], 'forget_gates': [], 'modulated_input_gates': [], 'output_gates': [], 'activations': [], 'state_c': [], 'state_m': [] } self.inputs[st_feat] = tf.placeholder(tf.float32, shape=(None, num_frames, self.num_st_features), name=st_feat) if num_layers == 1: edgesRNN[st_feat] = tf.contrib.rnn.BasicLSTMCell( num_units, state_is_tuple=True, activation=tf.nn.softsign) else: cells = [] for _ in range(num_layers): cell = tf.contrib.rnn.DropoutWrapper( tf.contrib.rnn.BasicLSTMCell( num_units, state_is_tuple=True, activation=tf.nn.softsign)) cells.append(cell) edgesRNN[st_feat] = tf.contrib.rnn.MultiRNNCell( cells, state_is_tuple=True) states[st_feat] = edgesRNN[st_feat].zero_state( self.batch_size, tf.float32) for node in nodes_names: infos[node] = { 'input_gates': [], 'forget_gates': [], 'modulated_input_gates': [], 'output_gates': [], 'activations': [], 'state_c': [], 'state_m': [] } self.inputs[node] = tf.placeholder(tf.float32, shape=(None, num_frames, None), name=node) if num_layers == 1: nodesRNN[node] = tf.contrib.rnn.BasicLSTMCell( num_units, state_is_tuple=True, activation=tf.nn.softsign) else: cells = [] for _ in range(num_layers): cell = tf.contrib.rnn.DropoutWrapper( tf.contrib.rnn.BasicLSTMCell( num_units, state_is_tuple=True, activation=tf.nn.softsign)) cells.append(cell) nodesRNN[node] = tf.contrib.rnn.MultiRNNCell( cells, state_is_tuple=True) states[node] = nodesRNN[node].zero_state(self.batch_size, tf.float32) wholeRNN = tf.contrib.rnn.BasicLSTMCell(num_units * 10, state_is_tuple=True, activation=tf.nn.softsign) states_whole = wholeRNN.zero_state(self.batch_size, tf.float32) attention_out_size = 1 attention_in_size = num_units sp_attention_LSTM = tf.contrib.rnn.BasicLSTMCell( attention_lstm_num_units, state_is_tuple=True) states['spatial_attention'] = sp_attention_LSTM.zero_state( self.batch_size, tf.float32) tp_attention_LSTM = tf.contrib.rnn.BasicLSTMCell( attention_lstm_num_units, state_is_tuple=True) states['tempral_attention'] = tp_attention_LSTM.zero_state( self.batch_size, tf.float32) weights = { 'out': tf.Variable(tf.random_normal([num_units * num_frames, num_classes]), name='weights_out'), 'sp_attention_FC1': tf.Variable(tf.random_normal([ attention_lstm_num_units + attention_in_size, attention_num_hidden_fc1 ]), name='sp_weights_FC1'), 'sp_attention_FC2': tf.Variable(tf.random_normal( [attention_num_hidden_fc1, attention_out_size]), name='sp_weights_FC2'), 'tp_attention_FC1': tf.Variable(tf.random_normal( [1000 + attention_lstm_num_units, attention_out_size]), name='tp_weights_FC1'), } biases = { 'out': tf.Variable(tf.random_normal([num_classes]), name='biases_out'), 'sp_attention_FC1': tf.Variable(tf.random_normal([attention_num_hidden_fc1]), name='sp_biases_FC1'), 'sp_attention_FC2': tf.Variable(tf.random_normal([attention_out_size]), name='sp_biases_FC2'), 'tp_attention_FC1': tf.Variable(tf.random_normal([attention_out_size]), name='tp_biases_FC1') } def spatial_attention(x_t, x_t1, scope): h_t1, states['spatial_attention'] = sp_attention_LSTM( x_t1, states['spatial_attention'], scope=scope) fc1 = tf.matmul( tf.concat([x_t, h_t1], 1), weights['sp_attention_FC1']) + biases['sp_attention_FC1'] fc2 = tf.matmul( tf.tanh(fc1), weights['sp_attention_FC2']) + biases['sp_attention_FC2'] tmp_at = fc2 #tmp_at = tf.nn.relu(fc2) # if attention_placement == 0: # at = tf.stack([tmp_at]*num_features_per_joints,2) # shape_at = tf.shape(at) # at = tf.reshape(at, [shape_at[0], shape_at[1]*shape_at[2]]) # else: return tmp_at def tempral_attention(x_t, x_t1, scope): h_t1, states['tempral_attention'] = tp_attention_LSTM( x_t1, states['tempral_attention'], scope=scope) fc1 = tf.matmul( tf.concat([x_t, h_t1], 1), weights['tp_attention_FC1']) + biases['tp_attention_FC1'] tmp_at = tf.nn.softmax(fc1) #tmp_at = tf.nn.relu(fc2) # if attention_placement == 0: # at = tf.stack([tmp_at]*num_features_per_joints,2) # shape_at = tf.shape(at) # at = tf.reshape(at, [shape_at[0], shape_at[1]*shape_at[2]]) # else: return tmp_at outputs = {} #final_outputs = [] node_inputs = {} final_inputs_list = [] #att_temp = [] #attention_dense = {} #attention_dense_list = [] #attention_fullbody_input_list = [] def conv_2d(kernels, kernel_size): return Convolution2D(kernels, kernel_size, kernel_size, init="he_uniform", border_mode="same") def att_module(final_inputs_list, t_or_s, scope, time_steps): att_weight = [] for time_step in range(len(final_inputs_list)): input_att = final_inputs_list[time_step] if time_step > 0: input_att_t1 = final_inputs_list[time_step - 1] else: input_att_t1 = tf.zeros_like(input_att) if t_or_s == 's': if time_step > 0: tf.get_variable_scope().reuse_variables() at_shaped = spatial_attention(input_att, input_att_t1, scope) elif t_or_s == 't': at_shaped = tempral_attention(input_att, input_att_t1, scope) att_weight.append(at_shaped) att_weight = tf.nn.softmax(att_weight) final_inputs_list = final_inputs_list * att_weight return final_inputs_list with tf.variable_scope("SRNN"): for time_step in range(num_frames): #final_inputs_list = [] if time_step > 0: tf.get_variable_scope().reuse_variables() #final_temp_inputs = [] #final_inputs = [] #attention_dense_list = [] for temp_feat in self.temp_features_names: inputs = self.inputs[temp_feat][:, time_step, :] state = states[temp_feat] scope = "lstm_" + temp_feat outputs[temp_feat], states[temp_feat] = edgesRNN[ temp_feat](inputs, state, scope=scope) # attention_dense[temp_feat] = Dense(1, kernel_initializer=tf.random_normal_initializer(), # bias_initializer=tf.random_normal_initializer(),activation='sigmoid')(outputs[temp_feat]) # attention_dense_list.append(attention_dense[temp_feat]) #final_inputs.append(outputs[temp_feat]) for st_feat in self.st_features_names: inputs = self.inputs[st_feat][:, time_step, :] state = states[st_feat] scope = "lstm_" + st_feat outputs[st_feat], states[st_feat] = edgesRNN[st_feat]( inputs, state, scope=scope) # attention_dense[st_feat] = Dense(1, kernel_initializer=tf.random_normal_initializer(), # bias_initializer=tf.random_normal_initializer(),activation='sigmoid')(outputs[st_feat]) # attention_dense_list.append(attention_dense[st_feat]) #final_inputs.append(outputs[st_feat]) # attention_fullbody_input = tf.concat(attention_dense_list,1) # attention_fullbody_input = tf.nn.elu(attention_fullbody_input) # attention_fullbody_input_list.append(attention_fullbody_input) # fullbody_input = tf.concat(final_inputs, 1) # final_inputs_list.append(fullbody_input) # input_att = final_inputs_list[time_step] # if time_step > 0: # input_att_t1 = final_inputs_list[time_step-1] # else: # input_att_t1 = tf.zeros_like(input_att) # # at_shaped, at = attention(input_att, input_att_t1) # # final_inputs_list[time_step] = tf.multiply(final_inputs_list[time_step] ,at_shaped) # # fullbody_input = tf.concat(final_inputs, 1) # final_inputs_list.append(fullbody_input) # #attention_fullbody_input_list[time_step] = tf.multiply(at_shaped, attention_fullbody_input_list[time_step]) node_inputs['face'] = [ outputs['face-face'], outputs['face-belly'], outputs['face-rightElbow'], outputs['face-leftElbow'], outputs['face-rightArm'], outputs['face-leftArm'], outputs['face-rightKnee'], outputs['face-leftKnee'], outputs['face-rightLeg'], outputs['face-leftLeg'] ] with tf.variable_scope('attention_face'): #if time_step > 0: tf.get_variable_scope().reuse_variables() node_inputs['face'] = att_module(node_inputs['face'], 's', 'attention_face', time_step) # node_inputs['neck'] = [outputs['face-neck'],outputs['neck-belly'],outputs['neck-rightShoulder'],outputs['neck-leftShoulder'], # outputs['neck-rightElbow'], # outputs['neck-leftElbow'],outputs['neck-rightArm'],outputs['neck-leftArm'],outputs['neck-rightHip'], # outputs['neck-leftHip'], # outputs['neck-rightKnee'],outputs['neck-leftKnee'],outputs['neck-rightLeg'],outputs['neck-leftLeg'], # outputs['neck-neck']] # # # node_inputs['neck'] = att_module(node_inputs['neck']) #node_inputs['neck'] = tf.reshape(tf.transpose(conv_2d(1,1)(tf.nn.relu(node_inputs['neck'])),[0,3,2,1]),[self.batch_size,num_units]) # node_inputs['elbow'] = tf.concat( # [outputs['rightElbow-rightElbow'], outputs['leftElbow-leftElbow'], # outputs['face-rightElbow'], outputs['face-leftElbow'], # outputs['rightElbow-rightArm'], # outputs['leftElbow-leftArm'], outputs['belly-rightElbow'], # outputs['belly-leftElbow'], outputs['rightElbow-leftElbow']], 1) node_inputs['belly'] = [ outputs['belly-belly'], outputs['face-belly'], outputs['belly-rightElbow'], outputs['belly-leftElbow'], outputs['belly-rightKnee'], outputs['belly-leftKnee'], outputs['belly-leftArm'], outputs['belly-rightArm'], outputs['belly-leftLeg'], outputs['belly-rightLeg'] ] with tf.variable_scope('attention_belly'): #if time_step > 0: tf.get_variable_scope().reuse_variables() node_inputs['belly'] = att_module(node_inputs['belly'], 's', 'attention_belly', time_step) # node_inputs['right-shoulder'] = [outputs['face-rightShoulder'],outputs['neck-rightShoulder'],outputs['belly-rightShoulder'],outputs['rightShoulder-leftShoulder'], # outputs['rightShoulder-rightElbow'], # outputs['rightShoulder-leftElbow'],outputs['rightShoulder-rightArm'],outputs['rightShoulder-leftArm'],outputs['rightShoulder-rightHip'], # outputs['rightShoulder-leftHip'], # outputs['rightShoulder-rightKnee'],outputs['rightShoulder-leftKnee'],outputs['rightShoulder-rightLeg'],outputs['rightShoulder-leftLeg'], # outputs['rightShoulder-rightShoulder']] #node_inputs['right-shoulder'] = att_module(node_inputs['right-shoulder']) # node_inputs['left-shoulder'] = [outputs['face-leftShoulder'],outputs['neck-leftShoulder'],outputs['belly-leftShoulder'],outputs['rightShoulder-leftShoulder'], # outputs['leftShoulder-rightElbow'], # outputs['leftShoulder-leftElbow'],outputs['leftShoulder-rightArm'],outputs['leftShoulder-leftArm'],outputs['leftShoulder-rightHip'], # outputs['leftShoulder-leftHip'], # outputs['leftShoulder-rightKnee'],outputs['leftShoulder-leftKnee'],outputs['leftShoulder-rightLeg'],outputs['leftShoulder-leftLeg'], # outputs['leftShoulder-leftShoulder']] #node_inputs['left-shoulder'] = att_module(node_inputs['left-shoulder']) node_inputs['right-elbow'] = [ outputs['face-rightElbow'], outputs['belly-rightElbow'], outputs['rightElbow-leftElbow'], outputs['rightElbow-rightArm'], outputs['rightElbow-leftArm'], outputs['rightElbow-rightKnee'], outputs['rightElbow-leftKnee'], outputs['rightElbow-rightLeg'], outputs['rightElbow-leftLeg'], outputs['rightElbow-rightElbow'] ] with tf.variable_scope('attention_right-elbow'): #if time_step > 0: tf.get_variable_scope().reuse_variables() node_inputs['right-elbow'] = att_module( node_inputs['right-elbow'], 's', 'attention_right-elbow', time_step) node_inputs['left-elbow'] = [ outputs['face-leftElbow'], outputs['belly-leftElbow'], outputs['rightElbow-leftElbow'], outputs['leftElbow-rightArm'], outputs['leftElbow-leftArm'], outputs['leftElbow-rightKnee'], outputs['leftElbow-leftKnee'], outputs['leftElbow-rightLeg'], outputs['leftElbow-leftLeg'], outputs['leftElbow-leftElbow'] ] with tf.variable_scope('attention_left-elbow'): # if time_step > 0: tf.get_variable_scope().reuse_variables() node_inputs['left-elbow'] = att_module( node_inputs['left-elbow'], 's', 'attention_left-elbow', time_step) node_inputs['right-arm'] = [ outputs['face-rightArm'], outputs['belly-rightArm'], outputs['rightElbow-rightArm'], outputs['leftElbow-rightArm'], outputs['rightArm-leftArm'], outputs['rightArm-rightKnee'], outputs['rightArm-leftKnee'], outputs['rightArm-rightLeg'], outputs['rightArm-leftLeg'], outputs['rightArm-rightArm'] ] with tf.variable_scope('attention_right-arm'): # if time_step > 0: tf.get_variable_scope().reuse_variables() node_inputs['right-arm'] = att_module( node_inputs['right-arm'], 's', 'attention_right-arm', time_step) node_inputs['left-arm'] = [ outputs['face-leftArm'], outputs['belly-leftArm'], outputs['rightElbow-leftArm'], outputs['leftElbow-leftArm'], outputs['rightArm-leftArm'], outputs['leftArm-rightKnee'], outputs['leftArm-leftKnee'], outputs['leftArm-rightLeg'], outputs['leftArm-leftLeg'], outputs['leftArm-leftArm'] ] with tf.variable_scope('attention_left-arm'): # if time_step > 0: tf.get_variable_scope().reuse_variables() node_inputs['left-arm'] = att_module( node_inputs['left-arm'], 's', 'attention_left-arm', time_step) # node_inputs['right-hip'] = [outputs['face-rightHip'],outputs['neck-rightHip'],outputs['belly-rightHip'],outputs['rightShoulder-rightHip'], # outputs['leftShoulder-rightHip'], # outputs['rightElbow-rightHip'],outputs['leftElbow-rightHip'],outputs['rightArm-rightHip'],outputs['rightHip-leftHip'], # outputs['leftArm-rightHip'], # outputs['rightHip-rightKnee'],outputs['rightHip-leftKnee'],outputs['rightHip-rightLeg'],outputs['rightHip-leftLeg'], # outputs['rightHip-rightHip']] #node_inputs['right-hip'] = att_module(node_inputs['right-hip']) # node_inputs['left-hip'] = [outputs['face-leftHip'],outputs['neck-leftHip'],outputs['belly-leftHip'],outputs['rightShoulder-leftHip'], # outputs['leftShoulder-leftHip'], # outputs['rightElbow-leftHip'],outputs['leftElbow-leftHip'],outputs['rightArm-leftHip'],outputs['leftArm-leftHip'], # outputs['rightHip-leftHip'], # outputs['leftHip-rightKnee'],outputs['leftHip-leftKnee'],outputs['leftHip-rightLeg'],outputs['leftHip-leftLeg'], # outputs['leftHip-leftHip']] #node_inputs['left-hip'] = att_module(node_inputs['left-hip']) node_inputs['right-knee'] = [ outputs['face-rightKnee'], outputs['belly-rightKnee'], outputs['rightElbow-rightKnee'], outputs['leftElbow-rightKnee'], outputs['rightArm-rightKnee'], outputs['leftArm-rightKnee'], outputs['rightKnee-leftKnee'], outputs['rightKnee-rightLeg'], outputs['rightKnee-leftLeg'], outputs['rightKnee-rightKnee'] ] with tf.variable_scope('attention_right-knee'): # if time_step > 0: tf.get_variable_scope().reuse_variables() node_inputs['right-knee'] = att_module( node_inputs['right-knee'], 's', 'attention_right-knee', time_step) node_inputs['left-knee'] = [ outputs['face-leftKnee'], outputs['belly-leftKnee'], outputs['rightElbow-leftKnee'], outputs['leftElbow-leftKnee'], outputs['rightArm-leftKnee'], outputs['leftArm-leftKnee'], outputs['rightKnee-leftKnee'], outputs['leftKnee-rightLeg'], outputs['leftKnee-leftLeg'], outputs['leftKnee-leftKnee'] ] with tf.variable_scope('attention_left-knee'): # if time_step > 0: tf.get_variable_scope().reuse_variables() node_inputs['left-knee'] = att_module( node_inputs['left-knee'], 's', 'attention_left-knee', time_step) node_inputs['right-leg'] = [ outputs['face-rightLeg'], outputs['belly-rightLeg'], outputs['rightElbow-rightLeg'], outputs['leftElbow-rightLeg'], outputs['rightArm-rightLeg'], outputs['leftArm-rightLeg'], outputs['rightKnee-rightLeg'], outputs['leftKnee-rightLeg'], outputs['rightLeg-leftLeg'], outputs['rightLeg-rightLeg'] ] with tf.variable_scope('attention_right-leg'): # if time_step > 0: tf.get_variable_scope().reuse_variables() node_inputs['right-leg'] = att_module( node_inputs['right-leg'], 's', 'attention_right-leg', time_step) node_inputs['left-leg'] = [ outputs['face-leftLeg'], outputs['belly-leftLeg'], outputs['rightShoulder-leftLeg'], outputs['leftElbow-leftLeg'], outputs['rightArm-leftLeg'], outputs['leftArm-leftLeg'], outputs['rightKnee-leftLeg'], outputs['leftKnee-leftLeg'], outputs['rightLeg-leftLeg'], outputs['leftLeg-leftLeg'] ] with tf.variable_scope('attention_left-leg'): # if time_step > 0: tf.get_variable_scope().reuse_variables() node_inputs['left-leg'] = att_module( node_inputs['left-leg'], 's', 'attention_left-leg', time_step) #node_inputs['left-leg'] = tf.reshape(tf.transpose(conv_2d(1,1)(tf.nn.relu(node_inputs['left-leg'])),[0,3,2,1]),[self.batch_size,num_units]) # node_inputs['arms'] = tf.concat( # [outputs['rightArm-rightArm'], outputs['leftArm-leftArm'],outputs['face-rightArm'],outputs['rightElbow-rightArm'], # outputs['leftElbow-leftArm'],outputs['face-leftArm'], outputs['belly-rightArm'], outputs['belly-leftArm'], # outputs['rightArm-leftArm']], 1) # # node_inputs['knee'] = tf.concat([outputs['rightKnee-rightKnee'],outputs['leftKnee-leftKnee'],outputs['rightKnee-leftKnee'], # outputs['belly-rightKnee'],outputs['belly-leftKnee'],outputs['rightKnee-rightLeg'], # outputs['leftKnee-leftLeg']], 1) # # # node_inputs['legs'] = tf.concat( # [outputs['rightLeg-rightLeg'], outputs['leftLeg-leftLeg'],outputs['rightKnee-rightLeg'],outputs['leftKnee-leftLeg'], # outputs['belly-rightLeg'], outputs['belly-leftLeg'],outputs['rightLeg-leftLeg']], 1) node_output_list = [] for node_name in nodes_names: inputs = tf.concat(tf.unstack(node_inputs[node_name]), 1) inputs = tf.nn.elu(inputs) state = states[node_name] scope = "lstm_" + node_name outputs[node_name], states[node_name] = nodesRNN[ node_name](inputs, state, scope=scope) node_output_list.append(outputs[node_name]) # # fullbody_input = tf.concat( # [node_inputs['face'],node_inputs['elbow'], node_inputs['belly'], node_inputs['knee'],node_inputs['arms'], # node_inputs['legs']], 1) # state = states['wholeRNN'] # scope = "lstm_" + 'wholeRNN' #node_output_list = att_module(node_output_list) fullbody_input = tf.concat(node_output_list, 1) final_inputs_list.append(fullbody_input) #with tf.variable_scope("temporal_attention",reuse=None): #final_inputs_list = tf.stack(att_module(final_inputs_list,'t')) #outputs, final_state = tf.nn.dynamic_rnn(wholeRNN, tf.stack(final_inputs_list), initial_state=states_whole, time_major=True) #outputs = tf.unstack(outputs) # input_att = final_inputs_list[time_step] # if time_step > 0: # input_att_t1 = final_inputs_list[time_step - 1] # else: # input_att_t1 = tf.zeros_like(input_att) # # at_shaped, at = attention(input_att, input_att_t1) # # final_inputs_list[time_step] = tf.multiply(final_inputs_list[time_step], at_shaped) # cells = [] # for _ in range(1): # cell = tf.contrib.rnn.BasicLSTMCell(num_units,activation=tf.nn.softsign ) # cells.append(cell) # cell_fw = tf.contrib.rnn.MultiRNNCell(cells) # # cells = [] # for _ in range(1): # cell = tf.contrib.rnn.BasicLSTMCell(num_units,activation=tf.nn.softsign) # cells.append(cell) # cell_bw = tf.contrib.rnn.MultiRNNCell(cells) # # final_outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(cell_fw, cell_bw,final_inputs_list,dtype=tf.float32) # attention_inputs = tf.transpose(final_outputs, perm=[1, 0, 2]) # # # alpha = attention(attention_inputs,100,return_alphas=True) # # # final_outputs = final_outputs * alpha # # split0,split1,split2,split3,split4,split5,split6,split7,split8,split9= tf.split(final_outputs, num_or_size_splits=10, axis=0) # split = [tf.squeeze(split0,axis=0),tf.squeeze(split1,axis=0),tf.squeeze(split2,axis=0),tf.squeeze(split3,axis=0),tf.squeeze(split4,axis=0), # tf.squeeze(split5, axis=0),tf.squeeze(split6,axis=0),tf.squeeze(split7,axis=0),tf.squeeze(split8,axis=0),tf.squeeze(split9,axis=0)] self.infos = infos self.final_states = states #self.logits = tf.matmul(output, weights['out'], name="logits") + biases['out'] # self.full_connect_layer = Dense(256,kernel_initializer=tf.random_normal_initializer(),bias_initializer=tf.random_normal_initializer())(final_outputs[-1]) # self.dropout_layer = tf.nn.dropout(Dense(256,kernel_initializer=tf.random_normal_initializer(),bias_initializer=tf.random_normal_initializer())(final_outputs[-1]),keep_prob=0.8) # self.logits = Dense(21,kernel_initializer=tf.random_normal_initializer(),bias_initializer=tf.random_normal_initializer())(final_outputs) self.logits = tf.layers.dense( tf.nn.elu(final_inputs_list[-1]), 21, kernel_initializer=tf.random_normal_initializer(), bias_initializer=tf.random_normal_initializer(), name='dense_out') # self.logits = Dense(21, kernel_initializer=tf.random_normal_initializer(), # bias_initializer=tf.random_normal_initializer())(output) self.logits_drop = tf.nn.dropout(self.logits, keep_prob=0.5) self.predict = tf.nn.softmax(self.logits_drop) with tf.name_scope('cross_entropy'): loss = tf.nn.softmax_cross_entropy_with_logits( logits=self.logits_drop, labels=self.targets) self.cost = tf.reduce_mean(loss) self.cost_inference = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.targets)) if self.save_summaries: tf.summary.scalar('cross_entropy', self.cost) tvars = tf.trainable_variables() ##learning rate!!! # # def get_learningrate(): # if self.strikes > 4: # self.learning_rate_decay = tf.Variable(float(self.learning_rate_decay.eval() / 10), trainable=False) # self.strikes = 0 # else: # self.learning_rate_decay= self.learning_rate_decay # return self.learning_rate_decay # def exponential_decay_new(learning_rate, decay_rate): # """Applies exponential decay to the learning rate. # # When training a model, it is often recommended to lower the learning rate as # the training progresses. This function applies an exponential decay function # to a provided initial learning rate. It requires a `global_step` value to # compute the decayed learning rate. You can just pass a TensorFlow variable # that you increment at each training step. # # The function returns the decayed learning rate. It is computed as: # # ```python # decayed_learning_rate = learning_rate * # decay_rate ^ (global_step / decay_steps) # ``` # # If the argument `staircase` is `True`, then `global_step / decay_steps` is an # integer division and the decayed learning rate follows a staircase function. # # Example: decay every 100000 steps with a base of 0.96: # # ```python # ... # global_step = tf.Variable(0, trainable=False) # starter_learning_rate = 0.1 # learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, # 100000, 0.96, staircase=True) # # Passing global_step to minimize() will increment it at each step. # learning_step = ( # tf.train.GradientDescentOptimizer(learning_rate) # .minimize(...my loss..., global_step=global_step) # ) # ``` # # Args: # learning_rate: A scalar `float32` or `float64` `Tensor` or a # Python number. The initial learning rate. # global_step: A scalar `int32` or `int64` `Tensor` or a Python number. # Global step to use for the decay computation. Must not be negative. # decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. # Must be positive. See the decay computation above. # decay_rate: A scalar `float32` or `float64` `Tensor` or a # Python number. The decay rate. # staircase: Boolean. If `True` decay the learning rate at discrete intervals # name: String. Optional name of the operation. Defaults to # 'ExponentialDecay'. # # Returns: # A scalar `Tensor` of the same type as `learning_rate`. The decayed # learning rate. # # Raises: # ValueError: if `global_step` is not supplied. # """ # # learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate") # dtype = learning_rate.dtype # strikes = math_ops.cast(self.strikes, dtype) # #decay_steps = math_ops.cast(decay_steps, dtype) # decay_rate = math_ops.cast(decay_rate, dtype) # if strikes.eval() > 4 : # self.strikes = math_ops.multiply(self.strikes,0) # # return math_ops.multiply(learning_rate, decay_rate) # else: # return learning_rate starter_learning_rate = self.learning_rate self.learning_rate_decay = tf.train.exponential_decay( starter_learning_rate, self.global_step, 250, 0.65, staircase=True) # self.learning_rate_decay = exponential_decay_new( # starter_learning_rate, # 0.1 # ) if not forward_only: if self.GD: optimizer = tf.train.GradientDescentOptimizer( self.learning_rate_decay) clipped_grads, norm = tf.clip_by_global_norm( tf.gradients(self.cost, tvars), self.max_grad_norm) self.gradients_norm = norm self.updates = optimizer.apply_gradients( zip(clipped_grads, tvars), global_step=self.global_step) else: aggregation_method = tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate_decay, epsilon=self.adam_epsilon) gradients_and_params = optimizer.compute_gradients( self.cost, tvars, aggregation_method=aggregation_method) gradients, params = zip(*gradients_and_params) norm = tf.global_norm(gradients) self.gradients_norm = norm self.updates = optimizer.apply_gradients( zip(gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) self.merged = tf.summary.merge_all() # self.merged = tf.merge_all_summaries() if self.save_summaries: self.train_writer = tf.summary.FileWriter(log_dir + '/train') self.test_writer = tf.summary.FileWriter(log_dir + '/test')
def __init__(self, pix_x, pix_y, scope, trainer, act_space=6): with tf.variable_scope(scope): strides1 = int(4) strides2 = int(2) full_c1 = int(pix_x / (strides1 * strides2)) full_c2 = int(pix_y / (strides1 * strides2)) filters2 = 32 self.input = tf.placeholder(dtype=tf.float32, shape=(None, 42, 42, 1), name='frame_input') self.conv1 = slim.conv2d(activation_fn=tf.nn.elu, inputs=self.input, num_outputs=32, kernel_size=[3, 3], stride=[2, 2], padding='SAME') self.conv2 = slim.conv2d(activation_fn=tf.nn.elu, inputs=self.conv1, num_outputs=32, kernel_size=[3, 3], stride=[2, 2], padding='SAME') self.conv3 = slim.conv2d(activation_fn=tf.nn.elu, inputs=self.conv2, num_outputs=32, kernel_size=[3, 3], stride=[2, 2], padding='SAME') self.conv4 = slim.conv2d(activation_fn=tf.nn.elu, inputs=self.conv3, num_outputs=32, kernel_size=[3, 3], stride=[2, 2], padding='SAME') self.hidden = slim.fully_connected(slim.flatten(self.conv4), 256, activation_fn=tf.nn.elu) lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(256, state_is_tuple=True) init_cell_state = tf.constant(value=0, shape=(1, lstm_cell.state_size.c), dtype=tf.float32) init_hidden_state = tf.constant(value=0, shape=(1, lstm_cell.state_size.h), dtype=tf.float32) self.init_cell = [init_cell_state, init_hidden_state] self.c_in = tf.placeholder(dtype=tf.float32, shape=(1, lstm_cell.state_size.c), name='c_in') self.h_in = tf.placeholder(dtype=tf.float32, shape=(1, lstm_cell.state_size.h), name='h_in') self.rnn_in = tf.expand_dims(self.hidden, [0]) step_size = tf.shape(self.input)[:1] self.state_in = tf.nn.rnn_cell.LSTMStateTuple(self.c_in, self.h_in) self.lstm_outputs, self.lstm_state = tf.nn.dynamic_rnn( lstm_cell, self.rnn_in, initial_state=self.state_in, time_major=False, sequence_length=step_size) self.lstm_outputs = tf.squeeze(self.lstm_outputs, axis=0) condense_to_value = tf.get_variable( dtype=tf.float32, shape=(256), initializer=self.normalized_columns_initializer(std=1), name='form_value') self.value_output = tf.tensordot(self.lstm_outputs, condense_to_value, [[1], [0]]) condense_to_actions = tf.get_variable( dtype=tf.float32, shape=(256, act_space), initializer=self.normalized_columns_initializer(std=0.01), name='c_act') action_output = tf.tensordot(self.lstm_outputs, condense_to_actions, [[1], [0]], name='action1') self.norm_actions = tf.nn.softmax(action_output) self.test = self.lstm_outputs if scope != 'global': R = tf.placeholder(dtype=tf.float32, shape=(None), name='perf_reward') get_value = tf.placeholder(dtype=tf.float32, shape=(None), name='perf_value') get_action = tf.placeholder(dtype=tf.int32, shape=(None), name='perf_action') advantage = tf.placeholder(dtype=tf.float32, shape=(None), name='advantage') self.one_hot_action = tf.one_hot(get_action, act_space) self.action_channel1 = tf.multiply(self.norm_actions, self.one_hot_action) self.action_channel = tf.reduce_sum(self.action_channel1, 1) self.clip_action = tf.clip_by_value(self.action_channel, 0.000001, 9999999) self.value_loss = tf.reduce_sum( tf.square(self.value_output - R)) self.action_loss = tf.reduce_sum( tf.log(self.clip_action) * advantage) self.entropy = -tf.reduce_sum( tf.log(self.norm_actions) * self.norm_actions) self.full_loss = 0.5 * self.value_loss - self.action_loss - 0.01 * self.entropy local_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope) self.gradients = tf.gradients(self.full_loss, local_vars) self.var_norms = tf.global_norm(local_vars) grads, self.grad_norms = tf.clip_by_global_norm( self.gradients, 40.0) global_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = trainer.apply_gradients( zip(self.gradients, global_vars))
def construct(self, args, num_words, num_chars, lem_num_chars, num_tags, num_senses, bow, eow): with self.session.graph.as_default(): # Training params self.is_training = tf.placeholder(tf.bool, []) self.learning_rate = tf.placeholder(tf.float32, [], name="learning_rate") # Sentence lengths self.sentence_lens = tf.placeholder(tf.int32, [None], name="sentence_lens") # Number of output words self.words_count = tf.reduce_sum(self.sentence_lens) words_count = self.words_count # Map sentences -> word list self.word_indexes = tf.placeholder(tf.int32, [None, 2], name='word_indexes') # Tag data self.tags = tf.placeholder(tf.int32, [None, None, len(num_tags)], name="tags") # Form IDs and charseqs self.word_ids = tf.placeholder(tf.int32, [None, None], name="word_ids") self.charseqs = tf.placeholder(tf.int32, [None, None], name="charseqs") self.charseq_lens = tf.placeholder(tf.int32, [None], name="charseq_lens") self.charseq_ids = tf.placeholder(tf.int32, [None, None], name="charseq_ids") # Lemma charseqs self.target_senses = tf.placeholder(tf.int32, [None, None], name="target_senses") self.target_ids = tf.placeholder(tf.int32, [None, None], name="target_ids") self.target_seqs = tf.placeholder(tf.int32, [None, None], name="target_seqs") self.target_seq_lens = tf.placeholder(tf.int32, [None], name="target_seq_lens") # Sentence weights weights = tf.sequence_mask(self.sentence_lens, dtype=tf.float32) sum_weights = tf.reduce_sum(weights) # Source forms lengths (in sentences and by words/lemmas) sentence_form_len = tf.nn.embedding_lookup(self.charseq_lens, self.charseq_ids) word_form_len = tf.gather_nd(sentence_form_len, self.word_indexes) # Target sequences for words _target_seq_lens = tf.nn.embedding_lookup(self.target_seq_lens, self.target_ids) # 2D _target_seqs = tf.nn.embedding_lookup(self.target_seqs, self.target_ids) # Flattened to word-list target_lens = tf.gather_nd(_target_seq_lens, self.word_indexes) target_seqs = tf.gather_nd(_target_seqs, self.word_indexes) target_senses = tf.gather_nd(self.target_senses, self.word_indexes) # Add eow at the end target_seqs = tf.reverse_sequence(target_seqs, target_lens, 1) target_seqs = tf.pad(target_seqs, [[0, 0], [1, 0]], constant_values=eow) target_lens = target_lens + 1 target_seqs = tf.reverse_sequence(target_seqs, target_lens, 1) # RNN Cell if args.rnn_cell == "LSTM": rnn_cell = tf.nn.rnn_cell.LSTMCell elif args.rnn_cell == "GRU": rnn_cell = tf.nn.rnn_cell.GRUCell else: raise ValueError("Unknown rnn_cell {}".format(args.rnn_cell)) # Encoder enc_out = encoder_network(self.word_indexes, self.word_ids, self.charseqs, self.charseq_ids, self.charseq_lens, self.sentence_lens, num_words, num_chars, args.we_dim, args.cle_dim, rnn_cell, args.rnn_cell_dim, args.rnn_layers, args.dropout, self.is_training, args.separate_embed, args.separate_rnn) rnn_inputs_tags, word_rnn_outputs, sentence_rnn_outputs_tags, word_cle_states, word_cle_outputs = enc_out # Tagger loss_tag, tag_outputs, self.predictions, correct_tag, correct_tags_compositional = tag_decoder( self.tags, sentence_rnn_outputs_tags, weights, sum_weights, num_tags, args.tags, args.label_smoothing) # Tagger features for lemmatizer tag_feats = tag_features(tag_outputs, self.word_indexes, words_count, args.rnn_cell_dim, args.dropout, self.is_training, args.no_tags_to_lemmas, args.tag_signal_dropout) self.current_accuracy_tag, self.update_accuracy_tag = tf.metrics.mean(correct_tag, weights=sum_weights) self.current_accuracy_tags_compositional, self.update_accuracy_tags_compositional = tf.metrics.mean( correct_tags_compositional) # Lemmatizer loss_lem, predictions = lemma_decoder(word_rnn_outputs, tag_feats, word_cle_states, word_cle_outputs, word_form_len, target_seqs, target_lens, self.charseq_lens, words_count, lem_num_chars, rnn_cell, args.rnn_cell, args.rnn_cell_dim, args.cle_dim, args.beams, args.beam_len_penalty, args.lem_smoothing, bow, eow) self.lemma_predictions_training, self.lemma_predictions, self.lemma_prediction_lengths = predictions # Lemmatizer sense predictor loss_sense, self.sense_prediction = sense_predictor(word_rnn_outputs, tag_feats, target_senses, num_senses, words_count, args.predict_sense, args.sense_smoothing) # Lemma predictions, loss and accuracy self._lemma_stats(target_seqs, target_lens, target_senses) # Loss, training and gradients # Compute combined weighted loss on tags and lemmas loss = loss_tag + loss_lem * args.loss_lem_w + loss_sense * args.loss_sense_w self.global_step = tf.train.create_global_step() self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(self.update_ops): optimizer = tf.contrib.opt.LazyAdamOptimizer(learning_rate=self.learning_rate, beta2=args.beta_2) gradients, variables = zip(*optimizer.compute_gradients(loss)) self.gradient_norm = tf.global_norm(gradients) if args.grad_clip: gradients, _ = tf.clip_by_global_norm(gradients, args.grad_clip) self.training = optimizer.apply_gradients(zip(gradients, variables), global_step=self.global_step, name="training") # Saver self.saver = tf.train.Saver(max_to_keep=2) # Summaries self.current_loss_tag, self.update_loss_tag = tf.metrics.mean(loss_tag, weights=sum_weights) self.current_loss_lem, self.update_loss_lem = tf.metrics.mean(loss_lem, weights=sum_weights) self.current_loss_sense, self.update_loss_sense = tf.metrics.mean(loss_sense, weights=sum_weights) self.current_loss, self.update_loss = tf.metrics.mean(loss, weights=sum_weights) self.reset_metrics = tf.variables_initializer(tf.get_collection(tf.GraphKeys.METRIC_VARIABLES)) summary_writer = tf.contrib.summary.create_file_writer(args.logdir, flush_millis=1 * 1000) self.summaries = {} with summary_writer.as_default(), tf.contrib.summary.record_summaries_every_n_global_steps(1): self.summaries["train"] = [tf.contrib.summary.scalar("train/loss_tag", self.update_loss_tag), tf.contrib.summary.scalar("train/loss_sense", self.update_loss_sense), tf.contrib.summary.scalar("train/loss_lem", self.update_loss_lem), tf.contrib.summary.scalar("train/loss", self.update_loss), tf.contrib.summary.scalar("train/gradient", self.gradient_norm), tf.contrib.summary.scalar("train/accuracy_tag", self.update_accuracy_tag), tf.contrib.summary.scalar("train/accuracy_compositional_tags", self.update_accuracy_tags_compositional), tf.contrib.summary.scalar("train/accuracy_lem", self.update_accuracy_lem_train), tf.contrib.summary.scalar("train/accuracy_lemsense", self.update_accuracy_lemsense_train), tf.contrib.summary.scalar("train/learning_rate", self.learning_rate)] with summary_writer.as_default(), tf.contrib.summary.always_record_summaries(): for dataset in ["dev", "test"]: self.summaries[dataset] = [tf.contrib.summary.scalar(dataset + "/loss", self.current_loss), tf.contrib.summary.scalar(dataset + "/accuracy_tag", self.current_accuracy_tag), tf.contrib.summary.scalar(dataset + "/accuracy_compositional_tags", self.current_accuracy_tags_compositional), tf.contrib.summary.scalar(dataset + "/accuracy_lem", self.current_accuracy_lem), tf.contrib.summary.scalar(dataset + "/accuracy_lemsense", self.current_accuracy_lemsense)] # Initialize variables self.session.run(tf.global_variables_initializer()) with summary_writer.as_default(): tf.contrib.summary.initialize(session=self.session, graph=self.session.graph)
def __init__(self, observation_space, action_space, config, existing_inputs=None): config = dict(ray.rllib.agents.impala.impala.DEFAULT_CONFIG, **config) assert config["batch_mode"] == "truncate_episodes", \ "Must use `truncate_episodes` batch mode with V-trace." self.config = config self.sess = tf.get_default_session() # Create input placeholders if existing_inputs: actions, dones, behaviour_logits, rewards, observations, \ prev_actions, prev_rewards = existing_inputs[:7] existing_state_in = existing_inputs[7:-1] existing_seq_lens = existing_inputs[-1] else: if isinstance(action_space, gym.spaces.Discrete): ac_size = action_space.n actions = tf.placeholder(tf.int64, [None], name="ac") else: raise UnsupportedSpaceException( "Action space {} is not supported for IMPALA.".format( action_space)) dones = tf.placeholder(tf.bool, [None], name="dones") rewards = tf.placeholder(tf.float32, [None], name="rewards") behaviour_logits = tf.placeholder(tf.float32, [None, ac_size], name="behaviour_logits") observations = tf.placeholder(tf.float32, [None] + list(observation_space.shape)) existing_state_in = None existing_seq_lens = None # Setup the policy dist_class, logit_dim = ModelCatalog.get_action_dist( action_space, self.config["model"]) prev_actions = ModelCatalog.get_action_placeholder(action_space) prev_rewards = tf.placeholder(tf.float32, [None], name="prev_reward") self.model = ModelCatalog.get_model( { "obs": observations, "prev_actions": prev_actions, "prev_rewards": prev_rewards, "is_training": self._get_is_training_placeholder(), }, observation_space, logit_dim, self.config["model"], state_in=existing_state_in, seq_lens=existing_seq_lens) action_dist = dist_class(self.model.outputs) values = self.model.value_function() self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name) def to_batches(tensor): if self.config["model"]["use_lstm"]: B = tf.shape(self.model.seq_lens)[0] T = tf.shape(tensor)[0] // B else: # Important: chop the tensor into batches at known episode cut # boundaries. TODO(ekl) this is kind of a hack T = self.config["sample_batch_size"] B = tf.shape(tensor)[0] // T rs = tf.reshape(tensor, tf.concat([[B, T], tf.shape(tensor)[1:]], axis=0)) # swap B and T axes return tf.transpose( rs, [1, 0] + list(range(2, 1 + int(tf.shape(tensor).shape[0])))) if self.model.state_in: max_seq_len = tf.reduce_max(self.model.seq_lens) - 1 mask = tf.sequence_mask(self.model.seq_lens, max_seq_len) mask = tf.reshape(mask, [-1]) else: mask = tf.ones_like(rewards, dtype=tf.bool) # Inputs are reshaped from [B * T] => [T - 1, B] for V-trace calc. self.loss = VTraceLoss( actions=to_batches(actions)[:-1], actions_logp=to_batches(action_dist.logp(actions))[:-1], actions_entropy=to_batches(action_dist.entropy())[:-1], dones=to_batches(dones)[:-1], behaviour_logits=to_batches(behaviour_logits)[:-1], target_logits=to_batches(self.model.outputs)[:-1], discount=config["gamma"], rewards=to_batches(rewards)[:-1], values=to_batches(values)[:-1], bootstrap_value=to_batches(values)[-1], valid_mask=to_batches(mask)[:-1], vf_loss_coeff=self.config["vf_loss_coeff"], entropy_coeff=self.config["entropy_coeff"], clip_rho_threshold=self.config["vtrace_clip_rho_threshold"], clip_pg_rho_threshold=self.config["vtrace_clip_pg_rho_threshold"]) # KL divergence between worker and learner logits for debugging model_dist = Categorical(self.model.outputs) behaviour_dist = Categorical(behaviour_logits) self.KLs = model_dist.kl(behaviour_dist) self.mean_KL = tf.reduce_mean(self.KLs) self.max_KL = tf.reduce_max(self.KLs) self.median_KL = tf.contrib.distributions.percentile(self.KLs, 50.0) # Initialize TFPolicyGraph loss_in = [ ("actions", actions), ("dones", dones), ("behaviour_logits", behaviour_logits), ("rewards", rewards), ("obs", observations), ("prev_actions", prev_actions), ("prev_rewards", prev_rewards), ] LearningRateSchedule.__init__(self, self.config["lr"], self.config["lr_schedule"]) TFPolicyGraph.__init__( self, observation_space, action_space, self.sess, obs_input=observations, action_sampler=action_dist.sample(), loss=self.model.loss() + self.loss.total_loss, loss_inputs=loss_in, state_inputs=self.model.state_in, state_outputs=self.model.state_out, prev_action_input=prev_actions, prev_reward_input=prev_rewards, seq_lens=self.model.seq_lens, max_seq_len=self.config["model"]["max_seq_len"], batch_divisibility_req=self.config["sample_batch_size"]) self.sess.run(tf.global_variables_initializer()) self.stats_fetches = { "stats": { "cur_lr": tf.cast(self.cur_lr, tf.float64), "policy_loss": self.loss.pi_loss, "entropy": self.loss.entropy, "grad_gnorm": tf.global_norm(self._grads), "var_gnorm": tf.global_norm(self.var_list), "vf_loss": self.loss.vf_loss, "vf_explained_var": explained_variance( tf.reshape(self.loss.vtrace_returns.vs, [-1]), tf.reshape(to_batches(values)[:-1], [-1])), "mean_KL": self.mean_KL, "max_KL": self.max_KL, "median_KL": self.median_KL, }, }
def __init__(self, scope, a_size, trainer, TRAINING, GLOBAL_NET_SCOPE, OBS_SIZE): with tf.variable_scope(str(scope) + '/qvalues'): self.inputs = tf.placeholder(shape=[None, OBS_SIZE], dtype=tf.float32) # self.goal_pos=tf.placeholder(shape=[None,3],dtype=tf.float32) # self.myinput = tf.transpose(self.inputs, perm=[0,2,3,1]) # self.policy, self.value, self.state_out, self.state_in, self.state_init, self.valids = self._build_net( # self.inputs, TRAINING, a_size, RNN_SIZE) self.policy, self.value, self.valids = self._build_net( self.inputs, TRAINING, a_size, RNN_SIZE, OBS_SIZE) if TRAINING: self.actions = tf.placeholder(shape=[None], dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions, a_size, dtype=tf.float32) self.valid_actions = tf.placeholder(shape=[None, a_size], dtype=tf.float32) self.target_v = tf.placeholder(tf.float32, [None], 'Vtarget') self.advantages = tf.placeholder(shape=[None], dtype=tf.float32) # self.target_collisioncourse = tf.placeholder(tf.float32, [None]) # self.target_astar = tf.placeholder(shape=[None,a_size], dtype=tf.float32) self.responsible_outputs = tf.reduce_sum( self.policy * self.actions_onehot, [1]) self.train_value = tf.placeholder(tf.float32, [None]) # self.train_astar = tf.placeholder(tf.float32, [None]) self.optimal_actions = tf.placeholder(tf.int32, [None]) self.optimal_actions_onehot = tf.one_hot(self.optimal_actions, a_size, dtype=tf.float32) # Loss Functions self.value_loss = (0.005 / 4) * tf.reduce_sum( self.train_value * tf.square(self.target_v - tf.reshape(self.value, shape=[-1]))) self.entropy = -0.001 * tf.reduce_sum(self.policy * tf.log( tf.clip_by_value(self.policy, 1e-10, 1.0))) self.policy_loss = -0.02 * tf.reduce_sum( tf.log(tf.clip_by_value(self.responsible_outputs, 1e-15, 1.0)) * self.advantages) self.valid_loss = -0.01 * tf.reduce_sum(tf.log(tf.clip_by_value(self.valids, 1e-10, 1.0)) * \ self.valid_actions + tf.log( tf.clip_by_value(1 - self.valids, 1e-10, 1.0)) * (1 - self.valid_actions)) # self.collisioncourse_loss = - tf.reduce_sum(self.target_collisioncourse*tf.log(tf.clip_by_value(self.collisioncourse,1e-10,1.0))\ # +(1-self.target_collisioncourse)*tf.log(tf.clip_by_value(1-self.collisioncourse,1e-10,1.0))) # self.astar_loss = - tf.reduce_sum(self.train_astar*tf.reduce_sum(tf.log(tf.clip_by_value(self.next_astar,1e-10,1.0)) *\ # self.target_astar+tf.log(tf.clip_by_value(1-self.next_astar,1e-10,1.0)) * (1-self.target_astar), axis=1)) # self.astar_loss = tf.reduce_sum(self.train_astar*tf.contrib.keras.backend.categorical_crossentropy(self.target_astar,self.policy)) self.loss = 1 * self.value_loss + self.policy_loss - 1 * self.entropy + 1 * self.valid_loss # + .5*self.collisioncourse_loss +.5*self.astar_loss self.imitation_loss = 0.2 * tf.reduce_mean( tf.contrib.keras.backend.categorical_crossentropy( self.optimal_actions_onehot, self.policy)) # Get gradients from local network using local losses and # normalize the gradients using clipping local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope + '/qvalues') self.gradients = tf.gradients(self.loss, local_vars) self.var_norms = tf.global_norm(local_vars) grads, self.grad_norms = tf.clip_by_global_norm( self.gradients, GRAD_CLIP) # Apply local gradients to global network global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, GLOBAL_NET_SCOPE + '/qvalues') self.apply_grads = trainer.apply_gradients(zip(grads, global_vars)) # now the gradients for imitation loss self.i_gradients = tf.gradients(self.imitation_loss, local_vars) self.i_var_norms = tf.global_norm(local_vars) i_grads, self.i_grad_norms = tf.clip_by_global_norm( self.i_gradients, GRAD_CLIP) # Apply local gradients to global network self.apply_imitation_grads = trainer.apply_gradients( zip(i_grads, global_vars)) # self.homogenize_weights = update_target_graph(str(scope)+'/qvaluesB', str(scope)+'/qvalues') print("Hello World... From " + str(scope)) # :)
def robust_minimize( optimizer, loss, loss_per_dp, global_step, batch_size, y_, clip_method='dp', clip_type='global', clip_function='soft', clip_threshold=0.0, clip_percentile=99, clip_perclass=True, window_size=1000, log_dir=None, marks=[], ): """ This function takes as input a standard tensorflow optimizer and outputs a robust version using gradient clipping. It is an implementation of the paper "Stochastic Gradient Descent with Gradient Clipping is Robust to Adversarial Noise" submitted to NIPS 2018. Example usage: train_op=robust.robust_minimize( tf.train.AdamOptimizer(1e-4), loss, loss_per_dp, global_step, 100, y_, ) Args: optimizer: the tensorflow optimizer to make robust loss: the loss function; this should be equal to tf.reduce_mean(loss_per_dp) loss_per_dp: a tensor of shape (?,) that has the loss function for each data point global_step: the current step y_: the true response variable clip_method: may be one of 'dp','dp_naive','batch', or 'batch_naive'; the 'dp' and 'dp_naive' methods implement the minibatch heuristic described in the paper clip_type: may be 'none' to disable robustness or 'global' to enable clip_function: may be 'soft' or 'hard' clip_threshold: if this value is greater than zero, then this is the threshold used in gradient clipping; if this value is less than or equal to zero, then use the heuristic from the paper for dynamically selecting clip values clip_percentile: the percentile to clip at when using the dynamic heuristic; recommended to be equal to 1-epsilon clip_perclass: if True, when using the dynamic heuristic, maintain separate lists of past gradients for each class window_size: the total number of past gradients to store log_dir: location to output gradient information from each timestep for debug purposes; setting to None disables output marks: tensors to write to the log dir on each iteration for debug purposes Returns: a training op Raises: Probably a lot of stuff if there's errors IDK """ import tensorflow as tf import numpy as np import math def update_tensor(tensor,indices,updates): newvals=tf.SparseTensor(indices,tf.stack(updates),tensor.get_shape()) updates2=map(lambda i: tensor.__getitem__(i),indices) oldvals=tf.SparseTensor(indices,tf.stack(updates2),tensor.get_shape()) #return tensor+tf.sparse_tensor_to_dense(newvals-oldvals) return tensor+tf.sparse_tensor_to_dense(newvals)-tf.sparse_tensor_to_dense(oldvals) if batch_size==1 or clip_type=='none': clip_method='batch' if clip_method=='batch' or clip_method=='batch_naive': clip_perclass=False window_size=int(batch_size*math.ceil(window_size/batch_size)) with tf.name_scope('robust_minimize'): # setup clipping epsilon=1e-6 if clip_perclass: num_windows=y_.get_shape()[1] label_steps=tf.Variable(tf.zeros([num_windows]),trainable=False,name='label_steps') label_steps_update=tf.assign(label_steps,label_steps+tf.reduce_sum(y_,axis=0)) label_steps_int=tf.cast(label_steps,tf.int32) y_window_ = tf.argmax(y_,axis=1) else: num_windows=1 label_steps=tf.Variable(tf.zeros([num_windows]),trainable=False,name='label_steps') label_steps_update=tf.assign(label_steps,label_steps+batch_size) label_steps_int=tf.cast(label_steps,tf.int32) #label_steps=tf.cast(tf.reshape(global_step,[1]),tf.float32) #label_steps_update=tf.group() #label_steps_int=label_steps y_window_ = tf.zeros([batch_size]) ms = tf.Variable(tf.zeros([num_windows,window_size]),trainable=False,name='ms') trim_factor=tf.minimum(1.0,label_steps/window_size) def get_percentile(dist,p): xs=map(lambda i: tf.contrib.distributions.percentile(dist[i],p[i])+epsilon,range(0,dist.get_shape()[0])) return tf.stack(xs) m=get_percentile(ms,50*trim_factor) if clip_threshold<=0.0: clip=get_percentile(ms,clip_percentile*trim_factor) #clip=tf.contrib.distributions.percentile(ms,clip_percentile)+epsilon #clip=tf.contrib.distributions.percentile(ms_trimmed,clip_percentile_modified) else: clip=clip_threshold*tf.ones([num_windows]) def clip_gradients(gradients,norm,clip_mod): clip_mod=tf.reshape(clip_mod,()) if clip_type=='none': gradients2=gradients elif clip_type=='global': #if opts['verbose']: #clip = tf.cond( #clip>=global_norm, #lambda:clip, #lambda:tf.Print(clip,[global_norm,clip],'clipped'), #) if clip_function=='soft': tf.Print(clip_mod,[clip_mod]) gradients2, _ = tf.clip_by_global_norm(gradients, clip_mod, use_norm=norm) elif clip_function=='hard': gradients2=[] for grad in gradients: if grad==None: grad2=None else: #print('norm=',norm) #print('clip_mod=',clip_mod) #print('grad=',grad) grad2=tf.cond( norm>clip_mod, lambda:tf.zeros(grad.get_shape()), lambda:grad ) gradients2.append(grad2) return gradients2 # calculate gradients if clip_method=='dp': # FIXME: this method makes no effort to place the variables on appropriate devices # when multiple devices are available variables = ( tf.trainable_variables() + tf.get_collection(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES) + tf.get_collection(tf.GraphKeys._STREAMING_MODEL_PORTS) ) loop_vars = [ tf.constant(0,tf.int32), tf.TensorArray(tf.float32,size=batch_size,clear_after_read=False), map(lambda _: tf.TensorArray(tf.float32,size=batch_size,clear_after_read=False),variables) ] def go(i,arr_norm,arr_vars): grad=tf.gradients(loss_per_dp[i],variables) norm=tf.global_norm(grad) clip_local=clip_gradients(grad,norm,tf.reduce_sum(clip*y_[i])) return [ i+1, arr_norm.write(i,norm), map(lambda (arr,g): arr.write(i,g),zip(arr_vars,clip_local)) ] _,norms,clips=tf.while_loop( lambda i,arr_norm,arr_vars: i<batch_size, go, loop_vars ) gradients2 = [ tf.reduce_mean(g.stack(),axis=0) for g in clips ] all_norms=norms.stack() global_norm= tf.reduce_mean(norms.stack()) i1,i2,ms_new = tf.while_loop( lambda batch_index,window_index,ms_new: batch_index<batch_size, lambda batch_index,window_index,ms_new: (batch_index+1 ,window_index+y_[batch_index] ,update_tensor( ms_new, [(y_window_[batch_index],tf.mod(tf.cast(window_index[y_window_[batch_index]],tf.int64),window_size))], [all_norms[batch_index]] ) ), [0,tf.mod(label_steps,window_size),ms] ) ms_update=tf.assign(ms,ms_new) elif clip_method=='dp_naive': all_gradients = [] all_norms = [] for i in range(0,batch_size): grads_and_vars=optimizer.compute_gradients(loss_per_dp[i,...]) dp_gradients,variables = zip(*grads_and_vars) dp_norm = tf.global_norm(dp_gradients) dp_gradients2 = clip_gradients(dp_gradients,dp_norm,tf.reduce_sum(clip*y_[i])) all_gradients.append(dp_gradients2) all_norms.append(dp_norm) gradients2 = [ sum(i)/batch_size for i in zip(*all_gradients) ] global_norm= sum(all_norms)/batch_size #index_start=tf.mod( global_step *batch_size,window_size) #index_stop =tf.mod((global_step+1)*batch_size,window_size) #all_norms=tf.stack(all_norms) #ms_update = tf.assign(ms[index_start:index_stop],all_norms) all_norms=tf.stack(all_norms) i1,i2,ms_new = tf.while_loop( lambda batch_index,window_index,ms_new: batch_index<batch_size, lambda batch_index,window_index,ms_new: (batch_index+1 ,window_index+y_[batch_index] ,update_tensor( ms_new, [(y_window_[batch_index],tf.mod(tf.cast(window_index[y_window_[batch_index]],tf.int64),window_size))], [all_norms[batch_index]] ) ), [0,tf.mod(label_steps,window_size),ms] ) ms_update=tf.assign(ms,ms_new) elif clip_method=='batch_naive': all_gradients = [] for i in range(0,batch_size): grads_and_vars=optimizer.compute_gradients(loss_per_dp[i,...]) dp_gradients,variables = zip(*grads_and_vars) all_gradients.append(dp_gradients) gradients = [ sum(i)/batch_size for i in zip(*all_gradients) ] global_norm = tf.global_norm(gradients) ms_update = tf.assign(ms[0,tf.mod(global_step,window_size)],global_norm) gradients2 = clip_gradients(gradients,global_norm,clip) all_norms=tf.tile(tf.reshape(global_norm,shape=[1]),[batch_size]) elif clip_method=='batch': grads_and_vars=optimizer.compute_gradients(loss) gradients, variables = zip(*grads_and_vars) global_norm = tf.global_norm(gradients) gradients2 = clip_gradients(gradients,global_norm,clip) ms_update = tf.assign(ms[0,tf.mod(global_step,window_size)],global_norm) all_norms=tf.tile(tf.reshape(global_norm,shape=[1]),[batch_size]) # setup logging if log_dir is not None: log_file=log_dir+'/robust.log' import os print(' robust log file = ',os.path.abspath(log_file)) log=open(log_file,'a',1) def update_log(global_step,clip,m,norms,*marks): for i in range(0,norms.shape[0]): log.write(str(global_step)+' ') log.write(str(clip)+' ') log.write(str(m)+' ') log.write(str(norms[i])+' ') #log.write(str(id_[i])+' ') for mark in marks: log.write(str(mark[i])+' ') log.write('\n') return [] log_update=tf.py_func(update_log,[global_step,clip,m,all_norms]+marks,[]) else: log_update=tf.group() # apply gradients grads_and_vars2=zip(gradients2,variables) grad_updates=optimizer.apply_gradients( grads_and_vars2, global_step=global_step) train_op = tf.group(grad_updates,log_update,label_steps_update,ms_update) return train_op
def __init__(self, add_summaries=False, trainable=True, use_naive_policy=True): self.trainable = trainable self.avg_net = getattr(AcerEstimator, "average_net", self) scope_name = tf.get_variable_scope().name + '/' with tf.name_scope("inputs"): # TODO When seq_length is None, use seq_length + 1 is somewhat counter-intuitive. # Come up a solution to pass seq_length+1 and seq_length at the same time. # maybe a assertion ? But that could be hard to understand self.seq_length = tf.placeholder(tf.int32, [], "seq_length") self.state = get_state_placeholder() self.a = tf.placeholder( FLAGS.dtype, [seq_length, batch_size, FLAGS.num_actions], "actions") self.r = tf.placeholder(FLAGS.dtype, [seq_length, batch_size, 1], "rewards") self.done = tf.placeholder(tf.bool, [batch_size, 1], "done") with tf.variable_scope("shared"): shared, self.lstm = build_network(self.state, scope_name, add_summaries) # For k-step rollout s_i, i = 0, 1, ..., k-1, we need one additional # state s_k s.t. we can bootstrap value from it, i.e. we need V(s_k) with tf.variable_scope("V"): self.value_all = value = state_value_network(shared) value *= tf.Variable(1, dtype=FLAGS.dtype, name="value_scale", trainable=FLAGS.train_value_scale) self.value_last = value[-1:, ...] * tf.cast( ~self.done, FLAGS.dtype)[None, ...] self.value = value[:self.seq_length, ...] with tf.variable_scope("shared-policy"): if not FLAGS.share_network: # FIXME right now this only works for non-lstm version shared, lstm2 = build_network(self.state, scope_name, add_summaries) self.lstm.inputs.update(lstm2.inputs) self.lstm.outputs.update(lstm2.outputs) shared = shared[:self.seq_length, ...] self.state.update(self.lstm.inputs) with tf.variable_scope("policy"): self.pi, self.pi_behavior = build_policy(shared, FLAGS.policy_dist) with tf.name_scope("output"): self.a_prime = tf.squeeze(self.pi.sample_n(1), 0) self.action_and_stats = [self.a_prime, self.pi.stats] with tf.variable_scope("A"): # adv = self.advantage_network(tf.stop_gradient(shared)) adv = self.advantage_network(shared) Q_tilt = self.SDN_network(adv, self.value, self.pi) with tf.variable_scope("Q"): self.Q_tilt_a = Q_tilt(self.a, name="Q_tilt_a") self.Q_tilt_a_prime = Q_tilt(self.a_prime, name="Q_tilt_a_prime") # Compute the importance sampling weight \rho and \rho^{'} with tf.name_scope("rho"): self.rho, self.rho_prime = self.compute_rho( self.a, self.a_prime, self.pi, self.pi_behavior) with tf.name_scope("c_i"): self.c = tf.minimum(tf_const(1.), self.rho**(1. / FLAGS.num_actions), "c_i") tf.logging.info("c.shape = {}".format(tf_shape(self.c))) with tf.name_scope("Q_Retrace"): self.Q_ret, self.Q_opc = self.compute_Q_ret_Q_opc_recursively( self.value, self.value_last, self.c, self.r, self.Q_tilt_a) with tf.name_scope("losses"): self.pi_loss, self.pi_loss_sur = self.get_policy_loss( self.rho, self.pi, self.a, self.Q_opc, self.value, self.rho_prime, self.Q_tilt_a_prime, self.a_prime) self.vf_loss, self.vf_loss_sur = self.get_value_loss( self.Q_ret, self.Q_tilt_a, self.rho, self.value) # Surrogate loss is the loss tensor we passed to optimizer for # automatic gradient computation, it uses lots of stop_gradient. # Therefore it's different from the true loss (self.loss) self.entropy = tf.reduce_sum(tf.reduce_mean(self.pi.entropy(), axis=1), axis=0) self.entropy_loss = -self.entropy * FLAGS.entropy_cost_mult for loss in [ self.pi_loss_sur, self.vf_loss_sur, self.entropy_loss ]: assert len(loss.get_shape()) == 0 self.loss_sur = (self.pi_loss_sur + self.vf_loss_sur * FLAGS.lr_vp_ratio + self.entropy_loss) self.loss = self.pi_loss + self.vf_loss + self.entropy_loss with tf.name_scope("grads_and_optimizer"): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): global_step = FLAGS.global_step self.lr = tf.train.exponential_decay(tf_const( FLAGS.learning_rate), FLAGS.global_timestep, FLAGS.decay_steps, FLAGS.decay_rate, staircase=FLAGS.staircase) self.optimizer = tf.train.AdamOptimizer(self.lr) # self.optimizer = tf.train.RMSPropOptimizer(self.lr) # self.optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) tf.logging.info("Computing gradients ...") grads_and_vars = self.optimizer.compute_gradients( self.loss_sur) check_none_grads(grads_and_vars) self.grad_norms = { str(v.name): tf.sqrt(tf.reduce_sum(g**2)) for g, v in grads_and_vars if g is not None } self.global_norm = tf.global_norm( [g for g, v in grads_and_vars if g is not None]) self.grads_and_vars = [(tf.check_numerics(g, message=str(v.name)), v) for g, v in grads_and_vars if g is not None] # Collect all trainable variables initialized here self.var_list = [v for g, v in self.grads_and_vars] self.lock = None self.summaries = self.summarize(add_summaries)
def __init__(self, mode, iterator, params, rev_vocab_table=None, scope=None, log_trainables=True): print_out("# creating %s graph ..." % mode) self.dtype = tf.float32 self.mode = mode self.embedding_size = params.embedding_size self.num_layers = params.num_layers self.iterator = iterator # self.scheduled_sampling_prob = scheduled_sampling_prob # self.num_samples_for_loss = num_samples_for_loss self.device_manager = DeviceManager() self.round_robin = RoundRobin(self.device_manager) self.num_gpus = self.device_manager.num_available_gpus() print_out("# number of gpus %d" % self.num_gpus) with tf.variable_scope(scope or 'ta_seq2seq_graph', dtype=self.dtype): self.init_embeddings(params.vocab_file, params.embedding_type, self.embedding_size, scope=scope) with tf.variable_scope(scope or "build_network"): with tf.variable_scope("output_projection") as output_scope: if params.boost_topic_gen_prob: self.output_layer = taware_layer.JointDenseLayer( params.vocab_size, params.topic_vocab_size, scope=output_scope, name="output_projection") else: self.output_layer = layers_core.Dense( params.vocab_size, # activation=tf.nn.tanh, use_bias=False, name="output_projection") encoder_keep_prob, decoder_keep_prob = self.get_keep_probs( mode, params) self.batch_size = tf.size(self.iterator.source_sequence_lengths) encoder_outputs, encoder_state = self.__build_encoder( params, encoder_keep_prob) logits, sample_id, final_decoder_state = self.__build_decoder( params, encoder_outputs, encoder_state, decoder_keep_prob) if mode != tf.contrib.learn.ModeKeys.INFER: with tf.device(self.device_manager.tail_gpu()): loss = self.__compute_loss(logits) else: loss = None if mode == tf.contrib.learn.ModeKeys.TRAIN: self.train_loss = loss self.word_count = tf.reduce_sum( self.iterator.source_sequence_lengths) + tf.reduce_sum( self.iterator.target_sequence_length) elif mode == tf.contrib.learn.ModeKeys.EVAL: self.eval_loss = loss elif mode == tf.contrib.learn.ModeKeys.INFER: self.sample_words = rev_vocab_table.lookup( tf.to_int64(sample_id)) if mode != tf.contrib.learn.ModeKeys.INFER: ## Count the number of predicted words for compute ppl. self.predict_count = tf.reduce_sum( self.iterator.target_sequence_length) self.global_step = tf.Variable(0, trainable=False) trainables = tf.trainable_variables() # Gradients and SGD update operation for training the model. # Arrage for the embedding vars to appear at the beginning. if mode == tf.contrib.learn.ModeKeys.TRAIN: self.learning_rate = tf.constant(params.learning_rate) # decay self.learning_rate = self._get_learning_rate_decay( params, self.global_step, self.learning_rate) # Optimizer if params.optimizer.lower() == "sgd": opt = tf.train.GradientDescentOptimizer(self.learning_rate) tf.summary.scalar("lr", self.learning_rate) elif params.optimizer.lower() == "adam": opt = tf.train.AdamOptimizer(self.learning_rate) tf.summary.scalar("lr", self.learning_rate) else: raise ValueError('Unknown optimizer: ' + params.optimizer) # Gradients gradients = tf.gradients(self.train_loss, trainables, colocate_gradients_with_ops=True) clipped_grads, grad_norm = tf.clip_by_global_norm( gradients, params.max_gradient_norm) grad_norm_summary = [tf.summary.scalar("grad_norm", grad_norm)] grad_norm_summary.append( tf.summary.scalar("clipped_gradient", tf.global_norm(clipped_grads))) self.grad_norm = grad_norm self.update = opt.apply_gradients(zip(clipped_grads, trainables), global_step=self.global_step) # Summary self.train_summary = tf.summary.merge([ tf.summary.scalar("lr", self.learning_rate), tf.summary.scalar("train_loss", self.train_loss), ] + grad_norm_summary) if mode == tf.contrib.learn.ModeKeys.INFER: self.infer_logits, self.sample_id = logits, sample_id self.infer_summary = tf.no_op() # Saver self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=3) # Print trainable variables if log_trainables: print_out("# Trainable variables") for trainable in trainables: print_out(" %s, %s, %s" % (trainable.name, str( trainable.get_shape()), trainable.op.device))
def __init__(self, env, task, visualise): """ An implementation of the A3C algorithm that is reasonably well-tuned for the VNC environments. Below, we will have a modest amount of complexity due to the way TensorFlow handles data parallelism. But overall, we'll define the model, specify its inputs, and describe how the policy gradients step should be computed. """ self.env = env self.task = task ob_space = self.env.observation_space ac_space = self.env.action_space worker_device = "/job:worker/task:{}/cpu:0".format(task) with tf.device( tf.train.replica_device_setter(1, worker_device=worker_device)): with tf.variable_scope("global"): self.network = CnnPolicy(ob_space, ac_space, 1, 1, reuse=False) self.global_step = tf.get_variable( "global_step", [], tf.int32, initializer=tf.constant_initializer(0, dtype=tf.int32), trainable=False) with tf.device(worker_device): with tf.variable_scope("local"): self.local_network = pi = CnnPolicy(ob_space, ac_space, 1, 1, reuse=False) pi.global_step = self.global_step self.ac = tf.placeholder(tf.float32, [None, env.action_space.n], name="ac") self.adv = tf.placeholder(tf.float32, [None], name="adv") self.r = tf.placeholder(tf.float32, [None], name="r") log_prob_tf = tf.nn.log_softmax(pi.logits) prob_tf = tf.nn.softmax(pi.logits) # the "policy gradients" loss: its derivative is precisely the policy gradient # notice that self.ac is a placeholder that is provided externally. # adv will contain the advantages, as calculated in process_rollout pi_loss = -tf.reduce_mean( tf.reduce_sum(log_prob_tf * self.ac, [1]) * self.adv) # loss of value function vf_loss = 0.5 * tf.reduce_mean(tf.square(pi.vf - self.r)) print(vf_loss.get_shape(), pi.vf.get_shape()) entropy = -tf.reduce_sum(prob_tf * log_prob_tf) bs = tf.to_float(tf.shape(pi.x)[0]) self.loss = pi_loss + 0.5 * vf_loss - entropy * 0.01 # 20 represents the number of "local steps": the number of timesteps # we run the policy before we update the parameters. # The larger local steps is, the lower is the variance in our policy gradients estimate # on the one hand; but on the other hand, we get less frequent parameter updates, which # slows down learning. In this code, we found that making local steps be much # smaller than 20 makes the algorithm more difficult to tune and to get to work. self.runner = RunnerThread(env, pi, 20, visualise) grads = tf.gradients(self.loss, pi.var_list) # summ = tf.Summary() # summ.value.add(tag="model/policy_loss", simple_value=pi_loss / bs) # summ.value.add(tag="model/value_loss", simple_value=vf_loss / bs) # summ.value.add(tag="model/entropy", simple_value=entropy / bs) # summ.value.add(tag="model/state", simple_value=pi.x) # summ.value.add(tag="model/grad_global_norm", simple_value=tf.global_norm(grads)) # summ.value.add(tag="model/var_global_norm", simple_value=tf.global_norm(pi.var_list)) # if use_tf12_api: tf.summary.scalar("model/policy_loss", pi_loss / bs) tf.summary.scalar("model/value_loss", vf_loss / bs) tf.summary.scalar("model/entropy", entropy / bs) tf.summary.image("model/state", pi.x) tf.summary.scalar("model/grad_global_norm", tf.global_norm(grads)) tf.summary.scalar("model/var_global_norm", tf.global_norm(pi.var_list)) self.summary_op = tf.summary.merge_all() # else: # tf.scalar_summary("model/policy_loss", pi_loss / bs) # tf.scalar_summary("model/value_loss", vf_loss / bs) # tf.scalar_summary("model/entropy", entropy / bs) # tf.image_summary("model/state", pi.x) # tf.scalar_summary("model/grad_global_norm", tf.global_norm(grads)) # tf.scalar_summary("model/var_global_norm", tf.global_norm(pi.var_list)) # self.summary_op = tf.merge_all() grads, _ = tf.clip_by_global_norm(grads, 0.5) # copy weights from the parameter server to the local model self.sync = tf.group(*[ v1.assign(v2) for v1, v2 in zip(pi.var_list, self.network.var_list) ]) grads_and_vars = list(zip(grads, self.network.var_list)) inc_step = self.global_step.assign_add(tf.shape(pi.x)[0]) # each worker has a different set of adam optimizer parameters opt = tf.train.AdamOptimizer(7e-4) self.train_op = tf.group(opt.apply_gradients(grads_and_vars), inc_step) self.summary_writer = None self.local_steps = 0
def train( model_dir, hp=None, max_steps=1e7, display_step=500, ruleset='mante', rule_trains=None, rule_prob_map=None, seed=0, rich_output=True, load_dir=None, trainables=None, fixReadoutandBias=False, fixBias=False, ): """Train the network. Args: model_dir: str, training directory hp: dictionary of hyperparameters max_steps: int, maximum number of training steps display_step: int, display steps ruleset: the set of rules to train rule_trains: list of rules to train, if None then all rules possible rule_prob_map: None or dictionary of relative rule probability seed: int, random seed to be used Returns: model is stored at model_dir/model.ckpt training configuration is stored at model_dir/hp.json """ tools.mkdir_p(model_dir) # Network parameters default_hp = get_default_hp(ruleset) if hp is not None: default_hp.update(hp) hp = default_hp hp['seed'] = seed hp['rng'] = np.random.RandomState(seed) # Rules to train and test. Rules in a set are trained together if rule_trains is None: # By default, training all rules available to this ruleset hp['rule_trains'] = task.rules_dict[ruleset] else: hp['rule_trains'] = rule_trains hp['rules'] = hp['rule_trains'] # Assign probabilities for rule_trains. if rule_prob_map is None: rule_prob_map = dict() # Turn into rule_trains format hp['rule_probs'] = None if hasattr(hp['rule_trains'], '__iter__'): # Set default as 1. rule_prob = np.array( [rule_prob_map.get(r, 1.) for r in hp['rule_trains']]) hp['rule_probs'] = list(rule_prob / np.sum(rule_prob)) tools.save_hp(hp, model_dir) # Build the model with tf.device('gpu:0'): model = Model(model_dir, hp=hp) # Display hp for key, val in hp.items(): print('{:20s} = '.format(key) + str(val)) if fixReadoutandBias is True: my_var_list = [ var for var in model.var_list if 'rnn/leaky_rnn_cell/kernel:0' in var.name ] print(my_var_list) elif fixBias is True: my_var_list = [ var for var in model.var_list if 'rnn/leaky_rnn_cell/kernel:0' in var.name or 'output/weights:0' in var.name ] else: my_var_list = model.var_list model.set_optimizer(var_list=my_var_list) # Store results log = defaultdict(list) log['model_dir'] = model_dir # Record time t_start = time.time() # Use customized session that launches the graph as well with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # penalty on deviation from initial weight if hp['l2_weight_init'] > 0: anchor_ws = sess.run(model.weight_list) for w, w_val in zip(model.weight_list, anchor_ws): model.cost_reg += (hp['l2_weight_init'] * tf.nn.l2_loss(w - w_val)) model.set_optimizer(var_list=my_var_list) # partial weight training if ('p_weight_train' in hp and (hp['p_weight_train'] is not None) and hp['p_weight_train'] < 1.0): for w in model.weight_list: w_val = sess.run(w) w_size = sess.run(tf.size(w)) w_mask_tmp = np.linspace(0, 1, w_size) hp['rng'].shuffle(w_mask_tmp) ind_fix = w_mask_tmp > hp['p_weight_train'] w_mask = np.zeros(w_size, dtype=np.float32) w_mask[ind_fix] = 1e-1 # will be squared in l2_loss w_mask = tf.constant(w_mask) w_mask = tf.reshape(w_mask, w.shape) model.cost_reg += tf.nn.l2_loss((w - w_val) * w_mask) model.set_optimizer(var_list=my_var_list) step = 0 run_ave_time = [] while step * hp['batch_size_train'] <= max_steps: try: # Validation if step % display_step == 0: grad_norm = tf.global_norm(model.clipped_gs) grad_norm_np = sess.run(grad_norm) # import pdb # pdb.set_trace() log['grad_norm'].append(grad_norm_np.item()) log['trials'].append(step * hp['batch_size_train']) log['times'].append(time.time() - t_start) log = do_eval(sess, model, log, hp['rule_trains']) # if log['perf_avg'][-1] > model.hp['target_perf']: # check if minimum performance is above target if log['perf_min'][-1] > model.hp['target_perf']: print('Perf reached the target: {:0.2f}'.format( hp['target_perf'])) break if rich_output: display_rich_output(model, sess, step, log, model_dir) # Training dtStart = datetime.now() sess.run(model.train_step) dtEnd = datetime.now() if len(run_ave_time) is 0: run_ave_time = np.expand_dims( (dtEnd - dtStart).total_seconds(), axis=0) else: run_ave_time = np.concatenate( (run_ave_time, np.expand_dims((dtEnd - dtStart).total_seconds(), axis=0))) # print(np.mean(run_ave_time)) # print((dtEnd-dtStart).total_seconds()) step += 1 if step < 10: model.save_ckpt(step) if step < 1000: if step % display_step / 10 == 0: model.save_ckpt(step) if step % display_step == 0: model.save_ckpt(step) except KeyboardInterrupt: print("Optimization interrupted by user") break print("Optimization finished!")
def __init__(self, observation_space, action_space, config): config = dict(ray.rllib.agents.a3c.a3c.DEFAULT_CONFIG, **config) self.config = config self.sess = tf.get_default_session() # Setup the policy self.observations = tf.placeholder( tf.float32, [None] + list(observation_space.shape)) dist_class, logit_dim = ModelCatalog.get_action_dist( action_space, self.config["model"]) prev_actions = ModelCatalog.get_action_placeholder(action_space) prev_rewards = tf.placeholder(tf.float32, [None], name="prev_reward") self.model = ModelCatalog.get_model({ "obs": self.observations, "prev_actions": prev_actions, "prev_rewards": prev_rewards }, observation_space, logit_dim, self.config["model"]) action_dist = dist_class(self.model.outputs) self.vf = tf.reshape( linear(self.model.last_layer, 1, "value", normc_initializer(1.0)), [-1]) self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name) # Setup the policy loss if isinstance(action_space, gym.spaces.Box): ac_size = action_space.shape[0] actions = tf.placeholder(tf.float32, [None, ac_size], name="ac") elif isinstance(action_space, gym.spaces.Discrete): actions = tf.placeholder(tf.int64, [None], name="ac") else: raise UnsupportedSpaceException( "Action space {} is not supported for A3C.".format( action_space)) advantages = tf.placeholder(tf.float32, [None], name="advantages") self.v_target = tf.placeholder(tf.float32, [None], name="v_target") self.loss = A3CLoss(action_dist, actions, advantages, self.v_target, self.vf, self.config["vf_loss_coeff"], self.config["entropy_coeff"]) # Initialize TFPolicyGraph loss_in = [ ("obs", self.observations), ("actions", actions), ("prev_actions", prev_actions), ("prev_rewards", prev_rewards), ("advantages", advantages), ("value_targets", self.v_target), ] LearningRateSchedule.__init__(self, self.config["lr"], self.config["lr_schedule"]) TFPolicyGraph.__init__( self, observation_space, action_space, self.sess, obs_input=self.observations, action_sampler=action_dist.sample(), loss=self.loss.total_loss, loss_inputs=loss_in, state_inputs=self.model.state_in, state_outputs=self.model.state_out, prev_action_input=prev_actions, prev_reward_input=prev_rewards, seq_lens=self.model.seq_lens, max_seq_len=self.config["model"]["max_seq_len"]) self.stats_fetches = { "stats": { "cur_lr": tf.cast(self.cur_lr, tf.float64), "policy_loss": self.loss.pi_loss, "policy_entropy": self.loss.entropy, "grad_gnorm": tf.global_norm(self._grads), "var_gnorm": tf.global_norm(self.var_list), "vf_loss": self.loss.vf_loss, "vf_explained_var": explained_variance(self.v_target, self.vf), }, } self.sess.run(tf.global_variables_initializer())
def add_optimizer_op(self, scope): """ Set self.train_op and self.grad_norm """ ############################################################## """ TODO: 1. get Adam Optimizer (remember that we defined self.lr in the placeholders section) 2. compute grads wrt to variables in scope for self.loss 3. clip the grads by norm with self.config.clip_val if self.config.grad_clip is True 4. apply the gradients and store the train op in self.train_op (sess.run(train_op) must update the variables) 5. compute the global norm of the gradients and store this scalar in self.grad_norm HINT: you may find the following functinos useful - tf.get_collection - optimizer.compute_gradients - tf.clip_by_norm - optimizer.apply_gradients - tf.global_norm you can access config variable by writing self.config.variable_name (be sure that you set self.train_op and self.grad_norm) """ ############################################################## #################### YOUR CODE HERE - 8-12 lines ############# var_lst = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) # print('-' * 20) # for var in var_lst: # print(' -- ' + var.name) # print('-' * 20) optimizer = tf.train.AdamOptimizer(self.lr) # self.train_op = optimizer.minimize(self.loss, var_list=var_lst) grads_and_vars_lst = optimizer.compute_gradients(self.loss, var_list=var_lst) if self.config.grad_clip: grads_clipped_and_vars_lst = [] for grad_and_var in grads_and_vars_lst: grad, var = grad_and_var grad_clipped = tf.clip_by_norm(grad, self.config.clip_val) grads_clipped_and_vars_lst.append((grad_clipped, var)) # self.train_op = optimizer.apply_gradients( # grads_clipped_and_vars_lst) train_op = optimizer.apply_gradients(grads_clipped_and_vars_lst) grads_lst = [x[0] for x in grads_clipped_and_vars_lst] else: train_op = optimizer.apply_gradients(grads_and_vars_lst) # self.train_op = optimizer.apply_gradients(grads_and_vars_lst) grads_lst = [x[0] for x in grads_and_vars_lst] # global norm is just a norm of stacked vectors # self.grad_norm = tf.global_norm(grads_lst) grad_norm = tf.global_norm(grads_lst) # var_lst = tf.get_collection( # tf.GraphKeys.TRAINABLE_VARIABLES, scope='target_q') # debugging: compare norms of target_q and q as an indeirect # check of the weight update # self.target_q_norm = tf.global_norm(var_lst) # var_lst = tf.get_collection( # tf.GraphKeys.TRAINABLE_VARIABLES, scope='q') # self.q_norm = tf.global_norm(var_lst) return train_op, grad_norm
def __init__(self, s_size, a_size, scope, trainer): with tf.variable_scope(scope): #Input and visual encoding layers self.inputs = tf.placeholder(shape=[None, s_size], dtype=tf.float32) self.imageIn = tf.reshape(self.inputs, shape=[-1, 84, 84, 1]) self.conv1 = slim.conv2d(activation_fn=tf.nn.elu, inputs=self.imageIn, num_outputs=16, kernel_size=[8, 8], stride=[4, 4], padding='VALID') self.conv2 = slim.conv2d(activation_fn=tf.nn.elu, inputs=self.conv1, num_outputs=32, kernel_size=[4, 4], stride=[2, 2], padding='VALID') hidden = slim.fully_connected(slim.flatten(self.conv2), 256, activation_fn=tf.nn.elu) #Recurrent network for temporal dependencies lstm_cell = tf.contrib.rnn.BasicLSTMCell(256, state_is_tuple=True) c_init = np.zeros((1, lstm_cell.state_size.c), np.float32) h_init = np.zeros((1, lstm_cell.state_size.h), np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c]) h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h]) self.state_in = (c_in, h_in) rnn_in = tf.expand_dims(hidden, [0]) step_size = tf.shape(self.imageIn)[:1] state_in = tf.contrib.rnn.LSTMStateTuple(c_in, h_in) lstm_outputs, lstm_state = tf.nn.dynamic_rnn( lstm_cell, rnn_in, initial_state=state_in, sequence_length=step_size, time_major=False) lstm_c, lstm_h = lstm_state self.state_out = (lstm_c[:1, :], lstm_h[:1, :]) rnn_out = tf.reshape(lstm_outputs, [-1, 256]) #Output layers for policy and value estimations self.policy = slim.fully_connected( rnn_out, a_size, activation_fn=tf.nn.softmax, weights_initializer=normalized_columns_initializer(0.01), biases_initializer=None) self.value = slim.fully_connected( rnn_out, 1, activation_fn=None, weights_initializer=normalized_columns_initializer(1.0), biases_initializer=None) #Only the worker network need ops for loss functions and gradient updating. if scope != 'global': self.actions = tf.placeholder(shape=[None], dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions, a_size, dtype=tf.float32) self.target_v = tf.placeholder(shape=[None], dtype=tf.float32) self.advantages = tf.placeholder(shape=[None], dtype=tf.float32) self.responsible_outputs = tf.reduce_sum( self.policy * self.actions_onehot, [1]) #Loss functions self.value_loss = 0.5 * tf.reduce_sum( tf.square(self.target_v - tf.reshape(self.value, [-1]))) self.entropy = -tf.reduce_sum( self.policy * tf.log(self.policy)) self.policy_loss = -tf.reduce_sum( tf.log(self.responsible_outputs) * self.advantages) self.loss = 0.5 * self.value_loss + self.policy_loss - self.entropy * 0.01 #Get gradients from local network using local losses local_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope) self.gradients = tf.gradients(self.loss, local_vars) self.var_norms = tf.global_norm(local_vars) grads, self.grad_norms = tf.clip_by_global_norm( self.gradients, 40.0) #Apply local gradients to global network global_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = trainer.apply_gradients( zip(grads, global_vars))
def train_loop( session, inputs, cost, train_data, stop_after, prints=[], test_data=None, test_every=None, callback=None, callback_every=None, inject_iteration=False, optimizer=tf.train.AdamOptimizer(), save_every=1000, save_output=False ): prints = [('cost', cost)] + prints grads_and_vars = optimizer.compute_gradients( cost, colocate_gradients_with_ops=True ) print "Params:" total_param_count = 0 for g, v in grads_and_vars: shape = v.get_shape() shape_str = ",".join([str(x) for x in v.get_shape()]) param_count = 1 for dim in shape: param_count *= int(dim) total_param_count += param_count if g == None: print "\t{} ({}) [no grad!]".format(v.name, shape_str) else: print "\t{} ({})".format(v.name, shape_str) print "Total param count: {}".format( locale.format("%d", total_param_count, grouping=True) ) # for i in xrange(len(grads_and_vars)): # g, v = grads_and_vars[i] # if g == None: # grads_and_vars[i] = (tf.zeros_like(v), v) # else: # grads_and_vars[i] = (tf.clip_by_value(g, -5., 5.), v) grads = [g for g,v in grads_and_vars] _vars = [v for g,v in grads_and_vars] global_norm = tf.global_norm(grads) prints = prints + [('gradnorm', global_norm)] grads, global_norm = tf.clip_by_global_norm(grads, 5.0, use_norm=global_norm) grads_and_vars = zip(grads, _vars) train_op = optimizer.apply_gradients(grads_and_vars) def train_fn(input_vals): return session.run( [p[1] for p in prints] + [train_op], feed_dict={sym:real for sym, real in zip(inputs, input_vals)} )[:-1] def eval_fn(input_vals): return session.run( [p[1] for p in prints], feed_dict={sym:real for sym, real in zip(inputs, input_vals)} ) _vars = { 'epoch': 0, 'iteration': 0, 'seconds': 0., 'last_callback': 0, 'last_test': 0 } train_generator = train_data() saver = tf.train.Saver() if os.path.isfile(TRAIN_LOOP_FILE): print "Resuming interrupted train loop session" with open(TRAIN_LOOP_FILE, 'r') as f: _vars = pickle.load(f) saver.restore(session, os.getcwd()+"/"+PARAMS_FILE) print "Fast-fowarding dataset generator" dataset_iters = 0 while dataset_iters < _vars['iteration']: try: train_generator.next() except StopIteration: train_generator = train_data() train_generator.next() dataset_iters += 1 else: print "Initializing variables..." session.run(tf.initialize_all_variables()) print "done!" train_output_entries = [[]] def log(outputs, test, _vars, extra_things_to_print): entry = collections.OrderedDict() for key in ['epoch', 'iteration', 'seconds']: entry[key] = _vars[key] for i,p in enumerate(prints): if test: entry['test '+p[0]] = outputs[i] else: entry['train '+p[0]] = outputs[i] train_output_entries[0].append(entry) to_print = entry.items() to_print.extend(extra_things_to_print) print_str = "" for k,v in to_print: if isinstance(v, int): print_str += "{}:{}\t".format(k,v) else: print_str += "{}:{:.4f}\t".format(k,v) print print_str[:-1] # omit the last \t def save_train_output_and_params(iteration): if not save_output: return print "Saving output and params..." # Saving weights takes a while. To minimize risk of interruption during # a critical segment, we write weights to a temp file, delete the old # file, and rename the temp file. start_time = time.time() saver.save(session, PARAMS_FILE + '_tmp') print "saver.save time: {}".format(time.time() - start_time) start_time = time.time() if os.path.isfile(PARAMS_FILE): os.remove(PARAMS_FILE) os.rename(PARAMS_FILE+'_tmp', PARAMS_FILE) print "move and rename time: {}".format(time.time() - start_time) # shutil.copyfile(PARAMS_FILE, PARAMS_FILE+'_'+str(iteration)) start_time = time.time() with open(TRAIN_LOOP_FILE, 'w') as f: pickle.dump(_vars, f) print "_vars pickle dump time: {}".format(time.time() - start_time) start_time = time.time() with open(TRAIN_OUTPUT_FILE, 'a') as f: for entry in train_output_entries[0]: for k,v in entry.items(): if isinstance(v, np.generic): entry[k] = np.asscalar(v) f.write(json.dumps(entry) + "\n") print "ndjson write time: {}".format(time.time() - start_time) train_output_entries[0] = [] while True: if _vars['iteration'] == stop_after: save_train_output_and_params(_vars['iteration']) print "Done!" try: # This only matters on Ishaan's computer import experiment_tools experiment_tools.send_sms("done!") except ImportError: pass break data_load_start_time = time.time() try: input_vals = train_generator.next() except StopIteration: train_generator = train_data() input_vals = train_generator.next() train_generator.next() _vars['epoch'] += 1 data_load_time = time.time() - data_load_start_time if inject_iteration: input_vals = [np.int32(_vars['iteration'])] + list(input_vals) start_time = time.time() outputs = train_fn(input_vals) run_time = time.time() - start_time _vars['seconds'] += run_time _vars['iteration'] += 1 log(outputs, False, _vars, [('iter time', run_time), ('data time', data_load_time)]) if (test_data is not None) and _vars['iteration'] % test_every == (test_every-1): if inject_iteration: test_outputs = [ eval_fn([np.int32(_vars['iteration'])] + list(input_vals)) for input_vals in test_data() ] else: test_outputs = [ eval_fn(input_vals) for input_vals in test_data() ] mean_test_outputs = np.array(test_outputs).mean(axis=0) log(mean_test_outputs, True, _vars, []) if (callback is not None) and _vars['iteration'] % callback_every == (callback_every-1): tag = "iter{}".format(_vars['iteration']) callback(tag) if _vars['iteration'] % save_every == (save_every-1): save_train_output_and_params(_vars['iteration'])
def __init__(self, args, sample=False): def tf_normal(x, mu, s, rho): with tf.variable_scope('normal'): x = tf.expand_dims(x,2) norm = tf.sub(x[:,:args.chunk_samples,:], mu) z = tf.div(tf.square(norm), s) tf.histogram_summary('z-score', tf.div(norm,tf.sqrt(s))) tf.histogram_summary('std-dev', tf.sqrt(s)) tf.scalar_summary('std-dev-mean', tf.reduce_mean(tf.sqrt(s))) denom_log = tf.log(tf.maximum(1e-20,tf.sqrt(2*np.pi*s)),name='denom_log') result = tf.reduce_sum(-z/2-denom_log + (tf.log(rho,name='log_rho')*(1+x[:,args.chunk_samples:,:]) +tf.log(tf.maximum(1e-20,1-rho),name='log_rho_inv')*(1-x[:,args.chunk_samples:,:]))/2, 1) return result def get_lossfunc(z_pi, z_mu, z_sigma, z_rho, x): normals = tf_normal(x, z_mu, z_sigma, z_rho) result = -tf_logsumexp(tf.log(tf.maximum(1e-20,z_pi))+normals) return tf.reduce_sum(result) def tf_logsumexp(x): with tf.variable_scope('logsumexp'): max_val = tf.reduce_max(x,1, keep_dims=True) ret = tf.log(tf.reduce_sum(tf.exp(x - max_val), 1, keep_dims=True)) + max_val return ret def get_mixture_coef(output): with tf.variable_scope('get_mixture'): z = output z_pi = z[:,:self.num_mixture] z_mu = tf.reshape(z[:,self.num_mixture:(args.chunk_samples+1)*self.num_mixture],[-1,args.chunk_samples,self.num_mixture],name='z_mu') z_sigma = tf.reshape(z[:,(args.chunk_samples+1)*self.num_mixture:(2*args.chunk_samples+1)*self.num_mixture],[-1,args.chunk_samples,self.num_mixture]) z_rho = tf.reshape(z[:,(2*args.chunk_samples+1)*self.num_mixture:],[-1,args.chunk_samples,self.num_mixture]) # apply transformations #softmax with lower bound #z_pi = (tf.nn.softmax(z_pi, name='z_pi')+0.01)/(1.+0.01*args.num_mixture) z_pi = tf.nn.softmax(z_pi, name='z_pi') z_sigma = tf.exp(z_sigma, name='z_sigma') z_rho = tf.maximum(1e-20,tf.sigmoid(z_rho, name='z_rho')) return [z_pi, z_mu, z_sigma, z_rho] self.args = args if sample: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = tf.nn.rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = tf.nn.rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = tf.nn.rnn_cell.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size) cell = tf.nn.rnn_cell.MultiRNNCell([cell] * args.num_layers) if (sample == False and args.keep_prob < 1): # training mode cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob = args.keep_prob) self.cell = cell self.input_data = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, args.seq_length, 2*args.chunk_samples], name='input_data') self.target_data = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, args.seq_length, 2*args.chunk_samples],name = 'target_data') self.initial_state = cell.zero_state(batch_size=args.batch_size, dtype=tf.float32) self.num_mixture = args.num_mixture # NOUT = self.num_mixture * (1 + 3*(args.chunk_samples)) output_w = tf.Variable(tf.random_normal([args.rnn_size, NOUT],stddev=0.2), name="output_w") output_b = tf.Variable(tf.zeros([NOUT]), name="output_b") #inputs = tf.split(1, args.seq_length, self.input_data) #inputs = [tf.squeeze(input_, [1]) for input_ in inputs] #inputs = tf.unpack(tf.transpose(self.input_data, perm=(1,0,2))) # input shape: (batch_size, n_steps, n_input) inputs = tf.transpose(self.input_data, [1, 0, 2]) # permute n_steps and batch_size inputs = tf.reshape(inputs, [-1, 2*args.chunk_samples]) # (n_steps*batch_size, n_input) # Split data because rnn cell needs a list of inputs for the RNN inner loop inputs = tf.split(0, args.seq_length, inputs) # n_steps * (batch_size, n_hidden) # Get lstm cell output outputs, last_state = tf.nn.rnn(cell, inputs, initial_state=self.initial_state) #outputs, last_state = tf.nn.seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=None, scope='rnnlm_decode') output = tf.transpose(tf.pack(outputs), [1,0,2]) output = tf.reshape(output, [-1, args.rnn_size]) output = tf.nn.xw_plus_b(output, output_w, output_b) self.final_state = last_state # reshape target data so that it is compatible with prediction shape flat_target_data = tf.reshape(self.target_data,[-1, 2*args.chunk_samples]) [o_pi, o_mu, o_sigma, o_rho] = get_mixture_coef(output) self.pi = o_pi self.mu = o_mu self.sigma = o_sigma self.rho = o_rho lossfunc = get_lossfunc(o_pi, o_mu, o_sigma, o_rho, flat_target_data) self.cost = lossfunc / (args.batch_size * args.seq_length * args.chunk_samples) tf.scalar_summary('cost', self.cost) self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads = tf.gradients(self.cost, tvars) grads = tf.cond( tf.global_norm(grads) > 1e-20, lambda: tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip)[0], lambda: grads) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def build_graph(self, features, labels, mode, params): """docstring.""" del labels, params misc_utils.print_out("Running fast mode_fn") hparams = self.hparams # Create global_step tf.train.get_or_create_global_step() if mode == tf.contrib.learn.ModeKeys.INFER: # Doing inference only on one GPU inf_hparams = tf.contrib.training.HParams(**hparams.values()) inf_hparams.set_hparam("num_gpus", 1) # Inference is done in fp32 and in the same way as that of dist_strategy. inf_hparams.set_hparam("use_fp16", False) misc_utils.print_out("inference hparmas:") misc_utils.print_hparams(inf_hparams) # Create variable_mgr var_mgr = self._get_variable_mgr(inf_hparams) with mixed_precision_scope(), tf.device("gpu:0"), tf.name_scope( "tower_0"), var_mgr.create_outer_variable_scope(0): model = gnmt_model.GNMTModel(inf_hparams, mode=mode, features=features) sample_ids = model.sample_id reverse_target_vocab_table = lookup_ops.index_to_string_table_from_file( inf_hparams.tgt_vocab_file, default_value=vocab_utils.UNK) sample_words = reverse_target_vocab_table.lookup( tf.to_int64(sample_ids)) # make sure outputs is of shape [batch_size, time] or [beam_width, # batch_size, time] when using beam search. if inf_hparams.time_major: sample_words = tf.transpose(sample_words) elif sample_words.shape.ndims == 3: # beam search output in [batch_size, time, beam_width] shape. sample_words = tf.transpose(sample_words, [2, 0, 1]) predictions = {"predictions": sample_words} # return loss, vars, grads, predictions, train_op, scaffold return None, None, None, predictions, None, None elif mode == tf.contrib.learn.ModeKeys.TRAIN: num_towers = hparams.num_gpus # Shard inputs tower_features = self._shard_inputs(features, num_towers) # Create loss scale vars if necessary loss_scale, loss_scale_normal_steps = self._create_loss_scale_vars( ) # Create variable_mgr var_mgr = self._get_variable_mgr(hparams) # Build per-tower fprop and bprop devices = var_mgr.get_devices() tower_gradvars = [] tower_scopes = [] var_scopes = [] train_losses = [] learning_rates = [] batch_sizes = [] def get_optimizer(hparams, learning_rate): """docstring.""" mlperf_log.gnmt_print(key=mlperf_log.OPT_NAME, value=hparams.optimizer) if hparams.optimizer == "sgd": opt = tf.train.GradientDescentOptimizer(learning_rate) elif hparams.optimizer == "adam": mlperf_log.gnmt_print(key=mlperf_log.OPT_HP_ADAM_BETA1, value=0.9) mlperf_log.gnmt_print(key=mlperf_log.OPT_HP_ADAM_BETA2, value=0.999) mlperf_log.gnmt_print(key=mlperf_log.OPT_HP_ADAM_EPSILON, value=1e-8) opt = tf.train.AdamOptimizer(learning_rate) else: raise ValueError("Unknown optimizer type %s" % hparams.optimizer) return opt def fprop_and_bprop(tid): """docstring.""" model = gnmt_model.GNMTModel(hparams, mode=mode, features=tower_features[tid]) # sync training. assert model.learning_rate is not None # The following handles shouldn't be built in when doing manual assert model.grad_norm is None assert model.update is None tower_loss = model.train_loss # Only check loss numerics if in fp16 if hparams.use_fp16 and hparams.check_tower_loss_numerics: tower_loss = tf.check_numerics( tower_loss, "tower_%d has Inf/NaN loss" % tid) # Cast to fp32, otherwise would easily overflow. tower_loss = tf.to_float(tower_loss) var_params, grads = self._compute_tower_grads( tower_loss, var_mgr.trainable_variables_on_device(tid, tid), use_fp16=hparams.use_fp16, loss_scale=loss_scale, colocate_gradients_with_ops=hparams. colocate_gradients_with_ops) self._print_varinfo(var_params, tid) res = [model.train_loss, model.learning_rate, model.batch_size] res.extend(grads) return res def unpack_fprop_and_bprop_output(output): train_loss = output[0] learning_rate = output[1] batch_size = output[2] grads = output[3:] return train_loss, learning_rate, batch_size, grads with mixed_precision_scope(): for tid in range(num_towers): with tf.device(devices[tid % len(devices)]), tf.name_scope( "tower_%s" % tid) as scope: tower_scopes.append(scope) with var_mgr.create_outer_variable_scope( tid) as var_scope: var_scopes.append(var_scope) outputs = maybe_xla_compile( hparams, fprop_and_bprop, tid) (train_loss, learning_rate, batch_size, grads) = unpack_fprop_and_bprop_output(outputs) train_losses.append(train_loss) learning_rates.append(learning_rate) batch_sizes.append(batch_size) var_params = var_mgr.trainable_variables_on_device( tid, tid) tower_gradvars.append(list(zip(grads, var_params))) # Add summaries if hparams.show_metrics: tf.summary.scalar("learning_rate", learning_rates[0]) if loss_scale: tf.summary.scalar("loss_scale", loss_scale) if hparams.enable_auto_loss_scale: tf.summary.scalar("loss_scale_normal_steps", loss_scale_normal_steps) misc_utils.print_out("Finish building fprop and per-tower bprop.") # Aggregate gradients # The following compute the aggregated grads for each tower, stored in # opaque grad_states structure. apply_grads_devices, grad_states = var_mgr.preprocess_device_grads( tower_gradvars) master_grads = None master_params = None update_ops = [] for i, device in enumerate(apply_grads_devices): with tf.device(device), tf.name_scope(tower_scopes[i]): # Get per-tower grads. with tf.name_scope("get_gradients_to_apply"): avg_gradvars = var_mgr.get_gradients_to_apply( i, grad_states) avg_grads = [gv[0] for gv in avg_gradvars] # gradients post-processing with tf.name_scope("clip_gradients"): if hparams.clip_grads: clipped_grads, grad_norm = model_helper.gradient_clip( avg_grads, max_gradient_norm=hparams.max_gradient_norm) # summary the grad on the 1st tower if i == 0 and hparams.show_metrics: tf.summary.scalar("grad_norm", grad_norm) tf.summary.scalar( "clipped_grad_norm", tf.global_norm(clipped_grads)) else: clipped_grads = avg_grads if i == 0: master_grads = clipped_grads # Build apply-gradients ops clipped_gradvars = list( zip(clipped_grads, [gv[1] for gv in avg_gradvars])) if i == 0: master_params = [gv[1] for gv in avg_gradvars] with tf.name_scope("append_gradient_ops"): loss_scale_params = variable_mgr_util.AutoLossScaleParams( enable_auto_loss_scale=hparams. enable_auto_loss_scale, loss_scale=loss_scale, loss_scale_normal_steps=loss_scale_normal_steps, inc_loss_scale_every_n=hparams. fp16_inc_loss_scale_every_n, is_chief=True) opt = get_optimizer(hparams, learning_rates[i]) var_mgr.append_apply_gradients_ops( grad_states, opt, clipped_gradvars, update_ops, loss_scale_params) misc_utils.print_out("Finish building grad aggregation.") assert len(update_ops) == num_towers train_op = tf.group(update_ops) with tf.control_dependencies([train_op]): global_step = tf.train.get_global_step() train_op = global_step.assign_add(1) # Compute loss on the first gpu # TODO(jamesqin): optimize it? with tf.device("gpu:0"): loss = misc_utils.weighted_avg(train_losses, batch_sizes) # Create local init_ops # TODO(jamesqin): handle resource variables! # At present if not using mirror strategy, not using resource vars. local_init_ops = [] local_init_op = tf.local_variables_initializer() with tf.control_dependencies([local_init_op]): local_init_ops.append(var_mgr.get_post_init_ops()) local_init_ops.extend([local_init_op, tf.tables_initializer()]) saveable_vars = var_mgr.savable_variables() # Add saveables for cudnn vars in master tower. saveable_objects = tf.get_collection(tf.GraphKeys.SAVEABLE_OBJECTS) saveable_objects = [x for x in saveable_objects if "v0" in x.name] misc_utils.print_out("Saveable vars(%d): " % len(saveable_vars)) for mv in saveable_vars: misc_utils.print_out(mv.name) misc_utils.print_out("All global trainable vars(%d): " % len(tf.trainable_variables())) for tv in tf.trainable_variables(): misc_utils.print_out(tv.name) misc_utils.print_out("All global vars(%d): " % len(tf.global_variables())) for gv in tf.global_variables(): misc_utils.print_out(gv.name) misc_utils.print_out("master backproped params(%d): " % len(master_params)) for mp in master_params: misc_utils.print_out(mp.name) # Note the cudnn vars are skipped the init check. :( scaffold = tf.train.Scaffold( ready_op=tf.report_uninitialized_variables(saveable_vars), ready_for_local_init_op=tf.report_uninitialized_variables( saveable_vars), local_init_op=tf.group(*local_init_ops), saver=tf.train.Saver(saveable_vars + saveable_objects)) misc_utils.print_out("Finish building model_fn") # return loss, vars, grads, predictions, train_op, scaffold return loss, master_params, master_grads, None, train_op, scaffold
def __init__(self, args, vocab): #tf.get_variable_scope().reuse_variables() dim_y = args.dim_y dim_z = args.dim_z dim_h = dim_y + dim_z dim_emb = args.dim_emb n_layers = args.n_layers max_len = args.max_seq_length filter_sizes = [int(x) for x in args.filter_sizes.split(',')] n_filters = args.n_filters beta1, beta2 = 0.5, 0.999 grad_clip = 30.0 self.dropout = tf.placeholder(tf.float32, name='dropout') self.learning_rate = tf.placeholder(tf.float32, name='learning_rate') self.rho = tf.placeholder(tf.float32, name='rho') self.gamma = tf.placeholder(tf.float32, name='gamma') self.batch_len = tf.placeholder(tf.int32, name='batch_len') self.batch_size = tf.placeholder(tf.int32, name='batch_size') self.enc_inputs = tf.placeholder( tf.int32, [None, None], #size * len name='enc_inputs') self.dec_inputs = tf.placeholder(tf.int32, [None, None], name='dec_inputs') self.targets = tf.placeholder(tf.int32, [None, None], name='targets') self.weights = tf.placeholder(tf.float32, [None, None], name='weights') self.labels = tf.placeholder(tf.float32, [None], name='labels') # testing optimization testing1 = tf.constant([[37.0, -23.0], [1.0, 4.0]]) testing2 = tf.constant([[37.0, -23.0], [1.0, 4.0]]) self.lineartest = tf.matmul(testing1, testing2) #===== labels = tf.reshape(self.labels, [-1, 1]) embedding = tf.get_variable('embedding', initializer=vocab.embedding.astype( np.float32)) with tf.variable_scope('projection'): proj_W = tf.get_variable('W', [dim_h, vocab.size]) proj_b = tf.get_variable('b', [vocab.size]) enc_inputs = tf.nn.embedding_lookup(embedding, self.enc_inputs) dec_inputs = tf.nn.embedding_lookup(embedding, self.dec_inputs) ##### auto-encoder ##### init_state = tf.concat([ linear(labels, dim_y, scope='encoder'), tf.zeros([self.batch_size, dim_z]) ], 1) cell_e = create_cell(dim_h, n_layers, self.dropout) _, z = tf.nn.dynamic_rnn(cell_e, enc_inputs, initial_state=init_state, scope='encoder') z = z[:, dim_y:] #cell_e = create_cell(dim_z, n_layers, self.dropout) #_, z = tf.nn.dynamic_rnn(cell_e, enc_inputs, # dtype=tf.float32, scope='encoder') self.h_ori = tf.concat([linear(labels, dim_y, scope='generator'), z], 1) self.h_tsf = tf.concat( [linear(1 - labels, dim_y, scope='generator', reuse=True), z], 1) cell_g = create_cell(dim_h, n_layers, self.dropout) g_outputs, _ = tf.nn.dynamic_rnn(cell_g, dec_inputs, initial_state=self.h_ori, scope='generator') #====== # creating new decoder modules here ===== #NEW PLACEHOLDER VARIABLES self.testing = tf.placeholder(tf.float32, name='testing') # CURRENTLY it replicates the functitonality of the first one. need to # modify the inputs (placeeholders) in the tensorflow graph accordingly. # z is shared (encoder shared), output passes to second decoder pairing. # here, scope is "generator2" self.h_ori2 = tf.concat([linear(labels, dim_y, scope='generator2'), z], 1) self.h_tsf2 = tf.concat( [linear(1 - labels, dim_y, scope='generator2', reuse=True), z], 1) cell_g2 = create_cell(dim_h, n_layers, self.dropout) g_outputs2, _ = tf.nn.dynamic_rnn(cell_g2, dec_inputs, initial_state=self.h_ori2, scope='generator2') teach_h2 = tf.concat([tf.expand_dims(self.h_ori2, 1), g_outputs2], 1) g_outputs2 = tf.nn.dropout(g_outputs2, self.dropout) g_outputs2 = tf.reshape(g_outputs2, [-1, dim_h]) g_logits2 = tf.matmul(g_outputs2, proj_W) + proj_b # change projections? loss_rec2 = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.reshape(self.targets, [-1]), logits=g_logits2) loss_rec2 *= tf.reshape(self.weights, [-1]) self.loss_rec2 = tf.reduce_sum(loss_rec2) / tf.to_float( self.batch_size) # continuing go = dec_inputs[:, 0, :] # unchanged soft_func = softsample_word(self.dropout, proj_W, proj_b, embedding, self.gamma) hard_func = argmax_word(self.dropout, proj_W, proj_b, embedding) soft_h_ori2, soft_logits_ori2 = rnn_decode(self.h_ori2, go, max_len, cell_g2, soft_func, scope='generator2') soft_h_tsf2, soft_logits_tsf2 = rnn_decode(self.h_tsf2, go, max_len, cell_g2, soft_func, scope='generator2') hard_h_ori2, self.hard_logits_ori2 = rnn_decode(self.h_ori2, go, max_len, cell_g2, hard_func, scope='generator2') hard_h_tsf2, self.hard_logits_tsf2 = rnn_decode(self.h_tsf2, go, max_len, cell_g2, hard_func, scope='generator2') half = self.batch_size / 2 zeros, ones = self.labels[:half], self.labels[half:] soft_h_tsf2 = soft_h_tsf2[:, :1 + self.batch_len, :] self.loss_d02, loss_g02 = discriminator(teach_h2[:half], soft_h_tsf2[half:], ones, zeros, filter_sizes, n_filters, self.dropout, scope='discriminator02') self.loss_d12, loss_g12 = discriminator(teach_h2[half:], soft_h_tsf2[:half], ones, zeros, filter_sizes, n_filters, self.dropout, scope='discriminator12') ##### optimizer ##### self.loss_adv2 = loss_g02 + loss_g12 self.loss2 = self.loss_rec2 + self.rho * self.loss_adv2 theta_eg2 = retrive_var( ['encoder', 'generator2', 'embedding', 'projection']) theta_d02 = retrive_var(['discriminator02']) theta_d12 = retrive_var(['discriminator12']) opt2 = tf.train.AdamOptimizer(self.learning_rate, beta1, beta2) grad_rec2, _ = zip(*opt2.compute_gradients(self.loss_rec2, theta_eg2)) grad_adv2, _ = zip(*opt2.compute_gradients(self.loss_adv2, theta_eg2)) grad2, _ = zip(*opt2.compute_gradients(self.loss2, theta_eg2)) grad2, _ = tf.clip_by_global_norm( grad2, grad_clip) # grad_clip doesn't need 2 self.grad_rec_norm2 = tf.global_norm(grad_rec2) self.grad_adv_norm2 = tf.global_norm(grad_adv2) self.grad_norm2 = tf.global_norm(grad2) self.optimize_tot2 = opt2.apply_gradients(zip(grad2, theta_eg2)) self.optimize_rec2 = opt2.minimize(self.loss_rec2, var_list=theta_eg2) self.optimize_d02 = opt2.minimize(self.loss_d02, var_list=theta_d02) self.optimize_d12 = opt2.minimize(self.loss_d12, var_list=theta_d12) self.saver2 = tf.train.Saver() #====== #====== #====== # Decoder 3 self.h_ori3 = tf.concat([linear(labels, dim_y, scope='generator3'), z], 1) self.h_tsf3 = tf.concat( [linear(1 - labels, dim_y, scope='generator3', reuse=True), z], 1) cell_g3 = create_cell(dim_h, n_layers, self.dropout) g_outputs3, _ = tf.nn.dynamic_rnn(cell_g3, dec_inputs, initial_state=self.h_ori3, scope='generator3') teach_h3 = tf.concat([tf.expand_dims(self.h_ori3, 1), g_outputs3], 1) g_outputs3 = tf.nn.dropout(g_outputs3, self.dropout) g_outputs3 = tf.reshape(g_outputs3, [-1, dim_h]) g_logits3 = tf.matmul(g_outputs3, proj_W) + proj_b # change projections? loss_rec3 = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.reshape(self.targets, [-1]), logits=g_logits3) loss_rec3 *= tf.reshape(self.weights, [-1]) self.loss_rec3 = tf.reduce_sum(loss_rec3) / tf.to_float( self.batch_size) # continuing go = dec_inputs[:, 0, :] # unchanged soft_func = softsample_word(self.dropout, proj_W, proj_b, embedding, self.gamma) hard_func = argmax_word(self.dropout, proj_W, proj_b, embedding) soft_h_ori3, soft_logits_ori3 = rnn_decode(self.h_ori3, go, max_len, cell_g3, soft_func, scope='generator3') soft_h_tsf3, soft_logits_tsf3 = rnn_decode(self.h_tsf3, go, max_len, cell_g3, soft_func, scope='generator3') hard_h_ori3, self.hard_logits_ori3 = rnn_decode(self.h_ori3, go, max_len, cell_g3, hard_func, scope='generator3') hard_h_tsf3, self.hard_logits_tsf3 = rnn_decode(self.h_tsf3, go, max_len, cell_g3, hard_func, scope='generator3') half = self.batch_size / 2 zeros, ones = self.labels[:half], self.labels[half:] soft_h_tsf3 = soft_h_tsf3[:, :1 + self.batch_len, :] self.loss_d03, loss_g03 = discriminator(teach_h3[:half], soft_h_tsf3[half:], ones, zeros, filter_sizes, n_filters, self.dropout, scope='discriminator03') self.loss_d13, loss_g13 = discriminator(teach_h3[half:], soft_h_tsf3[:half], ones, zeros, filter_sizes, n_filters, self.dropout, scope='discriminator13') self.loss_adv3 = loss_g03 + loss_g13 self.loss3 = self.loss_rec3 + self.rho * self.loss_adv3 theta_eg3 = retrive_var( ['encoder', 'generator3', 'embedding', 'projection']) theta_d03 = retrive_var(['discriminator03']) theta_d13 = retrive_var(['discriminator13']) opt3 = tf.train.AdamOptimizer(self.learning_rate, beta1, beta2) grad_rec3, _ = zip(*opt3.compute_gradients(self.loss_rec3, theta_eg3)) grad_adv3, _ = zip(*opt3.compute_gradients(self.loss_adv3, theta_eg3)) grad3, _ = zip(*opt3.compute_gradients(self.loss3, theta_eg3)) grad3, _ = tf.clip_by_global_norm( grad3, grad_clip) # grad_clip doesn't need 2 self.grad_rec_norm3 = tf.global_norm(grad_rec3) self.grad_adv_norm3 = tf.global_norm(grad_adv3) self.grad_norm3 = tf.global_norm(grad3) self.optimize_tot3 = opt3.apply_gradients(zip(grad3, theta_eg3)) self.optimize_rec3 = opt3.minimize(self.loss_rec3, var_list=theta_eg3) self.optimize_d03 = opt3.minimize(self.loss_d03, var_list=theta_d03) self.optimize_d13 = opt3.minimize(self.loss_d13, var_list=theta_d13) self.saver3 = tf.train.Saver() # ====== # ====== # Decoder 4 self.h_ori4 = tf.concat([linear(labels, dim_y, scope='generator4'), z], 1) self.h_tsf4 = tf.concat( [linear(1 - labels, dim_y, scope='generator4', reuse=True), z], 1) cell_g4 = create_cell(dim_h, n_layers, self.dropout) g_outputs4, _ = tf.nn.dynamic_rnn(cell_g4, dec_inputs, initial_state=self.h_ori4, scope='generator4') teach_h4 = tf.concat([tf.expand_dims(self.h_ori4, 1), g_outputs4], 1) g_outputs4 = tf.nn.dropout(g_outputs4, self.dropout) g_outputs4 = tf.reshape(g_outputs4, [-1, dim_h]) g_logits4 = tf.matmul(g_outputs4, proj_W) + proj_b # change projections? loss_rec4 = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.reshape(self.targets, [-1]), logits=g_logits4) loss_rec4 *= tf.reshape(self.weights, [-1]) self.loss_rec4 = tf.reduce_sum(loss_rec4) / tf.to_float( self.batch_size) # continuing go = dec_inputs[:, 0, :] # unchanged soft_func = softsample_word(self.dropout, proj_W, proj_b, embedding, self.gamma) hard_func = argmax_word(self.dropout, proj_W, proj_b, embedding) soft_h_ori4, soft_logits_ori4 = rnn_decode(self.h_ori4, go, max_len, cell_g4, soft_func, scope='generator4') soft_h_tsf4, soft_logits_tsf4 = rnn_decode(self.h_tsf4, go, max_len, cell_g4, soft_func, scope='generator4') hard_h_ori4, self.hard_logits_ori4 = rnn_decode(self.h_ori4, go, max_len, cell_g4, hard_func, scope='generator4') hard_h_tsf4, self.hard_logits_tsf4 = rnn_decode(self.h_tsf4, go, max_len, cell_g4, hard_func, scope='generator4') half = self.batch_size / 2 zeros, ones = self.labels[:half], self.labels[half:] soft_h_tsf4 = soft_h_tsf4[:, :1 + self.batch_len, :] self.loss_d04, loss_g04 = discriminator(teach_h4[:half], soft_h_tsf4[half:], ones, zeros, filter_sizes, n_filters, self.dropout, scope='discriminator04') self.loss_d14, loss_g14 = discriminator(teach_h4[half:], soft_h_tsf4[:half], ones, zeros, filter_sizes, n_filters, self.dropout, scope='discriminator14') self.loss_adv4 = loss_g04 + loss_g14 self.loss4 = self.loss_rec4 + self.rho * self.loss_adv4 theta_eg4 = retrive_var( ['encoder', 'generator4', 'embedding', 'projection']) theta_d04 = retrive_var(['discriminator04']) theta_d14 = retrive_var(['discriminator14']) opt4 = tf.train.AdamOptimizer(self.learning_rate, beta1, beta2) grad_rec4, _ = zip(*opt4.compute_gradients(self.loss_rec4, theta_eg4)) grad_adv4, _ = zip(*opt4.compute_gradients(self.loss_adv4, theta_eg4)) grad4, _ = zip(*opt4.compute_gradients(self.loss4, theta_eg4)) grad4, _ = tf.clip_by_global_norm( grad4, grad_clip) # grad_clip doesn't need 2 self.grad_rec_norm4 = tf.global_norm(grad_rec4) self.grad_adv_norm4 = tf.global_norm(grad_adv4) self.grad_norm4 = tf.global_norm(grad4) self.optimize_tot4 = opt4.apply_gradients(zip(grad4, theta_eg4)) self.optimize_rec4 = opt4.minimize(self.loss_rec4, var_list=theta_eg4) self.optimize_d04 = opt4.minimize(self.loss_d04, var_list=theta_d04) self.optimize_d14 = opt4.minimize(self.loss_d14, var_list=theta_d14) self.saver4 = tf.train.Saver() # ===== # ===== # Decoder 5 self.h_ori5 = tf.concat([linear(labels, dim_y, scope='generator5'), z], 1) self.h_tsf5 = tf.concat( [linear(1 - labels, dim_y, scope='generator5', reuse=True), z], 1) cell_g5 = create_cell(dim_h, n_layers, self.dropout) g_outputs5, _ = tf.nn.dynamic_rnn(cell_g5, dec_inputs, initial_state=self.h_ori5, scope='generator5') teach_h5 = tf.concat([tf.expand_dims(self.h_ori5, 1), g_outputs5], 1) g_outputs5 = tf.nn.dropout(g_outputs5, self.dropout) g_outputs5 = tf.reshape(g_outputs5, [-1, dim_h]) g_logits5 = tf.matmul(g_outputs5, proj_W) + proj_b # change projections? loss_rec5 = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.reshape(self.targets, [-1]), logits=g_logits5) loss_rec5 *= tf.reshape(self.weights, [-1]) self.loss_rec5 = tf.reduce_sum(loss_rec5) / tf.to_float( self.batch_size) # continuing go = dec_inputs[:, 0, :] # unchanged soft_func = softsample_word(self.dropout, proj_W, proj_b, embedding, self.gamma) hard_func = argmax_word(self.dropout, proj_W, proj_b, embedding) soft_h_ori5, soft_logits_ori5 = rnn_decode(self.h_ori5, go, max_len, cell_g5, soft_func, scope='generator5') soft_h_tsf5, soft_logits_tsf5 = rnn_decode(self.h_tsf5, go, max_len, cell_g5, soft_func, scope='generator5') hard_h_ori5, self.hard_logits_ori5 = rnn_decode(self.h_ori5, go, max_len, cell_g5, hard_func, scope='generator5') hard_h_tsf5, self.hard_logits_tsf5 = rnn_decode(self.h_tsf5, go, max_len, cell_g5, hard_func, scope='generator5') half = self.batch_size / 2 zeros, ones = self.labels[:half], self.labels[half:] soft_h_tsf5 = soft_h_tsf5[:, :1 + self.batch_len, :] self.loss_d05, loss_g05 = discriminator(teach_h5[:half], soft_h_tsf5[half:], ones, zeros, filter_sizes, n_filters, self.dropout, scope='discriminator05') self.loss_d15, loss_g15 = discriminator(teach_h5[half:], soft_h_tsf5[:half], ones, zeros, filter_sizes, n_filters, self.dropout, scope='discriminator15') self.loss_adv5 = loss_g05 + loss_g15 self.loss5 = self.loss_rec5 + self.rho * self.loss_adv5 theta_eg5 = retrive_var( ['encoder', 'generator5', 'embedding', 'projection']) theta_d05 = retrive_var(['discriminator05']) theta_d15 = retrive_var(['discriminator15']) opt5 = tf.train.AdamOptimizer(self.learning_rate, beta1, beta2) grad_rec5, _ = zip(*opt5.compute_gradients(self.loss_rec5, theta_eg5)) grad_adv5, _ = zip(*opt5.compute_gradients(self.loss_adv5, theta_eg5)) grad5, _ = zip(*opt5.compute_gradients(self.loss5, theta_eg5)) grad5, _ = tf.clip_by_global_norm( grad5, grad_clip) # grad_clip doesn't need 2 self.grad_rec_norm5 = tf.global_norm(grad_rec5) self.grad_adv_norm5 = tf.global_norm(grad_adv5) self.grad_norm5 = tf.global_norm(grad5) self.optimize_tot5 = opt5.apply_gradients(zip(grad5, theta_eg5)) self.optimize_rec5 = opt5.minimize(self.loss_rec5, var_list=theta_eg5) self.optimize_d05 = opt5.minimize(self.loss_d05, var_list=theta_d05) self.optimize_d15 = opt5.minimize(self.loss_d15, var_list=theta_d15) self.saver5 = tf.train.Saver() # attach h0 in the front teach_h = tf.concat([tf.expand_dims(self.h_ori, 1), g_outputs], 1) g_outputs = tf.nn.dropout(g_outputs, self.dropout) g_outputs = tf.reshape(g_outputs, [-1, dim_h]) g_logits = tf.matmul(g_outputs, proj_W) + proj_b loss_rec = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.reshape(self.targets, [-1]), logits=g_logits) loss_rec *= tf.reshape(self.weights, [-1]) self.loss_rec = tf.reduce_sum(loss_rec) / tf.to_float(self.batch_size) ##### feed-previous decoding ##### go = dec_inputs[:, 0, :] soft_func = softsample_word(self.dropout, proj_W, proj_b, embedding, self.gamma) hard_func = argmax_word(self.dropout, proj_W, proj_b, embedding) soft_h_ori, soft_logits_ori = rnn_decode(self.h_ori, go, max_len, cell_g, soft_func, scope='generator') soft_h_tsf, soft_logits_tsf = rnn_decode(self.h_tsf, go, max_len, cell_g, soft_func, scope='generator') hard_h_ori, self.hard_logits_ori = rnn_decode(self.h_ori, go, max_len, cell_g, hard_func, scope='generator') hard_h_tsf, self.hard_logits_tsf = rnn_decode(self.h_tsf, go, max_len, cell_g, hard_func, scope='generator') ##### discriminator ##### # a batch's first half consists of sentences of one style, # and second half of the other half = self.batch_size / 2 zeros, ones = self.labels[:half], self.labels[half:] soft_h_tsf = soft_h_tsf[:, :1 + self.batch_len, :] self.loss_d0, loss_g0 = discriminator(teach_h[:half], soft_h_tsf[half:], ones, zeros, filter_sizes, n_filters, self.dropout, scope='discriminator0') self.loss_d1, loss_g1 = discriminator(teach_h[half:], soft_h_tsf[:half], ones, zeros, filter_sizes, n_filters, self.dropout, scope='discriminator1') ##### optimizer ##### self.loss_adv = loss_g0 + loss_g1 self.loss = self.loss_rec + self.rho * self.loss_adv theta_eg = retrive_var( ['encoder', 'generator', 'embedding', 'projection']) theta_d0 = retrive_var(['discriminator0']) theta_d1 = retrive_var(['discriminator1']) opt = tf.train.AdamOptimizer(self.learning_rate, beta1, beta2) grad_rec, _ = zip(*opt.compute_gradients(self.loss_rec, theta_eg)) grad_adv, _ = zip(*opt.compute_gradients(self.loss_adv, theta_eg)) grad, _ = zip(*opt.compute_gradients(self.loss, theta_eg)) grad, _ = tf.clip_by_global_norm(grad, grad_clip) self.grad_rec_norm = tf.global_norm(grad_rec) self.grad_adv_norm = tf.global_norm(grad_adv) self.grad_norm = tf.global_norm(grad) self.optimize_tot = opt.apply_gradients(zip(grad, theta_eg)) self.optimize_rec = opt.minimize(self.loss_rec, var_list=theta_eg) self.optimize_d0 = opt.minimize(self.loss_d0, var_list=theta_d0) self.optimize_d1 = opt.minimize(self.loss_d1, var_list=theta_d1) self.saver = tf.train.Saver()
def create_op_loss(self): value_state = self._tf_value_state adv_probas = self._tf_adv_probas R = tf.placeholder(tf.float32, [None]) actions_index = tf.placeholder(tf.int32, [None]) advantage = tf.placeholder(tf.float32, [None]) diff = tf.sub(R, value_state) #Entropy = sum_a (-p_a ln p_a) log_adv_probas = tf.log(adv_probas) entropy = tf.reduce_sum(tf.mul(tf.constant(-1.0), tf.mul(adv_probas, log_adv_probas)), reduction_indices=1) entropy_term = tf.mul(self.entropy_regularisation_strength, entropy) self.masks = tf.one_hot(actions_index, on_value=True, off_value=False, depth=self.nb_actions) self.pi_selected_actions = tf.boolean_mask(adv_probas, self.masks) log_pi_selected_actions = tf.log(self.pi_selected_actions) advantage_term = log_pi_selected_actions * advantage loss_advantage_action_function = -tf.reduce_sum(entropy_term + advantage_term) #In the paper, the authors recommend to multiply the loss by 0.5 loss_value_state_function = 0.5 * tf.nn.l2_loss(diff) loss = loss_advantage_action_function + loss_value_state_function opt = tf.train.AdamOptimizer(1e-4) grads = opt.compute_gradients(loss, var_list=self.get_all_variables()) symbolic_grads = tf.gradients(loss, self.get_all_variables()) symbolic_grads, _ = tf.clip_by_global_norm(symbolic_grads, 40.0) grad_placeholder = [(tf.placeholder(tf.float32, shape=grad[1].get_shape()), grad[1]) for grad in grads] apply_placeholder_op = opt.apply_gradients(grad_placeholder) tf.summary.scalar("gradient/grad_global_norm", tf.global_norm(grad_placeholder)) tf.summary.scalar("gradient/cnn1_grad_global_norm", tf.global_norm(grad_placeholder[0:2])) tf.summary.scalar("gradient/cnn2_grad_global_norm", tf.global_norm(grad_placeholder[2:2])) tf.summary.scalar("gradient/fcc1_grad_global_norm", tf.global_norm(grad_placeholder[4:2])) tf.summary.scalar("gradient/adv_probas_grad_global_norm", tf.global_norm(grad_placeholder[6:2])) tf.summary.scalar("gradient/value_state_grad_global_norm", tf.global_norm(grad_placeholder[8:2])) tf.summary.scalar("model/var_global_norm", tf.global_norm(self.get_all_variables())) self._tf_summary_adv_loss = tf.placeholder(tf.float32, []) self._tf_summary_value_state_loss = tf.placeholder(tf.float32, []) self._tf_summary_loss = tf.placeholder(tf.float32, []) tf.summary.scalar("loss/advantage_function_loss", self._tf_summary_adv_loss) tf.summary.scalar("loss/value_state_function_loss", self._tf_summary_value_state_loss) tf.summary.scalar("loss/total_loss", self._tf_summary_loss) #Input self._tf_loss_R = R self._tf_loss_action_index = actions_index self._tf_grad_placeholder = grad_placeholder self._tf_loss_advantage = advantage #Output self._tf_loss_value_state_function = loss_value_state_function self._tf_loss_advantage_action_function = loss_advantage_action_function self._tf_loss = loss self._tf_optimizer = opt self._tf_get_gradients = symbolic_grads self._tf_apply_gradients = apply_placeholder_op
def __init__(self, *, policy, ob_space, ac_space, nbatch_act, nbatch_train, nsteps, ent_coef, vf_coef, max_grad_norm): sess = tf.get_default_session() act_model = policy(sess, ob_space, ac_space, nbatch_act, 1, reuse=False) train_model = policy(sess, ob_space, ac_space, nbatch_train, nsteps, reuse=True) A = train_model.pdtype.sample_placeholder([None]) ADV = tf.placeholder(tf.float32, [None]) R = tf.placeholder(tf.float32, [None]) OLDNEGLOGPAC = tf.placeholder(tf.float32, [None]) OLDVPRED = tf.placeholder(tf.float32, [None]) LR = tf.placeholder(tf.float32, []) CLIPRANGE = tf.placeholder(tf.float32, []) neglogpac = train_model.pd.neglogp(A) entropy = tf.reduce_mean(train_model.pd.entropy()) vpred = train_model.vf vpredclipped = OLDVPRED + tf.clip_by_value(train_model.vf - OLDVPRED, - CLIPRANGE, CLIPRANGE) vf_losses1 = tf.square(vpred - R) vf_losses2 = tf.square(vpredclipped - R) vf_loss = .5 * tf.reduce_mean(tf.maximum(vf_losses1, vf_losses2)) ratio = tf.exp(OLDNEGLOGPAC - neglogpac) pg_losses = -ADV * ratio pg_losses2 = -ADV * tf.clip_by_value(ratio, 1.0 - CLIPRANGE, 1.0 + CLIPRANGE) pg_loss = tf.reduce_mean(tf.maximum(pg_losses, pg_losses2)) approxkl = .5 * tf.reduce_mean(tf.square(neglogpac - OLDNEGLOGPAC)) clipfrac = tf.reduce_mean(tf.to_float(tf.greater(tf.abs(ratio - 1.0), CLIPRANGE))) loss = pg_loss - entropy * ent_coef + vf_loss * vf_coef with tf.variable_scope('model'): params = tf.trainable_variables() grads = tf.gradients(loss, params) if max_grad_norm is not None: grads, _grad_norm = tf.clip_by_global_norm(grads, max_grad_norm) grads = list(zip(grads, params)) trainer = tf.train.AdamOptimizer(learning_rate=LR, epsilon=1e-5) _train = trainer.apply_gradients(grads) self.td_map = None def train(lr, cliprange, obs, insts, returns, masks, actions, values, neglogpacs, states=None): advs = returns - values advs = (advs - advs.mean()) / (advs.std() + 1e-8) td_map = {train_model.X:obs, train_model.I:insts, A:actions, ADV:advs, R:returns, LR:lr, CLIPRANGE:cliprange, OLDNEGLOGPAC:neglogpacs, OLDVPRED:values} if states is not None: td_map[train_model.S] = states td_map[train_model.M] = masks self.td_map = td_map return sess.run( [pg_loss, vf_loss, entropy, approxkl, clipfrac, _train], td_map )[:-1] self.loss_names = [ 'policy_loss', 'value_loss', 'policy_entropy', 'approxkl', 'clipfrac'] def save(save_path): ps = sess.run(params) joblib.dump(ps, save_path) def load(load_path): loaded_params = joblib.load(load_path) restores = [] for p, loaded_p in zip(params, loaded_params): restores.append(p.assign(loaded_p)) sess.run(restores) self.train = train self.train_model = train_model self.act_model = act_model self.step = act_model.step self.value = act_model.value self.initial_state = act_model.initial_state self.save = save self.load = load tf.global_variables_initializer().run(session=sess) #pylint: disable=E1101 # add summary # =========== self.writer = tf.summary.FileWriter('./Asset/logdir', sess.graph) cnn_grads = tf.gradients(loss, train_model.cnn_var) gru_grads = tf.gradients(loss, train_model.gru_var) ga_grads = tf.gradients(loss, train_model.ga_var) lstm_grads = tf.gradients(loss, train_model.lstm_var) pi_grads = tf.gradients(loss, train_model.pi_var) vf_grads = tf.gradients(loss, train_model.vf_var) cnn_grad_norm = tf.global_norm(cnn_grads, name='cnn_grads') gru_grad_norm = tf.global_norm(gru_grads, name='gru_grads') ga_grad_norm = tf.global_norm(ga_grads, name='ga_grads') lstm_grad_norm = tf.global_norm(lstm_grads, name='lstm_grads') pi_grad_norm = tf.global_norm(pi_grads, name='pi_grads') vf_grad_norm = tf.global_norm(vf_grads, name='vf_grads') tf.summary.scalar('GradNorm/cnn', cnn_grad_norm) tf.summary.scalar('GradNorm/gru', gru_grad_norm) tf.summary.scalar('GradNorm/GA', ga_grad_norm) tf.summary.scalar('GradNorm/lstm', lstm_grad_norm) tf.summary.scalar('GradNorm/pi', pi_grad_norm) tf.summary.scalar('GradNorm/vf', vf_grad_norm) tf.summary.scalar('loss/policy_loss', pg_loss) tf.summary.scalar('loss/value_loss', vf_loss) tf.summary.scalar('loss/entropy', entropy) self.merged = tf.summary.merge_all() def get_summary(): return sess.run(self.merged, self.td_map) self.get_summary = get_summary
def get_train_ops(loss, tf_variables, train_step, clip_mode=None, grad_bound=None, l2_reg=1e-4, lr_warmup_val=None, lr_warmup_steps=100, lr_init=0.1, lr_dec_start=0, lr_dec_every=10000, lr_dec_rate=0.1, lr_dec_min=None, lr_cosine=False, lr_max=None, lr_min=None, lr_T_0=None, lr_T_mul=None, num_train_batches=None, optim_algo=None, sync_replicas=False, num_aggregate=None, num_replicas=None, get_grad_norms=False, moving_average=None): """ Args: clip_mode: "global", "norm", or None. moving_average: store the moving average of parameters """ if l2_reg > 0: l2_losses = [] for var in tf_variables: l2_losses.append(tf.reduce_sum(var**2)) l2_loss = tf.add_n(l2_losses) loss += l2_reg * l2_loss grads = tf.gradients(loss, tf_variables) grad_norm = tf.global_norm(grads) grad_norms = {} for v, g in zip(tf_variables, grads): if v is None or g is None: continue if isinstance(g, tf.IndexedSlices): grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g.values**2)) else: grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g**2)) if clip_mode is not None: assert grad_bound is not None, "Need grad_bound to clip gradients." if clip_mode == "global": grads, _ = tf.clip_by_global_norm(grads, grad_bound) elif clip_mode == "norm": clipped = [] for g in grads: if isinstance(g, tf.IndexedSlices): c_g = tf.clip_by_norm(g.values, grad_bound) c_g = tf.IndexedSlices(g.indices, c_g) else: c_g = tf.clip_by_norm(g, grad_bound) clipped.append(g) grads = clipped else: raise NotImplementedError("Unknown clip_mode {}".format(clip_mode)) if lr_cosine: assert lr_max is not None, "Need lr_max to use lr_cosine" assert lr_min is not None, "Need lr_min to use lr_cosine" assert lr_T_0 is not None, "Need lr_T_0 to use lr_cosine" assert lr_T_mul is not None, "Need lr_T_mul to use lr_cosine" assert num_train_batches is not None, ("Need num_train_batches to use" " lr_cosine") curr_epoch = tf.cast(train_step // num_train_batches, tf.int32) last_reset = tf.get_variable("last_reset", initializer=0, dtype=tf.int32, trainable=False) T_i = tf.get_variable("T_i", initializer=lr_T_0, dtype=tf.int32, trainable=False) T_curr = curr_epoch - last_reset def _update(): update_last_reset = tf.assign(last_reset, curr_epoch, use_locking=True) update_T_i = tf.assign(T_i, T_i * lr_T_mul, use_locking=True) with tf.control_dependencies([update_last_reset, update_T_i]): rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926 lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate)) return lr def _no_update(): rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926 lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate)) return lr learning_rate = tf.cond(tf.greater_equal(T_curr, T_i), _update, _no_update) else: learning_rate = tf.train.exponential_decay( lr_init, tf.maximum(train_step - lr_dec_start, 0), lr_dec_every, lr_dec_rate, staircase=True) if lr_dec_min is not None: learning_rate = tf.maximum(learning_rate, lr_dec_min) if lr_warmup_val is not None: learning_rate = tf.cond(tf.less(train_step, lr_warmup_steps), lambda: lr_warmup_val, lambda: learning_rate) if optim_algo == "momentum": opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_locking=True, use_nesterov=True) elif optim_algo == "sgd": opt = tf.train.GradientDescentOptimizer(learning_rate, use_locking=True) elif optim_algo == "adam": opt = tf.train.AdamOptimizer(learning_rate, beta1=0.0, epsilon=1e-3, use_locking=True) else: raise ValueError("Unknown optim_algo {}".format(optim_algo)) if sync_replicas: assert num_aggregate is not None, "Need num_aggregate to sync." assert num_replicas is not None, "Need num_replicas to sync." opt = tf.train.SyncReplicasOptimizer( opt, replicas_to_aggregate=num_aggregate, total_num_replicas=num_replicas, use_locking=True) if moving_average is not None: opt = tf.contrib.opt.MovingAverageOptimizer( opt, average_decay=moving_average) train_op = opt.apply_gradients(zip(grads, tf_variables), global_step=train_step) if get_grad_norms: return train_op, learning_rate, grad_norm, opt, grad_norms else: return train_op, learning_rate, grad_norm, opt
def build_train_model(self, test=True, reuse=None): """Build model for training. """ logging.info('Build train model.') self.prepare_training() with self.graph.as_default(): acc_list = [] loss_list = [] gv_list = [] cache = {} load = dict([(d, 0) for d in self._devices]) for i, (X, Y, device) in enumerate( zip(self.src_pls, self.label_pls, self._devices)): def daisy_chain_getter(getter, name, *args, **kwargs): """Get a variable and cache in a daisy chain.""" device_var_key = (device, name) if device_var_key in cache: # if we have the variable on the correct device, return it. return cache[device_var_key] if name in cache: # if we have it on a different device, copy it from the last device v = tf.identity(cache[name]) else: var = getter(name, *args, **kwargs) v = tf.identity(var._ref()) # pylint: disable=protected-access # update the cache cache[name] = v cache[device_var_key] = v return v def balanced_device_setter(op): """Balance variables to all devices.""" if op.type in {'Variable', 'VariableV2', 'VarHandleOp'}: # return self._sync_device min_load = min(load.values()) min_load_devices = [ d for d in load if load[d] == min_load ] chosen_device = random.choice(min_load_devices) load[chosen_device] += op.outputs[0].get_shape( ).num_elements() return chosen_device return device def identity_device_setter(op): return device device_setter = balanced_device_setter with tf.variable_scope(tf.get_variable_scope(), initializer=self._initializer, custom_getter=daisy_chain_getter, reuse=reuse): with tf.device(device_setter): logging.info('Build model on %s.' % device) encoder_output = self.encoder( X, is_training=True, reuse=i > 0 or None, encoder_scope=self.encoder_scope) decoder_output = self.decoder( utils.shift_right(Y), encoder_output, is_training=True, reuse=i > 0 or None, decoder_scope=self.decoder_scope) acc, loss = self.train_output( decoder_output, Y, reuse=i > 0 or None, decoder_scope=self.decoder_scope) var_list = tf.trainable_variables() if self._config.train.var_filter: var_list = [ v for v in var_list if re.match( self._config.train.var_filter, v.name) ] acc_list.append(acc) loss_list.append(loss) gv_list.append( self._optimizer.compute_gradients( loss, var_list=var_list)) self.accuracy = tf.reduce_mean(acc_list) self.loss = tf.reduce_mean(loss_list) # Clip gradients and then apply. grads_and_vars = utils.average_gradients(gv_list) avg_abs_grads = tf.reduce_mean(tf.abs(grads_and_vars[0])) if self._config.train.grads_clip > 0: grads, self.grads_norm = tf.clip_by_global_norm( [gv[0] for gv in grads_and_vars], clip_norm=self._config.train.grads_clip) grads_and_vars = zip(grads, [gv[1] for gv in grads_and_vars]) else: self.grads_norm = tf.global_norm( [gv[0] for gv in grads_and_vars]) self.train_op = self._optimizer.apply_gradients( grads_and_vars, global_step=self.global_step) # Summaries tf.summary.scalar('acc', self.accuracy) tf.summary.scalar('loss', self.loss) tf.summary.scalar('learning_rate', self.learning_rate) tf.summary.scalar('grads_norm', self.grads_norm) tf.summary.scalar('avg_abs_grads', avg_abs_grads) self.summary_op = tf.summary.merge_all() self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=60) # We may want to test the model during training. if test: self.build_test_model(reuse=True)
def __init__(self, policy, ob_space, ac_space, nenvs, nsteps, ent_coef, q_coef, gamma, max_grad_norm, lr, rprop_alpha, rprop_epsilon, total_timesteps, lrschedule, c, trust_region, alpha, delta): sess = get_session() nact = ac_space.n nbatch = nenvs * nsteps A = tf.placeholder(tf.int32, [nbatch]) # actions D = tf.placeholder(tf.float32, [nbatch]) # dones R = tf.placeholder(tf.float32, [nbatch]) # rewards, not returns MU = tf.placeholder(tf.float32, [nbatch, nact]) # mu's LR = tf.placeholder(tf.float32, []) eps = 1e-6 step_ob_placeholder = tf.placeholder(dtype=ob_space.dtype, shape=(nenvs, ) + ob_space.shape) train_ob_placeholder = tf.placeholder(dtype=ob_space.dtype, shape=(nenvs * (nsteps + 1), ) + ob_space.shape) with tf.variable_scope('acer_model', reuse=tf.AUTO_REUSE): step_model = policy(observ_placeholder=step_ob_placeholder, sess=sess) train_model = policy(observ_placeholder=train_ob_placeholder, sess=sess) params = find_trainable_variables("acer_model") print("Params {}".format(len(params))) for var in params: print(var) # create polyak averaged model ema = tf.train.ExponentialMovingAverage(alpha) ema_apply_op = ema.apply(params) def custom_getter(getter, *args, **kwargs): v = ema.average(getter(*args, **kwargs)) print(v.name) return v with tf.variable_scope("acer_model", custom_getter=custom_getter, reuse=True): polyak_model = policy(observ_placeholder=train_ob_placeholder, sess=sess) # Notation: (var) = batch variable, (var)s = seqeuence variable, (var)_i = variable index by action at step i # action probability distributions according to train_model, polyak_model and step_model # poilcy.pi is probability distribution parameters; to obtain distribution that sums to 1 need to take softmax train_model_p = tf.nn.softmax(train_model.pi) polyak_model_p = tf.nn.softmax(polyak_model.pi) step_model_p = tf.nn.softmax(step_model.pi) v = tf.reduce_sum(train_model_p * train_model.q, axis=-1) # shape is [nenvs * (nsteps + 1)] # strip off last step f, f_pol, q = map(lambda var: strip(var, nenvs, nsteps), [train_model_p, polyak_model_p, train_model.q]) # Get pi and q values for actions taken f_i = get_by_index(f, A) q_i = get_by_index(q, A) # Compute ratios for importance truncation rho = f / (MU + eps) rho_i = get_by_index(rho, A) # Calculate Q_retrace targets qret = q_retrace(R, D, q_i, v, rho_i, nenvs, nsteps, gamma) # Calculate losses # Entropy # entropy = tf.reduce_mean(strip(train_model.pd.entropy(), nenvs, nsteps)) entropy = tf.reduce_mean(cat_entropy_softmax(f)) # Policy Graident loss, with truncated importance sampling & bias correction v = strip(v, nenvs, nsteps, True) check_shape([qret, v, rho_i, f_i], [[nenvs * nsteps]] * 4) check_shape([rho, f, q], [[nenvs * nsteps, nact]] * 2) # Truncated importance sampling adv = qret - v logf = tf.log(f_i + eps) gain_f = logf * tf.stop_gradient( adv * tf.minimum(c, rho_i)) # [nenvs * nsteps] loss_f = -tf.reduce_mean(gain_f) # Bias correction for the truncation adv_bc = (q - tf.reshape(v, [nenvs * nsteps, 1]) ) # [nenvs * nsteps, nact] logf_bc = tf.log(f + eps) # / (f_old + eps) check_shape([adv_bc, logf_bc], [[nenvs * nsteps, nact]] * 2) gain_bc = tf.reduce_sum( logf_bc * tf.stop_gradient(adv_bc * tf.nn.relu(1.0 - (c / (rho + eps))) * f), axis=1) #IMP: This is sum, as expectation wrt f loss_bc = -tf.reduce_mean(gain_bc) loss_policy = loss_f + loss_bc # Value/Q function loss, and explained variance check_shape([qret, q_i], [[nenvs * nsteps]] * 2) ev = q_explained_variance(tf.reshape(q_i, [nenvs, nsteps]), tf.reshape(qret, [nenvs, nsteps])) loss_q = tf.reduce_mean(tf.square(tf.stop_gradient(qret) - q_i) * 0.5) # Net loss check_shape([loss_policy, loss_q, entropy], [[]] * 3) loss = loss_policy + q_coef * loss_q - ent_coef * entropy if trust_region: g = tf.gradients(-(loss_policy - ent_coef * entropy) * nsteps * nenvs, f) #[nenvs * nsteps, nact] # k = tf.gradients(KL(f_pol || f), f) k = -f_pol / ( f + eps ) #[nenvs * nsteps, nact] # Directly computed gradient of KL divergence wrt f k_dot_g = tf.reduce_sum(k * g, axis=-1) adj = tf.maximum(0.0, (tf.reduce_sum(k * g, axis=-1) - delta) / (tf.reduce_sum(tf.square(k), axis=-1) + eps)) #[nenvs * nsteps] # Calculate stats (before doing adjustment) for logging. avg_norm_k = avg_norm(k) avg_norm_g = avg_norm(g) avg_norm_k_dot_g = tf.reduce_mean(tf.abs(k_dot_g)) avg_norm_adj = tf.reduce_mean(tf.abs(adj)) g = g - tf.reshape(adj, [nenvs * nsteps, 1]) * k grads_f = -g / ( nenvs * nsteps ) # These are turst region adjusted gradients wrt f ie statistics of policy pi grads_policy = tf.gradients(f, params, grads_f) grads_q = tf.gradients(loss_q * q_coef, params) grads = [ gradient_add(g1, g2, param) for (g1, g2, param) in zip(grads_policy, grads_q, params) ] avg_norm_grads_f = avg_norm(grads_f) * (nsteps * nenvs) norm_grads_q = tf.global_norm(grads_q) norm_grads_policy = tf.global_norm(grads_policy) else: grads = tf.gradients(loss, params) if max_grad_norm is not None: grads, norm_grads = tf.clip_by_global_norm(grads, max_grad_norm) grads = list(zip(grads, params)) trainer = tf.train.RMSPropOptimizer(learning_rate=LR, decay=rprop_alpha, epsilon=rprop_epsilon) _opt_op = trainer.apply_gradients(grads) # so when you call _train, you first do the gradient step, then you apply ema with tf.control_dependencies([_opt_op]): _train = tf.group(ema_apply_op) lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule) # Ops/Summaries to run, and their names for logging run_ops = [ _train, loss, loss_q, entropy, loss_policy, loss_f, loss_bc, ev, norm_grads ] names_ops = [ 'loss', 'loss_q', 'entropy', 'loss_policy', 'loss_f', 'loss_bc', 'explained_variance', 'norm_grads' ] if trust_region: run_ops = run_ops + [ norm_grads_q, norm_grads_policy, avg_norm_grads_f, avg_norm_k, avg_norm_g, avg_norm_k_dot_g, avg_norm_adj ] names_ops = names_ops + [ 'norm_grads_q', 'norm_grads_policy', 'avg_norm_grads_f', 'avg_norm_k', 'avg_norm_g', 'avg_norm_k_dot_g', 'avg_norm_adj' ] def train(obs, actions, rewards, dones, mus, states, masks, steps): cur_lr = lr.value_steps(steps) td_map = { train_model.X: obs, polyak_model.X: obs, A: actions, R: rewards, D: dones, MU: mus, LR: cur_lr } if states is not None: td_map[train_model.S] = states td_map[train_model.M] = masks td_map[polyak_model.S] = states td_map[polyak_model.M] = masks return names_ops, sess.run(run_ops, td_map)[1:] # strip off _train def _step(observation, **kwargs): return step_model._evaluate( [step_model.action, step_model_p, step_model.state], observation, **kwargs) self.train = train self.save = functools.partial(save_variables, sess=sess, variables=params) self.train_model = train_model self.step_model = step_model self._step = _step self.step = self.step_model.step self.initial_state = step_model.initial_state tf.global_variables_initializer().run(session=sess)
def main(argv=None): # pylint: disable=unused-argument data_dir = './training/training/' train_data_filename = data_dir + 'images/' train_labels_filename = data_dir + 'groundtruth/' # Extract it into numpy arrays. train_data = extract_data(train_data_filename, TRAINING_SIZE) train_labels = extract_labels(train_labels_filename, TRAINING_SIZE) num_epochs = NUM_EPOCHS c0 = 0 c1 = 0 for i in range(len(train_labels)): if train_labels[i][0] == 1: c0 = c0 + 1 else: c1 = c1 + 1 print('Number of data points per class: c0 = ' + str(c0) + ' c1 = ' + str(c1)) print('Balancing training data...') min_c = min(c0, c1) idx0 = [i for i, j in enumerate(train_labels) if j[0] == 1] idx1 = [i for i, j in enumerate(train_labels) if j[1] == 1] new_indices = idx0[0:min_c] + idx1[0:min_c] print(len(new_indices)) print(train_data.shape) train_data = train_data[new_indices, :, :, :] train_labels = train_labels[new_indices] train_size = train_labels.shape[0] c0 = 0 c1 = 0 for i in range(len(train_labels)): if train_labels[i][0] == 1: c0 = c0 + 1 else: c1 = c1 + 1 print('Number of data points per class: c0 = ' + str(c0) + ' c1 = ' + str(c1)) # This is where training samples and labels are fed to the graph. # These placeholder nodes will be fed a batch of training data at each # training step using the {feed_dict} argument to the Run() call below. train_data_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, IMG_PATCH_SIZE, IMG_PATCH_SIZE, NUM_CHANNELS)) train_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, NUM_LABELS)) train_all_data_node = tf.constant(train_data) # The variables below hold all the trainable weights. They are passed an # initial value which will be assigned when when we call: # {tf.initialize_all_variables().run()} conv1_weights = tf.Variable( tf.truncated_normal( [5, 5, NUM_CHANNELS, 32], # 5x5 filter, depth 32. stddev=0.1, seed=SEED)) conv1_biases = tf.Variable(tf.zeros([32])) conv2_weights = tf.Variable( tf.truncated_normal([5, 5, 32, 64], stddev=0.1, seed=SEED)) conv2_biases = tf.Variable(tf.constant(0.1, shape=[64])) fc1_weights = tf.Variable( # fully connected, depth 512. tf.truncated_normal( [int(IMG_PATCH_SIZE / 4 * IMG_PATCH_SIZE / 4 * 64), 512], stddev=0.1, seed=SEED)) fc1_biases = tf.Variable(tf.constant(0.1, shape=[512])) fc2_weights = tf.Variable( tf.truncated_normal([512, NUM_LABELS], stddev=0.1, seed=SEED)) fc2_biases = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS])) # Make an image summary for 4d tensor image with index idx def get_image_summary(img, idx=0): V = tf.slice(img, (0, 0, 0, idx), (1, -1, -1, 1)) img_w = img.get_shape().as_list()[1] img_h = img.get_shape().as_list()[2] min_value = tf.reduce_min(V) V = V - min_value max_value = tf.reduce_max(V) V = V / (max_value * PIXEL_DEPTH) V = tf.reshape(V, (img_w, img_h, 1)) V = tf.transpose(V, (2, 0, 1)) V = tf.reshape(V, (-1, img_w, img_h, 1)) return V # Make an image summary for 3d tensor image with index idx def get_image_summary_3d(img): V = tf.slice(img, (0, 0, 0), (1, -1, -1)) img_w = img.get_shape().as_list()[1] img_h = img.get_shape().as_list()[2] V = tf.reshape(V, (img_w, img_h, 1)) V = tf.transpose(V, (2, 0, 1)) V = tf.reshape(V, (-1, img_w, img_h, 1)) return V # Get prediction for given input image def get_prediction(img): data = numpy.asarray(img_crop(img, IMG_PATCH_SIZE, IMG_PATCH_SIZE)) data_node = tf.constant(data) output = tf.nn.softmax(model(data_node)) output_prediction = s.run(output) img_prediction = label_to_img(img.shape[0], img.shape[1], IMG_PATCH_SIZE, IMG_PATCH_SIZE, output_prediction) return img_prediction # Get a concatenation of the prediction and groundtruth for given input file def get_prediction_with_groundtruth(filename, image_idx): imageid = "satImage_%.3d" % image_idx image_filename = filename + imageid + ".png" img = mpimg.imread(image_filename) img_prediction = get_prediction(img) cimg = concatenate_images(img, img_prediction) return cimg # Get prediction overlaid on the original image for given input file def get_prediction_with_overlay(filename, image_idx): imageid = "satImage_%.3d" % image_idx image_filename = filename + imageid + ".png" img = mpimg.imread(image_filename) img_prediction = get_prediction(img) oimg = make_img_overlay(img, img_prediction) return oimg # We will replicate the model structure for the training subgraph, as well # as the evaluation subgraphs, while sharing the trainable parameters. def model(data, train=False): """The Model definition.""" # 2D convolution, with 'SAME' padding (i.e. the output feature map has # the same size as the input). Note that {strides} is a 4D array whose # shape matches the data layout: [image index, y, x, depth]. conv = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') # Bias and rectified linear non-linearity. relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases)) # Max pooling. The kernel size spec {ksize} also follows the layout of # the data. Here we have a pooling window of 2, and a stride of 2. pool = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') conv2 = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases)) pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # Uncomment these lines to check the size of each layer # print 'data ' + str(data.get_shape()) # print 'conv ' + str(conv.get_shape()) # print 'relu ' + str(relu.get_shape()) # print 'pool ' + str(pool.get_shape()) # print 'pool2 ' + str(pool2.get_shape()) # Reshape the feature map cuboid into a 2D matrix to feed it to the # fully connected layers. pool_shape = pool2.get_shape().as_list() reshape = tf.reshape( pool2, [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]]) # Fully connected layer. Note that the '+' operation automatically # broadcasts the biases. hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases) # Add a 50% dropout during training only. Dropout also scales # activations such that no rescaling is needed at evaluation time. #if train: # hidden = tf.nn.dropout(hidden, 0.5, seed=SEED) out = tf.matmul(hidden, fc2_weights) + fc2_biases if train == True: summary_id = '_0' s_data = get_image_summary(data) filter_summary0 = tf.summary.image('summary_data' + summary_id, s_data) s_conv = get_image_summary(conv) filter_summary2 = tf.summary.image('summary_conv' + summary_id, s_conv) s_pool = get_image_summary(pool) filter_summary3 = tf.summary.image('summary_pool' + summary_id, s_pool) s_conv2 = get_image_summary(conv2) filter_summary4 = tf.summary.image('summary_conv2' + summary_id, s_conv2) s_pool2 = get_image_summary(pool2) filter_summary5 = tf.summary.image('summary_pool2' + summary_id, s_pool2) return out # Training computation: logits + cross-entropy loss. logits = model(train_data_node, True) # BATCH_SIZE*NUM_LABELS # print 'logits = ' + str(logits.get_shape()) + ' train_labels_node = ' + str(train_labels_node.get_shape()) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=train_labels_node)) tf.summary.scalar('loss', loss) all_params_node = [ conv1_weights, conv1_biases, conv2_weights, conv2_biases, fc1_weights, fc1_biases, fc2_weights, fc2_biases ] all_params_names = [ 'conv1_weights', 'conv1_biases', 'conv2_weights', 'conv2_biases', 'fc1_weights', 'fc1_biases', 'fc2_weights', 'fc2_biases' ] all_grads_node = tf.gradients(loss, all_params_node) all_grad_norms_node = [] for i in range(0, len(all_grads_node)): norm_grad_i = tf.global_norm([all_grads_node[i]]) all_grad_norms_node.append(norm_grad_i) tf.summary.scalar(all_params_names[i], norm_grad_i) # L2 regularization for the fully connected parameters. regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) + tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases)) # Add the regularization term to the loss. loss += 5e-4 * regularizers # Optimizer: set up a variable that's incremented once per batch and # controls the learning rate decay. batch = tf.Variable(0) # Decay once per epoch, using an exponential schedule starting at 0.01. learning_rate = tf.train.exponential_decay( 0.01, # Base learning rate. batch * BATCH_SIZE, # Current index into the dataset. train_size, # Decay step. 0.95, # Decay rate. staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Use simple momentum for the optimization. optimizer = tf.train.MomentumOptimizer(learning_rate, 0.0).minimize(loss, global_step=batch) # Predictions for the minibatch, validation set and test set. train_prediction = tf.nn.softmax(logits) # We'll compute them only once in a while by calling their {eval()} method. train_all_prediction = tf.nn.softmax(model(train_all_data_node)) # Add ops to save and restore all the variables. saver = tf.train.Saver() # Create a local session to run this computation. with tf.Session() as s: if RESTORE_MODEL: # Restore variables from disk. saver.restore(s, FLAGS.train_dir + "/model.ckpt") print("Model restored.") else: # Run all the initializers to prepare the trainable parameters. tf.initialize_all_variables().run() # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph_def=s.graph_def) print('Initialized!') # Loop through training steps. print('Total number of iterations = ' + str(int(num_epochs * train_size / BATCH_SIZE))) training_indices = range(train_size) for iepoch in range(num_epochs): # Permute training indices perm_indices = numpy.random.permutation(training_indices) for step in range(int(train_size / BATCH_SIZE)): offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE) batch_indices = perm_indices[offset:(offset + BATCH_SIZE)] # Compute the offset of the current minibatch in the data. # Note that we could use better randomization across epochs. batch_data = train_data[batch_indices, :, :, :] batch_labels = train_labels[batch_indices] # This dictionary maps the batch data (as a numpy array) to the # node in the graph is should be fed to. feed_dict = { train_data_node: batch_data, train_labels_node: batch_labels } if step % RECORDING_STEP == 0: summary_str, _, l, lr, predictions = s.run( [ summary_op, optimizer, loss, learning_rate, train_prediction ], feed_dict=feed_dict) #summary_str = s.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() # print_predictions(predictions, batch_labels) print('Epoch %.2f' % (float(step) * BATCH_SIZE / train_size)) print('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr)) print('Minibatch error: %.1f%%' % error_rate(predictions, batch_labels)) sys.stdout.flush() else: # Run the graph and fetch some of the nodes. _, l, lr, predictions = s.run( [optimizer, loss, learning_rate, train_prediction], feed_dict=feed_dict) # Save the variables to disk. save_path = saver.save(s, FLAGS.train_dir + "/model.ckpt") print("Model saved in file: %s" % save_path) print("Running prediction on training set") prediction_training_dir = "predictions_training/" if not os.path.isdir(prediction_training_dir): os.mkdir(prediction_training_dir) for i in range(1, TRAINING_SIZE + 1): pimg = get_prediction_with_groundtruth(train_data_filename, i) Image.fromarray(pimg).save(prediction_training_dir + "prediction_" + str(i) + ".png") oimg = get_prediction_with_overlay(train_data_filename, i) oimg.save(prediction_training_dir + "overlay_" + str(i) + ".png")