Esempio n. 1
0
  def _update_step(self, observ, action, old_mean, old_logstd, reward, advantage, length):
    """Compute the current combined loss and perform a gradient update step.

    Args:
      observ: Sequences of observations.
      action: Sequences of actions.
      old_mean: Sequences of action means of the behavioral policy.
      old_logstd: Sequences of action log stddevs of the behavioral policy.
      reward: Sequences of reward.
      advantage: Sequences of advantages.
      length: Batch of sequence lengths.

    Returns:
      Tuple of value loss, policy loss, and summary tensor.
    """
    value_loss, value_summary = self._value_loss(observ, reward, length)
    network = self._network(observ, length)
    policy_loss, policy_summary = self._policy_loss(network.mean, network.logstd, old_mean,
                                                    old_logstd, action, advantage, length)
    value_gradients, value_variables = (zip(*self._optimizer.compute_gradients(value_loss)))
    policy_gradients, policy_variables = (zip(*self._optimizer.compute_gradients(policy_loss)))
    all_gradients = value_gradients + policy_gradients
    all_variables = value_variables + policy_variables
    optimize = self._optimizer.apply_gradients(zip(all_gradients, all_variables))
    summary = tf.summary.merge([
        value_summary, policy_summary,
        tf.summary.scalar('value_gradient_norm', tf.global_norm(value_gradients)),
        tf.summary.scalar('policy_gradient_norm', tf.global_norm(policy_gradients)),
        utility.gradient_summaries(zip(value_gradients, value_variables), dict(value=r'.*')),
        utility.gradient_summaries(zip(policy_gradients, policy_variables), dict(policy=r'.*'))
    ])
    with tf.control_dependencies([optimize]):
      return [tf.identity(x) for x in (value_loss, policy_loss, summary)]
Esempio n. 2
0
def clip_by_global_norm_summary(t_list, clip_norm, norm_name, variables):
    # wrapper around tf.clip_by_global_norm that also does summary ops of norms

    # compute norms
    # use global_norm with one element to handle IndexedSlices vs dense
    norms = [tf.global_norm([t]) for t in t_list]

    # summary ops before clipping
    summary_ops = []
    for ns, v in zip(norms, variables):
        name = 'norm_pre_clip/' + v.name.replace(":", "_")
        summary_ops.append(tf.summary.scalar(name, ns))

    # clip
    clipped_t_list, tf_norm = tf.clip_by_global_norm(t_list, clip_norm)

    # summary ops after clipping
    norms_post = [tf.global_norm([t]) for t in clipped_t_list]
    for ns, v in zip(norms_post, variables):
        name = 'norm_post_clip/' + v.name.replace(":", "_")
        summary_ops.append(tf.summary.scalar(name, ns))

    summary_ops.append(tf.summary.scalar(norm_name, tf_norm))

    return clipped_t_list, tf_norm, summary_ops
Esempio n. 3
0
def setup_loss_critic(critic):
    # we are starting with critic.outputs symbol (after logistic layer)
    with tf.variable_scope("rl", initializer=tf.uniform_unit_scaling_initializer(1.0)):
        # loss setup
        # None to timestep
        critic.target_qt = tf.placeholder(tf.float32, shape=[None, None, critic.vocab_size],
                                            name="q_action_score")
        # p_actions is the target_token, and it's already [T, batch_size]
        # q_t needs to be expanded...

        # critic.outputs [T, batch_size, vocab_size]
        # let's populate (expand) target tokens to fill up qt (just like what we did with one-hot labels)

        critic.q_loss = tf.reduce_mean(tf.square(critic.outputs - critic.target_qt))  # Note: not adding lambda*C yet (variance)

        opt = nlc_model.get_optimizer(FLAGS.optimizer)(critic.learning_rate)

        # update
        params = tf.trainable_variables()
        gradients = tf.gradients(critic.q_loss, params)
        clipped_gradients, _ = tf.clip_by_global_norm(gradients, FLAGS.max_gradient_norm)
        #      self.gradient_norm = tf.global_norm(clipped_gradients)
        critic.gradient_norm = tf.global_norm(gradients)
        critic.param_norm = tf.global_norm(params)
        critic.updates = opt.apply_gradients(
            zip(clipped_gradients, params), global_step=critic.global_step)
Esempio n. 4
0
  def create_variables_for_optimization(self):
    with tf.name_scope("optimization"):
      with tf.name_scope("masker"):
          self.mask = tf.sequence_mask(self.seq_len, self.num_step)
          self.mask = tf.reshape(tf.cast(self.mask, tf.float32), (-1,))
      if self.loss_function == "cross_entropy":
        self.pl_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                                            logits=self.logit,
                                            labels=self.actions_flatten)
      elif self.loss_function == "l2":
        self.one_hot_actions = tf.one_hot(self.actions_flatten, self.num_actions)
        self.pl_loss = tf.reduce_mean((self.probs - self.one_hot_actions) ** 2,
                                            axis=1)
      else:
          raise ValueError("loss function type is not defined")

      self.pl_loss = tf.multiply(self.pl_loss, self.mask)
      self.pl_loss = tf.reduce_mean(tf.multiply(self.pl_loss, self.returns_flatten))

      self.entropy = tf.multiply(self.entropy, self.mask)
      self.entropy = tf.reduce_mean(self.entropy)

      self.loss = self.pl_loss - self.entropy_bonus * self.entropy

      self.trainable_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="policy_network")
      self.gradients = self.optimizer.compute_gradients(self.loss, var_list=self.trainable_variables)
      self.clipped_gradients = [(tf.clip_by_norm(grad, self.max_gradient), var)
                                  for grad, var in self.gradients]
      self.train_op = self.optimizer.apply_gradients(self.clipped_gradients,
                                                     self.global_step)
      self.grad_norm = tf.global_norm([grad for grad, var in self.gradients])
      self.var_norm = tf.global_norm(self.trainable_variables)
Esempio n. 5
0
def setup_actor_update(actor):

    with tf.variable_scope("rl"):
        actor.critic_output = tf.placeholder(tf.float32, [None, None, actor.vocab_size], name='critic_output')
        # action_gradients is passed in by Q_network...
        # and in DDPG, it's the gradients of Q w.r.t. policy's chosen actions
        # but in AC, it's the output of Q network w.r.t. all actions
        opt = nlc_model.get_optimizer(FLAGS.optimizer)(actor.learning_rate)

        # update
        params = tf.trainable_variables()

        # TODO: hope this would work
        with tf.variable_scope("Loss"):
            doshape = tf.shape(actor.decoder_output)
            T, batch_size = doshape[0], doshape[1]
            do2d = tf.reshape(actor.decoder_output, [-1, actor.size])
            logits2d = rnn_cell._linear(do2d, actor.vocab_size, True, 1.0)
            # outputs2d = tf.nn.log_softmax(logits2d)

            # apply Q-network's score here (similar to advantage function)
            # 1. reshape critic_output like decoder_output (same shape anyway)
            # TODO: hope this is correct
            critic_do2d = tf.reshape(actor.critic_output, [-1, actor.vocab_size])  # should reshape according to critic
            # 2. multiply this with actor's logitis
            rl_logits2d = logits2d * critic_do2d

            # actor.outputs = tf.reshape(outputs2d, tf.pack([T, batch_size, actor.vocab_size]))

            targets_no_GO = tf.slice(actor.target_tokens, [1, 0], [-1, -1])
            masks_no_GO = tf.slice(actor.target_mask, [1, 0], [-1, -1])
            # easier to pad target/mask than to split decoder input since tensorflow does not support negative indexing
            labels1d = tf.reshape(tf.pad(targets_no_GO, [[0, 1], [0, 0]]), [-1])
            mask1d = tf.reshape(tf.pad(masks_no_GO, [[0, 1], [0, 0]]), [-1])
            losses1d = tf.nn.sparse_softmax_cross_entropy_with_logits(rl_logits2d, labels1d) * tf.to_float(mask1d)
            losses2d = tf.reshape(losses1d, tf.pack([T, batch_size]))

            actor.rl_losses = tf.reduce_sum(losses2d) / tf.to_float(batch_size)

        # http://pemami4911.github.io/blog/2016/08/21/ddpg-rl.html (DDPG update)
        gradients = tf.gradients(actor.rl_losses, params)  # step 7: update
        # Not sure if I understood this part lol

        clipped_gradients, _ = tf.clip_by_global_norm(gradients, FLAGS.max_gradient_norm)

        # clip, then multiply, otherwise we are not learning the signals from critic
        # clipped_gradients: [T, batch_size, vocab_size]

        # updated_gradients = clipped_gradients * actor.critic_output
        # pass in as input

        actor.rl_gradient_norm = tf.global_norm(clipped_gradients)
        actor.rl_param_norm = tf.global_norm(params)

        actor.rl_updates = opt.apply_gradients(
            zip(clipped_gradients, params), global_step=actor.global_step)
Esempio n. 6
0
    def __init__(self, vocab_size, label_size, size, num_layers, batch_size, learning_rate,
                 learning_rate_decay_factor, dropout, embedding, src_steps, tgt_steps,
                 mode='sq2sq',
                 max_gradient_norm=5.0, forward_only=False):

        self.size = size
        self.mode = mode
        self.vocab_size = vocab_size
        self.label_size = label_size
        self.embedding = embedding
        self.src_steps = src_steps
        self.tgt_steps = tgt_steps
        self.batch_size = batch_size
        self.num_layers = num_layers
        self.keep_prob = 1.0 - dropout
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        self.source_tokens = tf.placeholder(tf.int32, shape=[None, self.src_steps], name='srcInput')
        self.target_tokens = tf.placeholder(tf.int32, shape=[None, self.tgt_steps], name='targetInput')
        self.label_placeholder = tf.placeholder(tf.float32, shape=[None, self.label_size])

        self.decoder_state_input, self.decoder_state_output = [], []
        self.tgt_encoder_state_input, self.tgt_encoder_state_output = [], []

        for i in xrange(num_layers):
            self.decoder_state_input.append(tf.placeholder(tf.float32, shape=[None, size]))
            self.tgt_encoder_state_input.append(tf.placeholder(tf.float32, shape=[None, size]))

        self.setup_embeddings()
        self.setup_encoder()
        self.setup_decoder()
        if mode == 'sq2sq':
            self.setup_label_loss()
        else:
            raise NotImplementedError

        params = tf.trainable_variables()
        if not forward_only:
            opt = tf.train.AdamOptimizer(self.learning_rate)

            gradients = tf.gradients(self.losses, params)
            clipped_gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm)
            self.gradient_norm = tf.global_norm(clipped_gradients)
            self.param_norm = tf.global_norm(params)
            self.updates = opt.apply_gradients(
                zip(clipped_gradients, params), global_step=self.global_step)

        self.saver = tf.train.Saver(tf.all_variables())
Esempio n. 7
0
    def initialize(self):
        if self.summarize:
            bs = tf.to_float(tf.shape(self.x)[0])
            tf.summary.scalar("model/policy_loss", self.pi_loss / bs)
            tf.summary.scalar("model/grad_gnorm", tf.global_norm(self.grads))
            tf.summary.scalar("model/var_gnorm", tf.global_norm(self.var_list))
            self.summary_op = tf.summary.merge_all()

        # TODO(rliaw): Can consider exposing these parameters
        self.sess = tf.Session(graph=self.g, config=tf.ConfigProto(
            intra_op_parallelism_threads=1, inter_op_parallelism_threads=2,
            gpu_options=tf.GPUOptions(allow_growth=True)))
        self.variables = ray.experimental.TensorFlowVariables(self.loss,
                                                              self.sess)
        self.sess.run(tf.global_variables_initializer())
Esempio n. 8
0
def optim(loss, **kwargs):
    r"""Applies gradients to variables.

    Args:
        loss: A 0-D `Tensor` containing the value to minimize.
        kwargs:
          optim: A name for optimizer. 'MaxProp' (default), 'AdaMax', 'Adam', or 'sgd'.
          lr: A Python Scalar (optional). Learning rate. Default is .001.
          beta1: A Python Scalar (optional). Default is .9.
          beta2: A Python Scalar (optional). Default is .99.
          category: A string or string list. Specifies the variables that should be trained (optional).
            Only if the name of a trainable variable starts with `category`, it's value is updated.
            Default is '', which means all trainable variables are updated.
    """
    opt = Opt(kwargs)
    # opt += Opt(optim='MaxProp', lr=0.001, beta1=0.9, beta2=0.99, category='')

    # default training options
    opt += Opt(optim='MaxProp', lr=0.001, beta1=0.9, beta2=0.99, category='')

    # select optimizer
    # if opt.optim == 'MaxProp':
        # optim = tf.sg_optimize.MaxPropOptimizer(learning_rate=opt.lr, beta2=opt.beta2)
    # elif opt.optim == 'AdaMax':
        # optim = tf.sg_optimize.AdaMaxOptimizer(learning_rate=opt.lr, beta1=opt.beta1, beta2=opt.beta2)
    # elif opt.optim == 'Adam':
    if opt.optim == 'Adm':
        optim = tf.train.AdamOptimizer(learning_rate=opt.lr, beta1=opt.beta1, beta2=opt.beta2)
    else:
        optim = tf.train.GradientDescentOptimizer(learning_rate=opt.lr)

    # get trainable variables
    if isinstance(opt.category, (tuple, list)):
        var_list = []
        for cat in opt.category:
            var_list.extend([t for t in tf.trainable_variables() if t.name.startswith(cat)])
    else:
        var_list = [t for t in tf.trainable_variables() if t.name.startswith(opt.category)]

    # calc gradient
    gradient = optim.compute_gradients(loss, var_list=var_list)

    # add summary
    for v, g in zip(var_list, gradient):
        # exclude batch normal statics
        if 'mean' not in v.name and 'variance' not in v.name \
                and 'beta' not in v.name and 'gamma' not in v.name:
                prefix = ''
                # summary name
                name = prefix + ''.join(v.name.split(':')[:-1])
                # summary statistics
                # noinspection PyBroadException
                try:
                    tf.summary.scalar(name + '/grad', tf.global_norm([g]))
                    tf.summary.histogram(name + '/grad-h', g)
                except:
                    pass
    global_step = tf.Variable(0, name='global_step', trainable=False)
    # gradient update op
    return optim.apply_gradients(gradient, global_step=global_step), global_step
Esempio n. 9
0
    def _update_network(self, trainer):
        self.actions = tf.placeholder(shape=[None], dtype=tf.int32)
        self.actions_onehot = tf.one_hot(
            self.actions, self.a_dim, dtype=tf.float32)
        self.target_v = tf.placeholder(shape=[None], dtype=tf.float32)
        self.advantages = tf.placeholder(shape=[None], dtype=tf.float32)

        self.outputs = tf.reduce_sum(
                self.policy * self.actions_onehot, [1])

        # loss
        self.value_loss = 0.5 * tf.reduce_sum(tf.square(
                self.target_v - tf.reshape(self.value, [-1])))
        # higher entropy -> lower loss -> encourage exploration
        self.entropy = -tf.reduce_sum(self.policy * tf.log(self.policy))

        self.policy_loss = -tf.reduce_sum(
            tf.log(self.outputs) * self.advantages)

        self.loss = 0.5 * self.value_loss \
            + self.policy_loss - 0.01 * self.entropy

        # local gradients
        local_vars = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
        self.gradients = tf.gradients(self.loss, local_vars)
        self.var_norms = tf.global_norm(local_vars)

        # grads[i] * clip_norm / max(global_norm, clip_norm)
        grads, self.grad_norms = tf.clip_by_global_norm(self.gradients, 40.0)

        # apply gradients to global network
        global_vars = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, 'global')
        self.apply_grads = trainer.apply_gradients(zip(grads, global_vars))
Esempio n. 10
0
  def _update_policy_step(self, observ, action, old_mean, old_logstd, advantage, length):
    """Compute the current policy loss and perform a gradient update step.

    Args:
      observ: Sequences of observations.
      action: Sequences of actions.
      old_mean: Sequences of action means of the behavioral policy.
      old_logstd: Sequences of action log stddevs of the behavioral policy.
      advantage: Sequences of advantages.
      length: Batch of sequence lengths.

    Returns:
      Tuple of loss tensor and summary tensor.
    """
    network = self._network(observ, length)
    loss, summary = self._policy_loss(network.mean, network.logstd, old_mean, old_logstd, action,
                                      advantage, length)
    gradients, variables = (zip(*self._policy_optimizer.compute_gradients(loss)))
    optimize = self._policy_optimizer.apply_gradients(zip(gradients, variables))
    summary = tf.summary.merge([
        summary,
        tf.summary.scalar('gradient_norm', tf.global_norm(gradients)),
        utility.gradient_summaries(zip(gradients, variables), dict(policy=r'.*'))
    ])
    with tf.control_dependencies([optimize]):
      return [tf.identity(loss), tf.identity(summary)]
Esempio n. 11
0
  def _update_step(self, sequence):
    """Compute the current combined loss and perform a gradient update step.

    The sequences must be a dict containing the keys `length` and `sequence`,
    where the latter is a tuple containing observations, actions, parameters of
    the behavioral policy, rewards, and advantages.

    Args:
      sequence: Sequences of episodes or chunks of episodes.

    Returns:
      Tuple of value loss, policy loss, and summary tensor.
    """
    observ, action, old_policy_params, reward, advantage = sequence['sequence']
    length = sequence['length']
    old_policy = self._policy_type(**old_policy_params)
    value_loss, value_summary = self._value_loss(observ, reward, length)
    network = self._network(observ, length)
    policy_loss, policy_summary = self._policy_loss(
        old_policy, network.policy, action, advantage, length)
    loss = policy_loss + value_loss + network.get('loss', 0)
    gradients, variables = (
        zip(*self._optimizer.compute_gradients(loss)))
    optimize = self._optimizer.apply_gradients(
        zip(gradients, variables))
    summary = tf.summary.merge([
        value_summary, policy_summary,
        tf.summary.histogram('network_loss', network.get('loss', 0)),
        tf.summary.scalar('gradient_norm', tf.global_norm(gradients)),
        utility.gradient_summaries(zip(gradients, variables))])
    with tf.control_dependencies([optimize]):
      return [tf.identity(x) for x in (value_loss, policy_loss, summary)]
Esempio n. 12
0
def _summarize_vars_and_grads(grads_and_vars):
  tf.logging.info('Trainable variables:')
  tf.logging.info('-' * 60)
  for grad, var in grads_and_vars:
    tf.logging.info(var)

    def tag(name, v=var):
      return v.op.name + '_' + name

    # Variable summary
    mean = tf.reduce_mean(var)
    tf.summary.scalar(tag('mean'), mean)
    with tf.name_scope(tag('stddev')):
      stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
    tf.summary.scalar(tag('stddev'), stddev)
    tf.summary.scalar(tag('max'), tf.reduce_max(var))
    tf.summary.scalar(tag('min'), tf.reduce_min(var))
    tf.summary.histogram(tag('histogram'), var)

    # Gradient summary
    if grad is not None:
      if isinstance(grad, tf.IndexedSlices):
        grad_values = grad.values
      else:
        grad_values = grad

      tf.summary.histogram(tag('gradient'), grad_values)
      tf.summary.scalar(tag('gradient_norm'), tf.global_norm([grad_values]))
    else:
      tf.logging.info('Var %s has no gradient', var.op.name)
Esempio n. 13
0
def _add_gradients_summaries(grads_and_vars):
  """Add histogram summaries to gradients.

  Note: The summaries are also added to the SUMMARIES collection.

  Args:
    grads_and_vars: A list of gradient to variable pairs (tuples).

  Returns:
    The _list_ of the added summaries for grads_and_vars.
  """
  summaries = []
  for grad, var in grads_and_vars:
    if grad is not None:
      if isinstance(grad, tf.IndexedSlices):
        grad_values = grad.values
      else:
        grad_values = grad
      summaries.append(tf.histogram_summary(var.op.name + ':gradient',
                                            grad_values))
      summaries.append(tf.histogram_summary(var.op.name + ':gradient_norm',
                                            tf.global_norm([grad_values])))
    else:
      tf.logging.info('Var %s has no gradient', var.op.name)
  return summaries
Esempio n. 14
0
def get_train_op(loss, params):
  """Generate training operation that updates variables based on loss."""
  with tf.variable_scope("get_train_op"):
    learning_rate = get_learning_rate(
        params.learning_rate, params.hidden_size,
        params.learning_rate_warmup_steps)

    # Create optimizer. Use LazyAdamOptimizer from TF contrib, which is faster
    # than the TF core Adam optimizer.
    optimizer = tf.contrib.opt.LazyAdamOptimizer(
        learning_rate,
        beta1=params.optimizer_adam_beta1,
        beta2=params.optimizer_adam_beta2,
        epsilon=params.optimizer_adam_epsilon)

    # Calculate and apply gradients using LazyAdamOptimizer.
    global_step = tf.train.get_global_step()
    tvars = tf.trainable_variables()
    gradients = optimizer.compute_gradients(
        loss, tvars, colocate_gradients_with_ops=True)
    train_op = optimizer.apply_gradients(
        gradients, global_step=global_step, name="train")

    # Save gradient norm to Tensorboard
    tf.summary.scalar("global_norm/gradient_norm",
                      tf.global_norm(list(zip(*gradients))[0]))

    return train_op
Esempio n. 15
0
  def __init__(self, vocab_size, size, num_layers, max_gradient_norm, batch_size, learning_rate,
               learning_rate_decay_factor, dropout, FLAGS, forward_only=False, optimizer="adam"):
    self.size = size
    self.vocab_size = vocab_size
    self.batch_size = batch_size
    self.num_layers = num_layers
    self.keep_prob_config = 1.0 - dropout
    self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
    self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * learning_rate_decay_factor)
    self.global_step = tf.Variable(0, trainable=False)

    self.keep_prob = tf.placeholder(tf.float32)
    self.source_tokens = tf.placeholder(tf.int32, shape=[None, None])
    self.target_tokens = tf.placeholder(tf.int32, shape=[None, None])
    self.source_mask = tf.placeholder(tf.int32, shape=[None, None])
    self.target_mask = tf.placeholder(tf.int32, shape=[None, None])
    self.beam_size = tf.placeholder(tf.int32)
    self.target_length = tf.reduce_sum(self.target_mask, reduction_indices=0)

    self.FLAGS = FLAGS

    self.decoder_state_input, self.decoder_state_output = [], []
    for i in xrange(num_layers):
      self.decoder_state_input.append(tf.placeholder(tf.float32, shape=[None, size]))

    with tf.variable_scope("NLC", initializer=tf.uniform_unit_scaling_initializer(1.0)):
      self.setup_embeddings()
      self.setup_encoder()
      self.setup_decoder()
      self.setup_loss()

      self.setup_beam()

    params = tf.trainable_variables()
    if not forward_only:
      opt = get_optimizer(optimizer)(self.learning_rate)

      gradients = tf.gradients(self.losses, params)
      clipped_gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm)
#      self.gradient_norm = tf.global_norm(clipped_gradients)
      self.gradient_norm = tf.global_norm(gradients)
      self.param_norm = tf.global_norm(params)
      self.updates = opt.apply_gradients(
        zip(clipped_gradients, params), global_step=self.global_step)

    self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.keep)  # write_version=tf.train.SaverDef.V1
Esempio n. 16
0
def gradient_clip(gradients, max_gradient_norm):
  """Clipping gradients of a model."""
  clipped_gradients, gradient_norm = tf.clip_by_global_norm(
      gradients, max_gradient_norm)
  gradient_norm_summary = [tf.summary.scalar("grad_norm", gradient_norm)]
  gradient_norm_summary.append(
      tf.summary.scalar("clipped_gradient", tf.global_norm(clipped_gradients)))

  return clipped_gradients, gradient_norm_summary, gradient_norm
 def __init__(self, FLAGS, encoder, decoder, classifier):
     
     self.FLAGS = FLAGS
     self.encoder = encoder
     self.decoder = decoder
     self.classifier = classifier
     self.xplaceholder = tf.placeholder(tf.int32, 
                         shape = (None, self.FLAGS.maxSentenceLength))
     self.yplaceholder = tf.placeholder(tf.float64, shape = (None,)) 
     # self.maskplaceholder = tf.placeholder(tf.int32, shape = (None,self.FLAGS.maxSentenceLength))
     self.maskplaceholder = tf.placeholder(tf.int32, shape = (None,))
     self.drop_placeholder = tf.placeholder(tf.float64, shape = ())
     self.lr_placeholder = tf.placeholder(tf.float64, shape = ())
     self.opplaceholder = tf.placeholder(tf.float64)
     with tf.variable_scope("tldr", initializer = tf.contrib.layers.xavier_initializer()):
         self.setup_embeddings()
         self.setup_system()
         self.setup_loss()
     
     params = tf.trainable_variables()
     self.globalnorm = 0
     self.paramnorm = 0
     for param in params:
         shp = param.get_shape()
         if len(shp) >= 2:
             self.paramnorm += tf.nn.l2_loss(param)
     opt = tf.train.AdamOptimizer(self.lr_placeholder)
     if self.FLAGS.clipGradients == 1:	
             try:
                 grads, _ = zip(*opt.compute_gradients(self.loss))
                 grads, _ =  tf.clip_by_global_norm(grads, self.FLAGS.max_gradient_norm)
                 self.globalnorm = tf.global_norm(grads)
                 grads_vars = zip(grads, params)
                 self.updates = opt.apply_gradients(grads_vars)
             except AttributeError:
                 self.updates = None
     else:
         grads = tf.gradients(self.loss, params)
         self.globalnorm = tf.global_norm(grads)
         try:
             self.updates = opt.minimize(self.loss)
         except AttributeError:
             self.updates = None
     self.saver = tf.train.Saver(keep_checkpoint_every_n_hours = 2, max_to_keep = 0)
Esempio n. 18
0
  def _create_loss_optimizer(self):
    # The loss is composed of two terms:
    # 1.) The reconstruction loss (the negative log probability
    #     of the input under the reconstructed Bernoulli distribution
    #     induced by the decoder in the data space).
    #     This can be interpreted as the number of "nats" required
    #     for reconstructing the input when the activation in latent
    #     is given.

    orig_energies = tf.reshape(self.x, [self.batch_size, -1])
    new_energies = tf.reshape(self.x_reconstr_mean, [self.batch_size, -1])

    diff = tf.square(tf.sub(orig_energies, new_energies))
    diff_norm = tf.div(diff,tf.exp(tf.minimum(20.,self.x_reconstr_log_sigma_sq)))
    denom_log = tf.log(2*np.pi) + self.x_reconstr_log_sigma_sq
    self.vae_loss_likelihood = tf.reduce_sum(0.5*(diff_norm+denom_log), 1) 

    # 2.) The latent loss, which is defined as the Kullback Leibler divergence
    ##    between the distribution in latent space induced by the encoder on
    #     the data and some prior. This acts as a kind of regularizer.
    #     This can be interpreted as the number of "nats" required
    #     for transmitting the the latent space distribution given
    #     the prior.
    self.vae_loss_kl = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq
                                       - tf.square(self.z_mean)
                                       - tf.exp(tf.minimum(20.,self.z_log_sigma_sq)), 1)

    self.cost = tf.reduce_mean(self.vae_loss_likelihood + self.lamb*self.vae_loss_kl) # average over batch

    #self.cost = tf.reduce_mean(self.vae_loss_kl + self.vae_loss_l2)

    self.t_vars = tf.trainable_variables()

    # Use RMSProp optimizer
    opt = tf.train.AdamOptimizer(self.learning_rate) #.minimize(self.cost, var_list=self.t_vars)
    grads, t_vars = zip(*opt.compute_gradients(self.cost, self.t_vars))
    self.gradnorm = tf.global_norm(grads)
    grads = tf.cond(
        tf.global_norm(grads) > 1e-20,
        lambda: tf.clip_by_global_norm(grads, 500.)[0],
        lambda: grads)
    self.optimizer = opt.apply_gradients(zip(grads,t_vars))
Esempio n. 19
0
  def __init__(self, vocab_size, size, num_layers, max_gradient_norm, batch_size, learning_rate,
               learning_rate_decay_factor, dropout, forward_only=False):

    self.size = size
    self.vocab_size = vocab_size
    self.batch_size = batch_size
    self.num_layers = num_layers
    self.keep_prob = 1.0 - dropout
    self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
    self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * learning_rate_decay_factor)
    self.global_step = tf.Variable(0, trainable=False)

    self.source_tokens = tf.placeholder(tf.int32, shape=[None, None])
    self.target_tokens = tf.placeholder(tf.int32, shape=[None, None])
    self.source_mask = tf.placeholder(tf.int32, shape=[None, None])
    self.target_mask = tf.placeholder(tf.int32, shape=[None, None])
    self.target_length = tf.reduce_sum(self.target_mask, reduction_indices=0)

    self.decoder_state_input, self.decoder_state_output = [], []
    for i in xrange(num_layers):
      self.decoder_state_input.append(tf.placeholder(tf.float32, shape=[None, size]))

    self.setup_embeddings()
    self.setup_encoder()
    self.setup_decoder()
    self.setup_loss()

    params = tf.trainable_variables()
    if not forward_only:
      opt = tf.train.AdamOptimizer(self.learning_rate)

      gradients = tf.gradients(self.losses, params)
      clipped_gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm)
#      self.gradient_norm = tf.global_norm(clipped_gradients)
      self.gradient_norm = tf.global_norm(gradients)
      self.param_norm = tf.global_norm(params)
      self.updates = opt.apply_gradients(
        zip(clipped_gradients, params), global_step=self.global_step)

    self.saver = tf.train.Saver(tf.all_variables())
Esempio n. 20
0
    def _update_network(self, trainer):
        '''
        Build losses, compute gradients and apply gradients to the global net
        '''

        self.actions = tf.placeholder(shape=[None], dtype=tf.int32)
        actions_onehot = tf.one_hot(self.actions, self.a_dim, dtype=tf.float32)
        self.target_v = tf.placeholder(shape=[None], dtype=tf.float32)
        self.advantages = tf.placeholder(shape=[None], dtype=tf.float32)

        action_prob = tf.reduce_sum(self.policy * actions_onehot, [1])

        # MSE critic loss
        self.critic_loss = 0.5 * tf.reduce_sum(
                tf.squared_difference(
                    self.target_v, tf.reshape(self.value, [-1])))

        # high entropy -> low loss -> encourage exploration
        self.entropy = -tf.reduce_sum(self.policy * tf.log(self.policy + 1e-30), 1)
        self.entropy_loss = -self.entropy_ratio * tf.reduce_sum(self.entropy)

        # policy gradients = d_[-log(p) * advantages] / d_theta
        self.actor_loss = -tf.reduce_sum(
            tf.log(action_prob + 1e-30) * self.advantages)
        self.actor_loss += self.entropy_loss

        self.loss = self.actor_loss + self.critic_loss
        local_vars = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
        self.grads = tf.gradients(self.loss, local_vars)

        # global norm gradients clipping
        self.grads, self.grad_norms = \
            tf.clip_by_global_norm(self.grads, self.clip_grads)
        self.var_norms = tf.global_norm(local_vars)
        global_vars = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, 'global')
        self.apply_grads_to_global = \
            trainer.apply_gradients(zip(self.grads, global_vars))

        # summaries
        if self.scope == 'worker_1':
            tf.summary.scalar('loss/entropy', tf.reduce_sum(self.entropy))
            tf.summary.scalar('loss/actor_loss', self.actor_loss)
            tf.summary.scalar('loss/critic_loss', self.critic_loss)
            tf.summary.scalar('advantages', tf.reduce_mean(self.advantages))
            tf.summary.scalar('norms/grad_norms', self.grad_norms)
            tf.summary.scalar('norms/var_norms', self.var_norms)
            summaries = tf.get_collection(tf.GraphKeys.SUMMARIES)
            self.summaries = tf.summary.merge(summaries)
        else:
            self.summaries = tf.no_op()
Esempio n. 21
0
    def __init__(self, FLAGS, id2word, word2id, emb_matrix):
        """
        Initializes the QA model.

        Inputs:
          FLAGS: the flags passed in from main.py
          id2word: dictionary mapping word idx (int) to word (string)
          word2id: dictionary mapping word (string) to word idx (int)
          emb_matrix: numpy array shape (400002, embedding_size) containing pre-traing GloVe embeddings
        """
        print "Initializing the QAModel..."
        self.FLAGS = FLAGS
        self.id2word = id2word
        self.word2id = word2id

        # Add all parts of the graph
        with tf.variable_scope("QAModel", initializer=tf.contrib.layers.variance_scaling_initializer(factor=1.0, uniform=True)):
            self.add_placeholders()
            self.add_embedding_layer(emb_matrix)
            self.build_graph()
            self.add_loss()

        # Define trainable parameters, gradient, gradient norm, and clip by gradient norm
        params = tf.trainable_variables()
        gradients = tf.gradients(self.loss, params)
        self.gradient_norm = tf.global_norm(gradients)
        clipped_gradients, _ = tf.clip_by_global_norm(gradients, FLAGS.max_gradient_norm)
        self.param_norm = tf.global_norm(params)

        # Define optimizer and updates
        # (updates is what you need to fetch in session.run to do a gradient update)
        self.global_step = tf.Variable(0, name="global_step", trainable=False)
        opt = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) # you can try other optimizers
        self.updates = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step)

        # Define savers (for checkpointing) and summaries (for tensorboard)
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.keep)
        self.bestmodel_saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
        self.summaries = tf.summary.merge_all()
Esempio n. 22
0
    def __init__(self,s_size,a_size,scope,trainer):
        with tf.variable_scope(scope):
            #  quantile regression dqn
            self.quantile = 1.0 / N
            self.cumulative_probabilities = (2.0 * np.arange(N) + 1) / (2.0 * N)
            #  network 
            self.inputs = tf.placeholder(shape=[None,s_size],dtype=tf.float32)
            self.imageIn = tf.reshape(self.inputs,shape=[-1,84,84,1])
            self.conv1 = slim.conv2d(activation_fn=tf.nn.relu,
                                     inputs=self.imageIn,num_outputs=32,
                                     kernel_size=[8,8],stride=[4,4],padding='VALID')
            self.conv2 = slim.conv2d(activation_fn=tf.nn.relu,
                                     inputs=self.conv1,num_outputs=64,
                                     kernel_size=[4,4],stride=[2,2],padding='VALID')
            self.conv3 = slim.conv2d(activation_fn=tf.nn.relu,
                                     inputs=self.conv2,num_outputs=64,
                                     kernel_size=[3,3],stride=[1,1],padding='VALID')
            hidden = slim.fully_connected(slim.flatten(self.conv3),512,activation_fn=tf.nn.relu)
            
            self.out = slim.fully_connected(hidden, a_size * N,
                                            activation_fn=None,
                                            weights_initializer=normalized_columns_initializer(0.1),
                                            biases_initializer=None)
            self.out = tf.reshape(self.out, [-1, a_size, N])
            self.Q   = tf.reduce_sum(self.out * self.quantile, axis=2)
            
            #Only the worker network need ops for loss functions and gradient updating.
            if scope != 'global':
                self.actions_q = tf.placeholder(shape=[None, a_size, N], dtype=tf.float32)
                self.q_target  = tf.placeholder(shape=[None, N], dtype=tf.float32)
                
                self.q_actiona = tf.multiply(self.out, self.actions_q)
                self.q_action  = tf.reduce_sum(self.q_actiona, axis=1)
                self.u = self.q_target - self.q_action
                
                self.loss = tf.reduce_mean(tf.reduce_sum(tf.square(self.u),axis=1))
                self.delta = tf.to_float(self.u < 0.0)
                self.loss1 = tf.abs(self.cumulative_probabilities - self.delta)
                self.loss2 = self.huber(self.u, k)
                #self.loss = tf.reduce_mean(tf.reduce_mean(self.loss1*self.loss2,axis=1))

                #Get gradients from local network using local losses
                local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
                self.gradients = tf.gradients(self.loss,local_vars)
                self.var_norms = tf.global_norm(local_vars)
                grads,self.grad_norms = tf.clip_by_global_norm(self.gradients,40.0)
                
                #Apply local gradients to global network
                global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global')
                self.apply_grads = trainer.apply_gradients(zip(grads,global_vars))
Esempio n. 23
0
    def _make_training_op(self):
        optimizer = tf.train.AdamOptimizer(self.config.learning_rate)
        params = tf.trainable_variables()
        gradients = tf.gradients(self.loss, params)
        clipped_gradients, gradient_norm = tf.clip_by_global_norm(
            gradients, self.config.max_gradient_norm)

        tf.summary.scalar("grad_norm", gradient_norm)
        tf.summary.scalar("clipped_norm", tf.global_norm(clipped_gradients))

        train_op = optimizer.apply_gradients(
            zip(clipped_gradients, params), global_step=self.global_step)

        return train_op
Esempio n. 24
0
    def __init__(self,s_size,a_size,scope,trainer):
        with tf.variable_scope(scope):
            #  distribution dqn 
            self.atoms = 21
            self.v_max = 10.
            self.v_min = -10.
            self.delta_z = (self.v_max - self.v_min) / (self.atoms - 1)
            self.z = [self.v_min + i * self.delta_z for i in range(self.atoms)]
            
            #  network 
            self.inputs = tf.placeholder(shape=[None,s_size],dtype=tf.float32)
            self.imageIn = tf.reshape(self.inputs,shape=[-1,84,84,1])
            self.conv1 = slim.conv2d(activation_fn=tf.nn.relu,
                                     inputs=self.imageIn,num_outputs=32,
                                     kernel_size=[8,8],stride=[4,4],padding='VALID')
            self.conv2 = slim.conv2d(activation_fn=tf.nn.relu,
                                     inputs=self.conv1,num_outputs=64,
                                     kernel_size=[4,4],stride=[2,2],padding='VALID')
            self.conv3 = slim.conv2d(activation_fn=tf.nn.relu,
                                     inputs=self.conv2,num_outputs=64,
                                     kernel_size=[3,3],stride=[1,1],padding='VALID')
            hidden = slim.fully_connected(slim.flatten(self.conv3),512,activation_fn=tf.nn.relu)
            self.out = slim.fully_connected(hidden, a_size*self.atoms,
                                             activation_fn=None,
                                             weights_initializer=normalized_columns_initializer(0.1),
                                             biases_initializer=None)
            self.out = tf.reshape(self.out, [-1, a_size, self.atoms])

            self.p  = tf.nn.softmax(self.out, dim=2)
            self.Q   = tf.reduce_sum(self.z * self.p, axis=2)
            
            #Only the worker network need ops for loss functions and gradient updating.
            if scope != 'global':
                self.m_input = tf.placeholder(shape=[None, self.atoms], dtype=tf.float32)
                self.actions_p = tf.placeholder(shape=[None, a_size, self.atoms],dtype=tf.float32)
                self.p_actiona = tf.multiply(self.p, self.actions_p)
                self.p_action  = tf.reduce_sum(self.p_actiona, axis=1)
                self.p_alog  = - tf.log(self.p_action+1e-20) + tf.log(self.m_input+1e-20)
                self.loss = tf.reduce_mean(tf.reduce_sum(self.m_input * self.p_alog, axis=1))
                local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
                self.gradients = tf.gradients(self.loss,local_vars)
                self.var_norms = tf.global_norm(local_vars)
                grads,self.grad_norms = tf.clip_by_global_norm(self.gradients,40.0)
                #Apply local gradients to global network
                global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global')
                self.apply_grads = trainer.apply_gradients(zip(grads,global_vars))
Esempio n. 25
0
    def add_training_op(self, loss):
        """Sets up the training Ops.

        Creates an optimizer and applies the gradients to all trainable variables.
        The Op returned by this function is what must be passed to the
        `sess.run()` call to cause the model to train. See

        TODO:
            - Get the gradients for the loss from optimizer using
              optimizer.compute_gradients.
            - if self.clip_gradients is true, clip the global norm of
              the gradients using tf.clip_by_global_norm to self.config.max_grad_norm
            - Compute the resultant global norm of the gradients using
              tf.global_norm and save this global norm in self.grad_norm.
            - Finally, actually create the training operation by calling
              optimizer.apply_gradients.
        See: https://www.tensorflow.org/api_docs/python/train/gradient_clipping
        Args:
            loss: Loss tensor.
        Returns:
            train_op: The Op for training.
        """

        optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.config.lr)

        ### YOUR CODE HERE (~6-10 lines)

        # - Remember to clip gradients only if self.config.clip_gradients
        # is True.
        # - Remember to set self.grad_norm
        grads_and_vars = optimizer.compute_gradients(loss)
        variables = [output[1] for output in grads_and_vars]
        gradients = [output[0] for output in grads_and_vars]
        if self.config.clip_gradients:
            tmp_gradients = tf.clip_by_global_norm(gradients, clip_norm=self.config.max_grad_norm)[0]
            gradients = tmp_gradients

        grads_and_vars = [(gradients[i], variables[i]) for i in range(len(gradients))]
        self.grad_norm = tf.global_norm(gradients)

        train_op = optimizer.apply_gradients(grads_and_vars)
        ### END YOUR CODE

        assert self.grad_norm is not None, "grad_norm was not set properly!"
        return train_op
Esempio n. 26
0
def define_ppo_step(observation, action, reward, done, value, old_pdf,
                    policy_factory, config):
  """Step of PPO."""
  new_policy_dist, new_value, _ = policy_factory(observation)
  new_pdf = new_policy_dist.prob(action)

  ratio = new_pdf / old_pdf
  clipped_ratio = tf.clip_by_value(ratio, 1 - config.clipping_coef,
                                   1 + config.clipping_coef)

  advantage = calculate_generalized_advantage_estimator(
      reward, value, done, config.gae_gamma, config.gae_lambda)

  advantage_mean, advantage_variance = tf.nn.moments(advantage, axes=[0, 1],
                                                     keep_dims=True)
  advantage_normalized = tf.stop_gradient(
      (advantage - advantage_mean)/(tf.sqrt(advantage_variance) + 1e-8))

  surrogate_objective = tf.minimum(clipped_ratio * advantage_normalized,
                                   ratio * advantage_normalized)
  policy_loss = -tf.reduce_mean(surrogate_objective)

  value_error = calculate_generalized_advantage_estimator(
      reward, new_value, done, config.gae_gamma, config.gae_lambda)
  value_loss = config.value_loss_coef * tf.reduce_mean(value_error ** 2)

  entropy = new_policy_dist.entropy()
  entropy_loss = -config.entropy_loss_coef * tf.reduce_mean(entropy)

  optimizer = get_optimizer(config)
  losses = [policy_loss, value_loss, entropy_loss]

  gradients = [list(zip(*optimizer.compute_gradients(loss))) for loss in losses]

  gradients_norms = [tf.global_norm(gradient[0]) for gradient in gradients]

  gradients_flat = sum([gradient[0] for gradient in gradients], ())
  gradients_variables_flat = sum([gradient[1] for gradient in gradients], ())

  optimize_op = optimizer.apply_gradients(zip(gradients_flat,
                                              gradients_variables_flat))

  with tf.control_dependencies([optimize_op]):
    return [tf.identity(x) for x in losses + gradients_norms]
Esempio n. 27
0
 def get_gradients(self, loss_or_grads, params):
   """
   Note
   ----
   The returned gradients may contain None value
   """
   # check valid algorithm
   if self.algorithm is None or \
   not hasattr(self.algorithm, 'compute_gradients') or \
   not hasattr(self.algorithm, 'apply_gradients'):
     raise RuntimeError("Optimizer is None, or doesn't has attributes: "
                        "compute_gradients and apply_gradients.")
   with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE) as scope:
     scope_name = scope.name
     # get the gradient
     grads_var = self.algorithm.compute_gradients(loss_or_grads,
                                                  var_list=params)
     grads_var = {g: v for g, v in grads_var if g is not None}
     grads = list(grads_var.keys())
     params = list(grads_var.values())
     # ====== clipnorm ====== #
     if self.clipnorm is not None:
       if self.clip_alg == 'norm':
         grads = [tf.clip_by_norm(g, self.clipnorm)
                  for g in grads]
       elif self.clip_alg == 'total_norm':
         grads, _ = tf.clip_by_global_norm(grads, self.clipnorm)
       elif self.clip_alg == 'avg_norm':
         grads = [tf.clip_by_average_norm(g, self.clipnorm)
                  for g in grads]
       else:
         raise ValueError("Unknown norm clipping algorithm: '%s'" % self.clip_alg)
     # ====== clipvalue ====== #
     if self.clipvalue is not None:
       grads = [tf.clip_by_value(g, -self.clipvalue, self.clipvalue)
                for g in grads]
     # ====== get final norm value ====== #
     self._norm = add_roles(tf.global_norm(grads, name="GradientNorm"),
                            GradientsNorm)
   # ====== setting Optimizer roles ====== #
   for v in get_all_variables(scope=scope_name):
     add_roles(v, roles=OptimizerVariable)
   return [(g, p) for g, p in zip(grads, params)]
Esempio n. 28
0
   def _createModel(self):
       with tf.variable_scope(self.scope):
           self.inputs = tf.placeholder('float', shape=[None,self.stateSize])
           x1 = slim.fully_connected(
               self.inputs,
               64,
               scope='fc/fc_1',
               activation_fn=tf.nn.relu)

           self.policy = slim.fully_connected(x1, self.actionSize,
               activation_fn=tf.nn.softmax,
               weights_initializer=Brian.normalized_columns_initializer(0.01),
               biases_initializer=None)
           self.value = slim.fully_connected(x1,1,
               activation_fn=None,
               weights_initializer=Brian.normalized_columns_initializer(1.0),
               biases_initializer=None)

           self.update_local_ops = Brian.update_target_graph('global',self.scope)

           if self.scope != 'global':
               self.actions = tf.placeholder( shape=[None], dtype=tf.int32)
               self.actions_onehot = tf.one_hot(self.actions, self.actionSize, dtype=tf.float32)
               self.target_v = tf.placeholder(shape=[None],dtype=tf.float32)
               self.advantages = tf.placeholder(shape=[None],dtype=tf.float32)

               self.responsible_outputs = tf.reduce_sum(self.policy * self.actions_onehot, [1])

               #Loss functions
               self.value_loss = 0.5 * tf.reduce_sum(tf.square(self.target_v - tf.reshape(self.value,[-1])))
               self.entropy = - tf.reduce_sum(self.policy * tf.log(self.policy))
               self.policy_loss = -tf.reduce_sum(tf.log(self.responsible_outputs)*self.advantages)
               self.loss = 0.5 * self.value_loss + self.policy_loss - self.entropy * 0.01

               #Get gradients from local network using local losses
               local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
               self.gradients = tf.gradients(self.loss,local_vars)
               self.var_norms = tf.global_norm(local_vars)
               grads,self.grad_norms = tf.clip_by_global_norm(self.gradients,40.0)

               #Apply local gradients to global network
               global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global')
               self.apply_grads = self.trainer.apply_gradients(zip(grads,global_vars))
Esempio n. 29
0
def get_train_op_and_metrics(loss, params):
  """Generate training op and metrics to save in TensorBoard."""
  with tf.variable_scope("get_train_op"):
    learning_rate = get_learning_rate(
        learning_rate=params["learning_rate"],
        hidden_size=params["hidden_size"],
        learning_rate_warmup_steps=params["learning_rate_warmup_steps"])

    # Create optimizer. Use LazyAdamOptimizer from TF contrib, which is faster
    # than the TF core Adam optimizer.
    optimizer = tf.contrib.opt.LazyAdamOptimizer(
        learning_rate,
        beta1=params["optimizer_adam_beta1"],
        beta2=params["optimizer_adam_beta2"],
        epsilon=params["optimizer_adam_epsilon"])

    if params["use_tpu"] and params["tpu"] != tpu_util.LOCAL:
      optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)

    # Uses automatic mixed precision FP16 training if on GPU.
    if params["dtype"] == "fp16":
      optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(
          optimizer)

    # Calculate and apply gradients using LazyAdamOptimizer.
    global_step = tf.train.get_global_step()
    tvars = tf.trainable_variables()
    gradients = optimizer.compute_gradients(
        loss, tvars, colocate_gradients_with_ops=True)
    minimize_op = optimizer.apply_gradients(
        gradients, global_step=global_step, name="train")
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    train_op = tf.group(minimize_op, update_ops)

    train_metrics = {"learning_rate": learning_rate}

    if not params["use_tpu"]:
      # gradient norm is not included as a summary when running on TPU, as
      # it can cause instability between the TPU and the host controller.
      gradient_norm = tf.global_norm(list(zip(*gradients))[0])
      train_metrics["global_norm/gradient_norm"] = gradient_norm

    return train_op, train_metrics
Esempio n. 30
0
    def add_train_op(self, loss):
        self.global_step = tf.Variable(0, name='global_step', trainable=False)

        opt = tf.train.AdamOptimizer(learning_rate=self.lr)

        gradients, variables = zip(*opt.compute_gradients(loss))
        # save selected gradient summaries
        #for grad in gradients:
            #if 'BasicDecoder' in grad.name or 'gru_cell' in grad.name or 'highway_3' in grad.name:
                #tf.summary.scalar(grad.name, tf.reduce_sum(grad))

        # optionally cap and noise gradients to regularize
        if self.config.cap_grads > 0:
            with tf.variable_scope('cap_grads'):
                tf.summary.scalar('global_gradient_norm', tf.global_norm(gradients))
                gradients, _ = tf.clip_by_global_norm(gradients, self.config.cap_grads)

        train_op = opt.apply_gradients(zip(gradients, variables), global_step=self.global_step)
        return train_op
Esempio n. 31
0
    def __init__(self, scope, trainer, global_step=None):
        with tf.variable_scope(scope):
            self.prob_of_random_goal = tf.Variable(
                FLAGS.initial_random_goal_prob,
                trainable=False,
                name="prob_of_random_goal",
                dtype=tf.float32)
            self.inputs = tf.placeholder(shape=[
                None, FLAGS.resized_height, FLAGS.resized_width,
                FLAGS.agent_history_length
            ],
                                         dtype=tf.float32,
                                         name="Inputs")

            self.prev_rewards = tf.placeholder(shape=[None],
                                               dtype=tf.float32,
                                               name="Prev_Rewards")

            self.prev_rewards_onehot = tf.one_hot(tf.cast(self.prev_rewards,
                                                          dtype=tf.int32),
                                                  2,
                                                  dtype=tf.float32,
                                                  name="Prev_Rewards_OneHot")

            self.prev_rewards = tf.expand_dims(self.prev_rewards,
                                               1,
                                               name="rewards")

            # self.prev_rewards_onehot = tf.expand_dims(self.prev_rewards, 0)

            self.prev_actions = tf.placeholder(shape=[None],
                                               dtype=tf.int32,
                                               name="Prev_Actions")
            self.prev_actions_onehot = tf.one_hot(self.prev_actions,
                                                  FLAGS.nb_actions,
                                                  dtype=tf.float32,
                                                  name="Prev_Actions_OneHot")

            self.prev_goal = tf.placeholder(shape=[None, FLAGS.hidden_dim],
                                            dtype=tf.float32,
                                            name="Prev_Goals")

            self.image_summaries = []

            if FLAGS.game not in flags.SUPPORTED_ENVS:
                self.conv0 = tf.contrib.layers.conv2d(self.inputs,
                                                      16,
                                                      8,
                                                      4,
                                                      activation_fn=tf.nn.elu,
                                                      scope="conv0")
                with tf.variable_scope('conv0'):
                    tf.get_variable_scope().reuse_variables()
                    weights = tf.get_variable('weights')
                    grid = self.put_kernels_on_grid(weights)
                    self.image_summaries.append(
                        tf.summary.image('kernels', grid, max_outputs=1))
                self.conv = tf.contrib.layers.conv2d(self.conv0,
                                                     32,
                                                     4,
                                                     2,
                                                     activation_fn=tf.nn.elu,
                                                     scope="conv1")
            else:
                self.conv = tf.contrib.layers.conv2d(self.inputs,
                                                     32,
                                                     5,
                                                     2,
                                                     activation_fn=tf.nn.elu,
                                                     scope="conv1")
                with tf.variable_scope('conv1'):
                    tf.get_variable_scope().reuse_variables()
                    weights = tf.get_variable('weights')
                    grid = self.put_kernels_on_grid(weights)
                    self.image_summaries.append(
                        tf.summary.image('kernels', grid, max_outputs=1))

            with tf.variable_scope('inputs'):
                tf.get_variable_scope().reuse_variables()
                self.image_summaries.append(
                    tf.summary.image('input', self.inputs, max_outputs=100))

            self.conv_flat = tf.contrib.layers.flatten(self.conv)
            self.fc = tf.contrib.layers.fully_connected(
                self.conv_flat, FLAGS.hidden_dim)
            self.fc = tf.contrib.layers.layer_norm(self.fc)
            self.f_percept = tf.nn.elu(self.fc, name="Zt")

            if FLAGS.game not in flags.SUPPORTED_ENVS:
                self.f_percept = tf.concat([self.f_percept, self.prev_rewards],
                                           1,
                                           name="Zt_r")
            else:
                self.f_percept = tf.concat(
                    [self.f_percept, self.prev_rewards_onehot], 1, name="Zt_r")

            summary_f_percept_act = tf.contrib.layers.summarize_activation(
                self.f_percept)

            ############################################################################################################
            # Manager network

            if FLAGS.meta:
                self.f_Mspace = tf.concat([self.f_percept, self.prev_goal],
                                          1,
                                          name="Zt_r")
            else:
                self.f_Mspace = tf.identity(self.f_percept, name="Zt_r")

            self.f_Mspace = tf.contrib.layers.fully_connected(
                self.f_Mspace, FLAGS.hidden_dim)

            self.f_percept = tf.concat(
                [self.f_percept, self.prev_actions_onehot], 1, name="Zt_r")

            self.f_Mspace = tf.contrib.layers.layer_norm(self.f_Mspace)
            self.f_Mspace = tf.nn.elu(self.f_Mspace, name="St")
            summary_f_Mspace_act = tf.contrib.layers.summarize_activation(
                self.f_Mspace)

            m_rnn_in = tf.expand_dims(self.f_Mspace, [0], name="Mrnn_in")
            step_size = tf.shape(self.inputs)[:1]

            m_lstm_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(
                FLAGS.hidden_dim)
            m_c_init = np.zeros((1, FLAGS.hidden_dim * FLAGS.manager_horizon),
                                np.float32)
            m_h_init = np.zeros((1, FLAGS.hidden_dim * FLAGS.manager_horizon),
                                np.float32)
            self.m_state_init = [m_c_init, m_h_init]
            m_c_in = tf.placeholder(
                tf.float32, [1, FLAGS.hidden_dim * FLAGS.manager_horizon],
                name="Mrnn_c_in")
            m_h_in = tf.placeholder(
                tf.float32, [1, FLAGS.hidden_dim * FLAGS.manager_horizon],
                name="Mrnn_h_in")
            self.m_state_in = (m_c_in, m_h_in)
            m_state_in = tf.contrib.rnn.LSTMStateTuple(m_c_in, m_h_in)

            m_lstm_outputs, m_lstm_state = self.fast_dlstm(
                m_rnn_in, m_state_in, m_lstm_cell, FLAGS.manager_horizon,
                FLAGS.hidden_dim * FLAGS.manager_horizon)

            m_lstm_c, m_lstm_h = m_lstm_state
            self.m_state_out = (m_lstm_c[-1, :1, :], m_lstm_h[-1, :1, :])
            self.goals = tf.reshape(m_lstm_outputs, [-1, FLAGS.hidden_dim])
            self.normalized_goals = tf.contrib.layers.fully_connected(
                self.goals, FLAGS.hidden_dim, activation_fn=tf.tanh, name="Gt")

            summary_goals = tf.contrib.layers.summarize_activation(
                self.normalized_goals)

            def randomize_goals(t):
                t = tf.cast(t, tf.int32)
                packed_tensors = tf.stack([
                    tf.random_normal([
                        FLAGS.hidden_dim,
                    ]), self.normalized_goals[t, :]
                ])

                to_update = tf.cond(
                    tf.less(
                        self.prob_of_random_goal,
                        tf.constant(FLAGS.final_random_goal_prob,
                                    dtype=tf.float32)),
                    lambda: tf.cast(
                        tf.multinomial(
                            tf.log([[
                                self.prob_of_random_goal,
                                tf.subtract(tf.constant(1.0), self.
                                            prob_of_random_goal)
                            ]]), 1)[0][0], tf.int32),
                    lambda: tf.constant(1, tf.int32))

                resulted_tensor = tf.gather(packed_tensors, to_update)

                return resulted_tensor

            self.randomized_goals = tf.map_fn(lambda t: randomize_goals(t),
                                              tf.to_float(
                                                  tf.range(0, step_size[0])),
                                              name="random_gt")

            summary_random_goals = tf.contrib.layers.summarize_activation(
                self.randomized_goals)

            self.decrease_prob_of_random_goal = tf.assign_sub(
                self.prob_of_random_goal,
                tf.constant(
                    (FLAGS.initial_random_goal_prob -
                     FLAGS.final_random_goal_prob) / FLAGS.explore_steps))

            m_fc_value_w = tf.get_variable(
                "M_Value_W",
                shape=[FLAGS.hidden_dim, 1],
                initializer=normalized_columns_initializer(1.0))
            self.m_value = tf.matmul(m_rnn_out, m_fc_value_w, name="M_Value")

            summary_m_value_act = tf.contrib.layers.summarize_activation(
                self.m_value)

            ############################################################################################################

            # Worker network

            self.sum_prev_goals = tf.placeholder(
                shape=[None, FLAGS.hidden_dim],
                dtype=tf.float32,
                name="Prev_c_Goals_sum")

            w_rnn_in = tf.expand_dims(self.f_percept, [0], name="Wrnn_in")
            step_size = tf.shape(self.inputs)[:1]
            w_lstm_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(
                FLAGS.goal_embedding_size * FLAGS.nb_actions)
            w_c_init = np.zeros((1, w_lstm_cell.state_size.c), np.float32)
            w_h_init = np.zeros((1, w_lstm_cell.state_size.h), np.float32)
            self.w_state_init = [w_c_init, w_h_init]
            w_c_in = tf.placeholder(tf.float32, [1, w_lstm_cell.state_size.c],
                                    name="Wrnn_c_in")
            w_h_in = tf.placeholder(tf.float32, [1, w_lstm_cell.state_size.h],
                                    name="Wrnn_h_in")
            self.w_state_in = (w_c_in, w_h_in)
            w_state_in = tf.contrib.rnn.LSTMStateTuple(w_c_in, w_h_in)

            w_lstm_outputs, w_lstm_state = tf.nn.dynamic_rnn(
                w_lstm_cell,
                w_rnn_in,
                initial_state=w_state_in,
                sequence_length=step_size,
                time_major=False)

            w_lstm_c, w_lstm_h = w_lstm_state
            self.w_state_out = (w_lstm_c[:1, :], w_lstm_h[:1, :])
            Ut = tf.reshape(
                w_lstm_outputs,
                [step_size[0], FLAGS.nb_actions, FLAGS.goal_embedding_size],
                name="Ut")
            Ut_flat = tf.reshape(
                w_lstm_outputs,
                [step_size[0], FLAGS.nb_actions * FLAGS.goal_embedding_size],
                name="Ut_flat")

            summary_wrnn_act = tf.contrib.layers.summarize_activation(Ut)

            goal_encoding = tf.contrib.layers.fully_connected(
                self.sum_prev_goals,
                FLAGS.goal_embedding_size,
                biases_initializer=None,
                scope="goal_emb")

            interm_rez = tf.squeeze(
                tf.matmul(Ut, tf.expand_dims(goal_encoding, 2)), 2)
            interm_rez = tf.contrib.layers.flatten(interm_rez)
            self.w_policy = tf.nn.softmax(interm_rez, name="W_Policy")

            summary_w_policy_act = tf.contrib.layers.summarize_activation(
                self.w_policy)

            w_fc_value_w = tf.get_variable(
                "W_Value_W",
                shape=[
                    FLAGS.nb_actions * FLAGS.goal_embedding_size +
                    FLAGS.goal_embedding_size, 1
                ],
                initializer=normalized_columns_initializer(1.0))
            self.w_value = tf.matmul(tf.concat([Ut_flat, goal_encoding], 1),
                                     w_fc_value_w,
                                     name="W_Value")

            summary_w_value_act = tf.contrib.layers.summarize_activation(
                self.w_value)

            if scope != 'global':

                self.w_extrinsic_return = tf.placeholder(shape=[None],
                                                         dtype=tf.float32)
                self.m_extrinsic_return = tf.placeholder(shape=[None],
                                                         dtype=tf.float32)
                self.w_intrinsic_return = tf.placeholder(shape=[None],
                                                         dtype=tf.float32)

                def gather_state_at_horiz(t):
                    t = tf.cast(t, tf.int32)
                    f_Mspace_c = tf.gather(
                        self.f_Mspace,
                        tf.minimum(
                            t +
                            tf.constant(FLAGS.manager_horizon, dtype=tf.int32),
                            step_size[0] - 1))
                    return f_Mspace_c

                self.f_Mspace_c = tf.cast(tf.map_fn(
                    lambda t: gather_state_at_horiz(t),
                    tf.to_float(tf.range(0, step_size[0])),
                    name="state_at_horiz"),
                                          dtype=tf.float32)
                self.state_diff = self.f_Mspace_c - self.f_Mspace
                self.cos_sim_state_diff = self.cosine_distance(
                    tf.stop_gradient(self.state_diff),
                    self.normalized_goals,
                    dim=1)

                self.m_advantages = self.m_extrinsic_return - tf.stop_gradient(
                    tf.reshape(self.m_value, [-1]))
                self.goals_loss = -tf.reduce_sum(
                    self.m_advantages * self.cos_sim_state_diff)
                self.m_value_loss = FLAGS.m_beta_v * tf.reduce_sum(
                    tf.square(self.m_extrinsic_return -
                              tf.reshape(self.m_value, [-1])))

                self.actions = tf.placeholder(shape=[None],
                                              dtype=tf.int32,
                                              name="Actions")
                self.actions_onehot = tf.one_hot(self.actions,
                                                 FLAGS.nb_actions,
                                                 dtype=tf.float32,
                                                 name="Actions_Onehot")

                self.responsible_outputs = tf.reduce_sum(
                    self.w_policy * self.actions_onehot, [1])

                self.intrinsic_return = FLAGS.alpha * self.w_intrinsic_return
                self.total_return = self.w_extrinsic_return + self.intrinsic_return
                self.w_advantages = self.total_return - tf.stop_gradient(
                    tf.reshape(self.w_value, [-1]))

                # Loss functions
                self.w_value_loss = FLAGS.w_beta_v * tf.reduce_sum(
                    tf.square(self.total_return -
                              tf.reshape(self.w_value, [-1])))
                self.entropy = -tf.reduce_sum(
                    self.w_policy * tf.log(self.w_policy + 1e-7))

                self.w_policy_loss = -tf.reduce_sum(
                    tf.log(self.responsible_outputs + 1e-7) *
                    self.w_advantages) - self.entropy * FLAGS.beta_e

                self.loss = self.w_value_loss + self.w_policy_loss + self.m_value_loss + self.goals_loss

                local_vars = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope)
                self.gradients = tf.gradients(self.loss, local_vars)
                self.var_norms = tf.global_norm(local_vars)
                grads, self.grad_norms = tf.clip_by_global_norm(
                    self.gradients, FLAGS.gradient_clip_value)

                self.worker_summaries = [
                    summary_f_percept_act, summary_f_Mspace_act, summary_goals,
                    summary_random_goals, summary_m_value_act,
                    summary_wrnn_act, summary_w_policy_act, summary_w_value_act
                ]
                for grad, weight in zip(grads, local_vars):
                    self.worker_summaries.append(
                        tf.summary.histogram(weight.name + '_grad', grad))
                    self.worker_summaries.append(
                        tf.summary.histogram(weight.name, weight))

                self.merged_summary = tf.summary.merge(self.worker_summaries)

                global_vars = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, 'global')
                self.apply_grads = trainer.apply_gradients(
                    zip(grads, global_vars))
Esempio n. 32
0
    def train(self, config):
        seed = 0
        np.random.seed(seed)
        tf.set_random_seed(seed)
        """Train DCGAN"""
        if config.dataset == "mnist":
            data_X, val_data, test_data, train_dist = mnist_data.load_mnist()
        elif config.dataset == "cifar":
            data_X, val_data, test_data = cifar_data.load_cifar()

        if self.model_type == "nice":
            val_data = np.reshape(val_data, (-1, self.image_size))
            test_data = np.reshape(test_data, (-1, self.image_size))

        lr = config.learning_rate
        self.learning_rate = tf.placeholder(tf.float32, [], name='lr')

        d_optim_ = tf.train.AdamOptimizer(self.learning_rate,
                                          beta1=config.beta1,
                                          beta2=0.9)
        d_grad = d_optim_.compute_gradients(self.d_loss, var_list=self.d_vars)
        d_grad_mag = tf.global_norm(d_grad)
        d_optim = d_optim_.apply_gradients(d_grad)

        g_optim_ = tf.train.AdamOptimizer(self.learning_rate,
                                          beta1=config.beta1,
                                          beta2=0.9)
        if self.n_critic <= 0:
            g_grad = g_optim_.compute_gradients(self.train_log_likelihood\
                , var_list=self.g_vars)
        else:
            if self.like_reg > 0:
                if self.model_type == "real_nvp":
                    g_grad_1 = g_optim_.compute_gradients(self.g_loss /
                                                          self.like_reg,
                                                          var_list=self.g_vars)
                    g_grad_2 = g_optim_.compute_gradients(
                        self.train_log_likelihood, var_list=self.g_vars)
                    grads_1, _ = zip(*g_grad_1)
                    grads_2, _ = zip(*g_grad_2)
                    sum_grad = [g1 + g2 for g1, g2 in zip(grads_1, grads_2)]
                    g_grad = [
                        pair for pair in zip(sum_grad,
                                             [var for grad, var in g_grad_1])
                    ]
                else:
                    g_grad = g_optim_.compute_gradients(
                        self.g_loss / self.like_reg +
                        self.train_log_likelihood,
                        var_list=self.g_vars)
            else:
                g_grad = g_optim_.compute_gradients(self.g_loss,
                                                    var_list=self.g_vars)

        g_grad_mag = tf.global_norm(g_grad)
        g_optim = g_optim_.apply_gradients(g_grad)

        try:  ##for data-dependent init (not implemented)
            if self.model_type == "real_nvp":
                self.sess.run(tf.global_variables_initializer(),
                              {self.x_init: data_X[0:config.batch_size]})
            else:
                self.sess.run(tf.global_variables_initializer())
        except:
            if self.model_type == "real_nvp":
                self.sess.run(tf.global_variables_initializer(),
                              {self.x_init: data_X[0:config.batch_size]})
            else:
                self.sess.run(tf.global_variables_initializer())

        self.g_sum = merge_summary([
            self.z_sum, self.d__sum, self.G_sum, self.d_loss_fake_sum,
            self.g_loss_sum
        ])
        self.d_sum = merge_summary(
            [self.z_sum, self.d_sum, self.d_loss_real_sum, self.d_loss_sum])
        self.writer = SummaryWriter("./" + self.log_dir, self.sess.graph)

        counter = 1
        start_time = time.time()
        could_load, checkpoint_counter = self.load(self.checkpoint_dir)
        if could_load:
            counter = checkpoint_counter
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed...")

        ############## A FIXED BATCH OF Zs FOR GENERATING SAMPLES ######################
        if self.prior == "uniform":
            sample_z = np.random.uniform(-1,
                                         1,
                                         size=(self.sample_num, self.z_dim))
        elif self.prior == "logistic":
            sample_z = np.random.logistic(loc=0.,
                                          scale=1.,
                                          size=(self.sample_num, self.z_dim))
        elif self.prior == "gaussian":
            sample_z = np.random.normal(0.0,
                                        1.0,
                                        size=(self.sample_num, self.z_dim))
        else:
            print("ERROR: Unrecognized prior...exiting")
            exit(-1)

        ################################ Evaluate initial model lli ########################

        val_nlli = self.evaluate_neg_loglikelihood(val_data, config)
        # train_nlli = self.evaluate_neg_loglikelihood(train_data, config)

        curr_inception_score = self.calculate_inception_and_mode_score()
        print("INITIAL TEST: val neg logli: %.8f,incep score: %.8f" % (val_nlli,\
         curr_inception_score[0]))
        if counter > 1:
            old_data = np.load("./" + config.sample_dir + '/graph_data.npy')
            self.best_val_nlli = old_data[2]
            self.best_model_counter = old_data[3]
            self.best_model_path = old_data[4]
            self.val_nlli_list = old_data[1]
            self.counter_list = old_data[5]
            self.batch_train_nlli_list = old_data[-4]
            self.inception_list = old_data[-2]
            self.samples_list = old_data[0]
            self.loss_list = old_data[-1]
            manifold_h, manifold_w = old_data[6]
        else:
            self.writer.add_summary(tf.Summary(\
                    value=[tf.Summary.Value(tag="Val Neg Log-likelihood", simple_value=val_nlli)]), counter)
            # self.writer.add_summary(tf.Summary(\
            #         value=[tf.Summary.Value(tag="Train Neg Log-likelihood", simple_value=train_nlli)]), counter)

            self.best_val_nlli = val_nlli
            # self.best_model_train_nlli = train_nlli
            self.best_model_counter = counter
            self.best_model_path = self.save(config.checkpoint_dir, counter)
            # self.train_nlli_list = [train_nlli]
            self.val_nlli_list = [val_nlli]
            self.counter_list = [1]
            self.batch_train_nlli_list = []
            self.inception_list = [curr_inception_score]
            self.samples_list = self.sess.run([self.sampler],
                                              feed_dict={
                                                  self.z: sample_z,
                                              })
            sample_inputs = data_X[0:config.batch_size]
            samples = self.samples_list[0]
            manifold_h = int(np.ceil(np.sqrt(samples.shape[0])))
            manifold_w = int(np.floor(np.sqrt(samples.shape[0])))
            self.loss_list = self.sess.run(
                [self.d_loss_real, self.d_loss_fake],
                feed_dict={
                    self.z: sample_z,
                    self.inputs: sample_inputs,
                })
        ##################################################################################

        for epoch in xrange(config.epoch):
            np.random.shuffle(data_X)
            batch_idxs = len(data_X) // config.batch_size

            for idx in xrange(0, batch_idxs):
                sys.stdout.flush()
                batch_images = data_X[idx * config.batch_size:(idx + 1) *
                                      config.batch_size]

                if self.prior == "uniform":
                    batch_z = np.random.uniform(-1, 1, [config.batch_size, self.z_dim]) \
                        .astype(np.float32)
                elif self.prior == "logistic":
                    batch_z = np.random.logistic(loc=0.,scale=1.0,size=[config.batch_size, self.z_dim]) \
                        .astype(np.float32)
                elif self.prior == "gaussian":
                    batch_z = np.random.normal(0.0,
                                               1.0,
                                               size=(config.batch_size,
                                                     self.z_dim))
                else:
                    print("ERROR: Unrecognized prior...exiting")
                    exit(-1)

                for r in range(self.n_critic):
                    _, d_g_mag, errD_fake, errD_real, summary_str = self.sess.run(
                        [
                            d_optim, d_grad_mag, self.d_loss_fake,
                            self.d_loss_real, self.d_sum
                        ],
                        feed_dict={
                            self.inputs: batch_images,
                            self.z: batch_z,
                            self.learning_rate: lr,
                        })
                if self.n_critic > 0:
                    self.writer.add_summary(summary_str, counter)

                # Update G network
                if self.like_reg > 0 or self.n_critic <= 0:
                    _, g_g_mag, errG, summary_str = self.sess.run(
                        [g_optim, g_grad_mag, self.g_loss, self.g_sum],
                        feed_dict={
                            self.z: batch_z,
                            self.learning_rate: lr,
                            self.inputs: batch_images,
                        })
                else:
                    _, g_g_mag, errG, summary_str = self.sess.run(
                        [g_optim, g_grad_mag, self.g_loss, self.g_sum],
                        feed_dict={
                            self.z: batch_z,
                            self.learning_rate: lr,
                        })
                self.writer.add_summary(summary_str, counter)

                batch_images_nl = batch_images
                if self.model_type == "nice":
                    batch_images_nl = np.reshape(
                        batch_images_nl,
                        (self.batch_size, -1))[:, self.permutation]
                b_train_nlli = self.sess.run([self.log_likelihood],
                                             feed_dict={
                                                 self.log_like_batch:
                                                 batch_images_nl,
                                             })
                b_train_nlli = b_train_nlli[0]

                self.batch_train_nlli_list.append(b_train_nlli)
                if self.n_critic > 0:
                    self.loss_list.append([errD_real, errD_fake])
                    self.writer.add_summary(tf.Summary(\
                    value=[tf.Summary.Value(tag="training loss", simple_value=-(errD_fake+errD_real))]) ,counter)
                self.writer.add_summary(tf.Summary(\
                  value=[tf.Summary.Value(tag="Batch train Neg Log-likelihood", simple_value=b_train_nlli)]) ,counter)
                counter += 1

                lr = max(lr * self.lr_decay, self.min_lr)

                if np.mod(counter, 703) == 1:  #340
                    if self.n_critic > 0:
                        print("Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f, d_grad_mag: %.8f, g_grad_mag: %.8f, lr: %.8f" \
                      % (epoch, idx, batch_idxs,
                        time.time() - start_time, errD_fake+errD_real, errG, d_g_mag, g_g_mag, lr))
                    else:
                        print("Epoch: [%2d] [%4d/%4d] time: %4.4f, g_loss: %.8f, g_grad_mag: %.8f, lr: %.8f" \
                      % (epoch, idx, batch_idxs,
                        time.time() - start_time, errG, g_g_mag, lr))
                    curr_model_path = self.save(config.checkpoint_dir, counter)

                    val_nlli = self.evaluate_neg_loglikelihood(
                        val_data, config)

                    # train_nlli = self.evaluate_neg_loglikelihood(train_data, config)
                    curr_inception_score = self.calculate_inception_and_mode_score(
                    )

                    print("[LogLi (%d,%d)]: val neg logli: %.8f, ince: %.8f, train lli: %.8f" % (epoch, idx,val_nlli,\
                     curr_inception_score[0], np.mean(self.batch_train_nlli_list[-700:])))

                    self.writer.add_summary(tf.Summary(\
                            value=[tf.Summary.Value(tag="Val Neg Log-likelihood", simple_value=val_nlli)]), counter)
                    # self.writer.add_summary(tf.Summary(\
                    #         value=[tf.Summary.Value(tag="Train Neg Log-likelihood", simple_value=train_nlli)]), counter)
                    if val_nlli < self.best_val_nlli:
                        self.best_val_nlli = val_nlli
                        self.best_model_counter = counter
                        self.best_model_path = curr_model_path
                        # self.best_model_train_nlli = train_nlli
                    # self.train_nlli_list.append(train_nlli)
                    self.val_nlli_list.append(val_nlli)
                    self.counter_list.append(counter)

                    samples, d_loss, g_loss = self.sess.run(
                        [self.sampler, self.d_loss, self.g_loss],
                        feed_dict={
                            self.z: sample_z,
                            self.inputs: sample_inputs,
                        })
                    self.samples_list.append(samples)
                    self.samples_list[-1].shape[1]
                    manifold_h = int(np.ceil(np.sqrt(samples.shape[0])))
                    manifold_w = int(np.floor(np.sqrt(samples.shape[0])))
                    self.inception_list.append(curr_inception_score)
                    save_images(
                        samples, [manifold_h, manifold_w],
                        './{}/train_{:02d}_{:04d}.png'.format(
                            config.sample_dir, epoch, idx))
                    print("[Sample] d_loss: %.8f, g_loss: %.8f" %
                          (d_loss, g_loss))

                    np.save("./"+config.sample_dir+'/graph_data',
                      [self.samples_list, self.val_nlli_list, self.best_val_nlli, self.best_model_counter,\
                       self.best_model_path, self.counter_list, [manifold_h, manifold_w], \
                       self.batch_train_nlli_list, self.inception_list, self.loss_list])


        np.save("./"+config.sample_dir+'/graph_data',
                [self.samples_list, self.val_nlli_list, self.best_val_nlli, self.best_model_counter,\
                 self.best_model_path, self.counter_list, [manifold_h, manifold_w], \
                 self.batch_train_nlli_list, self.inception_list, self.loss_list])
        self.test_model(test_data, config)
Esempio n. 33
0
def get_train_ops(loss,
                  tf_variables,
                  train_step,
                  clip_mode=None,
                  grad_bound=None,
                  l2_reg=1e-4,
                  lr_warmup_val=None,
                  lr_warmup_steps=100,
                  lr_init=0.1,
                  lr_dec_start=0,
                  lr_dec_every=10000,
                  lr_dec_rate=0.1,
                  lr_dec_min=None,
                  lr_cosine=False,
                  lr_max=None,
                  lr_min=None,
                  lr_T_0=None,
                  lr_T_mul=None,
                  num_train_batches=None,
                  optim_algo=None,
                  sync_replicas=False,
                  num_aggregate=None,
                  num_replicas=None,
                  get_grad_norms=False,
                  moving_average=None,
                  is_controller=False):
    """
    Args:
      clip_mode: "global", "norm", or None.
      moving_average: store the moving average of parameters
    """
    #TODO Maybe dont reduce here???
    # if not is_controller: # Dont quantize controller, vanishing grad problem?
    #     for i, var in enumerate(tf_variables):
    #         if var.dtype != tf.float16:
    #             tf_variables[i] = tf.Variable(tf.cast(tf_variables[i], tf.float16), name=tf_variables[i].name.split(':')[0])

    # if loss.dtype != tf.float16:
    #     loss = tf.cast(loss, tf.float16, name=loss.name.split(':')[0])

    if l2_reg > 0:
        l2_losses = []
        if not is_controller:
            for var in tf_variables:
                l2_losses.append(tf.reduce_sum(tf.cast(var, tf.float32)**2))
        else:
            for var in tf_variables:
                l2_losses.append(tf.reduce_sum(var**2))
        #TODO
        l2_loss = tf.add_n(l2_losses)  #OG
        loss += l2_reg * l2_loss  # loss = loss + 1e-4*l2_loss

    # import code
    # code.interact(local=locals())

    if lr_cosine:
        assert lr_max is not None, "Need lr_max to use lr_cosine"
        assert lr_min is not None, "Need lr_min to use lr_cosine"
        assert lr_T_0 is not None, "Need lr_T_0 to use lr_cosine"
        assert lr_T_mul is not None, "Need lr_T_mul to use lr_cosine"
        assert num_train_batches is not None, ("Need num_train_batches to use"
                                               " lr_cosine")

        curr_epoch = train_step // num_train_batches  # train step will be calculated by just one batch!

        last_reset = tf.Variable(0,
                                 dtype=tf.int32,
                                 trainable=False,
                                 name="last_reset")
        T_i = tf.Variable(lr_T_0, dtype=tf.int32, trainable=False, name="T_i")
        T_curr = curr_epoch - last_reset

        def _update():
            update_last_reset = tf.assign(last_reset,
                                          curr_epoch,
                                          use_locking=True)
            update_T_i = tf.assign(T_i, T_i * lr_T_mul, use_locking=True)
            with tf.control_dependencies([update_last_reset, update_T_i]):
                rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
                lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
            return lr

        def _no_update():
            rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
            lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
            return lr

        learning_rate = tf.cond(tf.greater_equal(T_curr, T_i), _update,
                                _no_update)
    else:
        learning_rate = tf.train.exponential_decay(
            lr_init,
            tf.maximum(train_step - lr_dec_start, 0),
            lr_dec_every,
            lr_dec_rate,
            staircase=True)
        if lr_dec_min is not None:
            learning_rate = tf.maximum(learning_rate, lr_dec_min)

    if lr_warmup_val is not None:
        learning_rate = tf.cond(tf.less(train_step, lr_warmup_steps),
                                lambda: lr_warmup_val, lambda: learning_rate)

    if optim_algo == "momentum":
        opt = tf.train.MomentumOptimizer(learning_rate,
                                         0.9,
                                         use_locking=True,
                                         use_nesterov=True)
    elif optim_algo == "sgd":
        opt = tf.train.GradientDescentOptimizer(learning_rate,
                                                use_locking=True)
    elif optim_algo == "adam":
        opt = tf.train.AdamOptimizer(learning_rate,
                                     beta1=0.0,
                                     epsilon=1e-3,
                                     use_locking=True)
    else:
        raise ValueError("Unknown optim_algo {}".format(optim_algo))

    if sync_replicas:
        assert num_aggregate is not None, "Need num_aggregate to sync."
        assert num_replicas is not None, "Need num_replicas to sync."

        opt = tf.train.SyncReplicasOptimizer(
            opt,
            replicas_to_aggregate=num_aggregate,
            total_num_replicas=num_replicas,
            use_locking=True)

    if moving_average is not None:
        opt = tf.contrib.opt.MovingAverageOptimizer(
            opt, average_decay=moving_average)

    #TODO
    if not is_controller:
        loss_scale_manager = tf.contrib.mixed_precision.FixedLossScaleManager(
            5000)  # too big? try 10000
        loss_scale_optimizer = tf.contrib.mixed_precision.LossScaleOptimizer(
            opt, loss_scale_manager)
        grads_and_vars = loss_scale_optimizer.compute_gradients(
            loss, tf_variables)
        grads = [grad_and_var[0] for grad_and_var in grads_and_vars]
    else:
        grads = tf.gradients(loss, tf_variables)
    # import code
    # code.interact(local=locals())

    grad_norm = tf.global_norm(grads)

    grad_norms = {}
    for v, g in zip(tf_variables, grads):
        if v is None or g is None:
            continue
        if isinstance(g, tf.IndexedSlices):
            grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g.values**2))
        else:
            grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g**2))

    if clip_mode is not None:
        assert grad_bound is not None, "Need grad_bound to clip gradients."
        if clip_mode == "global":
            grads, _ = tf.clip_by_global_norm(grads, grad_bound)
        elif clip_mode == "norm":
            clipped = []
            for g in grads:
                if isinstance(g, tf.IndexedSlices):
                    c_g = tf.clip_by_norm(g.values, grad_bound)
                    c_g = tf.IndexedSlices(g.indices, c_g)
                else:
                    c_g = tf.clip_by_norm(g, grad_bound)
                clipped.append(g)
            grads = clipped
        else:
            raise NotImplementedError("Unknown clip_mode {}".format(clip_mode))
    try:
        #TODO
        if not is_controller:
            assert (len(grads) == len(tf_variables))
            grads_and_vars = [
                tuple([grads[i], tf_variables[i]]) for i, _ in enumerate(grads)
            ]
            train_op = loss_scale_optimizer.apply_gradients(
                grads_and_vars, global_step=train_step)
        else:
            train_op = opt.apply_gradients(zip(grads, tf_variables),
                                           global_step=train_step)
    except Exception as e:
        print("\ncould not apply_gradients(), exception: {}".format(e))
        import code
        code.interact(local=locals())

    if get_grad_norms:
        return train_op, learning_rate, grad_norm, opt, grad_norms
    else:
        return train_op, learning_rate, grad_norm, opt
Esempio n. 34
0
def main(argv=None):
  print ('Number of arguments:', len(sys.argv), 'arguments.')
  print ('Argument List:', str(sys.argv))
  
  try:
    opts, args = getopt.getopt(sys.argv[1:], "h", ["max_grad_norm=", "num_epochs=", "learning_rate=" ,"dropout=", "num_layers=",  "num_steps=", "hidden_size=", "batch_size="])
  except getopt.GetoptError:
    print ('tsc_main_h_par.py --max_grad_norm <> --num_epochs <> --learning_rate <> --dropout <> --num_layers <> --num_steps <> --hidden_size <> --batch_size <>')
    sys.exit(2)
  for opt, arg in opts:
    if opt == '-h':
      print ('tsc_main_h_par.py --max_grad_norm <> --num_epochs <> --learning_rate <> --dropout <> --num_layers <> --num_steps <> --hidden_size <> --batch_size <>')
      sys.exit()
    elif opt == '--max_grad_norm':
      global max_grad_norm
      max_grad_norm =  int(arg)
    elif opt == '--num_epochs':
      global num_epochs
      num_epochs =  int(arg)
    elif opt == '--learning_rate':
      global learning_rate
      learning_rate = float(arg)
    elif opt == '--dropout':
      global dropout
      dropout=  float(arg)
    elif opt == '--num_layers':
      global num_layers
      num_layers =  int(arg)
    elif opt == '--num_steps':
      global num_steps
      num_steps =  int(arg)
    elif opt == '--hidden_size':
      global hidden_size
      hidden_size =  int(arg)
    elif opt == '--batch_size':
      global batch_size
      batch_size  =  int(arg)

  def normalize_matrix(matrix):
  	columns = matrix.shape[1]
  	
  	for i in range(0, columns):
  		x = matrix[:, i]
  		x_normed = (x - x.min(0)) / x.ptp(0)
  		matrix[:,i] = x_normed
  	return matrix

  def read_datasets(train_csv, validation_csv, test_csv):
  	class Data(object): pass
  	
  	data_sets = Data()
  	
  	train = np.genfromtxt(train_csv, delimiter=',', dtype=float)
  	train = train.astype(np.float)
  	validation = np.genfromtxt(validation_csv, delimiter=',', dtype=float)
  	validation = validation.astype(np.float)
  	test = np.genfromtxt(test_csv, delimiter=',', dtype=float)
  	test = test.astype(np.float)
  	
  	rows, columns = train.shape
  	arr = np.arange(rows)
  	np.random.shuffle(arr)
  	matrix = np.zeros((rows, columns))
  	
  	for i in range (0, rows):
  		matrix[i] = train[arr[i],:]

  	mTrain = matrix[:, 0:-1]
  	train_labels = matrix[:, columns - 1]
  	
  	rows, columns = validation.shape
  	arr = np.arange(rows)
  	np.random.shuffle(arr)
  	matrix = np.zeros((rows, columns))
  	
  	for i in range (0, rows):
  		matrix[i] = validation[arr[i],:]

  	mValidation = matrix[:, 0:-1]
  	validation_labels = matrix[:, columns - 1]
  	
  	rows, columns = test.shape
  	arr = np.arange(rows)
  	np.random.shuffle(arr)
  	matrix = np.zeros((rows, columns))
  	
  	for i in range (0, rows):
  		matrix[i] = test[arr[i],:]

  	mTest = matrix[:, 0:-1]
  	test_labels = matrix[:, columns - 1]	
  	
  	mData = np.concatenate((mTrain, mValidation, mTest))
  	mData = normalize_matrix(mData)

  	trainSize = len(mTrain)
  	validationSize = len(mValidation)
  	testSize = len(mTest)
  	
  	mTrain = mData[0:trainSize, :]
  	mValidation = mData[trainSize:trainSize + validationSize, :]
  	mTest = mData[-testSize:, :]

  	train_labels = train_labels.astype(np.uint8)
  	validation_labels = validation_labels.astype(np.uint8)
  	test_labels = test_labels.astype(np.uint8)
  	
  	data_sets.train = mTrain
  	data_sets.validation = mValidation
  	data_sets.test = mTest
  	data_sets.train_labels = train_labels
  	data_sets.validation_labels = validation_labels
  	data_sets.test_labels = test_labels

  	return data_sets

  def sample_batch(X_train,y_train,batch_size,num_steps):
    """ Function to sample a batch for training"""
    N,data_len = X_train.shape
    ind_N = np.random.choice(N,batch_size,replace=False).astype(int)[:]
    ind_start = np.random.choice(data_len-num_steps,1).astype(int)[0]
    X_batch = X_train[ind_N,ind_start:ind_start+num_steps]
    y_batch = y_train[ind_N]
    
    return X_batch,y_batch

  def check_test(X_test,y_test,batch_size,num_steps):
    """ Function to check the test_accuracy on the entire test set"""
    N = X_test.shape[0]
    num_batch = np.floor(N/batch_size).astype(int)
    test_acc = np.zeros(num_batch)
    test_predictions=[]
    for i in range(num_batch):
      X_batch, y_batch = sample_batch(X_test,y_test,batch_size,num_steps)
      test_acc[i], test_pred = sess.run([accuracy, predictions], feed_dict = {input_data: X_batch, targets: y_batch, keep_prob:1})
      test_predictions =  np.append(test_predictions,test_pred)   
    return np.mean(test_acc), test_predictions





  """Load the data"""
  # dummy = True
  # if dummy:
    # data_train = np.loadtxt(dir_path + 'UCR_TS_Archive_2015/Two_Patterns/Two_Patterns_TRAIN',delimiter=',')
    # data_test_val = np.loadtxt(dir_path + 'UCR_TS_Archive_2015/Two_Patterns/Two_Patterns_TEST',delimiter=',')
  # else:
    # data_train = np.loadtxt('data_train_dummy',delimiter=',')
    # data_test_val = np.loadtxt('data_test_dummy',delimiter=',')
  # data_test,data_val = np.split(data_test_val,2)
  # X_train = data_train[:,1:]
  # X_val = data_val[:,1:]
  # X_test = data_test[:,1:]
  # N = X_train.shape[0]
  # Ntest = X_test.shape[0]
  # Targets have labels 1-indexed. We subtract one for 0-indexed
  # y_train = data_train[:,0]-1
  # y_val = data_val[:,0]-1
  # y_test = data_test[:,0]-1
  # num_classes = len(np.unique(y_train))

  data_sets = read_datasets('Data/Test/train_data.csv','Data/Test/validation_data.csv', 'Data/Test/test_data.csv')

  X_train = data_sets.train
  train_size = X_train.shape[0]
  max_iterations = int((num_epochs * train_size) // batch_size)
  X_val = data_sets.validation
  X_test = data_sets.test

  N = X_train.shape[0]
  Ntest = X_test.shape[0]

  y_train = data_sets.train_labels
  y_val = data_sets.validation_labels
  y_test = data_sets.test_labels

  num_classes = len(np.unique(y_train))

  # Collect the costs in a numpy fashion
  epochs = np.floor(batch_size*max_iterations / N)
  print('Train with approximately %d epochs' %(epochs))
  if max_iterations%100 == 0:
    perf_collect = np.zeros((3,int(np.floor(max_iterations /100))))
  else:
   perf_collect = np.zeros((3,int(np.floor(max_iterations /100)) + 1 )) 

  """Place holders"""
  input_data = tf.placeholder(tf.float32, shape=(batch_size, num_steps), name = 'input_data')
  print(input_data)
  targets = tf.placeholder(tf.int64, shape=(batch_size), name='Targets')
  print(targets)
  #Used later on for drop_out. At testtime, we pass 1.0
  keep_prob = tf.placeholder("float", name = 'Drop_out_keep_prob')

  with tf.name_scope("LSTM_setup") as scope:
    cell = tf.nn.rnn_cell.LSTMCell(hidden_size, state_is_tuple=True)
    cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=keep_prob)
    cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
    initial_state = cell.zero_state(batch_size, tf.float32)
    #We have only one input dimension, but we generalize our code for future expansion
    inputs = tf.expand_dims(input_data, 2)

  #Define the recurrent nature of the LSTM
  with tf.name_scope("LSTM") as scope:
    outputs = []
    state = initial_state
    with tf.variable_scope("LSTM_state"):
      for time_step in range(num_steps):
       if time_step > 0: tf.get_variable_scope().reuse_variables() #Re-use variables only after first time-step
       (cell_output, state) = cell(inputs[:, time_step, :], state)
       outputs.append(cell_output)       #Now cell_output is size [batch_size x hidden_size]
    output = tf.reduce_mean(tf.pack(outputs),0)


  #Generate a classification from the last cell_output
  #Note, this is where timeseries classification differs from sequence to sequence
  #modelling. We only output to Softmax at last time step
  with tf.name_scope("Softmax") as scope:
    with tf.variable_scope("Softmax_params"):
      softmax_w = tf.get_variable("softmax_w", [hidden_size, num_classes])
      softmax_b = tf.get_variable("softmax_b", [num_classes])
    logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
    #Use sparse Softmax because we have mutually exclusive classes
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits,targets,name = 'Sparse_softmax')
    cost = tf.reduce_sum(loss) / batch_size
  with tf.name_scope("Evaluating_accuracy") as scope:
    predictions  =  tf.argmax(logits,1)
    correct_prediction = tf.equal(predictions,targets)
    accuracy  = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    tf.scalar_summary("accuracy", accuracy)
    
  """Optimizer"""
  with tf.name_scope("Optimizer") as scope:
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),max_grad_norm)   #We clip the gradients to prevent explosion
    optimizer = tf.train.AdamOptimizer(learning_rate)
    gradients = zip(grads, tvars)
    train_op = optimizer.apply_gradients(gradients)
    # Add histograms for variables, gradients and gradient norms.
    # The for-loop loops over all entries of the gradient and plots
    # a histogram. We cut of
    for gradient, variable in gradients:
      if isinstance(gradient, ops.IndexedSlices):
        grad_values = gradient.values
      else:
        grad_values = gradient
      h1 = tf.histogram_summary(variable.name, variable)
      h2 = tf.histogram_summary(variable.name + "/gradients", grad_values)
      h3 = tf.histogram_summary(variable.name + "/gradient_norm", tf.global_norm([grad_values]))

  #Final code for the TensorBoard
  merged = tf.merge_all_summaries()

  """Session time"""
  sess = tf.Session() #Depending on your use, do not forget to close the session
  writer = tf.train.SummaryWriter(dir_path + "/logs/log_tb")
  sess.run(tf.initialize_all_variables())


  step = 0
  cost_train_ma = -np.log(1/float(num_classes)+1e-9)
  for i in range(max_iterations):
    # Calculate some sizes
    N = X_train.shape[0]
    #Sample batch for training
    X_batch, y_batch = sample_batch(X_train,y_train,batch_size,num_steps)

    #Next line does the actual training
    cost_train, _ = sess.run([cost,train_op],feed_dict = {input_data: X_batch,targets: y_batch,keep_prob:dropout})
    cost_train_ma = cost_train_ma*0.99 + cost_train*0.01
    if i%100 == 0:
      #Evaluate training performance
      perf_collect[0,step] = cost_train
      #Evaluate validation performance
      X_batch, y_batch = sample_batch(X_val,y_val,batch_size,num_steps)
      result = sess.run([cost,merged,accuracy],feed_dict = {input_data: X_batch, targets: y_batch, keep_prob:1})
      cost_val = result[0]
      perf_collect[1,step] = cost_val
      acc_val = result[2]
      perf_collect[2,step] = acc_val
      print('At %5.0f out of %5.0f: Cost is TRAIN %.3f(%.3f) VAL %.3f and val acc is %.3f' %(i,max_iterations,cost_train,cost_train_ma,cost_val,acc_val))


      #Write information to TensorBoard
      summary_str = result[1]
      writer.add_summary(summary_str, i)
      writer.flush()

      step +=1
  acc_test, predictions = check_test(X_test,y_test,batch_size,num_steps)

  """Additional plots"""
  print('The accuracy on the test data is %.3f' %(acc_test))
  plt.plot(perf_collect[0],label='Train')
  plt.plot(perf_collect[1],label = 'Valid')
  plt.plot(perf_collect[2],label = 'Valid accuracy')
  plt.axis([0, step, 0, np.max(perf_collect)])
  plt.legend()
  plt.show()
  #y_val = y_val[1849:3724]
  print('\nY Results')
  print('Test accuracy is:  %.1f%%' % (100.0  * accuracy_score(y_test[0:predictions.shape[0]],predictions)))
  print('\nConfusion_matrix')
  print(confusion_matrix_table(y_test[0:predictions.shape[0]],predictions))
  print('\n', classification_report(y_test[0:predictions.shape[0]],predictions)) 
Esempio n. 35
0
    def __init__(self, s_size, a_size, scope, trainer, cell_units):
        print(scope)
        with tf.variable_scope(scope):
            # Input
            self.inputs = tf.placeholder(shape=[None, s_size],
                                         dtype=tf.float32)

            # Recurrent network for temporal dependencies
            lstm_cell = tf.contrib.rnn.BasicLSTMCell(cell_units,
                                                     state_is_tuple=True)
            c_init = np.zeros((1, lstm_cell.state_size.c), np.float32)
            h_init = np.zeros((1, lstm_cell.state_size.h), np.float32)
            self.state_init = [c_init, h_init]
            c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c])
            h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h])
            self.state_in = [c_in, h_in]
            rnn_in = tf.expand_dims(self.inputs, [0])
            state_in = tf.contrib.rnn.LSTMStateTuple(c_in, h_in)
            lstm_outputs, lstm_state = tf.nn.dynamic_rnn(
                lstm_cell, rnn_in, initial_state=state_in, time_major=False)
            lstm_c, lstm_h = lstm_state
            self.state_out = (lstm_c[:1, :], lstm_h[:1, :])
            rnn_out = tf.reshape(lstm_outputs, [-1, cell_units])

            # Output layers for policy and value estimations
            self.policy = slim.fully_connected(
                rnn_out,
                a_size,
                activation_fn=tf.nn.softmax,
                weights_initializer=normalized_columns_initializer(0.01),
                biases_initializer=None,
            )
            self.value = slim.fully_connected(
                rnn_out,
                1,
                activation_fn=None,
                weights_initializer=normalized_columns_initializer(1.0),
                biases_initializer=None,
            )

            # Only the worker network need ops for loss functions and gradient updating.
            if scope != "global" and scope != "init":
                self.actions = tf.placeholder(shape=[None, a_size],
                                              dtype=tf.float32)
                self.target_v = tf.placeholder(shape=[None], dtype=tf.float32)
                self.advantages = tf.placeholder(shape=[None],
                                                 dtype=tf.float32)

                self.responsible_outputs = tf.reduce_sum(
                    self.policy * self.actions, [1])

                # Value loss function
                self.value_loss = 0.5 * tf.reduce_sum(
                    tf.square(self.target_v - tf.reshape(self.value, [-1])))

                # Softmax policy loss function
                self.policy_loss = -tf.reduce_sum(
                    tf.log(tf.maximum(self.responsible_outputs, 1e-12)) *
                    self.advantages)

                # Softmax entropy function
                self.entropy = -tf.reduce_sum(
                    self.policy * tf.log(tf.maximum(self.policy, 1e-12)))

                self.loss = (0.5 * self.value_loss + self.policy_loss -
                             self.entropy * 0.01)

                # Get gradients from local network using local losses
                local_vars = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope)
                self.gradients = tf.gradients(self.loss, local_vars)
                self.var_norms = tf.global_norm(local_vars)
                grads, self.grad_norms = tf.clip_by_global_norm(
                    self.gradients, 40.0)

                # Apply local gradients to global network
                global_vars = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, "global")
                self.apply_grads = trainer.apply_gradients(
                    list(zip(grads, global_vars)))
Esempio n. 36
0
    def __init__(self, env, monitor_path: str, video=False, **usercfg) -> None:
        super(PPO, self).__init__(**usercfg)
        self.monitor_path: str = monitor_path
        self.env = wrappers.Monitor(env,
                                    monitor_path,
                                    force=True,
                                    video_callable=(None if video else False))

        self.config.update(
            dict(
                n_hidden_units=20,
                n_hidden_layers=2,
                gamma=0.99,
                gae_lambda=0.95,
                learning_rate=0.001,
                n_epochs=10,
                n_iter=10000,
                batch_size=64,  # Timesteps per training batch
                n_local_steps=256,
                gradient_clip_value=None,
                vf_coef=0.5,
                entropy_coef=0.01,
                cso_epsilon=0.2  # Clipped surrogate objective epsilon
            ))
        self.config.update(usercfg)

        with tf.variable_scope("old_network"):
            self.old_network = self.build_networks()
            self.old_network_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES,
                tf.get_variable_scope().name)

        with tf.variable_scope("new_network"):
            self.new_network = self.build_networks()
            if self.RNN:
                self.initial_features = self.new_network.state_init
            else:
                self.initial_features = None
            self.new_network_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES,
                tf.get_variable_scope().name)
        self.action = self.new_network.action
        self.value = self.new_network.value
        self.states = self.new_network.states
        self.actions_taken = self.new_network.actions_taken
        self.advantage = tf.placeholder(tf.float32, [None], name="advantage")
        self.ret = tf.placeholder(tf.float32, [None], name="return")

        self.set_old_to_new = tf.group(*[
            v1.assign(v2)
            for v1, v2 in zip(self.old_network_vars, self.new_network_vars)
        ])

        # Reduces by taking the mean instead of summing
        self.actor_loss = -tf.reduce_mean(
            self.make_actor_loss(self.old_network, self.new_network,
                                 self.advantage))
        self.critic_loss = tf.reduce_mean(tf.square(self.value - self.ret))
        self.mean_entropy = tf.reduce_mean(self.new_network.entropy)
        self.loss = self.actor_loss + self.config["vf_coef"] * self.critic_loss + \
            self.config["entropy_coef"] * self.mean_entropy

        grads = tf.gradients(self.loss, self.new_network_vars)

        self._global_step = tf.get_variable(
            "global_step", [],
            tf.int32,
            initializer=tf.constant_initializer(0, dtype=tf.int32),
            trainable=False)

        self.n_steps = tf.shape(self.states)[0]
        self.session = tf.Session()
        if self.config["save_model"]:
            tf.add_to_collection("action", self.action)
            tf.add_to_collection("states", self.states)
            self.saver = FastSaver()

        summary_actor_loss = tf.summary.scalar("model/Actor_loss",
                                               self.actor_loss)
        summary_critic_loss = tf.summary.scalar("model/Critic_loss",
                                                self.critic_loss)
        summary_loss = tf.summary.scalar("model/Loss", self.loss)
        summary_entropy = tf.summary.scalar("model/entropy",
                                            -self.mean_entropy)
        summary_grad_norm = tf.summary.scalar("model/grad_global_norm",
                                              tf.global_norm(grads))
        summary_var_norm = tf.summary.scalar(
            "model/var_global_norm", tf.global_norm(self.new_network_vars))
        summaries = []
        for v in tf.trainable_variables():
            if "new_network" in v.name:
                summaries.append(tf.summary.histogram(v.name, v))
        summaries += [
            summary_actor_loss, summary_critic_loss, summary_loss,
            summary_entropy, summary_grad_norm, summary_var_norm
        ]
        self.model_summary_op = tf.summary.merge(summaries)
        self.writer = tf.summary.FileWriter(
            os.path.join(self.monitor_path, "summaries"), self.session.graph)
        self.env_runner = EnvRunner(self.env,
                                    self,
                                    usercfg,
                                    summary_writer=self.writer)

        # grads before clipping were passed to the summary, now clip and apply them
        if self.config["gradient_clip_value"] is not None:
            grads, _ = tf.clip_by_global_norm(
                grads, self.config["gradient_clip_value"])
        self.optimizer = tf.train.AdamOptimizer(self.config["learning_rate"],
                                                name="optim")
        apply_grads = self.optimizer.apply_gradients(
            zip(grads, self.new_network_vars))

        inc_step = self._global_step.assign_add(self.n_steps)
        self.train_op = tf.group(apply_grads, inc_step)

        init = tf.global_variables_initializer()
        self.session.run(init)
        return
Esempio n. 37
0
    def _buildNetwork(self):
        def _vwwd(shape, stddev, wd):
        # variable with weight decay
            var = tf.Variable(tf.truncated_normal(shape, stddev=stddev, dtype=tf.float32))
            if wd is not None:
                tf.add_to_collection('losses', tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss'))
            return var

        def conv2d(name, l_input, w, b):
            return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(l_input, w, strides=[1,1,1,1], padding='SAME'), b), name=name)

        def max_pool(name, l_input, ksize, strides):
            return tf.nn.max_pool(l_input, ksize=[1,ksize,ksize,1], strides=[1,strides,strides,1], padding='SAME', name=name)

        def norm(name, l_input, lsize=4):
            return tf.nn.lrn(l_input, lsize, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name=name)

        def local(name, l_input, w, b):
            return tf.nn.relu(tf.matmul(l_input, w) + b, name=name)

        n_class = 10

        _weights = {
            'wc1': _vwwd([5, 5,  3, 64], stddev=5e-2, wd=0.0),
            'wc2': _vwwd([5, 5, 64, 64], stddev=5e-2, wd=0.0),
            'wl3': _vwwd([IMAGE_SIZE * IMAGE_SIZE * 4, 384],    stddev=0.04, wd=0.004),
            'wl4': _vwwd([384, 192],     stddev=0.04, wd=0.004),
            'out': _vwwd([192, n_class], stddev=1/192.0, wd=0.0),
        }

        _biases = {
            'bc1' :  tf.Variable(tf.constant(value=0.0 ,shape=[64],  dtype=tf.float32)),
            'bc2' :  tf.Variable(tf.constant(value=0.1, shape=[64],  dtype=tf.float32)),
            'bl3' :  tf.Variable(tf.constant(value=0.1, shape=[384], dtype=tf.float32)),
            'bl4' :  tf.Variable(tf.constant(value=0.1, shape=[192], dtype=tf.float32)),
            'out' :  tf.Variable(tf.constant(value=0.0, shape=[n_class],  dtype=tf.float32)),
        }

        self.x = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3])
        self.y_ = tf.placeholder(tf.int64, shape=[None])
        batch_num = tf.Variable(self.batch_num, tf.int64)
        self.keep_prob = tf.placeholder(tf.float32)
        _dropout = self.keep_prob

        conv1 = conv2d('conv1', self.x, _weights['wc1'], _biases['bc1'])
        pool1 = max_pool('pool1', conv1, ksize=3, strides=2)
        norm1 = norm('norm1', pool1, lsize=4)
        print 'norm1', norm1.get_shape()
        norm1 = tf.nn.dropout(norm1, _dropout)

        conv2 = conv2d('conv2', norm1, _weights['wc2'], _biases['bc2'])
        # [very interesting, reverse the order]
        norm2 = norm('norm2', conv2, lsize=4)
        pool2 = max_pool('pool2', norm2, ksize=3, strides=2)
        print 'pool2', pool2.get_shape()
        pool2= tf.nn.dropout(pool2, _dropout)
        # [very interesting, delete the dropout]

        pool2 = tf.reshape(pool2, [-1, IMAGE_SIZE * IMAGE_SIZE * 4])
        print 'pool2', pool2.get_shape()
        local3 = local('local3', pool2, _weights['wl3'], _biases['bl3'])

        local4 = local('local4', local3, _weights['wl4'], _biases['bl4'])

        self.softmax = tf.add(tf.matmul(local4, _weights['out']), _biases['out'], name='softmax')

        #global_step = tf.Variable(0, trainable=False)
        #decay_step = 100
        self.cross_entropy_individual = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.softmax, labels=self.y_)
        self.cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.softmax, labels=self.y_))
        '''
        tf.add_to_collection('losses', self.cross_entropy)
        self.loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
        self.lr = tf.train.exponential_decay(0.1, global_step, decay_step, 0.1, staircase=True)
        losses = tf.get_collection('losses')
        loss_average = tf.train.ExponentialMovingAverage(0.9, name='avg')
        loss_averages_op = loss_average.apply(losses + [self.loss])
        with tf.control_dependencies([loss_averages_op]):
            opt = tf.train.GradientDescentOptimizer(self.lr)
            grads = opt.compute_gradients(self.loss)
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
        variable_averages = tf.train.ExponentialMovingAverage(0.9999, global_step)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())
        with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
            self.train_op = tf.no_op(name='train')
        '''
        #self.lr = tf.train.exponential_decay(0.001, global_step, decay_step, 0.996, staircase=True)
        #self.train_step = tf.train.AdamOptimizer(self.lr).minimize(self.cross_entropy, global_step=global_step)
        self.opt = tf.train.AdamOptimizer(0.001)
        self.train_step = self.opt.minimize(self.cross_entropy)
        self.grad = self.opt.compute_gradients(self.cross_entropy)
        self.norm = tf.global_norm([i[0] for i in self.grad])
        self.correct_prediction = tf.equal(tf.argmax(self.softmax, 1), self.y_)
        self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))
    def __init__(self,
                 num_classes,
                 num_frames,
                 num_temp_features,
                 num_st_features,
                 num_units,
                 max_gradient_norm,
                 learning_rate,
                 learning_rate_decay_factor,
                 adam_epsilon,
                 GD,
                 attention_lstm_num_units,
                 attention_num_hidden_fc1,
                 forward_only=False,
                 l2_regularization=False,
                 weight_decay=0,
                 log_dir=None):
        """"
        Create S-RNN model
        edgeRNNs: dictionary with keys as RNN name and value is a list of layers
        nodeRNNs: dictionary with keys as RNN name and value is a list of layers
        nodeToEdgeConnections: dictionary with keys as nodeRNNs name and value is another
                dictionary whose keys are edgeRNNs the nodeRNN is connected to and value is a list
                of size-2 which indicate the features to choose from the unConcatenateLayer
        edgeListComplete:
        cost:
        nodeLabels:
        learning_rate:
        clipnorm:
        update_type:
        weight_decay:

        return:
        """
        self.save_summaries = log_dir is not None
        if self.save_summaries:
            print('Writing summaries for Tensorboard')
        num_layers = 1
        self.num_classes = num_classes
        self.num_temp_features = num_temp_features
        self.num_st_features = num_st_features
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        #self.learning_rate = float(learning_rate)
        #self.learning_rate_decay = tf.Variable(float(learning_rate), trainable=False)
        # self.learning_rate_decay_op = self.learning_rate.assign(
        #      self.learning_rate * 0.1)
        self.max_grad_norm = max_gradient_norm
        self.global_step = tf.Variable(0, trainable=False)
        self.adam_epsilon = adam_epsilon
        self.GD = GD
        self.weight_decay = weight_decay
        # self.previous_eval_loss = []
        # self.best_val_loss = np.inf
        # self.strikes = tf.Variable(0, trainable=False)
        self.temp_features_names = [
            'face-face', 'neck-neck', 'belly-belly',
            'rightShoulder-rightShoulder', 'leftShoulder-leftShoulder',
            'rightElbow-rightElbow', 'leftElbow-leftElbow',
            'rightArm-rightArm', 'leftArm-leftArm', 'rightHip-rightHip',
            'leftHip-leftHip', 'rightKnee-rightKnee', 'leftKnee-leftKnee',
            'rightLeg-rightLeg', 'leftLeg-leftLeg'
        ]

        self.st_features_names = [
            'face-neck', 'face-belly', 'face-rightShoulder',
            'face-leftShoulder', 'face-rightElbow', 'face-leftElbow',
            'face-rightArm', 'face-leftArm', 'face-rightHip', 'face-leftHip',
            'face-rightKnee', 'face-leftKnee', 'face-rightLeg', 'face-leftLeg',
            'neck-belly', 'neck-rightShoulder', 'neck-leftShoulder',
            'neck-rightElbow', 'neck-leftElbow', 'neck-rightArm',
            'neck-leftArm', 'neck-rightHip', 'neck-leftHip', 'neck-rightKnee',
            'neck-leftKnee', 'neck-rightLeg', 'neck-leftLeg',
            'belly-rightShoulder', 'belly-leftShoulder', 'belly-rightElbow',
            'belly-leftElbow', 'belly-rightArm', 'belly-leftArm',
            'belly-rightHip', 'belly-leftHip', 'belly-rightKnee',
            'belly-leftKnee', 'belly-rightLeg', 'belly-leftLeg',
            'rightShoulder-leftShoulder', 'rightShoulder-rightElbow',
            'rightShoulder-leftElbow', 'rightShoulder-rightArm',
            'rightShoulder-leftArm', 'rightShoulder-rightHip',
            'rightShoulder-leftHip', 'rightShoulder-rightKnee',
            'rightShoulder-leftKnee', 'rightShoulder-rightLeg',
            'rightShoulder-leftLeg', 'leftShoulder-rightElbow',
            'leftShoulder-leftElbow', 'leftShoulder-rightArm',
            'leftShoulder-leftArm', 'leftShoulder-rightHip',
            'leftShoulder-leftHip', 'leftShoulder-rightKnee',
            'leftShoulder-leftKnee', 'leftShoulder-rightLeg',
            'leftShoulder-leftLeg', 'rightElbow-leftElbow',
            'rightElbow-rightArm', 'rightElbow-leftArm', 'rightElbow-rightHip',
            'rightElbow-leftHip', 'rightElbow-rightKnee',
            'rightElbow-leftKnee', 'rightElbow-rightLeg', 'rightElbow-leftLeg',
            'leftElbow-rightArm', 'leftElbow-leftArm', 'leftElbow-rightHip',
            'leftElbow-leftHip', 'leftElbow-rightKnee', 'leftElbow-leftKnee',
            'leftElbow-rightLeg', 'leftElbow-leftLeg', 'rightArm-leftArm',
            'rightArm-rightHip', 'rightArm-leftHip', 'rightArm-rightKnee',
            'rightArm-leftKnee', 'rightArm-rightLeg', 'rightArm-leftLeg',
            'leftArm-rightHip', 'leftArm-leftHip', 'leftArm-rightKnee',
            'leftArm-leftKnee', 'leftArm-rightLeg', 'leftArm-leftLeg',
            'rightHip-leftHip', 'rightHip-rightKnee', 'rightHip-leftKnee',
            'rightHip-rightLeg', 'rightHip-leftLeg', 'leftHip-rightKnee',
            'leftHip-leftKnee', 'leftHip-rightLeg', 'leftHip-leftLeg',
            'rightKnee-leftKnee', 'rightKnee-rightLeg', 'rightKnee-leftLeg',
            'leftKnee-rightLeg', 'leftKnee-leftLeg', 'rightLeg-leftLeg'
        ]

        #nodes_names = {'face','neck','belly','right-shoulder','left-shoulder','right-elbow','left-elbow','right-arm','left-arm','right-hip','left-hip','right-knee','left-knee','right-leg', 'left-leg'}
        nodes_names = {
            'face', 'belly', 'right-elbow', 'left-elbow', 'right-arm',
            'left-arm', 'right-knee', 'left-knee', 'right-leg', 'left-leg'
        }
        edgesRNN = {}
        nodesRNN = {}
        states = {}
        infos = {}
        self.batch_size = tf.placeholder(dtype=tf.int32,
                                         shape=[],
                                         name='batch_size')
        #self.batch_size = 36
        self.inputs = {}
        self.targets = tf.placeholder(tf.float32,
                                      shape=(None, num_classes),
                                      name='targets')
        for temp_feat in self.temp_features_names:
            infos[temp_feat] = {
                'input_gates': [],
                'forget_gates': [],
                'modulated_input_gates': [],
                'output_gates': [],
                'activations': [],
                'state_c': [],
                'state_m': []
            }
            self.inputs[temp_feat] = tf.placeholder(
                tf.float32,
                shape=(None, num_frames, self.num_temp_features),
                name=temp_feat)
            if num_layers == 1:
                edgesRNN[temp_feat] = tf.contrib.rnn.BasicLSTMCell(
                    num_units, state_is_tuple=True, activation=tf.nn.softsign)

            else:
                cells = []
                for _ in range(num_layers):
                    cell = tf.contrib.rnn.DropoutWrapper(
                        tf.contrib.rnn.BasicLSTMCell(
                            num_units,
                            state_is_tuple=True,
                            activation=tf.nn.softsign))
                    cells.append(cell)
                edgesRNN[temp_feat] = tf.contrib.rnn.MultiRNNCell(
                    cells, state_is_tuple=True)
            states[temp_feat] = edgesRNN[temp_feat].zero_state(
                self.batch_size, dtype=tf.float32)

        for st_feat in self.st_features_names:
            infos[st_feat] = {
                'input_gates': [],
                'forget_gates': [],
                'modulated_input_gates': [],
                'output_gates': [],
                'activations': [],
                'state_c': [],
                'state_m': []
            }
            self.inputs[st_feat] = tf.placeholder(tf.float32,
                                                  shape=(None, num_frames,
                                                         self.num_st_features),
                                                  name=st_feat)
            if num_layers == 1:
                edgesRNN[st_feat] = tf.contrib.rnn.BasicLSTMCell(
                    num_units, state_is_tuple=True, activation=tf.nn.softsign)
            else:
                cells = []
                for _ in range(num_layers):
                    cell = tf.contrib.rnn.DropoutWrapper(
                        tf.contrib.rnn.BasicLSTMCell(
                            num_units,
                            state_is_tuple=True,
                            activation=tf.nn.softsign))
                    cells.append(cell)
                edgesRNN[st_feat] = tf.contrib.rnn.MultiRNNCell(
                    cells, state_is_tuple=True)
            states[st_feat] = edgesRNN[st_feat].zero_state(
                self.batch_size, tf.float32)

        for node in nodes_names:
            infos[node] = {
                'input_gates': [],
                'forget_gates': [],
                'modulated_input_gates': [],
                'output_gates': [],
                'activations': [],
                'state_c': [],
                'state_m': []
            }
            self.inputs[node] = tf.placeholder(tf.float32,
                                               shape=(None, num_frames, None),
                                               name=node)
            if num_layers == 1:
                nodesRNN[node] = tf.contrib.rnn.BasicLSTMCell(
                    num_units, state_is_tuple=True, activation=tf.nn.softsign)
            else:
                cells = []
                for _ in range(num_layers):
                    cell = tf.contrib.rnn.DropoutWrapper(
                        tf.contrib.rnn.BasicLSTMCell(
                            num_units,
                            state_is_tuple=True,
                            activation=tf.nn.softsign))
                    cells.append(cell)
                nodesRNN[node] = tf.contrib.rnn.MultiRNNCell(
                    cells, state_is_tuple=True)
            states[node] = nodesRNN[node].zero_state(self.batch_size,
                                                     tf.float32)

        wholeRNN = tf.contrib.rnn.BasicLSTMCell(num_units * 10,
                                                state_is_tuple=True,
                                                activation=tf.nn.softsign)
        states_whole = wholeRNN.zero_state(self.batch_size, tf.float32)

        attention_out_size = 1
        attention_in_size = num_units

        sp_attention_LSTM = tf.contrib.rnn.BasicLSTMCell(
            attention_lstm_num_units, state_is_tuple=True)
        states['spatial_attention'] = sp_attention_LSTM.zero_state(
            self.batch_size, tf.float32)

        tp_attention_LSTM = tf.contrib.rnn.BasicLSTMCell(
            attention_lstm_num_units, state_is_tuple=True)
        states['tempral_attention'] = tp_attention_LSTM.zero_state(
            self.batch_size, tf.float32)

        weights = {
            'out':
            tf.Variable(tf.random_normal([num_units * num_frames,
                                          num_classes]),
                        name='weights_out'),
            'sp_attention_FC1':
            tf.Variable(tf.random_normal([
                attention_lstm_num_units + attention_in_size,
                attention_num_hidden_fc1
            ]),
                        name='sp_weights_FC1'),
            'sp_attention_FC2':
            tf.Variable(tf.random_normal(
                [attention_num_hidden_fc1, attention_out_size]),
                        name='sp_weights_FC2'),
            'tp_attention_FC1':
            tf.Variable(tf.random_normal(
                [1000 + attention_lstm_num_units, attention_out_size]),
                        name='tp_weights_FC1'),
        }
        biases = {
            'out':
            tf.Variable(tf.random_normal([num_classes]), name='biases_out'),
            'sp_attention_FC1':
            tf.Variable(tf.random_normal([attention_num_hidden_fc1]),
                        name='sp_biases_FC1'),
            'sp_attention_FC2':
            tf.Variable(tf.random_normal([attention_out_size]),
                        name='sp_biases_FC2'),
            'tp_attention_FC1':
            tf.Variable(tf.random_normal([attention_out_size]),
                        name='tp_biases_FC1')
        }

        def spatial_attention(x_t, x_t1, scope):
            h_t1, states['spatial_attention'] = sp_attention_LSTM(
                x_t1, states['spatial_attention'], scope=scope)
            fc1 = tf.matmul(
                tf.concat([x_t, h_t1], 1),
                weights['sp_attention_FC1']) + biases['sp_attention_FC1']
            fc2 = tf.matmul(
                tf.tanh(fc1),
                weights['sp_attention_FC2']) + biases['sp_attention_FC2']
            tmp_at = fc2
            #tmp_at = tf.nn.relu(fc2)
            # if attention_placement == 0:
            #     at =  tf.stack([tmp_at]*num_features_per_joints,2)
            #     shape_at = tf.shape(at)
            #     at = tf.reshape(at, [shape_at[0], shape_at[1]*shape_at[2]])
            # else:
            return tmp_at

        def tempral_attention(x_t, x_t1, scope):
            h_t1, states['tempral_attention'] = tp_attention_LSTM(
                x_t1, states['tempral_attention'], scope=scope)
            fc1 = tf.matmul(
                tf.concat([x_t, h_t1], 1),
                weights['tp_attention_FC1']) + biases['tp_attention_FC1']
            tmp_at = tf.nn.softmax(fc1)
            #tmp_at = tf.nn.relu(fc2)
            # if attention_placement == 0:
            #     at =  tf.stack([tmp_at]*num_features_per_joints,2)
            #     shape_at = tf.shape(at)
            #     at = tf.reshape(at, [shape_at[0], shape_at[1]*shape_at[2]])
            # else:
            return tmp_at

        outputs = {}
        #final_outputs = []
        node_inputs = {}
        final_inputs_list = []

        #att_temp = []
        #attention_dense = {}
        #attention_dense_list = []
        #attention_fullbody_input_list = []
        def conv_2d(kernels, kernel_size):
            return Convolution2D(kernels,
                                 kernel_size,
                                 kernel_size,
                                 init="he_uniform",
                                 border_mode="same")

        def att_module(final_inputs_list, t_or_s, scope, time_steps):

            att_weight = []

            for time_step in range(len(final_inputs_list)):
                input_att = final_inputs_list[time_step]
                if time_step > 0:
                    input_att_t1 = final_inputs_list[time_step - 1]
                else:
                    input_att_t1 = tf.zeros_like(input_att)

                if t_or_s == 's':
                    if time_step > 0: tf.get_variable_scope().reuse_variables()
                    at_shaped = spatial_attention(input_att, input_att_t1,
                                                  scope)
                elif t_or_s == 't':
                    at_shaped = tempral_attention(input_att, input_att_t1,
                                                  scope)
                att_weight.append(at_shaped)

            att_weight = tf.nn.softmax(att_weight)

            final_inputs_list = final_inputs_list * att_weight

            return final_inputs_list

        with tf.variable_scope("SRNN"):
            for time_step in range(num_frames):
                #final_inputs_list = []
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                #final_temp_inputs = []
                #final_inputs = []
                #attention_dense_list = []
                for temp_feat in self.temp_features_names:
                    inputs = self.inputs[temp_feat][:, time_step, :]
                    state = states[temp_feat]
                    scope = "lstm_" + temp_feat
                    outputs[temp_feat], states[temp_feat] = edgesRNN[
                        temp_feat](inputs, state, scope=scope)
                    # attention_dense[temp_feat] = Dense(1, kernel_initializer=tf.random_normal_initializer(),
                    #       bias_initializer=tf.random_normal_initializer(),activation='sigmoid')(outputs[temp_feat])
                    # attention_dense_list.append(attention_dense[temp_feat])

                    #final_inputs.append(outputs[temp_feat])
                for st_feat in self.st_features_names:
                    inputs = self.inputs[st_feat][:, time_step, :]
                    state = states[st_feat]
                    scope = "lstm_" + st_feat
                    outputs[st_feat], states[st_feat] = edgesRNN[st_feat](
                        inputs, state, scope=scope)

                    # attention_dense[st_feat] = Dense(1, kernel_initializer=tf.random_normal_initializer(),
                    #                                    bias_initializer=tf.random_normal_initializer(),activation='sigmoid')(outputs[st_feat])
                    # attention_dense_list.append(attention_dense[st_feat])

                    #final_inputs.append(outputs[st_feat])

                # attention_fullbody_input = tf.concat(attention_dense_list,1)
                # attention_fullbody_input = tf.nn.elu(attention_fullbody_input)
                # attention_fullbody_input_list.append(attention_fullbody_input)
                # fullbody_input = tf.concat(final_inputs, 1)
                # final_inputs_list.append(fullbody_input)
                # input_att = final_inputs_list[time_step]
                # if time_step > 0:
                #     input_att_t1 = final_inputs_list[time_step-1]
                # else:
                #     input_att_t1 = tf.zeros_like(input_att)
                #
                # at_shaped, at = attention(input_att, input_att_t1)
                #
                # final_inputs_list[time_step] = tf.multiply(final_inputs_list[time_step] ,at_shaped)

                #
                # fullbody_input = tf.concat(final_inputs, 1)
                # final_inputs_list.append(fullbody_input)
                #

                #attention_fullbody_input_list[time_step] = tf.multiply(at_shaped, attention_fullbody_input_list[time_step])

                node_inputs['face'] = [
                    outputs['face-face'], outputs['face-belly'],
                    outputs['face-rightElbow'], outputs['face-leftElbow'],
                    outputs['face-rightArm'], outputs['face-leftArm'],
                    outputs['face-rightKnee'], outputs['face-leftKnee'],
                    outputs['face-rightLeg'], outputs['face-leftLeg']
                ]

                with tf.variable_scope('attention_face'):
                    #if time_step > 0: tf.get_variable_scope().reuse_variables()
                    node_inputs['face'] = att_module(node_inputs['face'], 's',
                                                     'attention_face',
                                                     time_step)

                # node_inputs['neck'] = [outputs['face-neck'],outputs['neck-belly'],outputs['neck-rightShoulder'],outputs['neck-leftShoulder'],
                #                                  outputs['neck-rightElbow'],
                #                                  outputs['neck-leftElbow'],outputs['neck-rightArm'],outputs['neck-leftArm'],outputs['neck-rightHip'],
                #                                  outputs['neck-leftHip'],
                #                                  outputs['neck-rightKnee'],outputs['neck-leftKnee'],outputs['neck-rightLeg'],outputs['neck-leftLeg'],
                #                                  outputs['neck-neck']]
                #
                #
                # node_inputs['neck'] = att_module(node_inputs['neck'])

                #node_inputs['neck'] = tf.reshape(tf.transpose(conv_2d(1,1)(tf.nn.relu(node_inputs['neck'])),[0,3,2,1]),[self.batch_size,num_units])

                # node_inputs['elbow'] = tf.concat(
                #     [outputs['rightElbow-rightElbow'], outputs['leftElbow-leftElbow'],
                #      outputs['face-rightElbow'], outputs['face-leftElbow'],
                #      outputs['rightElbow-rightArm'],
                #      outputs['leftElbow-leftArm'], outputs['belly-rightElbow'],
                #      outputs['belly-leftElbow'], outputs['rightElbow-leftElbow']], 1)
                node_inputs['belly'] = [
                    outputs['belly-belly'], outputs['face-belly'],
                    outputs['belly-rightElbow'], outputs['belly-leftElbow'],
                    outputs['belly-rightKnee'], outputs['belly-leftKnee'],
                    outputs['belly-leftArm'], outputs['belly-rightArm'],
                    outputs['belly-leftLeg'], outputs['belly-rightLeg']
                ]

                with tf.variable_scope('attention_belly'):
                    #if time_step > 0: tf.get_variable_scope().reuse_variables()
                    node_inputs['belly'] = att_module(node_inputs['belly'],
                                                      's', 'attention_belly',
                                                      time_step)

                # node_inputs['right-shoulder'] = [outputs['face-rightShoulder'],outputs['neck-rightShoulder'],outputs['belly-rightShoulder'],outputs['rightShoulder-leftShoulder'],
                #                                            outputs['rightShoulder-rightElbow'],
                #                                            outputs['rightShoulder-leftElbow'],outputs['rightShoulder-rightArm'],outputs['rightShoulder-leftArm'],outputs['rightShoulder-rightHip'],
                #                                            outputs['rightShoulder-leftHip'],
                #                                            outputs['rightShoulder-rightKnee'],outputs['rightShoulder-leftKnee'],outputs['rightShoulder-rightLeg'],outputs['rightShoulder-leftLeg'],
                #                                            outputs['rightShoulder-rightShoulder']]

                #node_inputs['right-shoulder'] = att_module(node_inputs['right-shoulder'])

                # node_inputs['left-shoulder'] = [outputs['face-leftShoulder'],outputs['neck-leftShoulder'],outputs['belly-leftShoulder'],outputs['rightShoulder-leftShoulder'],
                #                                           outputs['leftShoulder-rightElbow'],
                #                                           outputs['leftShoulder-leftElbow'],outputs['leftShoulder-rightArm'],outputs['leftShoulder-leftArm'],outputs['leftShoulder-rightHip'],
                #                                           outputs['leftShoulder-leftHip'],
                #                                           outputs['leftShoulder-rightKnee'],outputs['leftShoulder-leftKnee'],outputs['leftShoulder-rightLeg'],outputs['leftShoulder-leftLeg'],
                #                                           outputs['leftShoulder-leftShoulder']]

                #node_inputs['left-shoulder'] = att_module(node_inputs['left-shoulder'])

                node_inputs['right-elbow'] = [
                    outputs['face-rightElbow'], outputs['belly-rightElbow'],
                    outputs['rightElbow-leftElbow'],
                    outputs['rightElbow-rightArm'],
                    outputs['rightElbow-leftArm'],
                    outputs['rightElbow-rightKnee'],
                    outputs['rightElbow-leftKnee'],
                    outputs['rightElbow-rightLeg'],
                    outputs['rightElbow-leftLeg'],
                    outputs['rightElbow-rightElbow']
                ]
                with tf.variable_scope('attention_right-elbow'):
                    #if time_step > 0: tf.get_variable_scope().reuse_variables()
                    node_inputs['right-elbow'] = att_module(
                        node_inputs['right-elbow'], 's',
                        'attention_right-elbow', time_step)

                node_inputs['left-elbow'] = [
                    outputs['face-leftElbow'], outputs['belly-leftElbow'],
                    outputs['rightElbow-leftElbow'],
                    outputs['leftElbow-rightArm'],
                    outputs['leftElbow-leftArm'],
                    outputs['leftElbow-rightKnee'],
                    outputs['leftElbow-leftKnee'],
                    outputs['leftElbow-rightLeg'],
                    outputs['leftElbow-leftLeg'],
                    outputs['leftElbow-leftElbow']
                ]

                with tf.variable_scope('attention_left-elbow'):
                    #    if time_step > 0: tf.get_variable_scope().reuse_variables()
                    node_inputs['left-elbow'] = att_module(
                        node_inputs['left-elbow'], 's', 'attention_left-elbow',
                        time_step)

                node_inputs['right-arm'] = [
                    outputs['face-rightArm'], outputs['belly-rightArm'],
                    outputs['rightElbow-rightArm'],
                    outputs['leftElbow-rightArm'], outputs['rightArm-leftArm'],
                    outputs['rightArm-rightKnee'],
                    outputs['rightArm-leftKnee'], outputs['rightArm-rightLeg'],
                    outputs['rightArm-leftLeg'], outputs['rightArm-rightArm']
                ]

                with tf.variable_scope('attention_right-arm'):
                    #    if time_step > 0: tf.get_variable_scope().reuse_variables()
                    node_inputs['right-arm'] = att_module(
                        node_inputs['right-arm'], 's', 'attention_right-arm',
                        time_step)

                node_inputs['left-arm'] = [
                    outputs['face-leftArm'], outputs['belly-leftArm'],
                    outputs['rightElbow-leftArm'],
                    outputs['leftElbow-leftArm'], outputs['rightArm-leftArm'],
                    outputs['leftArm-rightKnee'], outputs['leftArm-leftKnee'],
                    outputs['leftArm-rightLeg'], outputs['leftArm-leftLeg'],
                    outputs['leftArm-leftArm']
                ]

                with tf.variable_scope('attention_left-arm'):
                    #    if time_step > 0: tf.get_variable_scope().reuse_variables()
                    node_inputs['left-arm'] = att_module(
                        node_inputs['left-arm'], 's', 'attention_left-arm',
                        time_step)

                # node_inputs['right-hip'] = [outputs['face-rightHip'],outputs['neck-rightHip'],outputs['belly-rightHip'],outputs['rightShoulder-rightHip'],
                #                                       outputs['leftShoulder-rightHip'],
                #                                       outputs['rightElbow-rightHip'],outputs['leftElbow-rightHip'],outputs['rightArm-rightHip'],outputs['rightHip-leftHip'],
                #                                       outputs['leftArm-rightHip'],
                #                                       outputs['rightHip-rightKnee'],outputs['rightHip-leftKnee'],outputs['rightHip-rightLeg'],outputs['rightHip-leftLeg'],
                #                                       outputs['rightHip-rightHip']]

                #node_inputs['right-hip'] = att_module(node_inputs['right-hip'])

                # node_inputs['left-hip'] = [outputs['face-leftHip'],outputs['neck-leftHip'],outputs['belly-leftHip'],outputs['rightShoulder-leftHip'],
                #                                      outputs['leftShoulder-leftHip'],
                #                                      outputs['rightElbow-leftHip'],outputs['leftElbow-leftHip'],outputs['rightArm-leftHip'],outputs['leftArm-leftHip'],
                #                                      outputs['rightHip-leftHip'],
                #                                      outputs['leftHip-rightKnee'],outputs['leftHip-leftKnee'],outputs['leftHip-rightLeg'],outputs['leftHip-leftLeg'],
                #                                      outputs['leftHip-leftHip']]

                #node_inputs['left-hip'] = att_module(node_inputs['left-hip'])

                node_inputs['right-knee'] = [
                    outputs['face-rightKnee'], outputs['belly-rightKnee'],
                    outputs['rightElbow-rightKnee'],
                    outputs['leftElbow-rightKnee'],
                    outputs['rightArm-rightKnee'],
                    outputs['leftArm-rightKnee'],
                    outputs['rightKnee-leftKnee'],
                    outputs['rightKnee-rightLeg'],
                    outputs['rightKnee-leftLeg'],
                    outputs['rightKnee-rightKnee']
                ]

                with tf.variable_scope('attention_right-knee'):
                    #    if time_step > 0: tf.get_variable_scope().reuse_variables()
                    node_inputs['right-knee'] = att_module(
                        node_inputs['right-knee'], 's', 'attention_right-knee',
                        time_step)

                node_inputs['left-knee'] = [
                    outputs['face-leftKnee'], outputs['belly-leftKnee'],
                    outputs['rightElbow-leftKnee'],
                    outputs['leftElbow-leftKnee'],
                    outputs['rightArm-leftKnee'], outputs['leftArm-leftKnee'],
                    outputs['rightKnee-leftKnee'],
                    outputs['leftKnee-rightLeg'], outputs['leftKnee-leftLeg'],
                    outputs['leftKnee-leftKnee']
                ]

                with tf.variable_scope('attention_left-knee'):
                    #    if time_step > 0: tf.get_variable_scope().reuse_variables()
                    node_inputs['left-knee'] = att_module(
                        node_inputs['left-knee'], 's', 'attention_left-knee',
                        time_step)

                node_inputs['right-leg'] = [
                    outputs['face-rightLeg'], outputs['belly-rightLeg'],
                    outputs['rightElbow-rightLeg'],
                    outputs['leftElbow-rightLeg'],
                    outputs['rightArm-rightLeg'], outputs['leftArm-rightLeg'],
                    outputs['rightKnee-rightLeg'],
                    outputs['leftKnee-rightLeg'], outputs['rightLeg-leftLeg'],
                    outputs['rightLeg-rightLeg']
                ]
                with tf.variable_scope('attention_right-leg'):
                    #    if time_step > 0: tf.get_variable_scope().reuse_variables()
                    node_inputs['right-leg'] = att_module(
                        node_inputs['right-leg'], 's', 'attention_right-leg',
                        time_step)

                node_inputs['left-leg'] = [
                    outputs['face-leftLeg'], outputs['belly-leftLeg'],
                    outputs['rightShoulder-leftLeg'],
                    outputs['leftElbow-leftLeg'], outputs['rightArm-leftLeg'],
                    outputs['leftArm-leftLeg'], outputs['rightKnee-leftLeg'],
                    outputs['leftKnee-leftLeg'], outputs['rightLeg-leftLeg'],
                    outputs['leftLeg-leftLeg']
                ]
                with tf.variable_scope('attention_left-leg'):
                    #    if time_step > 0: tf.get_variable_scope().reuse_variables()
                    node_inputs['left-leg'] = att_module(
                        node_inputs['left-leg'], 's', 'attention_left-leg',
                        time_step)

                #node_inputs['left-leg'] = tf.reshape(tf.transpose(conv_2d(1,1)(tf.nn.relu(node_inputs['left-leg'])),[0,3,2,1]),[self.batch_size,num_units])

                # node_inputs['arms'] = tf.concat(
                #     [outputs['rightArm-rightArm'], outputs['leftArm-leftArm'],outputs['face-rightArm'],outputs['rightElbow-rightArm'],
                #      outputs['leftElbow-leftArm'],outputs['face-leftArm'], outputs['belly-rightArm'], outputs['belly-leftArm'],
                #      outputs['rightArm-leftArm']], 1)
                #
                # node_inputs['knee'] = tf.concat([outputs['rightKnee-rightKnee'],outputs['leftKnee-leftKnee'],outputs['rightKnee-leftKnee'],
                #                                 outputs['belly-rightKnee'],outputs['belly-leftKnee'],outputs['rightKnee-rightLeg'],
                #                                  outputs['leftKnee-leftLeg']], 1)
                #
                #
                # node_inputs['legs'] = tf.concat(
                #     [outputs['rightLeg-rightLeg'], outputs['leftLeg-leftLeg'],outputs['rightKnee-rightLeg'],outputs['leftKnee-leftLeg'],
                #      outputs['belly-rightLeg'], outputs['belly-leftLeg'],outputs['rightLeg-leftLeg']], 1)

                node_output_list = []
                for node_name in nodes_names:
                    inputs = tf.concat(tf.unstack(node_inputs[node_name]), 1)
                    inputs = tf.nn.elu(inputs)
                    state = states[node_name]
                    scope = "lstm_" + node_name
                    outputs[node_name], states[node_name] = nodesRNN[
                        node_name](inputs, state, scope=scope)
                    node_output_list.append(outputs[node_name])
                #
                # fullbody_input = tf.concat(
                #     [node_inputs['face'],node_inputs['elbow'], node_inputs['belly'], node_inputs['knee'],node_inputs['arms'],
                #      node_inputs['legs']], 1)

                # state = states['wholeRNN']
                # scope = "lstm_" + 'wholeRNN'
                #node_output_list = att_module(node_output_list)

                fullbody_input = tf.concat(node_output_list, 1)
                final_inputs_list.append(fullbody_input)

        #with tf.variable_scope("temporal_attention",reuse=None):
        #final_inputs_list = tf.stack(att_module(final_inputs_list,'t'))

        #outputs, final_state = tf.nn.dynamic_rnn(wholeRNN, tf.stack(final_inputs_list), initial_state=states_whole, time_major=True)
        #outputs = tf.unstack(outputs)

        # input_att = final_inputs_list[time_step]
        # if time_step > 0:
        #     input_att_t1 = final_inputs_list[time_step - 1]
        # else:
        #     input_att_t1 = tf.zeros_like(input_att)
        #
        # at_shaped, at = attention(input_att, input_att_t1)
        #
        # final_inputs_list[time_step] = tf.multiply(final_inputs_list[time_step], at_shaped)

        # cells = []
        # for _ in range(1):
        #     cell = tf.contrib.rnn.BasicLSTMCell(num_units,activation=tf.nn.softsign )
        #     cells.append(cell)
        # cell_fw = tf.contrib.rnn.MultiRNNCell(cells)
        #
        # cells = []
        # for _ in range(1):
        #     cell = tf.contrib.rnn.BasicLSTMCell(num_units,activation=tf.nn.softsign)
        #     cells.append(cell)
        # cell_bw = tf.contrib.rnn.MultiRNNCell(cells)
        #
        # final_outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(cell_fw, cell_bw,final_inputs_list,dtype=tf.float32)

        # attention_inputs = tf.transpose(final_outputs, perm=[1, 0, 2])
        # #
        # alpha = attention(attention_inputs,100,return_alphas=True)
        # #
        # final_outputs = final_outputs * alpha
        #
        # split0,split1,split2,split3,split4,split5,split6,split7,split8,split9= tf.split(final_outputs, num_or_size_splits=10, axis=0)
        # split = [tf.squeeze(split0,axis=0),tf.squeeze(split1,axis=0),tf.squeeze(split2,axis=0),tf.squeeze(split3,axis=0),tf.squeeze(split4,axis=0),
        #          tf.squeeze(split5, axis=0),tf.squeeze(split6,axis=0),tf.squeeze(split7,axis=0),tf.squeeze(split8,axis=0),tf.squeeze(split9,axis=0)]

        self.infos = infos

        self.final_states = states
        #self.logits = tf.matmul(output, weights['out'], name="logits") + biases['out']
        # self.full_connect_layer = Dense(256,kernel_initializer=tf.random_normal_initializer(),bias_initializer=tf.random_normal_initializer())(final_outputs[-1])
        # self.dropout_layer =  tf.nn.dropout(Dense(256,kernel_initializer=tf.random_normal_initializer(),bias_initializer=tf.random_normal_initializer())(final_outputs[-1]),keep_prob=0.8)
        # self.logits = Dense(21,kernel_initializer=tf.random_normal_initializer(),bias_initializer=tf.random_normal_initializer())(final_outputs)
        self.logits = tf.layers.dense(
            tf.nn.elu(final_inputs_list[-1]),
            21,
            kernel_initializer=tf.random_normal_initializer(),
            bias_initializer=tf.random_normal_initializer(),
            name='dense_out')
        # self.logits = Dense(21, kernel_initializer=tf.random_normal_initializer(),
        #                     bias_initializer=tf.random_normal_initializer())(output)
        self.logits_drop = tf.nn.dropout(self.logits, keep_prob=0.5)
        self.predict = tf.nn.softmax(self.logits_drop)

        with tf.name_scope('cross_entropy'):
            loss = tf.nn.softmax_cross_entropy_with_logits(
                logits=self.logits_drop, labels=self.targets)
            self.cost = tf.reduce_mean(loss)
            self.cost_inference = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
                                                        labels=self.targets))
        if self.save_summaries:
            tf.summary.scalar('cross_entropy', self.cost)

        tvars = tf.trainable_variables()

        ##learning rate!!!
        #
        # def get_learningrate():
        #     if self.strikes > 4:
        #         self.learning_rate_decay = tf.Variable(float(self.learning_rate_decay.eval() / 10), trainable=False)
        #         self.strikes = 0
        #     else:
        #         self.learning_rate_decay= self.learning_rate_decay
        #     return  self.learning_rate_decay

        # def exponential_decay_new(learning_rate, decay_rate):
        #     """Applies exponential decay to the learning rate.
        #
        #     When training a model, it is often recommended to lower the learning rate as
        #     the training progresses.  This function applies an exponential decay function
        #     to a provided initial learning rate.  It requires a `global_step` value to
        #     compute the decayed learning rate.  You can just pass a TensorFlow variable
        #     that you increment at each training step.
        #
        #     The function returns the decayed learning rate.  It is computed as:
        #
        #     ```python
        #     decayed_learning_rate = learning_rate *
        #                             decay_rate ^ (global_step / decay_steps)
        #     ```
        #
        #     If the argument `staircase` is `True`, then `global_step / decay_steps` is an
        #     integer division and the decayed learning rate follows a staircase function.
        #
        #     Example: decay every 100000 steps with a base of 0.96:
        #
        #     ```python
        #     ...
        #     global_step = tf.Variable(0, trainable=False)
        #     starter_learning_rate = 0.1
        #     learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
        #                                                100000, 0.96, staircase=True)
        #     # Passing global_step to minimize() will increment it at each step.
        #     learning_step = (
        #         tf.train.GradientDescentOptimizer(learning_rate)
        #         .minimize(...my loss..., global_step=global_step)
        #     )
        #     ```
        #
        #     Args:
        #       learning_rate: A scalar `float32` or `float64` `Tensor` or a
        #         Python number.  The initial learning rate.
        #       global_step: A scalar `int32` or `int64` `Tensor` or a Python number.
        #         Global step to use for the decay computation.  Must not be negative.
        #       decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number.
        #         Must be positive.  See the decay computation above.
        #       decay_rate: A scalar `float32` or `float64` `Tensor` or a
        #         Python number.  The decay rate.
        #       staircase: Boolean.  If `True` decay the learning rate at discrete intervals
        #       name: String.  Optional name of the operation.  Defaults to
        #         'ExponentialDecay'.
        #
        #     Returns:
        #       A scalar `Tensor` of the same type as `learning_rate`.  The decayed
        #       learning rate.
        #
        #     Raises:
        #       ValueError: if `global_step` is not supplied.
        #     """
        #
        #     learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
        #     dtype = learning_rate.dtype
        #     strikes = math_ops.cast(self.strikes, dtype)
        #     #decay_steps = math_ops.cast(decay_steps, dtype)
        #     decay_rate = math_ops.cast(decay_rate, dtype)
        #     if strikes.eval() > 4 :
        #         self.strikes = math_ops.multiply(self.strikes,0)
        #
        #         return math_ops.multiply(learning_rate, decay_rate)
        #     else:
        #         return learning_rate

        starter_learning_rate = self.learning_rate

        self.learning_rate_decay = tf.train.exponential_decay(
            starter_learning_rate, self.global_step, 250, 0.65, staircase=True)
        # self.learning_rate_decay = exponential_decay_new(
        #     starter_learning_rate,
        #     0.1
        # )

        if not forward_only:
            if self.GD:
                optimizer = tf.train.GradientDescentOptimizer(
                    self.learning_rate_decay)

                clipped_grads, norm = tf.clip_by_global_norm(
                    tf.gradients(self.cost, tvars), self.max_grad_norm)
                self.gradients_norm = norm
                self.updates = optimizer.apply_gradients(
                    zip(clipped_grads, tvars), global_step=self.global_step)
            else:
                aggregation_method = tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N
                optimizer = tf.train.AdamOptimizer(
                    learning_rate=self.learning_rate_decay,
                    epsilon=self.adam_epsilon)
                gradients_and_params = optimizer.compute_gradients(
                    self.cost, tvars, aggregation_method=aggregation_method)
                gradients, params = zip(*gradients_and_params)
                norm = tf.global_norm(gradients)
                self.gradients_norm = norm
                self.updates = optimizer.apply_gradients(
                    zip(gradients, params), global_step=self.global_step)

        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

        self.merged = tf.summary.merge_all()
        # self.merged = tf.merge_all_summaries()
        if self.save_summaries:
            self.train_writer = tf.summary.FileWriter(log_dir + '/train')
            self.test_writer = tf.summary.FileWriter(log_dir + '/test')
Esempio n. 39
0
File: Atari.py Progetto: asgerMe/A3C
    def __init__(self, pix_x, pix_y, scope, trainer, act_space=6):

        with tf.variable_scope(scope):

            strides1 = int(4)
            strides2 = int(2)

            full_c1 = int(pix_x / (strides1 * strides2))
            full_c2 = int(pix_y / (strides1 * strides2))

            filters2 = 32

            self.input = tf.placeholder(dtype=tf.float32,
                                        shape=(None, 42, 42, 1),
                                        name='frame_input')

            self.conv1 = slim.conv2d(activation_fn=tf.nn.elu,
                                     inputs=self.input,
                                     num_outputs=32,
                                     kernel_size=[3, 3],
                                     stride=[2, 2],
                                     padding='SAME')

            self.conv2 = slim.conv2d(activation_fn=tf.nn.elu,
                                     inputs=self.conv1,
                                     num_outputs=32,
                                     kernel_size=[3, 3],
                                     stride=[2, 2],
                                     padding='SAME')
            self.conv3 = slim.conv2d(activation_fn=tf.nn.elu,
                                     inputs=self.conv2,
                                     num_outputs=32,
                                     kernel_size=[3, 3],
                                     stride=[2, 2],
                                     padding='SAME')
            self.conv4 = slim.conv2d(activation_fn=tf.nn.elu,
                                     inputs=self.conv3,
                                     num_outputs=32,
                                     kernel_size=[3, 3],
                                     stride=[2, 2],
                                     padding='SAME')
            self.hidden = slim.fully_connected(slim.flatten(self.conv4),
                                               256,
                                               activation_fn=tf.nn.elu)

            lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(256, state_is_tuple=True)

            init_cell_state = tf.constant(value=0,
                                          shape=(1, lstm_cell.state_size.c),
                                          dtype=tf.float32)
            init_hidden_state = tf.constant(value=0,
                                            shape=(1, lstm_cell.state_size.h),
                                            dtype=tf.float32)

            self.init_cell = [init_cell_state, init_hidden_state]

            self.c_in = tf.placeholder(dtype=tf.float32,
                                       shape=(1, lstm_cell.state_size.c),
                                       name='c_in')
            self.h_in = tf.placeholder(dtype=tf.float32,
                                       shape=(1, lstm_cell.state_size.h),
                                       name='h_in')

            self.rnn_in = tf.expand_dims(self.hidden, [0])
            step_size = tf.shape(self.input)[:1]
            self.state_in = tf.nn.rnn_cell.LSTMStateTuple(self.c_in, self.h_in)
            self.lstm_outputs, self.lstm_state = tf.nn.dynamic_rnn(
                lstm_cell,
                self.rnn_in,
                initial_state=self.state_in,
                time_major=False,
                sequence_length=step_size)

            self.lstm_outputs = tf.squeeze(self.lstm_outputs, axis=0)

            condense_to_value = tf.get_variable(
                dtype=tf.float32,
                shape=(256),
                initializer=self.normalized_columns_initializer(std=1),
                name='form_value')

            self.value_output = tf.tensordot(self.lstm_outputs,
                                             condense_to_value, [[1], [0]])
            condense_to_actions = tf.get_variable(
                dtype=tf.float32,
                shape=(256, act_space),
                initializer=self.normalized_columns_initializer(std=0.01),
                name='c_act')
            action_output = tf.tensordot(self.lstm_outputs,
                                         condense_to_actions, [[1], [0]],
                                         name='action1')
            self.norm_actions = tf.nn.softmax(action_output)

            self.test = self.lstm_outputs

            if scope != 'global':
                R = tf.placeholder(dtype=tf.float32,
                                   shape=(None),
                                   name='perf_reward')
                get_value = tf.placeholder(dtype=tf.float32,
                                           shape=(None),
                                           name='perf_value')
                get_action = tf.placeholder(dtype=tf.int32,
                                            shape=(None),
                                            name='perf_action')
                advantage = tf.placeholder(dtype=tf.float32,
                                           shape=(None),
                                           name='advantage')

                self.one_hot_action = tf.one_hot(get_action, act_space)
                self.action_channel1 = tf.multiply(self.norm_actions,
                                                   self.one_hot_action)
                self.action_channel = tf.reduce_sum(self.action_channel1, 1)

                self.clip_action = tf.clip_by_value(self.action_channel,
                                                    0.000001, 9999999)
                self.value_loss = tf.reduce_sum(
                    tf.square(self.value_output - R))
                self.action_loss = tf.reduce_sum(
                    tf.log(self.clip_action) * advantage)
                self.entropy = -tf.reduce_sum(
                    tf.log(self.norm_actions) * self.norm_actions)
                self.full_loss = 0.5 * self.value_loss - self.action_loss - 0.01 * self.entropy

                local_vars = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope)
                self.gradients = tf.gradients(self.full_loss, local_vars)

                self.var_norms = tf.global_norm(local_vars)
                grads, self.grad_norms = tf.clip_by_global_norm(
                    self.gradients, 40.0)
                global_vars = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, 'global')
                self.apply_grads = trainer.apply_gradients(
                    zip(self.gradients, global_vars))
Esempio n. 40
0
    def construct(self, args, num_words, num_chars, lem_num_chars, num_tags, num_senses, bow, eow):
        with self.session.graph.as_default():
            # Training params
            self.is_training = tf.placeholder(tf.bool, [])
            self.learning_rate = tf.placeholder(tf.float32, [], name="learning_rate")

            # Sentence lengths
            self.sentence_lens = tf.placeholder(tf.int32, [None], name="sentence_lens")
            # Number of output words
            self.words_count = tf.reduce_sum(self.sentence_lens)
            words_count = self.words_count
            # Map sentences -> word list
            self.word_indexes = tf.placeholder(tf.int32, [None, 2], name='word_indexes')

            # Tag data
            self.tags = tf.placeholder(tf.int32, [None, None, len(num_tags)], name="tags")

            # Form IDs and charseqs
            self.word_ids = tf.placeholder(tf.int32, [None, None], name="word_ids")
            self.charseqs = tf.placeholder(tf.int32, [None, None], name="charseqs")
            self.charseq_lens = tf.placeholder(tf.int32, [None], name="charseq_lens")
            self.charseq_ids = tf.placeholder(tf.int32, [None, None], name="charseq_ids")

            # Lemma charseqs
            self.target_senses = tf.placeholder(tf.int32, [None, None], name="target_senses")
            self.target_ids = tf.placeholder(tf.int32, [None, None], name="target_ids")
            self.target_seqs = tf.placeholder(tf.int32, [None, None], name="target_seqs")
            self.target_seq_lens = tf.placeholder(tf.int32, [None], name="target_seq_lens")

            # Sentence weights
            weights = tf.sequence_mask(self.sentence_lens, dtype=tf.float32)
            sum_weights = tf.reduce_sum(weights)

            # Source forms lengths (in sentences and by words/lemmas)
            sentence_form_len = tf.nn.embedding_lookup(self.charseq_lens, self.charseq_ids)
            word_form_len = tf.gather_nd(sentence_form_len, self.word_indexes)

            # Target sequences for words
            _target_seq_lens = tf.nn.embedding_lookup(self.target_seq_lens, self.target_ids) # 2D
            _target_seqs = tf.nn.embedding_lookup(self.target_seqs, self.target_ids)
            # Flattened to word-list
            target_lens = tf.gather_nd(_target_seq_lens, self.word_indexes)
            target_seqs = tf.gather_nd(_target_seqs, self.word_indexes)
            target_senses = tf.gather_nd(self.target_senses, self.word_indexes)
            # Add eow at the end
            target_seqs = tf.reverse_sequence(target_seqs, target_lens, 1)
            target_seqs = tf.pad(target_seqs, [[0, 0], [1, 0]], constant_values=eow)
            target_lens = target_lens + 1
            target_seqs = tf.reverse_sequence(target_seqs, target_lens, 1)

            # RNN Cell
            if args.rnn_cell == "LSTM":
                rnn_cell = tf.nn.rnn_cell.LSTMCell
            elif args.rnn_cell == "GRU":
                rnn_cell = tf.nn.rnn_cell.GRUCell
            else:
                raise ValueError("Unknown rnn_cell {}".format(args.rnn_cell))

            # Encoder
            enc_out = encoder_network(self.word_indexes, self.word_ids, self.charseqs, self.charseq_ids,
                                      self.charseq_lens, self.sentence_lens, num_words, num_chars, args.we_dim,
                                      args.cle_dim, rnn_cell, args.rnn_cell_dim, args.rnn_layers, args.dropout,
                                      self.is_training, args.separate_embed, args.separate_rnn)
            rnn_inputs_tags, word_rnn_outputs, sentence_rnn_outputs_tags, word_cle_states, word_cle_outputs = enc_out

            # Tagger
            loss_tag, tag_outputs, self.predictions, correct_tag, correct_tags_compositional = tag_decoder(
                self.tags, sentence_rnn_outputs_tags, weights, sum_weights, num_tags, args.tags, args.label_smoothing)

            # Tagger features for lemmatizer
            tag_feats = tag_features(tag_outputs, self.word_indexes, words_count, args.rnn_cell_dim, args.dropout,
                                        self.is_training, args.no_tags_to_lemmas, args.tag_signal_dropout)

            self.current_accuracy_tag, self.update_accuracy_tag = tf.metrics.mean(correct_tag, weights=sum_weights)
            self.current_accuracy_tags_compositional, self.update_accuracy_tags_compositional = tf.metrics.mean(
                correct_tags_compositional)

            # Lemmatizer
            loss_lem, predictions = lemma_decoder(word_rnn_outputs, tag_feats, word_cle_states, word_cle_outputs,
                                                  word_form_len, target_seqs, target_lens, self.charseq_lens,
                                                  words_count, lem_num_chars, rnn_cell, args.rnn_cell,
                                                  args.rnn_cell_dim, args.cle_dim, args.beams, args.beam_len_penalty,
                                                  args.lem_smoothing, bow, eow)
            self.lemma_predictions_training, self.lemma_predictions, self.lemma_prediction_lengths = predictions

            # Lemmatizer sense predictor
            loss_sense, self.sense_prediction = sense_predictor(word_rnn_outputs, tag_feats, target_senses, num_senses,
                                                                words_count, args.predict_sense, args.sense_smoothing)

            # Lemma predictions, loss and accuracy
            self._lemma_stats(target_seqs, target_lens, target_senses)

            # Loss, training and gradients
            # Compute combined weighted loss on tags and lemmas
            loss = loss_tag + loss_lem * args.loss_lem_w + loss_sense * args.loss_sense_w
            self.global_step = tf.train.create_global_step()
            self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(self.update_ops):
                optimizer = tf.contrib.opt.LazyAdamOptimizer(learning_rate=self.learning_rate, beta2=args.beta_2)
                gradients, variables = zip(*optimizer.compute_gradients(loss))
                self.gradient_norm = tf.global_norm(gradients)
                if args.grad_clip:
                    gradients, _ = tf.clip_by_global_norm(gradients, args.grad_clip)
                self.training = optimizer.apply_gradients(zip(gradients, variables), global_step=self.global_step, name="training")

            # Saver
            self.saver = tf.train.Saver(max_to_keep=2)

            # Summaries
            self.current_loss_tag, self.update_loss_tag = tf.metrics.mean(loss_tag, weights=sum_weights)
            self.current_loss_lem, self.update_loss_lem = tf.metrics.mean(loss_lem, weights=sum_weights)
            self.current_loss_sense, self.update_loss_sense = tf.metrics.mean(loss_sense, weights=sum_weights)
            self.current_loss, self.update_loss = tf.metrics.mean(loss, weights=sum_weights)
            self.reset_metrics = tf.variables_initializer(tf.get_collection(tf.GraphKeys.METRIC_VARIABLES))

            summary_writer = tf.contrib.summary.create_file_writer(args.logdir, flush_millis=1 * 1000)
            self.summaries = {}
            with summary_writer.as_default(), tf.contrib.summary.record_summaries_every_n_global_steps(1):
                self.summaries["train"] = [tf.contrib.summary.scalar("train/loss_tag", self.update_loss_tag),
                                           tf.contrib.summary.scalar("train/loss_sense", self.update_loss_sense),
                                           tf.contrib.summary.scalar("train/loss_lem", self.update_loss_lem),
                                           tf.contrib.summary.scalar("train/loss", self.update_loss),
                                           tf.contrib.summary.scalar("train/gradient", self.gradient_norm),
                                           tf.contrib.summary.scalar("train/accuracy_tag", self.update_accuracy_tag),
                                           tf.contrib.summary.scalar("train/accuracy_compositional_tags", self.update_accuracy_tags_compositional),
                                           tf.contrib.summary.scalar("train/accuracy_lem", self.update_accuracy_lem_train),
                                           tf.contrib.summary.scalar("train/accuracy_lemsense", self.update_accuracy_lemsense_train),
                                           tf.contrib.summary.scalar("train/learning_rate", self.learning_rate)]
            with summary_writer.as_default(), tf.contrib.summary.always_record_summaries():
                for dataset in ["dev", "test"]:
                    self.summaries[dataset] = [tf.contrib.summary.scalar(dataset + "/loss", self.current_loss),
                                               tf.contrib.summary.scalar(dataset + "/accuracy_tag", self.current_accuracy_tag),
                                               tf.contrib.summary.scalar(dataset + "/accuracy_compositional_tags", self.current_accuracy_tags_compositional),
                                               tf.contrib.summary.scalar(dataset + "/accuracy_lem", self.current_accuracy_lem),
                                               tf.contrib.summary.scalar(dataset + "/accuracy_lemsense", self.current_accuracy_lemsense)]

            # Initialize variables
            self.session.run(tf.global_variables_initializer())
            with summary_writer.as_default():
                tf.contrib.summary.initialize(session=self.session, graph=self.session.graph)
Esempio n. 41
0
    def __init__(self,
                 observation_space,
                 action_space,
                 config,
                 existing_inputs=None):
        config = dict(ray.rllib.agents.impala.impala.DEFAULT_CONFIG, **config)
        assert config["batch_mode"] == "truncate_episodes", \
            "Must use `truncate_episodes` batch mode with V-trace."
        self.config = config
        self.sess = tf.get_default_session()

        # Create input placeholders
        if existing_inputs:
            actions, dones, behaviour_logits, rewards, observations, \
                prev_actions, prev_rewards = existing_inputs[:7]
            existing_state_in = existing_inputs[7:-1]
            existing_seq_lens = existing_inputs[-1]
        else:
            if isinstance(action_space, gym.spaces.Discrete):
                ac_size = action_space.n
                actions = tf.placeholder(tf.int64, [None], name="ac")
            else:
                raise UnsupportedSpaceException(
                    "Action space {} is not supported for IMPALA.".format(
                        action_space))
            dones = tf.placeholder(tf.bool, [None], name="dones")
            rewards = tf.placeholder(tf.float32, [None], name="rewards")
            behaviour_logits = tf.placeholder(tf.float32, [None, ac_size],
                                              name="behaviour_logits")
            observations = tf.placeholder(tf.float32, [None] +
                                          list(observation_space.shape))
            existing_state_in = None
            existing_seq_lens = None

        # Setup the policy
        dist_class, logit_dim = ModelCatalog.get_action_dist(
            action_space, self.config["model"])
        prev_actions = ModelCatalog.get_action_placeholder(action_space)
        prev_rewards = tf.placeholder(tf.float32, [None], name="prev_reward")
        self.model = ModelCatalog.get_model(
            {
                "obs": observations,
                "prev_actions": prev_actions,
                "prev_rewards": prev_rewards,
                "is_training": self._get_is_training_placeholder(),
            },
            observation_space,
            logit_dim,
            self.config["model"],
            state_in=existing_state_in,
            seq_lens=existing_seq_lens)
        action_dist = dist_class(self.model.outputs)
        values = self.model.value_function()
        self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          tf.get_variable_scope().name)

        def to_batches(tensor):
            if self.config["model"]["use_lstm"]:
                B = tf.shape(self.model.seq_lens)[0]
                T = tf.shape(tensor)[0] // B
            else:
                # Important: chop the tensor into batches at known episode cut
                # boundaries. TODO(ekl) this is kind of a hack
                T = self.config["sample_batch_size"]
                B = tf.shape(tensor)[0] // T
            rs = tf.reshape(tensor,
                            tf.concat([[B, T], tf.shape(tensor)[1:]], axis=0))
            # swap B and T axes
            return tf.transpose(
                rs,
                [1, 0] + list(range(2, 1 + int(tf.shape(tensor).shape[0]))))

        if self.model.state_in:
            max_seq_len = tf.reduce_max(self.model.seq_lens) - 1
            mask = tf.sequence_mask(self.model.seq_lens, max_seq_len)
            mask = tf.reshape(mask, [-1])
        else:
            mask = tf.ones_like(rewards, dtype=tf.bool)

        # Inputs are reshaped from [B * T] => [T - 1, B] for V-trace calc.
        self.loss = VTraceLoss(
            actions=to_batches(actions)[:-1],
            actions_logp=to_batches(action_dist.logp(actions))[:-1],
            actions_entropy=to_batches(action_dist.entropy())[:-1],
            dones=to_batches(dones)[:-1],
            behaviour_logits=to_batches(behaviour_logits)[:-1],
            target_logits=to_batches(self.model.outputs)[:-1],
            discount=config["gamma"],
            rewards=to_batches(rewards)[:-1],
            values=to_batches(values)[:-1],
            bootstrap_value=to_batches(values)[-1],
            valid_mask=to_batches(mask)[:-1],
            vf_loss_coeff=self.config["vf_loss_coeff"],
            entropy_coeff=self.config["entropy_coeff"],
            clip_rho_threshold=self.config["vtrace_clip_rho_threshold"],
            clip_pg_rho_threshold=self.config["vtrace_clip_pg_rho_threshold"])

        # KL divergence between worker and learner logits for debugging
        model_dist = Categorical(self.model.outputs)
        behaviour_dist = Categorical(behaviour_logits)
        self.KLs = model_dist.kl(behaviour_dist)
        self.mean_KL = tf.reduce_mean(self.KLs)
        self.max_KL = tf.reduce_max(self.KLs)
        self.median_KL = tf.contrib.distributions.percentile(self.KLs, 50.0)

        # Initialize TFPolicyGraph
        loss_in = [
            ("actions", actions),
            ("dones", dones),
            ("behaviour_logits", behaviour_logits),
            ("rewards", rewards),
            ("obs", observations),
            ("prev_actions", prev_actions),
            ("prev_rewards", prev_rewards),
        ]
        LearningRateSchedule.__init__(self, self.config["lr"],
                                      self.config["lr_schedule"])
        TFPolicyGraph.__init__(
            self,
            observation_space,
            action_space,
            self.sess,
            obs_input=observations,
            action_sampler=action_dist.sample(),
            loss=self.model.loss() + self.loss.total_loss,
            loss_inputs=loss_in,
            state_inputs=self.model.state_in,
            state_outputs=self.model.state_out,
            prev_action_input=prev_actions,
            prev_reward_input=prev_rewards,
            seq_lens=self.model.seq_lens,
            max_seq_len=self.config["model"]["max_seq_len"],
            batch_divisibility_req=self.config["sample_batch_size"])

        self.sess.run(tf.global_variables_initializer())

        self.stats_fetches = {
            "stats": {
                "cur_lr":
                tf.cast(self.cur_lr, tf.float64),
                "policy_loss":
                self.loss.pi_loss,
                "entropy":
                self.loss.entropy,
                "grad_gnorm":
                tf.global_norm(self._grads),
                "var_gnorm":
                tf.global_norm(self.var_list),
                "vf_loss":
                self.loss.vf_loss,
                "vf_explained_var":
                explained_variance(
                    tf.reshape(self.loss.vtrace_returns.vs, [-1]),
                    tf.reshape(to_batches(values)[:-1], [-1])),
                "mean_KL":
                self.mean_KL,
                "max_KL":
                self.max_KL,
                "median_KL":
                self.median_KL,
            },
        }
    def __init__(self, scope, a_size, trainer, TRAINING, GLOBAL_NET_SCOPE,
                 OBS_SIZE):
        with tf.variable_scope(str(scope) + '/qvalues'):
            self.inputs = tf.placeholder(shape=[None, OBS_SIZE],
                                         dtype=tf.float32)
            #           self.goal_pos=tf.placeholder(shape=[None,3],dtype=tf.float32)
            #           self.myinput = tf.transpose(self.inputs, perm=[0,2,3,1])
            # self.policy, self.value, self.state_out, self.state_in, self.state_init, self.valids = self._build_net(
            #     self.inputs, TRAINING, a_size, RNN_SIZE)
            self.policy, self.value, self.valids = self._build_net(
                self.inputs, TRAINING, a_size, RNN_SIZE, OBS_SIZE)

        if TRAINING:
            self.actions = tf.placeholder(shape=[None], dtype=tf.int32)
            self.actions_onehot = tf.one_hot(self.actions,
                                             a_size,
                                             dtype=tf.float32)
            self.valid_actions = tf.placeholder(shape=[None, a_size],
                                                dtype=tf.float32)
            self.target_v = tf.placeholder(tf.float32, [None], 'Vtarget')
            self.advantages = tf.placeholder(shape=[None], dtype=tf.float32)
            #           self.target_collisioncourse = tf.placeholder(tf.float32, [None])
            #           self.target_astar           = tf.placeholder(shape=[None,a_size], dtype=tf.float32)
            self.responsible_outputs = tf.reduce_sum(
                self.policy * self.actions_onehot, [1])
            self.train_value = tf.placeholder(tf.float32, [None])
            #           self.train_astar            = tf.placeholder(tf.float32, [None])
            self.optimal_actions = tf.placeholder(tf.int32, [None])
            self.optimal_actions_onehot = tf.one_hot(self.optimal_actions,
                                                     a_size,
                                                     dtype=tf.float32)

            # Loss Functions
            self.value_loss = (0.005 / 4) * tf.reduce_sum(
                self.train_value *
                tf.square(self.target_v - tf.reshape(self.value, shape=[-1])))
            self.entropy = -0.001 * tf.reduce_sum(self.policy * tf.log(
                tf.clip_by_value(self.policy, 1e-10, 1.0)))
            self.policy_loss = -0.02 * tf.reduce_sum(
                tf.log(tf.clip_by_value(self.responsible_outputs, 1e-15, 1.0))
                * self.advantages)
            self.valid_loss = -0.01 * tf.reduce_sum(tf.log(tf.clip_by_value(self.valids, 1e-10, 1.0)) * \
                                                    self.valid_actions + tf.log(
                tf.clip_by_value(1 - self.valids, 1e-10, 1.0)) * (1 - self.valid_actions))
            # self.collisioncourse_loss = - tf.reduce_sum(self.target_collisioncourse*tf.log(tf.clip_by_value(self.collisioncourse,1e-10,1.0))\
            #                                      +(1-self.target_collisioncourse)*tf.log(tf.clip_by_value(1-self.collisioncourse,1e-10,1.0)))
            # self.astar_loss    = - tf.reduce_sum(self.train_astar*tf.reduce_sum(tf.log(tf.clip_by_value(self.next_astar,1e-10,1.0)) *\
            #                                 self.target_astar+tf.log(tf.clip_by_value(1-self.next_astar,1e-10,1.0)) * (1-self.target_astar), axis=1))
            # self.astar_loss = tf.reduce_sum(self.train_astar*tf.contrib.keras.backend.categorical_crossentropy(self.target_astar,self.policy))
            self.loss = 1 * self.value_loss + self.policy_loss - 1 * self.entropy + 1 * self.valid_loss  # + .5*self.collisioncourse_loss +.5*self.astar_loss
            self.imitation_loss = 0.2 * tf.reduce_mean(
                tf.contrib.keras.backend.categorical_crossentropy(
                    self.optimal_actions_onehot, self.policy))

            # Get gradients from local network using local losses and
            # normalize the gradients using clipping
            local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope + '/qvalues')
            self.gradients = tf.gradients(self.loss, local_vars)
            self.var_norms = tf.global_norm(local_vars)
            grads, self.grad_norms = tf.clip_by_global_norm(
                self.gradients, GRAD_CLIP)

            # Apply local gradients to global network
            global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                            GLOBAL_NET_SCOPE + '/qvalues')
            self.apply_grads = trainer.apply_gradients(zip(grads, global_vars))

            # now the gradients for imitation loss
            self.i_gradients = tf.gradients(self.imitation_loss, local_vars)
            self.i_var_norms = tf.global_norm(local_vars)
            i_grads, self.i_grad_norms = tf.clip_by_global_norm(
                self.i_gradients, GRAD_CLIP)

            # Apply local gradients to global network
            self.apply_imitation_grads = trainer.apply_gradients(
                zip(i_grads, global_vars))
        #           self.homogenize_weights = update_target_graph(str(scope)+'/qvaluesB', str(scope)+'/qvalues')

        print("Hello World... From  " + str(scope))  # :)
Esempio n. 43
0
def robust_minimize(
        optimizer,
        loss,
        loss_per_dp,
        global_step,
        batch_size,
        y_,
        clip_method='dp',
        clip_type='global',
        clip_function='soft',
        clip_threshold=0.0,
        clip_percentile=99,
        clip_perclass=True,
        window_size=1000,
        log_dir=None,
        marks=[],
        ):
    """
    This function takes as input a standard tensorflow optimizer and outputs a robust version using gradient clipping.
    It is an implementation of the paper "Stochastic Gradient Descent with Gradient Clipping is Robust to Adversarial Noise" submitted to NIPS 2018.

    Example usage:

        train_op=robust.robust_minimize(
                tf.train.AdamOptimizer(1e-4),
                loss,
                loss_per_dp,
                global_step,
                100,
                y_,
                )
    Args:
        optimizer: the tensorflow optimizer to make robust
        loss: the loss function; this should be equal to tf.reduce_mean(loss_per_dp)
        loss_per_dp: a tensor of shape (?,) that has the loss function for each data point
        global_step: the current step
        y_: the true response variable
        clip_method: may be one of 'dp','dp_naive','batch', or 'batch_naive';  the 'dp' and 'dp_naive' methods implement the minibatch heuristic described in the paper
        clip_type: may be 'none' to disable robustness or 'global' to enable
        clip_function: may be 'soft' or 'hard'
        clip_threshold: if this value is greater than zero, then this is the threshold used in gradient clipping; if this value is less than or equal to zero, then use the heuristic from the paper for dynamically selecting clip values
        clip_percentile: the percentile to clip at when using the dynamic heuristic; recommended to be equal to 1-epsilon
        clip_perclass: if True, when using the dynamic heuristic, maintain separate lists of past gradients for each class
        window_size: the total number of past gradients to store
        log_dir: location to output gradient information from each timestep for debug purposes; setting to None disables output
        marks: tensors to write to the log dir on each iteration for debug purposes

    Returns:
        a training op

    Raises:
        Probably a lot of stuff if there's errors IDK
    """

    import tensorflow as tf
    import numpy as np
    import math

    def update_tensor(tensor,indices,updates):
        newvals=tf.SparseTensor(indices,tf.stack(updates),tensor.get_shape())
        updates2=map(lambda i: tensor.__getitem__(i),indices)
        oldvals=tf.SparseTensor(indices,tf.stack(updates2),tensor.get_shape())
        #return tensor+tf.sparse_tensor_to_dense(newvals-oldvals)
        return tensor+tf.sparse_tensor_to_dense(newvals)-tf.sparse_tensor_to_dense(oldvals)

    if batch_size==1 or clip_type=='none':
        clip_method='batch'

    if clip_method=='batch' or clip_method=='batch_naive':
        clip_perclass=False

    window_size=int(batch_size*math.ceil(window_size/batch_size))

    with tf.name_scope('robust_minimize'):

        # setup clipping
        epsilon=1e-6

        if clip_perclass:
            num_windows=y_.get_shape()[1]
            label_steps=tf.Variable(tf.zeros([num_windows]),trainable=False,name='label_steps')
            label_steps_update=tf.assign(label_steps,label_steps+tf.reduce_sum(y_,axis=0))
            label_steps_int=tf.cast(label_steps,tf.int32)
            y_window_ = tf.argmax(y_,axis=1)
        else:
            num_windows=1
            label_steps=tf.Variable(tf.zeros([num_windows]),trainable=False,name='label_steps')
            label_steps_update=tf.assign(label_steps,label_steps+batch_size)
            label_steps_int=tf.cast(label_steps,tf.int32)
            #label_steps=tf.cast(tf.reshape(global_step,[1]),tf.float32)
            #label_steps_update=tf.group()
            #label_steps_int=label_steps
            y_window_ = tf.zeros([batch_size])

        ms = tf.Variable(tf.zeros([num_windows,window_size]),trainable=False,name='ms')
        trim_factor=tf.minimum(1.0,label_steps/window_size)

        def get_percentile(dist,p):
            xs=map(lambda i: tf.contrib.distributions.percentile(dist[i],p[i])+epsilon,range(0,dist.get_shape()[0]))
            return tf.stack(xs)
        m=get_percentile(ms,50*trim_factor)

        if clip_threshold<=0.0:
            clip=get_percentile(ms,clip_percentile*trim_factor)
            #clip=tf.contrib.distributions.percentile(ms,clip_percentile)+epsilon
            #clip=tf.contrib.distributions.percentile(ms_trimmed,clip_percentile_modified)
        else:
            clip=clip_threshold*tf.ones([num_windows])

        def clip_gradients(gradients,norm,clip_mod):
            clip_mod=tf.reshape(clip_mod,())

            if clip_type=='none':
                gradients2=gradients

            elif clip_type=='global':
                #if opts['verbose']:
                    #clip = tf.cond(
                            #clip>=global_norm,
                            #lambda:clip,
                            #lambda:tf.Print(clip,[global_norm,clip],'clipped'),
                            #)
                if clip_function=='soft':
                    tf.Print(clip_mod,[clip_mod])
                    gradients2, _ = tf.clip_by_global_norm(gradients, clip_mod, use_norm=norm)
                elif clip_function=='hard':
                    gradients2=[]
                    for grad in gradients:
                        if grad==None:
                            grad2=None
                        else:
                            #print('norm=',norm)
                            #print('clip_mod=',clip_mod)
                            #print('grad=',grad)
                            grad2=tf.cond(
                                    norm>clip_mod,
                                    lambda:tf.zeros(grad.get_shape()),
                                    lambda:grad
                                    )
                        gradients2.append(grad2)

            return gradients2

        # calculate gradients

        if clip_method=='dp':
            # FIXME: this method makes no effort to place the variables on appropriate devices
            # when multiple devices are available
            variables = (
                    tf.trainable_variables() +
                    tf.get_collection(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES) +
                    tf.get_collection(tf.GraphKeys._STREAMING_MODEL_PORTS)
                    )

            loop_vars = [
                    tf.constant(0,tf.int32),
                    tf.TensorArray(tf.float32,size=batch_size,clear_after_read=False),
                    map(lambda _: tf.TensorArray(tf.float32,size=batch_size,clear_after_read=False),variables)
                    ]

            def go(i,arr_norm,arr_vars):
                grad=tf.gradients(loss_per_dp[i],variables)
                norm=tf.global_norm(grad)
                clip_local=clip_gradients(grad,norm,tf.reduce_sum(clip*y_[i]))
                return [
                        i+1,
                        arr_norm.write(i,norm),
                        map(lambda (arr,g): arr.write(i,g),zip(arr_vars,clip_local))
                        ]

            _,norms,clips=tf.while_loop(
                    lambda i,arr_norm,arr_vars: i<batch_size,
                    go,
                    loop_vars
                    )

            gradients2 = [ tf.reduce_mean(g.stack(),axis=0) for g in clips ]
            all_norms=norms.stack()
            global_norm= tf.reduce_mean(norms.stack())

            i1,i2,ms_new = tf.while_loop(
                lambda batch_index,window_index,ms_new: batch_index<batch_size,
                lambda batch_index,window_index,ms_new:
                    (batch_index+1
                    ,window_index+y_[batch_index]
                    ,update_tensor(
                        ms_new,
                        [(y_window_[batch_index],tf.mod(tf.cast(window_index[y_window_[batch_index]],tf.int64),window_size))],
                        [all_norms[batch_index]]
                        )
                    ),
                [0,tf.mod(label_steps,window_size),ms]
                )
            ms_update=tf.assign(ms,ms_new)

        elif clip_method=='dp_naive':
            all_gradients = []
            all_norms = []
            for i in range(0,batch_size):
                grads_and_vars=optimizer.compute_gradients(loss_per_dp[i,...])
                dp_gradients,variables = zip(*grads_and_vars)
                dp_norm = tf.global_norm(dp_gradients)
                dp_gradients2 = clip_gradients(dp_gradients,dp_norm,tf.reduce_sum(clip*y_[i]))
                all_gradients.append(dp_gradients2)
                all_norms.append(dp_norm)
            gradients2 = [ sum(i)/batch_size for i in zip(*all_gradients) ]
            global_norm= sum(all_norms)/batch_size

            #index_start=tf.mod( global_step   *batch_size,window_size)
            #index_stop =tf.mod((global_step+1)*batch_size,window_size)
            #all_norms=tf.stack(all_norms)
            #ms_update = tf.assign(ms[index_start:index_stop],all_norms)

            all_norms=tf.stack(all_norms)
            i1,i2,ms_new = tf.while_loop(
                lambda batch_index,window_index,ms_new: batch_index<batch_size,
                lambda batch_index,window_index,ms_new:
                    (batch_index+1
                    ,window_index+y_[batch_index]
                    ,update_tensor(
                        ms_new,
                        [(y_window_[batch_index],tf.mod(tf.cast(window_index[y_window_[batch_index]],tf.int64),window_size))],
                        [all_norms[batch_index]]
                        )
                    ),
                [0,tf.mod(label_steps,window_size),ms]
                )
            ms_update=tf.assign(ms,ms_new)

        elif clip_method=='batch_naive':
            all_gradients = []
            for i in range(0,batch_size):
                grads_and_vars=optimizer.compute_gradients(loss_per_dp[i,...])
                dp_gradients,variables = zip(*grads_and_vars)
                all_gradients.append(dp_gradients)
            gradients = [ sum(i)/batch_size for i in zip(*all_gradients) ]
            global_norm = tf.global_norm(gradients)
            ms_update = tf.assign(ms[0,tf.mod(global_step,window_size)],global_norm)
            gradients2 = clip_gradients(gradients,global_norm,clip)
            all_norms=tf.tile(tf.reshape(global_norm,shape=[1]),[batch_size])

        elif clip_method=='batch':
            grads_and_vars=optimizer.compute_gradients(loss)
            gradients, variables = zip(*grads_and_vars)
            global_norm = tf.global_norm(gradients)
            gradients2 = clip_gradients(gradients,global_norm,clip)
            ms_update = tf.assign(ms[0,tf.mod(global_step,window_size)],global_norm)
            all_norms=tf.tile(tf.reshape(global_norm,shape=[1]),[batch_size])

    # setup logging

    if log_dir is not None:
        log_file=log_dir+'/robust.log'
        import os
        print('    robust log file = ',os.path.abspath(log_file))
        log=open(log_file,'a',1)

        def update_log(global_step,clip,m,norms,*marks):
            for i in range(0,norms.shape[0]):
                log.write(str(global_step)+' ')
                log.write(str(clip)+' ')
                log.write(str(m)+' ')
                log.write(str(norms[i])+' ')
                #log.write(str(id_[i])+' ')
                for mark in marks:
                    log.write(str(mark[i])+' ')
                log.write('\n')
            return []

        log_update=tf.py_func(update_log,[global_step,clip,m,all_norms]+marks,[])

    else:
        log_update=tf.group()

    # apply gradients

    grads_and_vars2=zip(gradients2,variables)
    grad_updates=optimizer.apply_gradients(
            grads_and_vars2,
            global_step=global_step)
    train_op = tf.group(grad_updates,log_update,label_steps_update,ms_update)

    return train_op
    def __init__(self,
                 add_summaries=False,
                 trainable=True,
                 use_naive_policy=True):

        self.trainable = trainable

        self.avg_net = getattr(AcerEstimator, "average_net", self)

        scope_name = tf.get_variable_scope().name + '/'

        with tf.name_scope("inputs"):
            # TODO When seq_length is None, use seq_length + 1 is somewhat counter-intuitive.
            # Come up a solution to pass seq_length+1 and seq_length at the same time.
            # maybe a assertion ? But that could be hard to understand
            self.seq_length = tf.placeholder(tf.int32, [], "seq_length")
            self.state = get_state_placeholder()
            self.a = tf.placeholder(
                FLAGS.dtype, [seq_length, batch_size, FLAGS.num_actions],
                "actions")
            self.r = tf.placeholder(FLAGS.dtype, [seq_length, batch_size, 1],
                                    "rewards")
            self.done = tf.placeholder(tf.bool, [batch_size, 1], "done")

        with tf.variable_scope("shared"):
            shared, self.lstm = build_network(self.state, scope_name,
                                              add_summaries)

        # For k-step rollout s_i, i = 0, 1, ..., k-1, we need one additional
        # state s_k s.t. we can bootstrap value from it, i.e. we need V(s_k)
        with tf.variable_scope("V"):
            self.value_all = value = state_value_network(shared)
            value *= tf.Variable(1,
                                 dtype=FLAGS.dtype,
                                 name="value_scale",
                                 trainable=FLAGS.train_value_scale)
            self.value_last = value[-1:, ...] * tf.cast(
                ~self.done, FLAGS.dtype)[None, ...]
            self.value = value[:self.seq_length, ...]

        with tf.variable_scope("shared-policy"):
            if not FLAGS.share_network:
                # FIXME right now this only works for non-lstm version
                shared, lstm2 = build_network(self.state, scope_name,
                                              add_summaries)
                self.lstm.inputs.update(lstm2.inputs)
                self.lstm.outputs.update(lstm2.outputs)

            shared = shared[:self.seq_length, ...]

        self.state.update(self.lstm.inputs)

        with tf.variable_scope("policy"):
            self.pi, self.pi_behavior = build_policy(shared, FLAGS.policy_dist)

        with tf.name_scope("output"):
            self.a_prime = tf.squeeze(self.pi.sample_n(1), 0)
            self.action_and_stats = [self.a_prime, self.pi.stats]

        with tf.variable_scope("A"):
            # adv = self.advantage_network(tf.stop_gradient(shared))
            adv = self.advantage_network(shared)
            Q_tilt = self.SDN_network(adv, self.value, self.pi)

        with tf.variable_scope("Q"):
            self.Q_tilt_a = Q_tilt(self.a, name="Q_tilt_a")
            self.Q_tilt_a_prime = Q_tilt(self.a_prime, name="Q_tilt_a_prime")

            # Compute the importance sampling weight \rho and \rho^{'}
            with tf.name_scope("rho"):
                self.rho, self.rho_prime = self.compute_rho(
                    self.a, self.a_prime, self.pi, self.pi_behavior)

            with tf.name_scope("c_i"):
                self.c = tf.minimum(tf_const(1.),
                                    self.rho**(1. / FLAGS.num_actions), "c_i")
                tf.logging.info("c.shape = {}".format(tf_shape(self.c)))

            with tf.name_scope("Q_Retrace"):
                self.Q_ret, self.Q_opc = self.compute_Q_ret_Q_opc_recursively(
                    self.value, self.value_last, self.c, self.r, self.Q_tilt_a)

        with tf.name_scope("losses"):
            self.pi_loss, self.pi_loss_sur = self.get_policy_loss(
                self.rho, self.pi, self.a, self.Q_opc, self.value,
                self.rho_prime, self.Q_tilt_a_prime, self.a_prime)

            self.vf_loss, self.vf_loss_sur = self.get_value_loss(
                self.Q_ret, self.Q_tilt_a, self.rho, self.value)

            # Surrogate loss is the loss tensor we passed to optimizer for
            # automatic gradient computation, it uses lots of stop_gradient.
            # Therefore it's different from the true loss (self.loss)
            self.entropy = tf.reduce_sum(tf.reduce_mean(self.pi.entropy(),
                                                        axis=1),
                                         axis=0)
            self.entropy_loss = -self.entropy * FLAGS.entropy_cost_mult

            for loss in [
                    self.pi_loss_sur, self.vf_loss_sur, self.entropy_loss
            ]:
                assert len(loss.get_shape()) == 0

            self.loss_sur = (self.pi_loss_sur +
                             self.vf_loss_sur * FLAGS.lr_vp_ratio +
                             self.entropy_loss)

            self.loss = self.pi_loss + self.vf_loss + self.entropy_loss

        with tf.name_scope("grads_and_optimizer"):

            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):

                global_step = FLAGS.global_step

                self.lr = tf.train.exponential_decay(tf_const(
                    FLAGS.learning_rate),
                                                     FLAGS.global_timestep,
                                                     FLAGS.decay_steps,
                                                     FLAGS.decay_rate,
                                                     staircase=FLAGS.staircase)

                self.optimizer = tf.train.AdamOptimizer(self.lr)
                # self.optimizer = tf.train.RMSPropOptimizer(self.lr)
                # self.optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)

                tf.logging.info("Computing gradients ...")
                grads_and_vars = self.optimizer.compute_gradients(
                    self.loss_sur)

                check_none_grads(grads_and_vars)

                self.grad_norms = {
                    str(v.name): tf.sqrt(tf.reduce_sum(g**2))
                    for g, v in grads_and_vars if g is not None
                }
                self.global_norm = tf.global_norm(
                    [g for g, v in grads_and_vars if g is not None])

                self.grads_and_vars = [(tf.check_numerics(g,
                                                          message=str(v.name)),
                                        v) for g, v in grads_and_vars
                                       if g is not None]

            # Collect all trainable variables initialized here
            self.var_list = [v for g, v in self.grads_and_vars]

        self.lock = None

        self.summaries = self.summarize(add_summaries)
Esempio n. 45
0
    def __init__(self,
                 mode,
                 iterator,
                 params,
                 rev_vocab_table=None,
                 scope=None,
                 log_trainables=True):

        print_out("# creating %s graph ..." % mode)
        self.dtype = tf.float32

        self.mode = mode
        self.embedding_size = params.embedding_size
        self.num_layers = params.num_layers
        self.iterator = iterator

        # self.scheduled_sampling_prob = scheduled_sampling_prob
        # self.num_samples_for_loss = num_samples_for_loss

        self.device_manager = DeviceManager()
        self.round_robin = RoundRobin(self.device_manager)
        self.num_gpus = self.device_manager.num_available_gpus()
        print_out("# number of gpus %d" % self.num_gpus)

        with tf.variable_scope(scope or 'ta_seq2seq_graph', dtype=self.dtype):
            self.init_embeddings(params.vocab_file,
                                 params.embedding_type,
                                 self.embedding_size,
                                 scope=scope)

            with tf.variable_scope(scope or "build_network"):
                with tf.variable_scope("output_projection") as output_scope:
                    if params.boost_topic_gen_prob:
                        self.output_layer = taware_layer.JointDenseLayer(
                            params.vocab_size,
                            params.topic_vocab_size,
                            scope=output_scope,
                            name="output_projection")
                    else:
                        self.output_layer = layers_core.Dense(
                            params.vocab_size,
                            # activation=tf.nn.tanh,
                            use_bias=False,
                            name="output_projection")

            encoder_keep_prob, decoder_keep_prob = self.get_keep_probs(
                mode, params)
            self.batch_size = tf.size(self.iterator.source_sequence_lengths)

            encoder_outputs, encoder_state = self.__build_encoder(
                params, encoder_keep_prob)

            logits, sample_id, final_decoder_state = self.__build_decoder(
                params, encoder_outputs, encoder_state, decoder_keep_prob)

            if mode != tf.contrib.learn.ModeKeys.INFER:
                with tf.device(self.device_manager.tail_gpu()):
                    loss = self.__compute_loss(logits)
            else:
                loss = None

            if mode == tf.contrib.learn.ModeKeys.TRAIN:
                self.train_loss = loss
                self.word_count = tf.reduce_sum(
                    self.iterator.source_sequence_lengths) + tf.reduce_sum(
                        self.iterator.target_sequence_length)
            elif mode == tf.contrib.learn.ModeKeys.EVAL:
                self.eval_loss = loss
            elif mode == tf.contrib.learn.ModeKeys.INFER:
                self.sample_words = rev_vocab_table.lookup(
                    tf.to_int64(sample_id))

            if mode != tf.contrib.learn.ModeKeys.INFER:
                ## Count the number of predicted words for compute ppl.
                self.predict_count = tf.reduce_sum(
                    self.iterator.target_sequence_length)

            self.global_step = tf.Variable(0, trainable=False)
            trainables = tf.trainable_variables()

            # Gradients and SGD update operation for training the model.
            # Arrage for the embedding vars to appear at the beginning.
            if mode == tf.contrib.learn.ModeKeys.TRAIN:
                self.learning_rate = tf.constant(params.learning_rate)
                # decay
                self.learning_rate = self._get_learning_rate_decay(
                    params, self.global_step, self.learning_rate)

                # Optimizer
                if params.optimizer.lower() == "sgd":
                    opt = tf.train.GradientDescentOptimizer(self.learning_rate)
                    tf.summary.scalar("lr", self.learning_rate)
                elif params.optimizer.lower() == "adam":
                    opt = tf.train.AdamOptimizer(self.learning_rate)
                    tf.summary.scalar("lr", self.learning_rate)
                else:
                    raise ValueError('Unknown optimizer: ' + params.optimizer)

                # Gradients
                gradients = tf.gradients(self.train_loss,
                                         trainables,
                                         colocate_gradients_with_ops=True)

                clipped_grads, grad_norm = tf.clip_by_global_norm(
                    gradients, params.max_gradient_norm)
                grad_norm_summary = [tf.summary.scalar("grad_norm", grad_norm)]
                grad_norm_summary.append(
                    tf.summary.scalar("clipped_gradient",
                                      tf.global_norm(clipped_grads)))

                self.grad_norm = grad_norm

                self.update = opt.apply_gradients(zip(clipped_grads,
                                                      trainables),
                                                  global_step=self.global_step)

                # Summary
                self.train_summary = tf.summary.merge([
                    tf.summary.scalar("lr", self.learning_rate),
                    tf.summary.scalar("train_loss", self.train_loss),
                ] + grad_norm_summary)

            if mode == tf.contrib.learn.ModeKeys.INFER:
                self.infer_logits, self.sample_id = logits, sample_id
                self.infer_summary = tf.no_op()

            # Saver
            self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=3)

            # Print trainable variables
            if log_trainables:
                print_out("# Trainable variables")
                for trainable in trainables:
                    print_out("  %s, %s, %s" %
                              (trainable.name, str(
                                  trainable.get_shape()), trainable.op.device))
Esempio n. 46
0
    def __init__(self, env, task, visualise):
        """
An implementation of the A3C algorithm that is reasonably well-tuned for the VNC environments.
Below, we will have a modest amount of complexity due to the way TensorFlow handles data parallelism.
But overall, we'll define the model, specify its inputs, and describe how the policy gradients step
should be computed.
"""

        self.env = env
        self.task = task
        ob_space = self.env.observation_space
        ac_space = self.env.action_space
        worker_device = "/job:worker/task:{}/cpu:0".format(task)
        with tf.device(
                tf.train.replica_device_setter(1,
                                               worker_device=worker_device)):
            with tf.variable_scope("global"):
                self.network = CnnPolicy(ob_space, ac_space, 1, 1, reuse=False)
                self.global_step = tf.get_variable(
                    "global_step", [],
                    tf.int32,
                    initializer=tf.constant_initializer(0, dtype=tf.int32),
                    trainable=False)

        with tf.device(worker_device):
            with tf.variable_scope("local"):
                self.local_network = pi = CnnPolicy(ob_space,
                                                    ac_space,
                                                    1,
                                                    1,
                                                    reuse=False)
                pi.global_step = self.global_step

            self.ac = tf.placeholder(tf.float32, [None, env.action_space.n],
                                     name="ac")
            self.adv = tf.placeholder(tf.float32, [None], name="adv")
            self.r = tf.placeholder(tf.float32, [None], name="r")

            log_prob_tf = tf.nn.log_softmax(pi.logits)
            prob_tf = tf.nn.softmax(pi.logits)

            # the "policy gradients" loss:  its derivative is precisely the policy gradient
            # notice that self.ac is a placeholder that is provided externally.
            # adv will contain the advantages, as calculated in process_rollout
            pi_loss = -tf.reduce_mean(
                tf.reduce_sum(log_prob_tf * self.ac, [1]) * self.adv)

            # loss of value function
            vf_loss = 0.5 * tf.reduce_mean(tf.square(pi.vf - self.r))
            print(vf_loss.get_shape(), pi.vf.get_shape())
            entropy = -tf.reduce_sum(prob_tf * log_prob_tf)

            bs = tf.to_float(tf.shape(pi.x)[0])
            self.loss = pi_loss + 0.5 * vf_loss - entropy * 0.01

            # 20 represents the number of "local steps":  the number of timesteps
            # we run the policy before we update the parameters.
            # The larger local steps is, the lower is the variance in our policy gradients estimate
            # on the one hand;  but on the other hand, we get less frequent parameter updates, which
            # slows down learning.  In this code, we found that making local steps be much
            # smaller than 20 makes the algorithm more difficult to tune and to get to work.
            self.runner = RunnerThread(env, pi, 20, visualise)

            grads = tf.gradients(self.loss, pi.var_list)

            # summ = tf.Summary()
            # summ.value.add(tag="model/policy_loss", simple_value=pi_loss / bs)
            # summ.value.add(tag="model/value_loss", simple_value=vf_loss / bs)
            # summ.value.add(tag="model/entropy", simple_value=entropy / bs)
            # summ.value.add(tag="model/state", simple_value=pi.x)
            # summ.value.add(tag="model/grad_global_norm", simple_value=tf.global_norm(grads))
            # summ.value.add(tag="model/var_global_norm", simple_value=tf.global_norm(pi.var_list))
            # if use_tf12_api:
            tf.summary.scalar("model/policy_loss", pi_loss / bs)
            tf.summary.scalar("model/value_loss", vf_loss / bs)
            tf.summary.scalar("model/entropy", entropy / bs)
            tf.summary.image("model/state", pi.x)
            tf.summary.scalar("model/grad_global_norm", tf.global_norm(grads))
            tf.summary.scalar("model/var_global_norm",
                              tf.global_norm(pi.var_list))
            self.summary_op = tf.summary.merge_all()

            # else:
            #     tf.scalar_summary("model/policy_loss", pi_loss / bs)
            #     tf.scalar_summary("model/value_loss", vf_loss / bs)
            #     tf.scalar_summary("model/entropy", entropy / bs)
            #     tf.image_summary("model/state", pi.x)
            #     tf.scalar_summary("model/grad_global_norm", tf.global_norm(grads))
            #     tf.scalar_summary("model/var_global_norm", tf.global_norm(pi.var_list))
            # self.summary_op = tf.merge_all()

            grads, _ = tf.clip_by_global_norm(grads, 0.5)

            # copy weights from the parameter server to the local model
            self.sync = tf.group(*[
                v1.assign(v2)
                for v1, v2 in zip(pi.var_list, self.network.var_list)
            ])

            grads_and_vars = list(zip(grads, self.network.var_list))
            inc_step = self.global_step.assign_add(tf.shape(pi.x)[0])

            # each worker has a different set of adam optimizer parameters
            opt = tf.train.AdamOptimizer(7e-4)
            self.train_op = tf.group(opt.apply_gradients(grads_and_vars),
                                     inc_step)
            self.summary_writer = None
            self.local_steps = 0
Esempio n. 47
0
def train(
    model_dir,
    hp=None,
    max_steps=1e7,
    display_step=500,
    ruleset='mante',
    rule_trains=None,
    rule_prob_map=None,
    seed=0,
    rich_output=True,
    load_dir=None,
    trainables=None,
    fixReadoutandBias=False,
    fixBias=False,
):
    """Train the network.

    Args:
        model_dir: str, training directory
        hp: dictionary of hyperparameters
        max_steps: int, maximum number of training steps
        display_step: int, display steps
        ruleset: the set of rules to train
        rule_trains: list of rules to train, if None then all rules possible
        rule_prob_map: None or dictionary of relative rule probability
        seed: int, random seed to be used

    Returns:
        model is stored at model_dir/model.ckpt
        training configuration is stored at model_dir/hp.json
    """

    tools.mkdir_p(model_dir)

    # Network parameters
    default_hp = get_default_hp(ruleset)
    if hp is not None:
        default_hp.update(hp)
    hp = default_hp
    hp['seed'] = seed
    hp['rng'] = np.random.RandomState(seed)

    # Rules to train and test. Rules in a set are trained together
    if rule_trains is None:
        # By default, training all rules available to this ruleset
        hp['rule_trains'] = task.rules_dict[ruleset]
    else:
        hp['rule_trains'] = rule_trains
    hp['rules'] = hp['rule_trains']

    # Assign probabilities for rule_trains.
    if rule_prob_map is None:
        rule_prob_map = dict()

    # Turn into rule_trains format
    hp['rule_probs'] = None
    if hasattr(hp['rule_trains'], '__iter__'):
        # Set default as 1.

        rule_prob = np.array(
            [rule_prob_map.get(r, 1.) for r in hp['rule_trains']])
        hp['rule_probs'] = list(rule_prob / np.sum(rule_prob))

    tools.save_hp(hp, model_dir)

    # Build the model
    with tf.device('gpu:0'):
        model = Model(model_dir, hp=hp)

        # Display hp
        for key, val in hp.items():
            print('{:20s} = '.format(key) + str(val))

        if fixReadoutandBias is True:
            my_var_list = [
                var for var in model.var_list
                if 'rnn/leaky_rnn_cell/kernel:0' in var.name
            ]
            print(my_var_list)
        elif fixBias is True:
            my_var_list = [
                var for var in model.var_list
                if 'rnn/leaky_rnn_cell/kernel:0' in var.name
                or 'output/weights:0' in var.name
            ]
        else:
            my_var_list = model.var_list

        model.set_optimizer(var_list=my_var_list)

        # Store results
        log = defaultdict(list)
        log['model_dir'] = model_dir

        # Record time
        t_start = time.time()

        # Use customized session that launches the graph as well
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            # penalty on deviation from initial weight
            if hp['l2_weight_init'] > 0:
                anchor_ws = sess.run(model.weight_list)
                for w, w_val in zip(model.weight_list, anchor_ws):
                    model.cost_reg += (hp['l2_weight_init'] *
                                       tf.nn.l2_loss(w - w_val))

                model.set_optimizer(var_list=my_var_list)

            # partial weight training
            if ('p_weight_train' in hp and (hp['p_weight_train'] is not None)
                    and hp['p_weight_train'] < 1.0):
                for w in model.weight_list:
                    w_val = sess.run(w)
                    w_size = sess.run(tf.size(w))
                    w_mask_tmp = np.linspace(0, 1, w_size)
                    hp['rng'].shuffle(w_mask_tmp)
                    ind_fix = w_mask_tmp > hp['p_weight_train']
                    w_mask = np.zeros(w_size, dtype=np.float32)
                    w_mask[ind_fix] = 1e-1  # will be squared in l2_loss
                    w_mask = tf.constant(w_mask)
                    w_mask = tf.reshape(w_mask, w.shape)
                    model.cost_reg += tf.nn.l2_loss((w - w_val) * w_mask)
                model.set_optimizer(var_list=my_var_list)

            step = 0
            run_ave_time = []
            while step * hp['batch_size_train'] <= max_steps:
                try:
                    # Validation
                    if step % display_step == 0:
                        grad_norm = tf.global_norm(model.clipped_gs)
                        grad_norm_np = sess.run(grad_norm)
                        # import pdb
                        # pdb.set_trace()
                        log['grad_norm'].append(grad_norm_np.item())
                        log['trials'].append(step * hp['batch_size_train'])
                        log['times'].append(time.time() - t_start)
                        log = do_eval(sess, model, log, hp['rule_trains'])
                        # if log['perf_avg'][-1] > model.hp['target_perf']:
                        # check if minimum performance is above target
                        if log['perf_min'][-1] > model.hp['target_perf']:
                            print('Perf reached the target: {:0.2f}'.format(
                                hp['target_perf']))
                            break

                        if rich_output:
                            display_rich_output(model, sess, step, log,
                                                model_dir)

                    # Training

                    dtStart = datetime.now()
                    sess.run(model.train_step)
                    dtEnd = datetime.now()

                    if len(run_ave_time) is 0:
                        run_ave_time = np.expand_dims(
                            (dtEnd - dtStart).total_seconds(), axis=0)
                    else:
                        run_ave_time = np.concatenate(
                            (run_ave_time,
                             np.expand_dims((dtEnd - dtStart).total_seconds(),
                                            axis=0)))

                    # print(np.mean(run_ave_time))
                    # print((dtEnd-dtStart).total_seconds())

                    step += 1

                    if step < 10:
                        model.save_ckpt(step)

                    if step < 1000:
                        if step % display_step / 10 == 0:
                            model.save_ckpt(step)

                    if step % display_step == 0:
                        model.save_ckpt(step)

                except KeyboardInterrupt:
                    print("Optimization interrupted by user")
                    break

            print("Optimization finished!")
Esempio n. 48
0
    def __init__(self, observation_space, action_space, config):
        config = dict(ray.rllib.agents.a3c.a3c.DEFAULT_CONFIG, **config)
        self.config = config
        self.sess = tf.get_default_session()

        # Setup the policy
        self.observations = tf.placeholder(
            tf.float32, [None] + list(observation_space.shape))
        dist_class, logit_dim = ModelCatalog.get_action_dist(
            action_space, self.config["model"])
        prev_actions = ModelCatalog.get_action_placeholder(action_space)
        prev_rewards = tf.placeholder(tf.float32, [None], name="prev_reward")
        self.model = ModelCatalog.get_model({
            "obs": self.observations,
            "prev_actions": prev_actions,
            "prev_rewards": prev_rewards
        }, observation_space, logit_dim, self.config["model"])
        action_dist = dist_class(self.model.outputs)
        self.vf = tf.reshape(
            linear(self.model.last_layer, 1, "value", normc_initializer(1.0)),
            [-1])
        self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          tf.get_variable_scope().name)

        # Setup the policy loss
        if isinstance(action_space, gym.spaces.Box):
            ac_size = action_space.shape[0]
            actions = tf.placeholder(tf.float32, [None, ac_size], name="ac")
        elif isinstance(action_space, gym.spaces.Discrete):
            actions = tf.placeholder(tf.int64, [None], name="ac")
        else:
            raise UnsupportedSpaceException(
                "Action space {} is not supported for A3C.".format(
                    action_space))
        advantages = tf.placeholder(tf.float32, [None], name="advantages")
        self.v_target = tf.placeholder(tf.float32, [None], name="v_target")
        self.loss = A3CLoss(action_dist, actions, advantages, self.v_target,
                            self.vf, self.config["vf_loss_coeff"],
                            self.config["entropy_coeff"])

        # Initialize TFPolicyGraph
        loss_in = [
            ("obs", self.observations),
            ("actions", actions),
            ("prev_actions", prev_actions),
            ("prev_rewards", prev_rewards),
            ("advantages", advantages),
            ("value_targets", self.v_target),
        ]
        LearningRateSchedule.__init__(self, self.config["lr"],
                                      self.config["lr_schedule"])
        TFPolicyGraph.__init__(
            self,
            observation_space,
            action_space,
            self.sess,
            obs_input=self.observations,
            action_sampler=action_dist.sample(),
            loss=self.loss.total_loss,
            loss_inputs=loss_in,
            state_inputs=self.model.state_in,
            state_outputs=self.model.state_out,
            prev_action_input=prev_actions,
            prev_reward_input=prev_rewards,
            seq_lens=self.model.seq_lens,
            max_seq_len=self.config["model"]["max_seq_len"])

        self.stats_fetches = {
            "stats": {
                "cur_lr": tf.cast(self.cur_lr, tf.float64),
                "policy_loss": self.loss.pi_loss,
                "policy_entropy": self.loss.entropy,
                "grad_gnorm": tf.global_norm(self._grads),
                "var_gnorm": tf.global_norm(self.var_list),
                "vf_loss": self.loss.vf_loss,
                "vf_explained_var": explained_variance(self.v_target, self.vf),
            },
        }

        self.sess.run(tf.global_variables_initializer())
Esempio n. 49
0
    def add_optimizer_op(self, scope):
        """
        Set self.train_op and self.grad_norm
        """

        ##############################################################
        """
        TODO: 1. get Adam Optimizer (remember that we defined self.lr in the placeholders
                section)
              2. compute grads wrt to variables in scope for self.loss
              3. clip the grads by norm with self.config.clip_val if self.config.grad_clip
                is True
              4. apply the gradients and store the train op in self.train_op
               (sess.run(train_op) must update the variables)
              5. compute the global norm of the gradients and store this scalar
                in self.grad_norm

        HINT: you may find the following functinos useful
            - tf.get_collection
            - optimizer.compute_gradients
            - tf.clip_by_norm
            - optimizer.apply_gradients
            - tf.global_norm
             
             you can access config variable by writing self.config.variable_name

        (be sure that you set self.train_op and self.grad_norm)
        """
        ##############################################################
        #################### YOUR CODE HERE - 8-12 lines #############
        var_lst = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                    scope=scope)

        # print('-' * 20)
        # for var in var_lst:
        # print(' -- ' + var.name)
        # print('-' * 20)

        optimizer = tf.train.AdamOptimizer(self.lr)
        # self.train_op = optimizer.minimize(self.loss, var_list=var_lst)
        grads_and_vars_lst = optimizer.compute_gradients(self.loss,
                                                         var_list=var_lst)
        if self.config.grad_clip:
            grads_clipped_and_vars_lst = []
            for grad_and_var in grads_and_vars_lst:
                grad, var = grad_and_var
                grad_clipped = tf.clip_by_norm(grad, self.config.clip_val)
                grads_clipped_and_vars_lst.append((grad_clipped, var))
            # self.train_op = optimizer.apply_gradients(
            #     grads_clipped_and_vars_lst)
            train_op = optimizer.apply_gradients(grads_clipped_and_vars_lst)
            grads_lst = [x[0] for x in grads_clipped_and_vars_lst]
        else:
            train_op = optimizer.apply_gradients(grads_and_vars_lst)
            # self.train_op = optimizer.apply_gradients(grads_and_vars_lst)
            grads_lst = [x[0] for x in grads_and_vars_lst]

        # global norm is just a norm of stacked vectors
        # self.grad_norm = tf.global_norm(grads_lst)
        grad_norm = tf.global_norm(grads_lst)
        # var_lst = tf.get_collection(
        #     tf.GraphKeys.TRAINABLE_VARIABLES, scope='target_q')
        # debugging: compare norms of target_q and q as an indeirect
        # check of the weight update
        # self.target_q_norm = tf.global_norm(var_lst)
        # var_lst = tf.get_collection(
        # tf.GraphKeys.TRAINABLE_VARIABLES, scope='q')
        # self.q_norm = tf.global_norm(var_lst)
        return train_op, grad_norm
Esempio n. 50
0
    def __init__(self, s_size, a_size, scope, trainer):
        with tf.variable_scope(scope):
            #Input and visual encoding layers
            self.inputs = tf.placeholder(shape=[None, s_size],
                                         dtype=tf.float32)
            self.imageIn = tf.reshape(self.inputs, shape=[-1, 84, 84, 1])
            self.conv1 = slim.conv2d(activation_fn=tf.nn.elu,
                                     inputs=self.imageIn,
                                     num_outputs=16,
                                     kernel_size=[8, 8],
                                     stride=[4, 4],
                                     padding='VALID')
            self.conv2 = slim.conv2d(activation_fn=tf.nn.elu,
                                     inputs=self.conv1,
                                     num_outputs=32,
                                     kernel_size=[4, 4],
                                     stride=[2, 2],
                                     padding='VALID')
            hidden = slim.fully_connected(slim.flatten(self.conv2),
                                          256,
                                          activation_fn=tf.nn.elu)

            #Recurrent network for temporal dependencies
            lstm_cell = tf.contrib.rnn.BasicLSTMCell(256, state_is_tuple=True)
            c_init = np.zeros((1, lstm_cell.state_size.c), np.float32)
            h_init = np.zeros((1, lstm_cell.state_size.h), np.float32)
            self.state_init = [c_init, h_init]
            c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c])
            h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h])
            self.state_in = (c_in, h_in)
            rnn_in = tf.expand_dims(hidden, [0])
            step_size = tf.shape(self.imageIn)[:1]
            state_in = tf.contrib.rnn.LSTMStateTuple(c_in, h_in)
            lstm_outputs, lstm_state = tf.nn.dynamic_rnn(
                lstm_cell,
                rnn_in,
                initial_state=state_in,
                sequence_length=step_size,
                time_major=False)
            lstm_c, lstm_h = lstm_state
            self.state_out = (lstm_c[:1, :], lstm_h[:1, :])
            rnn_out = tf.reshape(lstm_outputs, [-1, 256])

            #Output layers for policy and value estimations
            self.policy = slim.fully_connected(
                rnn_out,
                a_size,
                activation_fn=tf.nn.softmax,
                weights_initializer=normalized_columns_initializer(0.01),
                biases_initializer=None)
            self.value = slim.fully_connected(
                rnn_out,
                1,
                activation_fn=None,
                weights_initializer=normalized_columns_initializer(1.0),
                biases_initializer=None)

            #Only the worker network need ops for loss functions and gradient updating.
            if scope != 'global':
                self.actions = tf.placeholder(shape=[None], dtype=tf.int32)
                self.actions_onehot = tf.one_hot(self.actions,
                                                 a_size,
                                                 dtype=tf.float32)
                self.target_v = tf.placeholder(shape=[None], dtype=tf.float32)
                self.advantages = tf.placeholder(shape=[None],
                                                 dtype=tf.float32)

                self.responsible_outputs = tf.reduce_sum(
                    self.policy * self.actions_onehot, [1])

                #Loss functions
                self.value_loss = 0.5 * tf.reduce_sum(
                    tf.square(self.target_v - tf.reshape(self.value, [-1])))
                self.entropy = -tf.reduce_sum(
                    self.policy * tf.log(self.policy))
                self.policy_loss = -tf.reduce_sum(
                    tf.log(self.responsible_outputs) * self.advantages)
                self.loss = 0.5 * self.value_loss + self.policy_loss - self.entropy * 0.01

                #Get gradients from local network using local losses
                local_vars = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope)
                self.gradients = tf.gradients(self.loss, local_vars)
                self.var_norms = tf.global_norm(local_vars)
                grads, self.grad_norms = tf.clip_by_global_norm(
                    self.gradients, 40.0)

                #Apply local gradients to global network
                global_vars = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, 'global')
                self.apply_grads = trainer.apply_gradients(
                    zip(grads, global_vars))
Esempio n. 51
0
def train_loop(
    session,
    inputs,
    cost,
    train_data,
    stop_after,
    prints=[],
    test_data=None,
    test_every=None,
    callback=None,
    callback_every=None,
    inject_iteration=False,
    optimizer=tf.train.AdamOptimizer(),
    save_every=1000,
    save_output=False
    ):

    prints = [('cost', cost)] + prints

    grads_and_vars = optimizer.compute_gradients(
        cost,
        colocate_gradients_with_ops=True
    )

    print "Params:"
    total_param_count = 0
    for g, v in grads_and_vars:
        shape = v.get_shape()
        shape_str = ",".join([str(x) for x in v.get_shape()])

        param_count = 1
        for dim in shape:
            param_count *= int(dim)
        total_param_count += param_count

        if g == None:
            print "\t{} ({}) [no grad!]".format(v.name, shape_str)
        else:
            print "\t{} ({})".format(v.name, shape_str)
    print "Total param count: {}".format(
        locale.format("%d", total_param_count, grouping=True)
    )

    # for i in xrange(len(grads_and_vars)):
    #     g, v = grads_and_vars[i]
    #     if g == None:
    #         grads_and_vars[i] = (tf.zeros_like(v), v)
    #     else:
    #         grads_and_vars[i] = (tf.clip_by_value(g, -5., 5.), v)

    grads = [g for g,v in grads_and_vars]
    _vars = [v for g,v in grads_and_vars]

    global_norm = tf.global_norm(grads)
    prints = prints + [('gradnorm', global_norm)]

    grads, global_norm = tf.clip_by_global_norm(grads, 5.0, use_norm=global_norm)
    grads_and_vars = zip(grads, _vars)

    train_op = optimizer.apply_gradients(grads_and_vars)

    def train_fn(input_vals):
        return session.run(
            [p[1] for p in prints] + [train_op],
            feed_dict={sym:real for sym, real in zip(inputs, input_vals)}
        )[:-1]

    def eval_fn(input_vals):
        return session.run(
            [p[1] for p in prints],
            feed_dict={sym:real for sym, real in zip(inputs, input_vals)}
        )

    _vars = {
        'epoch': 0,
        'iteration': 0,
        'seconds': 0.,
        'last_callback': 0,
        'last_test': 0
    }

    train_generator = train_data()

    saver = tf.train.Saver()

    if os.path.isfile(TRAIN_LOOP_FILE):
        print "Resuming interrupted train loop session"
        with open(TRAIN_LOOP_FILE, 'r') as f:
            _vars = pickle.load(f)
        saver.restore(session, os.getcwd()+"/"+PARAMS_FILE)

        print "Fast-fowarding dataset generator"
        dataset_iters = 0
        while dataset_iters < _vars['iteration']:
            try:
                train_generator.next()
            except StopIteration:
                train_generator = train_data()
                train_generator.next()
            dataset_iters += 1
    else:
        print "Initializing variables..."
        session.run(tf.initialize_all_variables())
        print "done!"

    train_output_entries = [[]]
    
    def log(outputs, test, _vars, extra_things_to_print):
        entry = collections.OrderedDict()
        for key in ['epoch', 'iteration', 'seconds']:
            entry[key] = _vars[key]
        for i,p in enumerate(prints):
            if test:
                entry['test '+p[0]] = outputs[i]
            else:
                entry['train '+p[0]] = outputs[i]

        train_output_entries[0].append(entry)

        to_print = entry.items()
        to_print.extend(extra_things_to_print)
        print_str = ""
        for k,v in to_print:
            if isinstance(v, int):
                print_str += "{}:{}\t".format(k,v)
            else:
                print_str += "{}:{:.4f}\t".format(k,v)
        print print_str[:-1] # omit the last \t

    def save_train_output_and_params(iteration):
        if not save_output:
            return

        print "Saving output and params..."

        # Saving weights takes a while. To minimize risk of interruption during
        # a critical segment, we write weights to a temp file, delete the old
        # file, and rename the temp file.

        start_time = time.time()
        saver.save(session, PARAMS_FILE + '_tmp')
        print "saver.save time: {}".format(time.time() - start_time)
        start_time = time.time()
        if os.path.isfile(PARAMS_FILE):
            os.remove(PARAMS_FILE)
        os.rename(PARAMS_FILE+'_tmp', PARAMS_FILE)
        print "move and rename time: {}".format(time.time() - start_time)

        # shutil.copyfile(PARAMS_FILE, PARAMS_FILE+'_'+str(iteration))

        start_time = time.time()
        with open(TRAIN_LOOP_FILE, 'w') as f:
            pickle.dump(_vars, f)
        print "_vars pickle dump time: {}".format(time.time() - start_time)

        start_time = time.time()
        with open(TRAIN_OUTPUT_FILE, 'a') as f:
            for entry in train_output_entries[0]:
                for k,v in entry.items():
                    if isinstance(v, np.generic):
                        entry[k] = np.asscalar(v)
                f.write(json.dumps(entry) + "\n")
        print "ndjson write time: {}".format(time.time() - start_time)

        train_output_entries[0] = []

    while True:

        if _vars['iteration'] == stop_after:
            save_train_output_and_params(_vars['iteration'])

            print "Done!"

            try: # This only matters on Ishaan's computer
                import experiment_tools
                experiment_tools.send_sms("done!")
            except ImportError:
                pass

            break

        data_load_start_time = time.time()
        try:
            input_vals = train_generator.next()
        except StopIteration:
            train_generator = train_data()
            input_vals = train_generator.next()
            train_generator.next()
            _vars['epoch'] += 1
        data_load_time = time.time() - data_load_start_time

        if inject_iteration:
            input_vals = [np.int32(_vars['iteration'])] + list(input_vals)

        start_time = time.time()
        outputs = train_fn(input_vals)
        run_time = time.time() - start_time

        _vars['seconds'] += run_time
        _vars['iteration'] += 1

        log(outputs, False, _vars, [('iter time', run_time), ('data time', data_load_time)])

        if (test_data is not None) and _vars['iteration'] % test_every == (test_every-1):
            if inject_iteration:
                test_outputs = [
                    eval_fn([np.int32(_vars['iteration'])] + list(input_vals))
                    for input_vals in test_data()
                ]
            else:
                test_outputs = [
                    eval_fn(input_vals) 
                    for input_vals in test_data()
                ]
            mean_test_outputs = np.array(test_outputs).mean(axis=0)

            log(mean_test_outputs, True, _vars, [])

        if (callback is not None) and _vars['iteration'] % callback_every == (callback_every-1):
            tag = "iter{}".format(_vars['iteration'])
            callback(tag)

        if _vars['iteration'] % save_every == (save_every-1):
            save_train_output_and_params(_vars['iteration'])
Esempio n. 52
0
    def __init__(self, args, sample=False):

        def tf_normal(x, mu, s, rho):
            with tf.variable_scope('normal'):
                x = tf.expand_dims(x,2)
                norm = tf.sub(x[:,:args.chunk_samples,:], mu)
                z = tf.div(tf.square(norm), s)
                tf.histogram_summary('z-score', tf.div(norm,tf.sqrt(s)))
                tf.histogram_summary('std-dev', tf.sqrt(s))
                tf.scalar_summary('std-dev-mean', tf.reduce_mean(tf.sqrt(s)))
                denom_log = tf.log(tf.maximum(1e-20,tf.sqrt(2*np.pi*s)),name='denom_log')
                result = tf.reduce_sum(-z/2-denom_log + 
                                       (tf.log(rho,name='log_rho')*(1+x[:,args.chunk_samples:,:])
                                        +tf.log(tf.maximum(1e-20,1-rho),name='log_rho_inv')*(1-x[:,args.chunk_samples:,:]))/2, 1) 

            return result

        def get_lossfunc(z_pi, z_mu,  z_sigma, z_rho, x):
            normals = tf_normal(x, z_mu, z_sigma, z_rho)
            result = -tf_logsumexp(tf.log(tf.maximum(1e-20,z_pi))+normals)

            return tf.reduce_sum(result)
        
        def tf_logsumexp(x):
            with tf.variable_scope('logsumexp'):
                max_val = tf.reduce_max(x,1, keep_dims=True) 
                ret = tf.log(tf.reduce_sum(tf.exp(x - max_val), 1, keep_dims=True)) + max_val
                return ret

        def get_mixture_coef(output):
            with tf.variable_scope('get_mixture'):
                z = output
                z_pi = z[:,:self.num_mixture]
                z_mu = tf.reshape(z[:,self.num_mixture:(args.chunk_samples+1)*self.num_mixture],[-1,args.chunk_samples,self.num_mixture],name='z_mu')
                z_sigma = tf.reshape(z[:,(args.chunk_samples+1)*self.num_mixture:(2*args.chunk_samples+1)*self.num_mixture],[-1,args.chunk_samples,self.num_mixture])
                z_rho = tf.reshape(z[:,(2*args.chunk_samples+1)*self.num_mixture:],[-1,args.chunk_samples,self.num_mixture])
                
                # apply transformations

                #softmax with lower bound
                #z_pi = (tf.nn.softmax(z_pi, name='z_pi')+0.01)/(1.+0.01*args.num_mixture)
                z_pi = tf.nn.softmax(z_pi, name='z_pi')
                z_sigma = tf.exp(z_sigma, name='z_sigma')
                z_rho = tf.maximum(1e-20,tf.sigmoid(z_rho, name='z_rho'))

                return [z_pi, z_mu, z_sigma, z_rho]

        self.args = args
        if sample:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = tf.nn.rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = tf.nn.rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = tf.nn.rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)

        cell = tf.nn.rnn_cell.MultiRNNCell([cell] * args.num_layers)

        if (sample == False and args.keep_prob < 1): # training mode
            cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob = args.keep_prob)

        self.cell = cell

        self.input_data = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, args.seq_length, 2*args.chunk_samples], name='input_data')
        self.target_data = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, args.seq_length, 2*args.chunk_samples],name = 'target_data')
        self.initial_state = cell.zero_state(batch_size=args.batch_size, dtype=tf.float32)

        self.num_mixture = args.num_mixture

        # 
        NOUT = self.num_mixture * (1 + 3*(args.chunk_samples))

        output_w = tf.Variable(tf.random_normal([args.rnn_size, NOUT],stddev=0.2), name="output_w")
        output_b = tf.Variable(tf.zeros([NOUT]), name="output_b")

        #inputs = tf.split(1, args.seq_length, self.input_data)
        #inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
        #inputs = tf.unpack(tf.transpose(self.input_data, perm=(1,0,2)))

        # input shape: (batch_size, n_steps, n_input)
        inputs = tf.transpose(self.input_data, [1, 0, 2])  # permute n_steps and batch_size
        inputs = tf.reshape(inputs, [-1, 2*args.chunk_samples]) # (n_steps*batch_size, n_input)
        
        # Split data because rnn cell needs a list of inputs for the RNN inner loop
        inputs = tf.split(0, args.seq_length, inputs) # n_steps * (batch_size, n_hidden)
        
        # Get lstm cell output
        outputs, last_state = tf.nn.rnn(cell, inputs, initial_state=self.initial_state)

        #outputs, last_state = tf.nn.seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=None, scope='rnnlm_decode')
        output = tf.transpose(tf.pack(outputs), [1,0,2])
        output = tf.reshape(output, [-1, args.rnn_size])
        output = tf.nn.xw_plus_b(output, output_w, output_b)
        self.final_state = last_state
        # reshape target data so that it is compatible with prediction shape
        flat_target_data = tf.reshape(self.target_data,[-1, 2*args.chunk_samples])

        [o_pi, o_mu, o_sigma, o_rho] = get_mixture_coef(output)

        self.pi = o_pi
        self.mu = o_mu
        self.sigma = o_sigma
        self.rho = o_rho

        lossfunc = get_lossfunc(o_pi, o_mu, o_sigma, o_rho, flat_target_data)
        self.cost = lossfunc / (args.batch_size * args.seq_length * args.chunk_samples)
        tf.scalar_summary('cost', self.cost)


        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads = tf.gradients(self.cost, tvars)
        grads = tf.cond(
            tf.global_norm(grads) > 1e-20,
            lambda: tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip)[0],
            lambda: grads)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Esempio n. 53
0
    def build_graph(self, features, labels, mode, params):
        """docstring."""
        del labels, params
        misc_utils.print_out("Running fast mode_fn")

        hparams = self.hparams

        # Create global_step
        tf.train.get_or_create_global_step()

        if mode == tf.contrib.learn.ModeKeys.INFER:
            # Doing inference only on one GPU
            inf_hparams = tf.contrib.training.HParams(**hparams.values())
            inf_hparams.set_hparam("num_gpus", 1)
            # Inference is done in fp32 and in the same way as that of dist_strategy.
            inf_hparams.set_hparam("use_fp16", False)

            misc_utils.print_out("inference hparmas:")
            misc_utils.print_hparams(inf_hparams)

            # Create variable_mgr
            var_mgr = self._get_variable_mgr(inf_hparams)

            with mixed_precision_scope(), tf.device("gpu:0"), tf.name_scope(
                    "tower_0"), var_mgr.create_outer_variable_scope(0):
                model = gnmt_model.GNMTModel(inf_hparams,
                                             mode=mode,
                                             features=features)
                sample_ids = model.sample_id
                reverse_target_vocab_table = lookup_ops.index_to_string_table_from_file(
                    inf_hparams.tgt_vocab_file, default_value=vocab_utils.UNK)
                sample_words = reverse_target_vocab_table.lookup(
                    tf.to_int64(sample_ids))
                # make sure outputs is of shape [batch_size, time] or [beam_width,
                # batch_size, time] when using beam search.
                if inf_hparams.time_major:
                    sample_words = tf.transpose(sample_words)
                elif sample_words.shape.ndims == 3:
                    # beam search output in [batch_size, time, beam_width] shape.
                    sample_words = tf.transpose(sample_words, [2, 0, 1])
                predictions = {"predictions": sample_words}
                # return loss, vars, grads, predictions, train_op, scaffold
                return None, None, None, predictions, None, None
        elif mode == tf.contrib.learn.ModeKeys.TRAIN:
            num_towers = hparams.num_gpus
            # Shard inputs
            tower_features = self._shard_inputs(features, num_towers)
            # Create loss scale vars if necessary
            loss_scale, loss_scale_normal_steps = self._create_loss_scale_vars(
            )

            # Create variable_mgr
            var_mgr = self._get_variable_mgr(hparams)

            # Build per-tower fprop and bprop
            devices = var_mgr.get_devices()
            tower_gradvars = []
            tower_scopes = []
            var_scopes = []
            train_losses = []
            learning_rates = []
            batch_sizes = []

            def get_optimizer(hparams, learning_rate):
                """docstring."""
                mlperf_log.gnmt_print(key=mlperf_log.OPT_NAME,
                                      value=hparams.optimizer)
                if hparams.optimizer == "sgd":
                    opt = tf.train.GradientDescentOptimizer(learning_rate)
                elif hparams.optimizer == "adam":
                    mlperf_log.gnmt_print(key=mlperf_log.OPT_HP_ADAM_BETA1,
                                          value=0.9)
                    mlperf_log.gnmt_print(key=mlperf_log.OPT_HP_ADAM_BETA2,
                                          value=0.999)
                    mlperf_log.gnmt_print(key=mlperf_log.OPT_HP_ADAM_EPSILON,
                                          value=1e-8)
                    opt = tf.train.AdamOptimizer(learning_rate)
                else:
                    raise ValueError("Unknown optimizer type %s" %
                                     hparams.optimizer)
                return opt

            def fprop_and_bprop(tid):
                """docstring."""
                model = gnmt_model.GNMTModel(hparams,
                                             mode=mode,
                                             features=tower_features[tid])
                # sync training.
                assert model.learning_rate is not None
                # The following handles shouldn't be built in when doing manual
                assert model.grad_norm is None
                assert model.update is None
                tower_loss = model.train_loss
                # Only check loss numerics if in fp16
                if hparams.use_fp16 and hparams.check_tower_loss_numerics:
                    tower_loss = tf.check_numerics(
                        tower_loss, "tower_%d has Inf/NaN loss" % tid)
                # Cast to fp32, otherwise would easily overflow.
                tower_loss = tf.to_float(tower_loss)
                var_params, grads = self._compute_tower_grads(
                    tower_loss,
                    var_mgr.trainable_variables_on_device(tid, tid),
                    use_fp16=hparams.use_fp16,
                    loss_scale=loss_scale,
                    colocate_gradients_with_ops=hparams.
                    colocate_gradients_with_ops)
                self._print_varinfo(var_params, tid)
                res = [model.train_loss, model.learning_rate, model.batch_size]
                res.extend(grads)
                return res

            def unpack_fprop_and_bprop_output(output):
                train_loss = output[0]
                learning_rate = output[1]
                batch_size = output[2]
                grads = output[3:]
                return train_loss, learning_rate, batch_size, grads

            with mixed_precision_scope():
                for tid in range(num_towers):
                    with tf.device(devices[tid % len(devices)]), tf.name_scope(
                            "tower_%s" % tid) as scope:
                        tower_scopes.append(scope)
                        with var_mgr.create_outer_variable_scope(
                                tid) as var_scope:
                            var_scopes.append(var_scope)
                            outputs = maybe_xla_compile(
                                hparams, fprop_and_bprop, tid)
                            (train_loss, learning_rate, batch_size,
                             grads) = unpack_fprop_and_bprop_output(outputs)
                            train_losses.append(train_loss)
                            learning_rates.append(learning_rate)
                            batch_sizes.append(batch_size)
                            var_params = var_mgr.trainable_variables_on_device(
                                tid, tid)
                            tower_gradvars.append(list(zip(grads, var_params)))

            # Add summaries
            if hparams.show_metrics:
                tf.summary.scalar("learning_rate", learning_rates[0])
                if loss_scale:
                    tf.summary.scalar("loss_scale", loss_scale)
                    if hparams.enable_auto_loss_scale:
                        tf.summary.scalar("loss_scale_normal_steps",
                                          loss_scale_normal_steps)
            misc_utils.print_out("Finish building fprop and per-tower bprop.")
            # Aggregate gradients
            # The following compute the aggregated grads for each tower, stored in
            # opaque grad_states structure.
            apply_grads_devices, grad_states = var_mgr.preprocess_device_grads(
                tower_gradvars)
            master_grads = None
            master_params = None
            update_ops = []
            for i, device in enumerate(apply_grads_devices):
                with tf.device(device), tf.name_scope(tower_scopes[i]):
                    # Get per-tower grads.
                    with tf.name_scope("get_gradients_to_apply"):
                        avg_gradvars = var_mgr.get_gradients_to_apply(
                            i, grad_states)
                    avg_grads = [gv[0] for gv in avg_gradvars]

                    # gradients post-processing
                    with tf.name_scope("clip_gradients"):
                        if hparams.clip_grads:
                            clipped_grads, grad_norm = model_helper.gradient_clip(
                                avg_grads,
                                max_gradient_norm=hparams.max_gradient_norm)
                            # summary the grad on the 1st tower
                            if i == 0 and hparams.show_metrics:
                                tf.summary.scalar("grad_norm", grad_norm)
                                tf.summary.scalar(
                                    "clipped_grad_norm",
                                    tf.global_norm(clipped_grads))
                        else:
                            clipped_grads = avg_grads
                        if i == 0:
                            master_grads = clipped_grads

                    # Build apply-gradients ops
                    clipped_gradvars = list(
                        zip(clipped_grads, [gv[1] for gv in avg_gradvars]))
                    if i == 0:
                        master_params = [gv[1] for gv in avg_gradvars]
                    with tf.name_scope("append_gradient_ops"):
                        loss_scale_params = variable_mgr_util.AutoLossScaleParams(
                            enable_auto_loss_scale=hparams.
                            enable_auto_loss_scale,
                            loss_scale=loss_scale,
                            loss_scale_normal_steps=loss_scale_normal_steps,
                            inc_loss_scale_every_n=hparams.
                            fp16_inc_loss_scale_every_n,
                            is_chief=True)
                        opt = get_optimizer(hparams, learning_rates[i])
                        var_mgr.append_apply_gradients_ops(
                            grad_states, opt, clipped_gradvars, update_ops,
                            loss_scale_params)
            misc_utils.print_out("Finish building grad aggregation.")

            assert len(update_ops) == num_towers
            train_op = tf.group(update_ops)
            with tf.control_dependencies([train_op]):
                global_step = tf.train.get_global_step()
                train_op = global_step.assign_add(1)

            # Compute loss on the first gpu
            # TODO(jamesqin): optimize it?
            with tf.device("gpu:0"):
                loss = misc_utils.weighted_avg(train_losses, batch_sizes)

            # Create local init_ops
            # TODO(jamesqin): handle resource variables!
            # At present if not using mirror strategy, not using resource vars.
            local_init_ops = []
            local_init_op = tf.local_variables_initializer()
            with tf.control_dependencies([local_init_op]):
                local_init_ops.append(var_mgr.get_post_init_ops())
            local_init_ops.extend([local_init_op, tf.tables_initializer()])

            saveable_vars = var_mgr.savable_variables()
            # Add saveables for cudnn vars in master tower.
            saveable_objects = tf.get_collection(tf.GraphKeys.SAVEABLE_OBJECTS)
            saveable_objects = [x for x in saveable_objects if "v0" in x.name]

            misc_utils.print_out("Saveable vars(%d): " % len(saveable_vars))
            for mv in saveable_vars:
                misc_utils.print_out(mv.name)

            misc_utils.print_out("All global trainable vars(%d): " %
                                 len(tf.trainable_variables()))
            for tv in tf.trainable_variables():
                misc_utils.print_out(tv.name)

            misc_utils.print_out("All global vars(%d): " %
                                 len(tf.global_variables()))
            for gv in tf.global_variables():
                misc_utils.print_out(gv.name)

            misc_utils.print_out("master backproped params(%d): " %
                                 len(master_params))
            for mp in master_params:
                misc_utils.print_out(mp.name)

            # Note the cudnn vars are skipped the init check. :(
            scaffold = tf.train.Scaffold(
                ready_op=tf.report_uninitialized_variables(saveable_vars),
                ready_for_local_init_op=tf.report_uninitialized_variables(
                    saveable_vars),
                local_init_op=tf.group(*local_init_ops),
                saver=tf.train.Saver(saveable_vars + saveable_objects))

            misc_utils.print_out("Finish building model_fn")
            # return loss, vars, grads, predictions, train_op, scaffold
            return loss, master_params, master_grads, None, train_op, scaffold
Esempio n. 54
0
    def __init__(self, args, vocab):
        #tf.get_variable_scope().reuse_variables()
        dim_y = args.dim_y
        dim_z = args.dim_z
        dim_h = dim_y + dim_z
        dim_emb = args.dim_emb
        n_layers = args.n_layers
        max_len = args.max_seq_length
        filter_sizes = [int(x) for x in args.filter_sizes.split(',')]
        n_filters = args.n_filters
        beta1, beta2 = 0.5, 0.999
        grad_clip = 30.0

        self.dropout = tf.placeholder(tf.float32, name='dropout')
        self.learning_rate = tf.placeholder(tf.float32, name='learning_rate')
        self.rho = tf.placeholder(tf.float32, name='rho')
        self.gamma = tf.placeholder(tf.float32, name='gamma')

        self.batch_len = tf.placeholder(tf.int32, name='batch_len')
        self.batch_size = tf.placeholder(tf.int32, name='batch_size')
        self.enc_inputs = tf.placeholder(
            tf.int32,
            [None, None],  #size * len
            name='enc_inputs')
        self.dec_inputs = tf.placeholder(tf.int32, [None, None],
                                         name='dec_inputs')
        self.targets = tf.placeholder(tf.int32, [None, None], name='targets')
        self.weights = tf.placeholder(tf.float32, [None, None], name='weights')
        self.labels = tf.placeholder(tf.float32, [None], name='labels')

        # testing optimization
        testing1 = tf.constant([[37.0, -23.0], [1.0, 4.0]])
        testing2 = tf.constant([[37.0, -23.0], [1.0, 4.0]])
        self.lineartest = tf.matmul(testing1, testing2)
        #=====

        labels = tf.reshape(self.labels, [-1, 1])

        embedding = tf.get_variable('embedding',
                                    initializer=vocab.embedding.astype(
                                        np.float32))

        with tf.variable_scope('projection'):
            proj_W = tf.get_variable('W', [dim_h, vocab.size])
            proj_b = tf.get_variable('b', [vocab.size])

        enc_inputs = tf.nn.embedding_lookup(embedding, self.enc_inputs)
        dec_inputs = tf.nn.embedding_lookup(embedding, self.dec_inputs)

        #####   auto-encoder   #####
        init_state = tf.concat([
            linear(labels, dim_y, scope='encoder'),
            tf.zeros([self.batch_size, dim_z])
        ], 1)
        cell_e = create_cell(dim_h, n_layers, self.dropout)
        _, z = tf.nn.dynamic_rnn(cell_e,
                                 enc_inputs,
                                 initial_state=init_state,
                                 scope='encoder')
        z = z[:, dim_y:]

        #cell_e = create_cell(dim_z, n_layers, self.dropout)
        #_, z = tf.nn.dynamic_rnn(cell_e, enc_inputs,
        #    dtype=tf.float32, scope='encoder')

        self.h_ori = tf.concat([linear(labels, dim_y, scope='generator'), z],
                               1)
        self.h_tsf = tf.concat(
            [linear(1 - labels, dim_y, scope='generator', reuse=True), z], 1)

        cell_g = create_cell(dim_h, n_layers, self.dropout)
        g_outputs, _ = tf.nn.dynamic_rnn(cell_g,
                                         dec_inputs,
                                         initial_state=self.h_ori,
                                         scope='generator')

        #======
        # creating new decoder modules here =====

        #NEW PLACEHOLDER VARIABLES
        self.testing = tf.placeholder(tf.float32, name='testing')

        # CURRENTLY it replicates the functitonality of the first one. need to
        # modify the inputs (placeeholders) in the tensorflow graph accordingly.

        # z is shared (encoder shared), output passes to second decoder pairing.
        # here, scope is "generator2"
        self.h_ori2 = tf.concat([linear(labels, dim_y, scope='generator2'), z],
                                1)
        self.h_tsf2 = tf.concat(
            [linear(1 - labels, dim_y, scope='generator2', reuse=True), z], 1)

        cell_g2 = create_cell(dim_h, n_layers, self.dropout)
        g_outputs2, _ = tf.nn.dynamic_rnn(cell_g2,
                                          dec_inputs,
                                          initial_state=self.h_ori2,
                                          scope='generator2')

        teach_h2 = tf.concat([tf.expand_dims(self.h_ori2, 1), g_outputs2], 1)
        g_outputs2 = tf.nn.dropout(g_outputs2, self.dropout)
        g_outputs2 = tf.reshape(g_outputs2, [-1, dim_h])
        g_logits2 = tf.matmul(g_outputs2,
                              proj_W) + proj_b  # change projections?

        loss_rec2 = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=tf.reshape(self.targets, [-1]), logits=g_logits2)
        loss_rec2 *= tf.reshape(self.weights, [-1])
        self.loss_rec2 = tf.reduce_sum(loss_rec2) / tf.to_float(
            self.batch_size)
        # continuing
        go = dec_inputs[:, 0, :]  # unchanged
        soft_func = softsample_word(self.dropout, proj_W, proj_b, embedding,
                                    self.gamma)
        hard_func = argmax_word(self.dropout, proj_W, proj_b, embedding)

        soft_h_ori2, soft_logits_ori2 = rnn_decode(self.h_ori2,
                                                   go,
                                                   max_len,
                                                   cell_g2,
                                                   soft_func,
                                                   scope='generator2')
        soft_h_tsf2, soft_logits_tsf2 = rnn_decode(self.h_tsf2,
                                                   go,
                                                   max_len,
                                                   cell_g2,
                                                   soft_func,
                                                   scope='generator2')

        hard_h_ori2, self.hard_logits_ori2 = rnn_decode(self.h_ori2,
                                                        go,
                                                        max_len,
                                                        cell_g2,
                                                        hard_func,
                                                        scope='generator2')
        hard_h_tsf2, self.hard_logits_tsf2 = rnn_decode(self.h_tsf2,
                                                        go,
                                                        max_len,
                                                        cell_g2,
                                                        hard_func,
                                                        scope='generator2')

        half = self.batch_size / 2
        zeros, ones = self.labels[:half], self.labels[half:]
        soft_h_tsf2 = soft_h_tsf2[:, :1 + self.batch_len, :]

        self.loss_d02, loss_g02 = discriminator(teach_h2[:half],
                                                soft_h_tsf2[half:],
                                                ones,
                                                zeros,
                                                filter_sizes,
                                                n_filters,
                                                self.dropout,
                                                scope='discriminator02')
        self.loss_d12, loss_g12 = discriminator(teach_h2[half:],
                                                soft_h_tsf2[:half],
                                                ones,
                                                zeros,
                                                filter_sizes,
                                                n_filters,
                                                self.dropout,
                                                scope='discriminator12')

        #####   optimizer   #####
        self.loss_adv2 = loss_g02 + loss_g12
        self.loss2 = self.loss_rec2 + self.rho * self.loss_adv2

        theta_eg2 = retrive_var(
            ['encoder', 'generator2', 'embedding', 'projection'])
        theta_d02 = retrive_var(['discriminator02'])
        theta_d12 = retrive_var(['discriminator12'])

        opt2 = tf.train.AdamOptimizer(self.learning_rate, beta1, beta2)

        grad_rec2, _ = zip(*opt2.compute_gradients(self.loss_rec2, theta_eg2))
        grad_adv2, _ = zip(*opt2.compute_gradients(self.loss_adv2, theta_eg2))
        grad2, _ = zip(*opt2.compute_gradients(self.loss2, theta_eg2))
        grad2, _ = tf.clip_by_global_norm(
            grad2, grad_clip)  # grad_clip doesn't need 2

        self.grad_rec_norm2 = tf.global_norm(grad_rec2)
        self.grad_adv_norm2 = tf.global_norm(grad_adv2)
        self.grad_norm2 = tf.global_norm(grad2)

        self.optimize_tot2 = opt2.apply_gradients(zip(grad2, theta_eg2))
        self.optimize_rec2 = opt2.minimize(self.loss_rec2, var_list=theta_eg2)
        self.optimize_d02 = opt2.minimize(self.loss_d02, var_list=theta_d02)
        self.optimize_d12 = opt2.minimize(self.loss_d12, var_list=theta_d12)

        self.saver2 = tf.train.Saver()
        #======
        #======

        #======
        # Decoder 3
        self.h_ori3 = tf.concat([linear(labels, dim_y, scope='generator3'), z],
                                1)
        self.h_tsf3 = tf.concat(
            [linear(1 - labels, dim_y, scope='generator3', reuse=True), z], 1)

        cell_g3 = create_cell(dim_h, n_layers, self.dropout)
        g_outputs3, _ = tf.nn.dynamic_rnn(cell_g3,
                                          dec_inputs,
                                          initial_state=self.h_ori3,
                                          scope='generator3')

        teach_h3 = tf.concat([tf.expand_dims(self.h_ori3, 1), g_outputs3], 1)
        g_outputs3 = tf.nn.dropout(g_outputs3, self.dropout)
        g_outputs3 = tf.reshape(g_outputs3, [-1, dim_h])
        g_logits3 = tf.matmul(g_outputs3,
                              proj_W) + proj_b  # change projections?

        loss_rec3 = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=tf.reshape(self.targets, [-1]), logits=g_logits3)
        loss_rec3 *= tf.reshape(self.weights, [-1])
        self.loss_rec3 = tf.reduce_sum(loss_rec3) / tf.to_float(
            self.batch_size)
        # continuing
        go = dec_inputs[:, 0, :]  # unchanged
        soft_func = softsample_word(self.dropout, proj_W, proj_b, embedding,
                                    self.gamma)
        hard_func = argmax_word(self.dropout, proj_W, proj_b, embedding)

        soft_h_ori3, soft_logits_ori3 = rnn_decode(self.h_ori3,
                                                   go,
                                                   max_len,
                                                   cell_g3,
                                                   soft_func,
                                                   scope='generator3')
        soft_h_tsf3, soft_logits_tsf3 = rnn_decode(self.h_tsf3,
                                                   go,
                                                   max_len,
                                                   cell_g3,
                                                   soft_func,
                                                   scope='generator3')

        hard_h_ori3, self.hard_logits_ori3 = rnn_decode(self.h_ori3,
                                                        go,
                                                        max_len,
                                                        cell_g3,
                                                        hard_func,
                                                        scope='generator3')
        hard_h_tsf3, self.hard_logits_tsf3 = rnn_decode(self.h_tsf3,
                                                        go,
                                                        max_len,
                                                        cell_g3,
                                                        hard_func,
                                                        scope='generator3')

        half = self.batch_size / 2
        zeros, ones = self.labels[:half], self.labels[half:]
        soft_h_tsf3 = soft_h_tsf3[:, :1 + self.batch_len, :]

        self.loss_d03, loss_g03 = discriminator(teach_h3[:half],
                                                soft_h_tsf3[half:],
                                                ones,
                                                zeros,
                                                filter_sizes,
                                                n_filters,
                                                self.dropout,
                                                scope='discriminator03')
        self.loss_d13, loss_g13 = discriminator(teach_h3[half:],
                                                soft_h_tsf3[:half],
                                                ones,
                                                zeros,
                                                filter_sizes,
                                                n_filters,
                                                self.dropout,
                                                scope='discriminator13')

        self.loss_adv3 = loss_g03 + loss_g13
        self.loss3 = self.loss_rec3 + self.rho * self.loss_adv3

        theta_eg3 = retrive_var(
            ['encoder', 'generator3', 'embedding', 'projection'])
        theta_d03 = retrive_var(['discriminator03'])
        theta_d13 = retrive_var(['discriminator13'])

        opt3 = tf.train.AdamOptimizer(self.learning_rate, beta1, beta2)

        grad_rec3, _ = zip(*opt3.compute_gradients(self.loss_rec3, theta_eg3))
        grad_adv3, _ = zip(*opt3.compute_gradients(self.loss_adv3, theta_eg3))
        grad3, _ = zip(*opt3.compute_gradients(self.loss3, theta_eg3))
        grad3, _ = tf.clip_by_global_norm(
            grad3, grad_clip)  # grad_clip doesn't need 2

        self.grad_rec_norm3 = tf.global_norm(grad_rec3)
        self.grad_adv_norm3 = tf.global_norm(grad_adv3)
        self.grad_norm3 = tf.global_norm(grad3)

        self.optimize_tot3 = opt3.apply_gradients(zip(grad3, theta_eg3))
        self.optimize_rec3 = opt3.minimize(self.loss_rec3, var_list=theta_eg3)
        self.optimize_d03 = opt3.minimize(self.loss_d03, var_list=theta_d03)
        self.optimize_d13 = opt3.minimize(self.loss_d13, var_list=theta_d13)

        self.saver3 = tf.train.Saver()
        #       ======
        #       ======

        # Decoder 4
        self.h_ori4 = tf.concat([linear(labels, dim_y, scope='generator4'), z],
                                1)
        self.h_tsf4 = tf.concat(
            [linear(1 - labels, dim_y, scope='generator4', reuse=True), z], 1)

        cell_g4 = create_cell(dim_h, n_layers, self.dropout)
        g_outputs4, _ = tf.nn.dynamic_rnn(cell_g4,
                                          dec_inputs,
                                          initial_state=self.h_ori4,
                                          scope='generator4')

        teach_h4 = tf.concat([tf.expand_dims(self.h_ori4, 1), g_outputs4], 1)
        g_outputs4 = tf.nn.dropout(g_outputs4, self.dropout)
        g_outputs4 = tf.reshape(g_outputs4, [-1, dim_h])
        g_logits4 = tf.matmul(g_outputs4,
                              proj_W) + proj_b  # change projections?

        loss_rec4 = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=tf.reshape(self.targets, [-1]), logits=g_logits4)
        loss_rec4 *= tf.reshape(self.weights, [-1])
        self.loss_rec4 = tf.reduce_sum(loss_rec4) / tf.to_float(
            self.batch_size)
        # continuing
        go = dec_inputs[:, 0, :]  # unchanged
        soft_func = softsample_word(self.dropout, proj_W, proj_b, embedding,
                                    self.gamma)
        hard_func = argmax_word(self.dropout, proj_W, proj_b, embedding)

        soft_h_ori4, soft_logits_ori4 = rnn_decode(self.h_ori4,
                                                   go,
                                                   max_len,
                                                   cell_g4,
                                                   soft_func,
                                                   scope='generator4')
        soft_h_tsf4, soft_logits_tsf4 = rnn_decode(self.h_tsf4,
                                                   go,
                                                   max_len,
                                                   cell_g4,
                                                   soft_func,
                                                   scope='generator4')

        hard_h_ori4, self.hard_logits_ori4 = rnn_decode(self.h_ori4,
                                                        go,
                                                        max_len,
                                                        cell_g4,
                                                        hard_func,
                                                        scope='generator4')
        hard_h_tsf4, self.hard_logits_tsf4 = rnn_decode(self.h_tsf4,
                                                        go,
                                                        max_len,
                                                        cell_g4,
                                                        hard_func,
                                                        scope='generator4')

        half = self.batch_size / 2
        zeros, ones = self.labels[:half], self.labels[half:]
        soft_h_tsf4 = soft_h_tsf4[:, :1 + self.batch_len, :]

        self.loss_d04, loss_g04 = discriminator(teach_h4[:half],
                                                soft_h_tsf4[half:],
                                                ones,
                                                zeros,
                                                filter_sizes,
                                                n_filters,
                                                self.dropout,
                                                scope='discriminator04')
        self.loss_d14, loss_g14 = discriminator(teach_h4[half:],
                                                soft_h_tsf4[:half],
                                                ones,
                                                zeros,
                                                filter_sizes,
                                                n_filters,
                                                self.dropout,
                                                scope='discriminator14')

        self.loss_adv4 = loss_g04 + loss_g14
        self.loss4 = self.loss_rec4 + self.rho * self.loss_adv4

        theta_eg4 = retrive_var(
            ['encoder', 'generator4', 'embedding', 'projection'])
        theta_d04 = retrive_var(['discriminator04'])
        theta_d14 = retrive_var(['discriminator14'])

        opt4 = tf.train.AdamOptimizer(self.learning_rate, beta1, beta2)

        grad_rec4, _ = zip(*opt4.compute_gradients(self.loss_rec4, theta_eg4))
        grad_adv4, _ = zip(*opt4.compute_gradients(self.loss_adv4, theta_eg4))
        grad4, _ = zip(*opt4.compute_gradients(self.loss4, theta_eg4))
        grad4, _ = tf.clip_by_global_norm(
            grad4, grad_clip)  # grad_clip doesn't need 2

        self.grad_rec_norm4 = tf.global_norm(grad_rec4)
        self.grad_adv_norm4 = tf.global_norm(grad_adv4)
        self.grad_norm4 = tf.global_norm(grad4)

        self.optimize_tot4 = opt4.apply_gradients(zip(grad4, theta_eg4))
        self.optimize_rec4 = opt4.minimize(self.loss_rec4, var_list=theta_eg4)
        self.optimize_d04 = opt4.minimize(self.loss_d04, var_list=theta_d04)
        self.optimize_d14 = opt4.minimize(self.loss_d14, var_list=theta_d14)

        self.saver4 = tf.train.Saver()

        # =====
        # =====

        # Decoder 5
        self.h_ori5 = tf.concat([linear(labels, dim_y, scope='generator5'), z],
                                1)
        self.h_tsf5 = tf.concat(
            [linear(1 - labels, dim_y, scope='generator5', reuse=True), z], 1)

        cell_g5 = create_cell(dim_h, n_layers, self.dropout)
        g_outputs5, _ = tf.nn.dynamic_rnn(cell_g5,
                                          dec_inputs,
                                          initial_state=self.h_ori5,
                                          scope='generator5')

        teach_h5 = tf.concat([tf.expand_dims(self.h_ori5, 1), g_outputs5], 1)
        g_outputs5 = tf.nn.dropout(g_outputs5, self.dropout)
        g_outputs5 = tf.reshape(g_outputs5, [-1, dim_h])
        g_logits5 = tf.matmul(g_outputs5,
                              proj_W) + proj_b  # change projections?

        loss_rec5 = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=tf.reshape(self.targets, [-1]), logits=g_logits5)
        loss_rec5 *= tf.reshape(self.weights, [-1])
        self.loss_rec5 = tf.reduce_sum(loss_rec5) / tf.to_float(
            self.batch_size)
        # continuing
        go = dec_inputs[:, 0, :]  # unchanged
        soft_func = softsample_word(self.dropout, proj_W, proj_b, embedding,
                                    self.gamma)
        hard_func = argmax_word(self.dropout, proj_W, proj_b, embedding)

        soft_h_ori5, soft_logits_ori5 = rnn_decode(self.h_ori5,
                                                   go,
                                                   max_len,
                                                   cell_g5,
                                                   soft_func,
                                                   scope='generator5')
        soft_h_tsf5, soft_logits_tsf5 = rnn_decode(self.h_tsf5,
                                                   go,
                                                   max_len,
                                                   cell_g5,
                                                   soft_func,
                                                   scope='generator5')

        hard_h_ori5, self.hard_logits_ori5 = rnn_decode(self.h_ori5,
                                                        go,
                                                        max_len,
                                                        cell_g5,
                                                        hard_func,
                                                        scope='generator5')
        hard_h_tsf5, self.hard_logits_tsf5 = rnn_decode(self.h_tsf5,
                                                        go,
                                                        max_len,
                                                        cell_g5,
                                                        hard_func,
                                                        scope='generator5')

        half = self.batch_size / 2
        zeros, ones = self.labels[:half], self.labels[half:]
        soft_h_tsf5 = soft_h_tsf5[:, :1 + self.batch_len, :]

        self.loss_d05, loss_g05 = discriminator(teach_h5[:half],
                                                soft_h_tsf5[half:],
                                                ones,
                                                zeros,
                                                filter_sizes,
                                                n_filters,
                                                self.dropout,
                                                scope='discriminator05')
        self.loss_d15, loss_g15 = discriminator(teach_h5[half:],
                                                soft_h_tsf5[:half],
                                                ones,
                                                zeros,
                                                filter_sizes,
                                                n_filters,
                                                self.dropout,
                                                scope='discriminator15')

        self.loss_adv5 = loss_g05 + loss_g15
        self.loss5 = self.loss_rec5 + self.rho * self.loss_adv5

        theta_eg5 = retrive_var(
            ['encoder', 'generator5', 'embedding', 'projection'])
        theta_d05 = retrive_var(['discriminator05'])
        theta_d15 = retrive_var(['discriminator15'])

        opt5 = tf.train.AdamOptimizer(self.learning_rate, beta1, beta2)

        grad_rec5, _ = zip(*opt5.compute_gradients(self.loss_rec5, theta_eg5))
        grad_adv5, _ = zip(*opt5.compute_gradients(self.loss_adv5, theta_eg5))
        grad5, _ = zip(*opt5.compute_gradients(self.loss5, theta_eg5))
        grad5, _ = tf.clip_by_global_norm(
            grad5, grad_clip)  # grad_clip doesn't need 2

        self.grad_rec_norm5 = tf.global_norm(grad_rec5)
        self.grad_adv_norm5 = tf.global_norm(grad_adv5)
        self.grad_norm5 = tf.global_norm(grad5)

        self.optimize_tot5 = opt5.apply_gradients(zip(grad5, theta_eg5))
        self.optimize_rec5 = opt5.minimize(self.loss_rec5, var_list=theta_eg5)
        self.optimize_d05 = opt5.minimize(self.loss_d05, var_list=theta_d05)
        self.optimize_d15 = opt5.minimize(self.loss_d15, var_list=theta_d15)

        self.saver5 = tf.train.Saver()

        # attach h0 in the front
        teach_h = tf.concat([tf.expand_dims(self.h_ori, 1), g_outputs], 1)

        g_outputs = tf.nn.dropout(g_outputs, self.dropout)
        g_outputs = tf.reshape(g_outputs, [-1, dim_h])
        g_logits = tf.matmul(g_outputs, proj_W) + proj_b

        loss_rec = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=tf.reshape(self.targets, [-1]), logits=g_logits)
        loss_rec *= tf.reshape(self.weights, [-1])
        self.loss_rec = tf.reduce_sum(loss_rec) / tf.to_float(self.batch_size)

        #####   feed-previous decoding   #####
        go = dec_inputs[:, 0, :]
        soft_func = softsample_word(self.dropout, proj_W, proj_b, embedding,
                                    self.gamma)
        hard_func = argmax_word(self.dropout, proj_W, proj_b, embedding)

        soft_h_ori, soft_logits_ori = rnn_decode(self.h_ori,
                                                 go,
                                                 max_len,
                                                 cell_g,
                                                 soft_func,
                                                 scope='generator')
        soft_h_tsf, soft_logits_tsf = rnn_decode(self.h_tsf,
                                                 go,
                                                 max_len,
                                                 cell_g,
                                                 soft_func,
                                                 scope='generator')

        hard_h_ori, self.hard_logits_ori = rnn_decode(self.h_ori,
                                                      go,
                                                      max_len,
                                                      cell_g,
                                                      hard_func,
                                                      scope='generator')
        hard_h_tsf, self.hard_logits_tsf = rnn_decode(self.h_tsf,
                                                      go,
                                                      max_len,
                                                      cell_g,
                                                      hard_func,
                                                      scope='generator')

        #####   discriminator   #####
        # a batch's first half consists of sentences of one style,
        # and second half of the other
        half = self.batch_size / 2
        zeros, ones = self.labels[:half], self.labels[half:]
        soft_h_tsf = soft_h_tsf[:, :1 + self.batch_len, :]

        self.loss_d0, loss_g0 = discriminator(teach_h[:half],
                                              soft_h_tsf[half:],
                                              ones,
                                              zeros,
                                              filter_sizes,
                                              n_filters,
                                              self.dropout,
                                              scope='discriminator0')
        self.loss_d1, loss_g1 = discriminator(teach_h[half:],
                                              soft_h_tsf[:half],
                                              ones,
                                              zeros,
                                              filter_sizes,
                                              n_filters,
                                              self.dropout,
                                              scope='discriminator1')

        #####   optimizer   #####
        self.loss_adv = loss_g0 + loss_g1
        self.loss = self.loss_rec + self.rho * self.loss_adv

        theta_eg = retrive_var(
            ['encoder', 'generator', 'embedding', 'projection'])
        theta_d0 = retrive_var(['discriminator0'])
        theta_d1 = retrive_var(['discriminator1'])

        opt = tf.train.AdamOptimizer(self.learning_rate, beta1, beta2)

        grad_rec, _ = zip(*opt.compute_gradients(self.loss_rec, theta_eg))
        grad_adv, _ = zip(*opt.compute_gradients(self.loss_adv, theta_eg))
        grad, _ = zip(*opt.compute_gradients(self.loss, theta_eg))
        grad, _ = tf.clip_by_global_norm(grad, grad_clip)

        self.grad_rec_norm = tf.global_norm(grad_rec)
        self.grad_adv_norm = tf.global_norm(grad_adv)
        self.grad_norm = tf.global_norm(grad)

        self.optimize_tot = opt.apply_gradients(zip(grad, theta_eg))
        self.optimize_rec = opt.minimize(self.loss_rec, var_list=theta_eg)
        self.optimize_d0 = opt.minimize(self.loss_d0, var_list=theta_d0)
        self.optimize_d1 = opt.minimize(self.loss_d1, var_list=theta_d1)

        self.saver = tf.train.Saver()
Esempio n. 55
0
	def create_op_loss(self):

		value_state = self._tf_value_state
		adv_probas = self._tf_adv_probas

		R = tf.placeholder(tf.float32, [None])
		actions_index = tf.placeholder(tf.int32, [None])
		advantage = tf.placeholder(tf.float32, [None])

		diff = tf.sub(R, value_state)

		#Entropy = sum_a (-p_a ln p_a)
		log_adv_probas = tf.log(adv_probas)
		entropy = tf.reduce_sum(tf.mul(tf.constant(-1.0), tf.mul(adv_probas, log_adv_probas)), reduction_indices=1)
		entropy_term = tf.mul(self.entropy_regularisation_strength, entropy)
		self.masks = tf.one_hot(actions_index, on_value=True, off_value=False, depth=self.nb_actions)
		self.pi_selected_actions = tf.boolean_mask(adv_probas, self.masks)
		log_pi_selected_actions = tf.log(self.pi_selected_actions)

		advantage_term = log_pi_selected_actions * advantage

		loss_advantage_action_function = -tf.reduce_sum(entropy_term + advantage_term)

		#In the paper, the authors recommend to multiply the loss by 0.5
		loss_value_state_function = 0.5 * tf.nn.l2_loss(diff)

		loss = loss_advantage_action_function + loss_value_state_function

		opt = tf.train.AdamOptimizer(1e-4)

		grads = opt.compute_gradients(loss, var_list=self.get_all_variables())

		symbolic_grads = tf.gradients(loss, self.get_all_variables())

		symbolic_grads, _ = tf.clip_by_global_norm(symbolic_grads, 40.0)

		grad_placeholder = [(tf.placeholder(tf.float32, shape=grad[1].get_shape()), grad[1]) for grad in grads]

		apply_placeholder_op = opt.apply_gradients(grad_placeholder)

		tf.summary.scalar("gradient/grad_global_norm", tf.global_norm(grad_placeholder))
		tf.summary.scalar("gradient/cnn1_grad_global_norm", tf.global_norm(grad_placeholder[0:2]))
		tf.summary.scalar("gradient/cnn2_grad_global_norm", tf.global_norm(grad_placeholder[2:2]))
		tf.summary.scalar("gradient/fcc1_grad_global_norm", tf.global_norm(grad_placeholder[4:2]))
		tf.summary.scalar("gradient/adv_probas_grad_global_norm", tf.global_norm(grad_placeholder[6:2]))
		tf.summary.scalar("gradient/value_state_grad_global_norm", tf.global_norm(grad_placeholder[8:2]))

		tf.summary.scalar("model/var_global_norm", tf.global_norm(self.get_all_variables()))

		self._tf_summary_adv_loss = tf.placeholder(tf.float32, [])
		self._tf_summary_value_state_loss = tf.placeholder(tf.float32, [])
		self._tf_summary_loss = tf.placeholder(tf.float32, [])
		tf.summary.scalar("loss/advantage_function_loss", self._tf_summary_adv_loss)
		tf.summary.scalar("loss/value_state_function_loss", self._tf_summary_value_state_loss)
		tf.summary.scalar("loss/total_loss", self._tf_summary_loss)

		#Input
		self._tf_loss_R = R
		self._tf_loss_action_index = actions_index
		self._tf_grad_placeholder = grad_placeholder
		self._tf_loss_advantage = advantage

		#Output
		self._tf_loss_value_state_function = loss_value_state_function
		self._tf_loss_advantage_action_function = loss_advantage_action_function
		self._tf_loss = loss
		self._tf_optimizer = opt
		self._tf_get_gradients = symbolic_grads
		self._tf_apply_gradients = apply_placeholder_op
Esempio n. 56
0
    def __init__(self, *, policy, ob_space, ac_space, nbatch_act, nbatch_train,
                nsteps, ent_coef, vf_coef, max_grad_norm):
        sess = tf.get_default_session()

        act_model = policy(sess, ob_space, ac_space, nbatch_act, 1, reuse=False)
        train_model = policy(sess, ob_space, ac_space, nbatch_train, nsteps, reuse=True)

        A = train_model.pdtype.sample_placeholder([None])
        ADV = tf.placeholder(tf.float32, [None])
        R = tf.placeholder(tf.float32, [None])
        OLDNEGLOGPAC = tf.placeholder(tf.float32, [None])
        OLDVPRED = tf.placeholder(tf.float32, [None])
        LR = tf.placeholder(tf.float32, [])
        CLIPRANGE = tf.placeholder(tf.float32, [])

        neglogpac = train_model.pd.neglogp(A)
        entropy = tf.reduce_mean(train_model.pd.entropy())

        vpred = train_model.vf
        vpredclipped = OLDVPRED + tf.clip_by_value(train_model.vf - OLDVPRED, - CLIPRANGE, CLIPRANGE)
        vf_losses1 = tf.square(vpred - R)
        vf_losses2 = tf.square(vpredclipped - R)
        vf_loss = .5 * tf.reduce_mean(tf.maximum(vf_losses1, vf_losses2))
        ratio = tf.exp(OLDNEGLOGPAC - neglogpac)
        pg_losses = -ADV * ratio
        pg_losses2 = -ADV * tf.clip_by_value(ratio, 1.0 - CLIPRANGE, 1.0 + CLIPRANGE)
        pg_loss = tf.reduce_mean(tf.maximum(pg_losses, pg_losses2))
        approxkl = .5 * tf.reduce_mean(tf.square(neglogpac - OLDNEGLOGPAC))
        clipfrac = tf.reduce_mean(tf.to_float(tf.greater(tf.abs(ratio - 1.0), CLIPRANGE)))
        loss = pg_loss - entropy * ent_coef + vf_loss * vf_coef
        with tf.variable_scope('model'):
            params = tf.trainable_variables()
        grads = tf.gradients(loss, params)
        if max_grad_norm is not None:
            grads, _grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
        grads = list(zip(grads, params))
        trainer = tf.train.AdamOptimizer(learning_rate=LR, epsilon=1e-5)
        _train = trainer.apply_gradients(grads)
        
        self.td_map = None
        def train(lr, cliprange, obs, insts, returns, masks, actions, values, neglogpacs, states=None):
            advs = returns - values
            advs = (advs - advs.mean()) / (advs.std() + 1e-8)
            td_map = {train_model.X:obs, train_model.I:insts,
                    A:actions, ADV:advs, R:returns, LR:lr,
                    CLIPRANGE:cliprange, OLDNEGLOGPAC:neglogpacs, OLDVPRED:values}
            if states is not None:
                td_map[train_model.S] = states
                td_map[train_model.M] = masks
            self.td_map = td_map
            return sess.run(
                [pg_loss, vf_loss, entropy, approxkl, clipfrac, _train],
                td_map
            )[:-1]
        
        self.loss_names = [
            'policy_loss', 'value_loss', 'policy_entropy', 'approxkl', 'clipfrac']

        def save(save_path):
            ps = sess.run(params)
            joblib.dump(ps, save_path)

        def load(load_path):
            loaded_params = joblib.load(load_path)
            restores = []
            for p, loaded_p in zip(params, loaded_params):
                restores.append(p.assign(loaded_p))
            sess.run(restores)

        self.train = train
        self.train_model = train_model
        self.act_model = act_model
        self.step = act_model.step
        self.value = act_model.value
        self.initial_state = act_model.initial_state
        self.save = save
        self.load = load
        tf.global_variables_initializer().run(session=sess) #pylint: disable=E1101
        
        # add summary
        # ===========
        self.writer = tf.summary.FileWriter('./Asset/logdir', sess.graph)
        
        cnn_grads = tf.gradients(loss, train_model.cnn_var)
        gru_grads = tf.gradients(loss, train_model.gru_var)
        ga_grads = tf.gradients(loss, train_model.ga_var)
        lstm_grads = tf.gradients(loss, train_model.lstm_var)
        pi_grads = tf.gradients(loss, train_model.pi_var)
        vf_grads = tf.gradients(loss, train_model.vf_var)

        cnn_grad_norm = tf.global_norm(cnn_grads, name='cnn_grads')
        gru_grad_norm = tf.global_norm(gru_grads, name='gru_grads')
        ga_grad_norm = tf.global_norm(ga_grads, name='ga_grads')
        lstm_grad_norm = tf.global_norm(lstm_grads, name='lstm_grads')
        pi_grad_norm = tf.global_norm(pi_grads, name='pi_grads')
        vf_grad_norm = tf.global_norm(vf_grads, name='vf_grads')
        
        tf.summary.scalar('GradNorm/cnn', cnn_grad_norm)
        tf.summary.scalar('GradNorm/gru', gru_grad_norm)
        tf.summary.scalar('GradNorm/GA', ga_grad_norm)
        tf.summary.scalar('GradNorm/lstm', lstm_grad_norm)
        tf.summary.scalar('GradNorm/pi', pi_grad_norm)
        tf.summary.scalar('GradNorm/vf', vf_grad_norm)
        
        tf.summary.scalar('loss/policy_loss', pg_loss)
        tf.summary.scalar('loss/value_loss', vf_loss)
        tf.summary.scalar('loss/entropy', entropy)
        
        self.merged = tf.summary.merge_all()
        
        def get_summary():          
            return sess.run(self.merged, self.td_map)
        self.get_summary = get_summary
            
        
Esempio n. 57
0
def get_train_ops(loss,
                  tf_variables,
                  train_step,
                  clip_mode=None,
                  grad_bound=None,
                  l2_reg=1e-4,
                  lr_warmup_val=None,
                  lr_warmup_steps=100,
                  lr_init=0.1,
                  lr_dec_start=0,
                  lr_dec_every=10000,
                  lr_dec_rate=0.1,
                  lr_dec_min=None,
                  lr_cosine=False,
                  lr_max=None,
                  lr_min=None,
                  lr_T_0=None,
                  lr_T_mul=None,
                  num_train_batches=None,
                  optim_algo=None,
                  sync_replicas=False,
                  num_aggregate=None,
                  num_replicas=None,
                  get_grad_norms=False,
                  moving_average=None):
    """
  Args:
    clip_mode: "global", "norm", or None.
    moving_average: store the moving average of parameters
  """

    if l2_reg > 0:
        l2_losses = []
        for var in tf_variables:
            l2_losses.append(tf.reduce_sum(var**2))
        l2_loss = tf.add_n(l2_losses)
        loss += l2_reg * l2_loss

    grads = tf.gradients(loss, tf_variables)
    grad_norm = tf.global_norm(grads)

    grad_norms = {}
    for v, g in zip(tf_variables, grads):
        if v is None or g is None:
            continue
        if isinstance(g, tf.IndexedSlices):
            grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g.values**2))
        else:
            grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g**2))

    if clip_mode is not None:
        assert grad_bound is not None, "Need grad_bound to clip gradients."
        if clip_mode == "global":
            grads, _ = tf.clip_by_global_norm(grads, grad_bound)
        elif clip_mode == "norm":
            clipped = []
            for g in grads:
                if isinstance(g, tf.IndexedSlices):
                    c_g = tf.clip_by_norm(g.values, grad_bound)
                    c_g = tf.IndexedSlices(g.indices, c_g)
                else:
                    c_g = tf.clip_by_norm(g, grad_bound)
                clipped.append(g)
            grads = clipped
        else:
            raise NotImplementedError("Unknown clip_mode {}".format(clip_mode))

    if lr_cosine:
        assert lr_max is not None, "Need lr_max to use lr_cosine"
        assert lr_min is not None, "Need lr_min to use lr_cosine"
        assert lr_T_0 is not None, "Need lr_T_0 to use lr_cosine"
        assert lr_T_mul is not None, "Need lr_T_mul to use lr_cosine"
        assert num_train_batches is not None, ("Need num_train_batches to use"
                                               " lr_cosine")

        curr_epoch = tf.cast(train_step // num_train_batches, tf.int32)

        last_reset = tf.get_variable("last_reset",
                                     initializer=0,
                                     dtype=tf.int32,
                                     trainable=False)
        T_i = tf.get_variable("T_i",
                              initializer=lr_T_0,
                              dtype=tf.int32,
                              trainable=False)
        T_curr = curr_epoch - last_reset

        def _update():
            update_last_reset = tf.assign(last_reset,
                                          curr_epoch,
                                          use_locking=True)
            update_T_i = tf.assign(T_i, T_i * lr_T_mul, use_locking=True)
            with tf.control_dependencies([update_last_reset, update_T_i]):
                rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
                lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
            return lr

        def _no_update():
            rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
            lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
            return lr

        learning_rate = tf.cond(tf.greater_equal(T_curr, T_i), _update,
                                _no_update)
    else:
        learning_rate = tf.train.exponential_decay(
            lr_init,
            tf.maximum(train_step - lr_dec_start, 0),
            lr_dec_every,
            lr_dec_rate,
            staircase=True)
        if lr_dec_min is not None:
            learning_rate = tf.maximum(learning_rate, lr_dec_min)

    if lr_warmup_val is not None:
        learning_rate = tf.cond(tf.less(train_step, lr_warmup_steps),
                                lambda: lr_warmup_val, lambda: learning_rate)

    if optim_algo == "momentum":
        opt = tf.train.MomentumOptimizer(learning_rate,
                                         0.9,
                                         use_locking=True,
                                         use_nesterov=True)
    elif optim_algo == "sgd":
        opt = tf.train.GradientDescentOptimizer(learning_rate,
                                                use_locking=True)
    elif optim_algo == "adam":
        opt = tf.train.AdamOptimizer(learning_rate,
                                     beta1=0.0,
                                     epsilon=1e-3,
                                     use_locking=True)
    else:
        raise ValueError("Unknown optim_algo {}".format(optim_algo))

    if sync_replicas:
        assert num_aggregate is not None, "Need num_aggregate to sync."
        assert num_replicas is not None, "Need num_replicas to sync."

        opt = tf.train.SyncReplicasOptimizer(
            opt,
            replicas_to_aggregate=num_aggregate,
            total_num_replicas=num_replicas,
            use_locking=True)

    if moving_average is not None:
        opt = tf.contrib.opt.MovingAverageOptimizer(
            opt, average_decay=moving_average)

    train_op = opt.apply_gradients(zip(grads, tf_variables),
                                   global_step=train_step)

    if get_grad_norms:
        return train_op, learning_rate, grad_norm, opt, grad_norms
    else:
        return train_op, learning_rate, grad_norm, opt
Esempio n. 58
0
    def build_train_model(self, test=True, reuse=None):
        """Build model for training. """
        logging.info('Build train model.')
        self.prepare_training()

        with self.graph.as_default():
            acc_list = []
            loss_list = []
            gv_list = []
            cache = {}
            load = dict([(d, 0) for d in self._devices])
            for i, (X, Y, device) in enumerate(
                    zip(self.src_pls, self.label_pls, self._devices)):

                def daisy_chain_getter(getter, name, *args, **kwargs):
                    """Get a variable and cache in a daisy chain."""
                    device_var_key = (device, name)
                    if device_var_key in cache:
                        # if we have the variable on the correct device, return it.
                        return cache[device_var_key]
                    if name in cache:
                        # if we have it on a different device, copy it from the last device
                        v = tf.identity(cache[name])
                    else:
                        var = getter(name, *args, **kwargs)
                        v = tf.identity(var._ref())  # pylint: disable=protected-access
                    # update the cache
                    cache[name] = v
                    cache[device_var_key] = v
                    return v

                def balanced_device_setter(op):
                    """Balance variables to all devices."""
                    if op.type in {'Variable', 'VariableV2', 'VarHandleOp'}:
                        # return self._sync_device
                        min_load = min(load.values())
                        min_load_devices = [
                            d for d in load if load[d] == min_load
                        ]
                        chosen_device = random.choice(min_load_devices)
                        load[chosen_device] += op.outputs[0].get_shape(
                        ).num_elements()
                        return chosen_device
                    return device

                def identity_device_setter(op):
                    return device

                device_setter = balanced_device_setter

                with tf.variable_scope(tf.get_variable_scope(),
                                       initializer=self._initializer,
                                       custom_getter=daisy_chain_getter,
                                       reuse=reuse):
                    with tf.device(device_setter):
                        logging.info('Build model on %s.' % device)
                        encoder_output = self.encoder(
                            X,
                            is_training=True,
                            reuse=i > 0 or None,
                            encoder_scope=self.encoder_scope)
                        decoder_output = self.decoder(
                            utils.shift_right(Y),
                            encoder_output,
                            is_training=True,
                            reuse=i > 0 or None,
                            decoder_scope=self.decoder_scope)
                        acc, loss = self.train_output(
                            decoder_output,
                            Y,
                            reuse=i > 0 or None,
                            decoder_scope=self.decoder_scope)
                        var_list = tf.trainable_variables()
                        if self._config.train.var_filter:
                            var_list = [
                                v for v in var_list if re.match(
                                    self._config.train.var_filter, v.name)
                            ]
                        acc_list.append(acc)
                        loss_list.append(loss)

                        gv_list.append(
                            self._optimizer.compute_gradients(
                                loss, var_list=var_list))

            self.accuracy = tf.reduce_mean(acc_list)
            self.loss = tf.reduce_mean(loss_list)

            # Clip gradients and then apply.
            grads_and_vars = utils.average_gradients(gv_list)
            avg_abs_grads = tf.reduce_mean(tf.abs(grads_and_vars[0]))

            if self._config.train.grads_clip > 0:
                grads, self.grads_norm = tf.clip_by_global_norm(
                    [gv[0] for gv in grads_and_vars],
                    clip_norm=self._config.train.grads_clip)
                grads_and_vars = zip(grads, [gv[1] for gv in grads_and_vars])
            else:
                self.grads_norm = tf.global_norm(
                    [gv[0] for gv in grads_and_vars])

            self.train_op = self._optimizer.apply_gradients(
                grads_and_vars, global_step=self.global_step)

            # Summaries
            tf.summary.scalar('acc', self.accuracy)
            tf.summary.scalar('loss', self.loss)
            tf.summary.scalar('learning_rate', self.learning_rate)
            tf.summary.scalar('grads_norm', self.grads_norm)
            tf.summary.scalar('avg_abs_grads', avg_abs_grads)
            self.summary_op = tf.summary.merge_all()

            self.saver = tf.train.Saver(var_list=tf.global_variables(),
                                        max_to_keep=60)

        # We may want to test the model during training.
        if test:
            self.build_test_model(reuse=True)
Esempio n. 59
0
    def __init__(self, policy, ob_space, ac_space, nenvs, nsteps, ent_coef,
                 q_coef, gamma, max_grad_norm, lr, rprop_alpha, rprop_epsilon,
                 total_timesteps, lrschedule, c, trust_region, alpha, delta):

        sess = get_session()
        nact = ac_space.n
        nbatch = nenvs * nsteps

        A = tf.placeholder(tf.int32, [nbatch])  # actions
        D = tf.placeholder(tf.float32, [nbatch])  # dones
        R = tf.placeholder(tf.float32, [nbatch])  # rewards, not returns
        MU = tf.placeholder(tf.float32, [nbatch, nact])  # mu's
        LR = tf.placeholder(tf.float32, [])
        eps = 1e-6

        step_ob_placeholder = tf.placeholder(dtype=ob_space.dtype,
                                             shape=(nenvs, ) + ob_space.shape)
        train_ob_placeholder = tf.placeholder(dtype=ob_space.dtype,
                                              shape=(nenvs * (nsteps + 1), ) +
                                              ob_space.shape)
        with tf.variable_scope('acer_model', reuse=tf.AUTO_REUSE):

            step_model = policy(observ_placeholder=step_ob_placeholder,
                                sess=sess)
            train_model = policy(observ_placeholder=train_ob_placeholder,
                                 sess=sess)

        params = find_trainable_variables("acer_model")
        print("Params {}".format(len(params)))
        for var in params:
            print(var)

        # create polyak averaged model
        ema = tf.train.ExponentialMovingAverage(alpha)
        ema_apply_op = ema.apply(params)

        def custom_getter(getter, *args, **kwargs):
            v = ema.average(getter(*args, **kwargs))
            print(v.name)
            return v

        with tf.variable_scope("acer_model",
                               custom_getter=custom_getter,
                               reuse=True):
            polyak_model = policy(observ_placeholder=train_ob_placeholder,
                                  sess=sess)

        # Notation: (var) = batch variable, (var)s = seqeuence variable, (var)_i = variable index by action at step i

        # action probability distributions according to train_model, polyak_model and step_model
        # poilcy.pi is probability distribution parameters; to obtain distribution that sums to 1 need to take softmax
        train_model_p = tf.nn.softmax(train_model.pi)
        polyak_model_p = tf.nn.softmax(polyak_model.pi)
        step_model_p = tf.nn.softmax(step_model.pi)
        v = tf.reduce_sum(train_model_p * train_model.q,
                          axis=-1)  # shape is [nenvs * (nsteps + 1)]

        # strip off last step
        f, f_pol, q = map(lambda var: strip(var, nenvs, nsteps),
                          [train_model_p, polyak_model_p, train_model.q])
        # Get pi and q values for actions taken
        f_i = get_by_index(f, A)
        q_i = get_by_index(q, A)

        # Compute ratios for importance truncation
        rho = f / (MU + eps)
        rho_i = get_by_index(rho, A)

        # Calculate Q_retrace targets
        qret = q_retrace(R, D, q_i, v, rho_i, nenvs, nsteps, gamma)

        # Calculate losses
        # Entropy
        # entropy = tf.reduce_mean(strip(train_model.pd.entropy(), nenvs, nsteps))
        entropy = tf.reduce_mean(cat_entropy_softmax(f))

        # Policy Graident loss, with truncated importance sampling & bias correction
        v = strip(v, nenvs, nsteps, True)
        check_shape([qret, v, rho_i, f_i], [[nenvs * nsteps]] * 4)
        check_shape([rho, f, q], [[nenvs * nsteps, nact]] * 2)

        # Truncated importance sampling
        adv = qret - v
        logf = tf.log(f_i + eps)
        gain_f = logf * tf.stop_gradient(
            adv * tf.minimum(c, rho_i))  # [nenvs * nsteps]
        loss_f = -tf.reduce_mean(gain_f)

        # Bias correction for the truncation
        adv_bc = (q - tf.reshape(v, [nenvs * nsteps, 1])
                  )  # [nenvs * nsteps, nact]
        logf_bc = tf.log(f + eps)  # / (f_old + eps)
        check_shape([adv_bc, logf_bc], [[nenvs * nsteps, nact]] * 2)
        gain_bc = tf.reduce_sum(
            logf_bc *
            tf.stop_gradient(adv_bc * tf.nn.relu(1.0 - (c / (rho + eps))) * f),
            axis=1)  #IMP: This is sum, as expectation wrt f
        loss_bc = -tf.reduce_mean(gain_bc)

        loss_policy = loss_f + loss_bc

        # Value/Q function loss, and explained variance
        check_shape([qret, q_i], [[nenvs * nsteps]] * 2)
        ev = q_explained_variance(tf.reshape(q_i, [nenvs, nsteps]),
                                  tf.reshape(qret, [nenvs, nsteps]))
        loss_q = tf.reduce_mean(tf.square(tf.stop_gradient(qret) - q_i) * 0.5)

        # Net loss
        check_shape([loss_policy, loss_q, entropy], [[]] * 3)
        loss = loss_policy + q_coef * loss_q - ent_coef * entropy

        if trust_region:
            g = tf.gradients(-(loss_policy - ent_coef * entropy) * nsteps *
                             nenvs, f)  #[nenvs * nsteps, nact]
            # k = tf.gradients(KL(f_pol || f), f)
            k = -f_pol / (
                f + eps
            )  #[nenvs * nsteps, nact] # Directly computed gradient of KL divergence wrt f
            k_dot_g = tf.reduce_sum(k * g, axis=-1)
            adj = tf.maximum(0.0, (tf.reduce_sum(k * g, axis=-1) - delta) /
                             (tf.reduce_sum(tf.square(k), axis=-1) +
                              eps))  #[nenvs * nsteps]

            # Calculate stats (before doing adjustment) for logging.
            avg_norm_k = avg_norm(k)
            avg_norm_g = avg_norm(g)
            avg_norm_k_dot_g = tf.reduce_mean(tf.abs(k_dot_g))
            avg_norm_adj = tf.reduce_mean(tf.abs(adj))

            g = g - tf.reshape(adj, [nenvs * nsteps, 1]) * k
            grads_f = -g / (
                nenvs * nsteps
            )  # These are turst region adjusted gradients wrt f ie statistics of policy pi
            grads_policy = tf.gradients(f, params, grads_f)
            grads_q = tf.gradients(loss_q * q_coef, params)
            grads = [
                gradient_add(g1, g2, param)
                for (g1, g2, param) in zip(grads_policy, grads_q, params)
            ]

            avg_norm_grads_f = avg_norm(grads_f) * (nsteps * nenvs)
            norm_grads_q = tf.global_norm(grads_q)
            norm_grads_policy = tf.global_norm(grads_policy)
        else:
            grads = tf.gradients(loss, params)

        if max_grad_norm is not None:
            grads, norm_grads = tf.clip_by_global_norm(grads, max_grad_norm)
        grads = list(zip(grads, params))
        trainer = tf.train.RMSPropOptimizer(learning_rate=LR,
                                            decay=rprop_alpha,
                                            epsilon=rprop_epsilon)
        _opt_op = trainer.apply_gradients(grads)

        # so when you call _train, you first do the gradient step, then you apply ema
        with tf.control_dependencies([_opt_op]):
            _train = tf.group(ema_apply_op)

        lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule)

        # Ops/Summaries to run, and their names for logging
        run_ops = [
            _train, loss, loss_q, entropy, loss_policy, loss_f, loss_bc, ev,
            norm_grads
        ]
        names_ops = [
            'loss', 'loss_q', 'entropy', 'loss_policy', 'loss_f', 'loss_bc',
            'explained_variance', 'norm_grads'
        ]
        if trust_region:
            run_ops = run_ops + [
                norm_grads_q, norm_grads_policy, avg_norm_grads_f, avg_norm_k,
                avg_norm_g, avg_norm_k_dot_g, avg_norm_adj
            ]
            names_ops = names_ops + [
                'norm_grads_q', 'norm_grads_policy', 'avg_norm_grads_f',
                'avg_norm_k', 'avg_norm_g', 'avg_norm_k_dot_g', 'avg_norm_adj'
            ]

        def train(obs, actions, rewards, dones, mus, states, masks, steps):
            cur_lr = lr.value_steps(steps)
            td_map = {
                train_model.X: obs,
                polyak_model.X: obs,
                A: actions,
                R: rewards,
                D: dones,
                MU: mus,
                LR: cur_lr
            }
            if states is not None:
                td_map[train_model.S] = states
                td_map[train_model.M] = masks
                td_map[polyak_model.S] = states
                td_map[polyak_model.M] = masks

            return names_ops, sess.run(run_ops, td_map)[1:]  # strip off _train

        def _step(observation, **kwargs):
            return step_model._evaluate(
                [step_model.action, step_model_p, step_model.state],
                observation, **kwargs)

        self.train = train
        self.save = functools.partial(save_variables,
                                      sess=sess,
                                      variables=params)
        self.train_model = train_model
        self.step_model = step_model
        self._step = _step
        self.step = self.step_model.step

        self.initial_state = step_model.initial_state
        tf.global_variables_initializer().run(session=sess)
Esempio n. 60
0
def main(argv=None):  # pylint: disable=unused-argument

    data_dir = './training/training/'
    train_data_filename = data_dir + 'images/'
    train_labels_filename = data_dir + 'groundtruth/'

    # Extract it into numpy arrays.
    train_data = extract_data(train_data_filename, TRAINING_SIZE)
    train_labels = extract_labels(train_labels_filename, TRAINING_SIZE)

    num_epochs = NUM_EPOCHS

    c0 = 0
    c1 = 0
    for i in range(len(train_labels)):
        if train_labels[i][0] == 1:
            c0 = c0 + 1
        else:
            c1 = c1 + 1
    print('Number of data points per class: c0 = ' + str(c0) + ' c1 = ' +
          str(c1))

    print('Balancing training data...')
    min_c = min(c0, c1)
    idx0 = [i for i, j in enumerate(train_labels) if j[0] == 1]
    idx1 = [i for i, j in enumerate(train_labels) if j[1] == 1]
    new_indices = idx0[0:min_c] + idx1[0:min_c]
    print(len(new_indices))
    print(train_data.shape)
    train_data = train_data[new_indices, :, :, :]
    train_labels = train_labels[new_indices]

    train_size = train_labels.shape[0]

    c0 = 0
    c1 = 0
    for i in range(len(train_labels)):
        if train_labels[i][0] == 1:
            c0 = c0 + 1
        else:
            c1 = c1 + 1
    print('Number of data points per class: c0 = ' + str(c0) + ' c1 = ' +
          str(c1))

    # This is where training samples and labels are fed to the graph.
    # These placeholder nodes will be fed a batch of training data at each
    # training step using the {feed_dict} argument to the Run() call below.
    train_data_node = tf.placeholder(tf.float32,
                                     shape=(BATCH_SIZE, IMG_PATCH_SIZE,
                                            IMG_PATCH_SIZE, NUM_CHANNELS))
    train_labels_node = tf.placeholder(tf.float32,
                                       shape=(BATCH_SIZE, NUM_LABELS))
    train_all_data_node = tf.constant(train_data)

    # The variables below hold all the trainable weights. They are passed an
    # initial value which will be assigned when when we call:
    # {tf.initialize_all_variables().run()}
    conv1_weights = tf.Variable(
        tf.truncated_normal(
            [5, 5, NUM_CHANNELS, 32],  # 5x5 filter, depth 32.
            stddev=0.1,
            seed=SEED))
    conv1_biases = tf.Variable(tf.zeros([32]))
    conv2_weights = tf.Variable(
        tf.truncated_normal([5, 5, 32, 64], stddev=0.1, seed=SEED))
    conv2_biases = tf.Variable(tf.constant(0.1, shape=[64]))
    fc1_weights = tf.Variable(  # fully connected, depth 512.
        tf.truncated_normal(
            [int(IMG_PATCH_SIZE / 4 * IMG_PATCH_SIZE / 4 * 64), 512],
            stddev=0.1,
            seed=SEED))
    fc1_biases = tf.Variable(tf.constant(0.1, shape=[512]))
    fc2_weights = tf.Variable(
        tf.truncated_normal([512, NUM_LABELS], stddev=0.1, seed=SEED))
    fc2_biases = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS]))

    # Make an image summary for 4d tensor image with index idx
    def get_image_summary(img, idx=0):
        V = tf.slice(img, (0, 0, 0, idx), (1, -1, -1, 1))
        img_w = img.get_shape().as_list()[1]
        img_h = img.get_shape().as_list()[2]
        min_value = tf.reduce_min(V)
        V = V - min_value
        max_value = tf.reduce_max(V)
        V = V / (max_value * PIXEL_DEPTH)
        V = tf.reshape(V, (img_w, img_h, 1))
        V = tf.transpose(V, (2, 0, 1))
        V = tf.reshape(V, (-1, img_w, img_h, 1))
        return V

    # Make an image summary for 3d tensor image with index idx
    def get_image_summary_3d(img):
        V = tf.slice(img, (0, 0, 0), (1, -1, -1))
        img_w = img.get_shape().as_list()[1]
        img_h = img.get_shape().as_list()[2]
        V = tf.reshape(V, (img_w, img_h, 1))
        V = tf.transpose(V, (2, 0, 1))
        V = tf.reshape(V, (-1, img_w, img_h, 1))
        return V

    # Get prediction for given input image
    def get_prediction(img):
        data = numpy.asarray(img_crop(img, IMG_PATCH_SIZE, IMG_PATCH_SIZE))
        data_node = tf.constant(data)
        output = tf.nn.softmax(model(data_node))
        output_prediction = s.run(output)
        img_prediction = label_to_img(img.shape[0], img.shape[1],
                                      IMG_PATCH_SIZE, IMG_PATCH_SIZE,
                                      output_prediction)

        return img_prediction

    # Get a concatenation of the prediction and groundtruth for given input file
    def get_prediction_with_groundtruth(filename, image_idx):

        imageid = "satImage_%.3d" % image_idx
        image_filename = filename + imageid + ".png"
        img = mpimg.imread(image_filename)

        img_prediction = get_prediction(img)
        cimg = concatenate_images(img, img_prediction)

        return cimg

    # Get prediction overlaid on the original image for given input file
    def get_prediction_with_overlay(filename, image_idx):

        imageid = "satImage_%.3d" % image_idx
        image_filename = filename + imageid + ".png"
        img = mpimg.imread(image_filename)

        img_prediction = get_prediction(img)
        oimg = make_img_overlay(img, img_prediction)

        return oimg

    # We will replicate the model structure for the training subgraph, as well
    # as the evaluation subgraphs, while sharing the trainable parameters.
    def model(data, train=False):
        """The Model definition."""
        # 2D convolution, with 'SAME' padding (i.e. the output feature map has
        # the same size as the input). Note that {strides} is a 4D array whose
        # shape matches the data layout: [image index, y, x, depth].
        conv = tf.nn.conv2d(data,
                            conv1_weights,
                            strides=[1, 1, 1, 1],
                            padding='SAME')
        # Bias and rectified linear non-linearity.
        relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases))
        # Max pooling. The kernel size spec {ksize} also follows the layout of
        # the data. Here we have a pooling window of 2, and a stride of 2.
        pool = tf.nn.max_pool(relu,
                              ksize=[1, 2, 2, 1],
                              strides=[1, 2, 2, 1],
                              padding='SAME')

        conv2 = tf.nn.conv2d(pool,
                             conv2_weights,
                             strides=[1, 1, 1, 1],
                             padding='SAME')
        relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))
        pool2 = tf.nn.max_pool(relu2,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME')

        # Uncomment these lines to check the size of each layer
        # print 'data ' + str(data.get_shape())
        # print 'conv ' + str(conv.get_shape())
        # print 'relu ' + str(relu.get_shape())
        # print 'pool ' + str(pool.get_shape())
        # print 'pool2 ' + str(pool2.get_shape())

        # Reshape the feature map cuboid into a 2D matrix to feed it to the
        # fully connected layers.
        pool_shape = pool2.get_shape().as_list()
        reshape = tf.reshape(
            pool2,
            [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]])
        # Fully connected layer. Note that the '+' operation automatically
        # broadcasts the biases.
        hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)
        # Add a 50% dropout during training only. Dropout also scales
        # activations such that no rescaling is needed at evaluation time.
        #if train:
        #    hidden = tf.nn.dropout(hidden, 0.5, seed=SEED)
        out = tf.matmul(hidden, fc2_weights) + fc2_biases

        if train == True:
            summary_id = '_0'
            s_data = get_image_summary(data)
            filter_summary0 = tf.summary.image('summary_data' + summary_id,
                                               s_data)
            s_conv = get_image_summary(conv)
            filter_summary2 = tf.summary.image('summary_conv' + summary_id,
                                               s_conv)
            s_pool = get_image_summary(pool)
            filter_summary3 = tf.summary.image('summary_pool' + summary_id,
                                               s_pool)
            s_conv2 = get_image_summary(conv2)
            filter_summary4 = tf.summary.image('summary_conv2' + summary_id,
                                               s_conv2)
            s_pool2 = get_image_summary(pool2)
            filter_summary5 = tf.summary.image('summary_pool2' + summary_id,
                                               s_pool2)

        return out

    # Training computation: logits + cross-entropy loss.
    logits = model(train_data_node, True)  # BATCH_SIZE*NUM_LABELS
    # print 'logits = ' + str(logits.get_shape()) + ' train_labels_node = ' + str(train_labels_node.get_shape())
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                labels=train_labels_node))
    tf.summary.scalar('loss', loss)

    all_params_node = [
        conv1_weights, conv1_biases, conv2_weights, conv2_biases, fc1_weights,
        fc1_biases, fc2_weights, fc2_biases
    ]
    all_params_names = [
        'conv1_weights', 'conv1_biases', 'conv2_weights', 'conv2_biases',
        'fc1_weights', 'fc1_biases', 'fc2_weights', 'fc2_biases'
    ]
    all_grads_node = tf.gradients(loss, all_params_node)
    all_grad_norms_node = []
    for i in range(0, len(all_grads_node)):
        norm_grad_i = tf.global_norm([all_grads_node[i]])
        all_grad_norms_node.append(norm_grad_i)
        tf.summary.scalar(all_params_names[i], norm_grad_i)

    # L2 regularization for the fully connected parameters.
    regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) +
                    tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases))
    # Add the regularization term to the loss.
    loss += 5e-4 * regularizers

    # Optimizer: set up a variable that's incremented once per batch and
    # controls the learning rate decay.
    batch = tf.Variable(0)
    # Decay once per epoch, using an exponential schedule starting at 0.01.
    learning_rate = tf.train.exponential_decay(
        0.01,  # Base learning rate.
        batch * BATCH_SIZE,  # Current index into the dataset.
        train_size,  # Decay step.
        0.95,  # Decay rate.
        staircase=True)
    tf.summary.scalar('learning_rate', learning_rate)

    # Use simple momentum for the optimization.
    optimizer = tf.train.MomentumOptimizer(learning_rate,
                                           0.0).minimize(loss,
                                                         global_step=batch)

    # Predictions for the minibatch, validation set and test set.
    train_prediction = tf.nn.softmax(logits)
    # We'll compute them only once in a while by calling their {eval()} method.
    train_all_prediction = tf.nn.softmax(model(train_all_data_node))

    # Add ops to save and restore all the variables.
    saver = tf.train.Saver()

    # Create a local session to run this computation.
    with tf.Session() as s:

        if RESTORE_MODEL:
            # Restore variables from disk.
            saver.restore(s, FLAGS.train_dir + "/model.ckpt")
            print("Model restored.")

        else:
            # Run all the initializers to prepare the trainable parameters.
            tf.initialize_all_variables().run()

            # Build the summary operation based on the TF collection of Summaries.
            summary_op = tf.summary.merge_all()
            summary_writer = tf.summary.FileWriter(FLAGS.train_dir,
                                                   graph_def=s.graph_def)
            print('Initialized!')
            # Loop through training steps.
            print('Total number of iterations = ' +
                  str(int(num_epochs * train_size / BATCH_SIZE)))

            training_indices = range(train_size)

            for iepoch in range(num_epochs):

                # Permute training indices
                perm_indices = numpy.random.permutation(training_indices)

                for step in range(int(train_size / BATCH_SIZE)):

                    offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE)
                    batch_indices = perm_indices[offset:(offset + BATCH_SIZE)]

                    # Compute the offset of the current minibatch in the data.
                    # Note that we could use better randomization across epochs.
                    batch_data = train_data[batch_indices, :, :, :]
                    batch_labels = train_labels[batch_indices]
                    # This dictionary maps the batch data (as a numpy array) to the
                    # node in the graph is should be fed to.
                    feed_dict = {
                        train_data_node: batch_data,
                        train_labels_node: batch_labels
                    }

                    if step % RECORDING_STEP == 0:

                        summary_str, _, l, lr, predictions = s.run(
                            [
                                summary_op, optimizer, loss, learning_rate,
                                train_prediction
                            ],
                            feed_dict=feed_dict)
                        #summary_str = s.run(summary_op, feed_dict=feed_dict)
                        summary_writer.add_summary(summary_str, step)
                        summary_writer.flush()

                        # print_predictions(predictions, batch_labels)

                        print('Epoch %.2f' %
                              (float(step) * BATCH_SIZE / train_size))
                        print('Minibatch loss: %.3f, learning rate: %.6f' %
                              (l, lr))
                        print('Minibatch error: %.1f%%' %
                              error_rate(predictions, batch_labels))

                        sys.stdout.flush()
                    else:
                        # Run the graph and fetch some of the nodes.
                        _, l, lr, predictions = s.run(
                            [optimizer, loss, learning_rate, train_prediction],
                            feed_dict=feed_dict)

                # Save the variables to disk.
                save_path = saver.save(s, FLAGS.train_dir + "/model.ckpt")
                print("Model saved in file: %s" % save_path)

        print("Running prediction on training set")
        prediction_training_dir = "predictions_training/"
        if not os.path.isdir(prediction_training_dir):
            os.mkdir(prediction_training_dir)
        for i in range(1, TRAINING_SIZE + 1):
            pimg = get_prediction_with_groundtruth(train_data_filename, i)
            Image.fromarray(pimg).save(prediction_training_dir +
                                       "prediction_" + str(i) + ".png")
            oimg = get_prediction_with_overlay(train_data_filename, i)
            oimg.save(prediction_training_dir + "overlay_" + str(i) + ".png")