Ejemplo n.º 1
0
def fixed_step_return(reward, value, length, discount, window):
  """N-step discounted return."""
  timestep = tf.range(reward.shape[1].value)
  mask = tf.cast(timestep[None, :] < length[:, None], tf.float32)
  return_ = tf.zeros_like(reward)
  for _ in range(window):
    return_ += reward
    reward = discount * tf.concat([reward[:, 1:], tf.zeros_like(reward[:, -1:])], 1)
  return_ += discount**window * tf.concat(
      [value[:, window:], tf.zeros_like(value[:, -window:]), 1])
  return tf.check_numerics(tf.stop_gradient(mask * return_), 'return')
Ejemplo n.º 2
0
def variable_summaries(vars_, groups=None, scope='weights'):
  """Create histogram summaries for the provided variables.

  Summaries can be grouped via regexes matching variables names.

  Args:
    vars_: List of variables to summarize.
    groups: Mapping of name to regex for grouping summaries.
    scope: Name scope for this operation.

  Returns:
    Summary tensor.
  """
  groups = groups or {r'all': r'.*'}
  grouped = collections.defaultdict(list)
  for var in vars_:
    for name, pattern in groups.items():
      if re.match(pattern, var.name):
        name = re.sub(pattern, name, var.name)
        grouped[name].append(var)
  for name in groups:
    if name not in grouped:
      tf.logging.warn("No variables matching '{}' group.".format(name))
  summaries = []
  for name, vars_ in grouped.items():
    vars_ = [tf.reshape(var, [-1]) for var in vars_]
    vars_ = tf.concat(vars_, 0)
    summaries.append(tf.summary.histogram(scope + '/' + name, vars_))
  return tf.summary.merge(summaries)
Ejemplo n.º 3
0
def gradient_summaries(grad_vars, groups=None, scope='gradients'):
  """Create histogram summaries of the gradient.

  Summaries can be grouped via regexes matching variables names.

  Args:
    grad_vars: List of (gradient, variable) tuples as returned by optimizers.
    groups: Mapping of name to regex for grouping summaries.
    scope: Name scope for this operation.

  Returns:
    Summary tensor.
  """
  groups = groups or {r'all': r'.*'}
  grouped = collections.defaultdict(list)
  for grad, var in grad_vars:
    if grad is None:
      continue
    for name, pattern in groups.items():
      if re.match(pattern, var.name):
        name = re.sub(pattern, name, var.name)
        grouped[name].append(grad)
  for name in groups:
    if name not in grouped:
      tf.logging.warn("No variables matching '{}' group.".format(name))
  summaries = []
  for name, grads in grouped.items():
    grads = [tf.reshape(grad, [-1]) for grad in grads]
    grads = tf.concat(grads, 0)
    summaries.append(tf.summary.histogram(scope + '/' + name, grads))
  return tf.summary.merge(summaries)
def flat_grad(loss, var_list):
    grads = tf.gradients(loss, var_list)
    return tf.concat(axis=0,
                     values=[
                         tf.reshape(grad, [numel(v)])
                         for (v, grad) in zip(var_list, grads)
                     ])
Ejemplo n.º 5
0
def build_net(input_tfs, reuse=False):
    layers = [1024, 512]
    activation = tf.nn.relu

    input_tf = tf.concat(axis=-1, values=input_tfs)
    h = TFUtil.fc_net(input_tf, layers, activation=activation, reuse=reuse)
    h = activation(h)
    return h
Ejemplo n.º 6
0
def lambda_advantage(reward, value, length, discount):
  """Generalized Advantage Estimation."""
  timestep = tf.range(reward.shape[1].value)
  mask = tf.cast(timestep[None, :] < length[:, None], tf.float32)
  next_value = tf.concat([value[:, 1:], tf.zeros_like(value[:, -1:])], 1)
  delta = reward + discount * next_value - value
  advantage = tf.reverse(
      tf.transpose(
          tf.scan(lambda agg, cur: cur + discount * agg,
                  tf.transpose(tf.reverse(mask * delta, [1]), [1, 0]), tf.zeros_like(delta[:, -1]),
                  1, False), [1, 0]), [1])
  return tf.check_numerics(tf.stop_gradient(advantage), 'advantage')
def flat_grad(loss, var_list, grad_ys=None):
    grads = tf.gradients(loss, var_list, grad_ys)
    return tf.concat(
        [tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)],
        axis=0)
 def __init__(self, sess, var_list):
     self.sess = sess
     self.op = tf.concat(
         axis=0, values=[tf.reshape(v, [numel(v)]) for v in var_list])
     return