Beispiel #1
0
def ClippingOptimizer(opt: tf.train.Optimizer, low, high):
    original = opt.apply_gradients

    def apply_gradients(grads_and_vars, *a, **kw):
        app = original(grads_and_vars, *a, **kw)
        asg = [
            v.assign_add(tf.maximum(high - v, 0) + tf.minimum(low - v, 0))
            for g, v in grads_and_vars
        ]
        return tf.group(app, *asg)  # note that clipping is asynchronous here

    opt.apply_gradients = apply_gradients
    return opt
Beispiel #2
0
 def __distribute_training(self, iterator: tf.data.Iterator,
                           optimizer: tf.train.Optimizer) -> DistributedOps:
     gpus_to_use = self.__get_gpu_to_use()
     gradients, loss_operations = [], []
     for gpu_id in gpus_to_use:
         multi_gpu_operations = self.__place_operations(
             target_gpu_id=gpu_id, iterator=iterator, optimizer=optimizer)
         gradients.append(multi_gpu_operations.gradient)
         loss_operations.append(multi_gpu_operations.loss_operation)
     gradients = average_gradients(gradients)
     loss_operation = average_loss(loss_operations)
     training_step = optimizer.apply_gradients(gradients)
     return loss_operation, training_step
def dns_grad_op(loss, optimizer: tf.train.Optimizer, variables=None, global_step=None):
    """ Create an operation the updates the weights by gradient descent.

    In DNS, the weights are updated according to their derivative with respect to the masked
    values, but the update is applied to the non-masked values, so that zeroed-out weights may
    still change and in particular be spliced back in if necessary.

    Parameters
    ----------
    loss: A `tf.Tensor` representing the loss.
    optimizer: The optimizer to use.
    variables: The variables for which to create the gradient operation.
    global_step: An optional global step to increment.

    Returns
    -------
    train_op: An tensorflow op that when run updates the variables according to the gradient.
    """
    if variables is None:
        variables = tf.trainable_variables()

    replaced = {}

    wrt_variables = []

    num_replaced = 0

    for v in variables:
        # look for variables having shadow values.
        mvs = tf.get_collection(MASKED_WEIGHT_COLLECTION, v.op.name)

        if len(mvs) == 0:
            wrt_variables.append(v)
        elif len(mvs) == 1:
            num_replaced += 1
            wrt_variables.append(mvs[0])
            replaced[mvs[0]] = v
        else:
            raise ValueError('More than one masked weight for a given variable.')

    tf.logging.info('Replaced {0} variables for Dynamic Network Surgery'.format(num_replaced))

    grads_and_vars = optimizer.compute_gradients(loss, wrt_variables)
    grads_and_vars = [(g, replaced.get(v, v)) for g, v in grads_and_vars]

    train_op = optimizer.apply_gradients(grads_and_vars, global_step, 'dns_grad_op')

    return train_op
Beispiel #4
0
def ClippingOptimizer(opt: tf.train.Optimizer, low, high):
    original = opt.apply_gradients

    def apply_gradients(grads_and_vars, *a, **kw):
        app = original(grads_and_vars, *a, **kw)
        with tf.name_scope('clip'):
            # clip = [v.assign_add(tf.maximum(high-v, 0)+tf.minimum(low-v, 0)) for g, v in grads_and_vars]
            clip = [
                v.assign(tf.clip_by_value(v, low, high))
                for g, v in grads_and_vars
            ]

        step = after(app, clip, name='step')
        return step

    opt.apply_gradients = apply_gradients
    return opt
def train_op_with_clip_and_noise(
        optimizer: tf.train.Optimizer,
        grads_and_vars: _GRAD_AND_VARS_TYPE,
        global_step: Optional[tf.Tensor] = None,
        gradient_clip: Optional[float] = None,
        gradient_noise_std: Optional[float] = None,
        gradient_l2_norm: Optional[tf.Tensor] = None) -> tf.Operation:
    """
    Produce train op for gradients and variables with
    gradient clip and adding of gradient noise if they were provided inside of
    optim config

    Parameters
    ----------
    optimizer
        optimizer to use
    grads_and_vars
        list of (gradient, variable)
    global_step
        global step to use in the optimizer; Caution: provide global_step only
        once, if you execute this method multiple times in one session
    gradient_clip
        gradient clip value
    gradient_noise_std
        standard deviation of the noise to add to gradients
    gradient_l2_norm
        gradient l2 norm used for the gradient clipping

    Returns
    -------
    train_op
        training operation, which can be used inside of session run
    """
    if gradient_clip is not None:
        grads_and_vars = clip_grads_and_vars(grads_and_vars, gradient_clip,
                                             gradient_l2_norm)
    if gradient_noise_std is not None:
        grads_and_vars = add_noise_to_grads_and_vars(grads_and_vars,
                                                     gradient_noise_std)
    train_op = optimizer.apply_gradients(grads_and_vars,
                                         global_step=global_step)
    return train_op
def get_gradient_op(tensors: MDPTensors,
                    objective_initial_scales: SRLObjectives,
                    optimizer: tf.train.Optimizer,
                    gradient_clip: Optional[float], **kwargs):
    objectives: SRLObjectives = SRLObjectives(
        value_function=ValueFunction(tensors,
                                     objective_initial_scales.value_function,
                                     **kwargs),
        reward_prediction=RewardPrediction(
            tensors, objective_initial_scales.reward_prediction, **kwargs),
        auto_encoding=AutoEncodingPrediction(
            tensors, objective_initial_scales.auto_encoding, **kwargs),
        forward_dynamics=ForwardDynamicsPrediction(
            tensors, objective_initial_scales.forward_dynamics, **kwargs),
        inverse_dynamics=InverseDynamicsPrediction(
            tensors, objective_initial_scales.inverse_dynamics, **kwargs),
        slowness=SlownessLoss(tensors, objective_initial_scales.slowness,
                              **kwargs),
        diversity=DiversityLoss(tensors, objective_initial_scales.diversity,
                                **kwargs),
    )

    active_objectives = [
        o for o in objectives
        if o is not None and backend.get_value(o.scale) > 0
    ]
    total_loss = backend.mean(
        backend.stack([o.loss for o in active_objectives]))

    if gradient_clip is not None:
        gradients = optimizer.compute_gradients(total_loss)
        for i, (grad, var) in enumerate(gradients):
            if grad is not None:
                gradients[i] = (tf.clip_by_norm(grad, gradient_clip), var)
        return optimizer.apply_gradients(gradients)
    else:
        return optimizer.minimize(total_loss)
  def __init__(
      self,
      obs_spec: specs.Array,
      action_spec: specs.DiscreteArray,
      network: snt.RNNCore,
      optimizer: tf.train.Optimizer,
      sequence_length: int,
      td_lambda: float,
      agent_discount: float,
      seed: int,
  ):
    """A recurrent actor-critic agent."""
    del action_spec  # unused
    tf.set_random_seed(seed)
    self._sequence_length = sequence_length
    self._num_transitions_in_buffer = 0

    # Create the policy ops.
    obs = tf.placeholder(shape=(1,) + obs_spec.shape, dtype=obs_spec.dtype)
    mask = tf.placeholder(shape=(1,), dtype=tf.float32)
    state = self._placeholders_like(network.initial_state(batch_size=1))
    (online_logits, _), next_state = network((obs, mask), state)
    action = tf.squeeze(tf.multinomial(online_logits, 1, output_dtype=tf.int32))

    # Create placeholders and numpy arrays for learning from trajectories.
    shapes = [obs_spec.shape, (), (), (), ()]
    dtypes = [obs_spec.dtype, np.int32, np.float32, np.float32, np.float32]

    placeholders = [
        tf.placeholder(shape=(self._sequence_length, 1) + shape, dtype=dtype)
        for shape, dtype in zip(shapes, dtypes)]
    observations, actions, rewards, discounts, masks = placeholders

    # Build actor and critic losses.
    (logits, values), final_state = tf.nn.dynamic_rnn(
        network, (observations, tf.expand_dims(masks, -1)),
        initial_state=state, dtype=tf.float32, time_major=True)
    (_, bootstrap_value), _ = network((obs, mask), final_state)
    values, bootstrap_value = tree.map_structure(
        lambda t: tf.squeeze(t, axis=-1), (values, bootstrap_value))
    critic_loss, (advantages, _) = td_lambda_loss(
        state_values=values,
        rewards=rewards,
        pcontinues=agent_discount * discounts,
        bootstrap_value=bootstrap_value,
        lambda_=td_lambda)
    actor_loss = discrete_policy_gradient_loss(logits, actions, advantages)

    # Updates.
    grads_and_vars = optimizer.compute_gradients(actor_loss + critic_loss)
    grads, _ = tf.clip_by_global_norm([g for g, _ in grads_and_vars], 5.)
    grads_and_vars = [(g, pair[1]) for g, pair in zip(grads, grads_and_vars)]
    train_op = optimizer.apply_gradients(grads_and_vars)

    # Create TF session and callables.
    session = tf.Session()
    self._reset_fn = session.make_callable(
        network.initial_state(batch_size=1))
    self._policy_fn = session.make_callable(
        [action, next_state], [obs, mask, state])
    self._update_fn = session.make_callable(
        [train_op, final_state], placeholders + [obs, mask, state])
    session.run(tf.global_variables_initializer())

    # Initialize numpy buffers
    self.state = self._reset_fn()
    self.update_init_state = self._reset_fn()
    self.arrays = [
        np.zeros(shape=(self._sequence_length, 1) + shape, dtype=dtype)
        for shape, dtype in zip(shapes, dtypes)]