Example #1
0
def update_weights(optimizer: tf.train.Optimizer, network: Network, batch,
                   weight_decay: float):
  loss = 0
  for image, actions, targets in batch:
    # Initial step, from the real observation.
    value, reward, policy_logits, hidden_state = network.initial_inference(
        image)
    predictions = [(1.0, value, reward, policy_logits)]

    # Recurrent steps, from action and previous hidden state.
    for action in actions:
      value, reward, policy_logits, hidden_state = network.recurrent_inference(
          hidden_state, action)
      predictions.append((1.0 / len(actions), value, reward, policy_logits))

      hidden_state = tf.scale_gradient(hidden_state, 0.5)

    for prediction, target in zip(predictions, targets):
      gradient_scale, value, reward, policy_logits = prediction
      target_value, target_reward, target_policy = target

      l = (
          scalar_loss(value, target_value) +
          scalar_loss(reward, target_reward) +
          tf.nn.softmax_cross_entropy_with_logits(
              logits=policy_logits, labels=target_policy))

      loss += tf.scale_gradient(l, gradient_scale)

  for weights in network.get_weights():
    loss += weight_decay * tf.nn.l2_loss(weights)

  optimizer.minimize(loss)
def update_weights(optimizer: tf.train.Optimizer, network: Network, batch,
                   weight_decay: float):
    loss = 0
    for image, (target_value, target_policy) in batch:
        value, policy_logits = network.inference(image)
        loss += (tf.losses.mean_squared_error(value, target_value) +
                 tf.nn.softmax_cross_entropy_with_logits(logits=policy_logits,
                                                         labels=target_policy))

    for weights in network.get_weights():
        loss += weight_decay * tf.nn.l2_loss(weights)

    optimizer.minimize(loss)
Example #3
0
def train_step(model: tf.keras.Model, optimizer: tf.train.Optimizer,
               loss: loss, x: tf.Tensor, y: tf.Tensor):
    """Training operation. That is, we minimize the loss function here.

    Arguments:
        model {tf.keras.Model} -- Instance of tf.keras.Model
        optimizer {tf.train.Optimizer} -- Optimizer to be used.
        loss {loss} -- Loss function.
        x {tf.Tensor} -- Input features.
        y {tf.Tensor} -- Output labels.
    """
    optimizer.minimize(loss=lambda: loss(model, x, y),
                       global_step=tf.train.get_or_create_global_step())
    def __init__(
        self,
        environment: ControlBenchmark,
        experience_buffer: BaseExperienceBuffer,
        tensorflow_session: tf.Session,
        gamma: float,
        layer_sizes: List[int],
        layer_activations: List[str],
        shared_layers: int,
        tau: float,
        optimizer: tf.train.Optimizer,
        batch_size: int,
    ) -> None:
        super().__init__(environment=environment,
                         experience_buffer=experience_buffer)
        self.shared_layers = shared_layers
        self.tensorflow_session = tensorflow_session
        self.batch_size = batch_size

        self.Q = NAFNetwork(layer_sizes=layer_sizes,
                            layer_activations=layer_activations,
                            shared_layers=shared_layers,
                            state_shape=environment.state_shape,
                            action_shape=environment.action_shape)
        self.Q_lowpass = NAFNetwork(layer_sizes=layer_sizes,
                                    layer_activations=layer_activations,
                                    shared_layers=shared_layers,
                                    state_shape=environment.state_shape,
                                    action_shape=environment.action_shape)

        self.Q_lowpass.model.set_weights(self.Q.model.get_weights())

        self.observation_input = tf.keras.Input(
            shape=self.environment.state_shape, name='state')
        self.next_observation_input = tf.keras.Input(
            shape=self.environment.state_shape, name='next_state')
        self.action_input = tf.keras.Input(shape=self.environment.action_shape,
                                           name='action_placeholder')
        self.reward_input = tf.keras.Input(shape=(), name='reward')
        self.terminal_input = tf.keras.Input(shape=(), name='terminal')

        self.p_continue = gamma * (1 - self.terminal_input)

        self.frozen_parameter_update_op = periodic_target_update(
            target_variables=self.Q_lowpass.model.variables,
            source_variables=self.Q.model.variables,
            update_period=1,
            tau=tau)

        self.q_values_policy, self.mu_policy, _ = self.Q(
            state_action=[self.observation_input, self.action_input])
        _, _, self.vt_lowpass = self.Q_lowpass(
            state_action=[self.next_observation_input, self.action_input])
        # action is not actually used here to calculate the value

        self.target = self.reward_input + self.p_continue * self.vt_lowpass
        rl_loss = tf.reduce_mean(0.5 * (self.q_values_policy - self.target)**2)
        self.train_op = optimizer.minimize(rl_loss)

        self._initialize_tf_variables()
Example #5
0
    def __init__(
        self,
        obs_spec: specs.Array,
        action_spec: specs.DiscreteArray,
        network: snt.AbstractModule,
        optimizer: tf.train.Optimizer,
        sequence_length: int,
        td_lambda: float,
        agent_discount: float,
        seed: int,
    ):
        """A simple actor-critic agent."""
        del action_spec  # unused
        tf.set_random_seed(seed)
        self._sequence_length = sequence_length
        self._count = 0

        # Create the policy ops..
        obs = tf.placeholder(shape=obs_spec.shape, dtype=obs_spec.dtype)
        online_logits, _ = network(tf.expand_dims(obs, 0))
        action = tf.squeeze(
            tf.multinomial(online_logits, 1, output_dtype=tf.int32))

        # Create placeholders and numpy arrays for learning from trajectories.
        shapes = [obs_spec.shape, (), (), ()]
        dtypes = [obs_spec.dtype, np.int32, np.float32, np.float32]

        placeholders = [
            tf.placeholder(shape=(self._sequence_length, 1) + shape,
                           dtype=dtype)
            for shape, dtype in zip(shapes, dtypes)
        ]
        observations, actions, rewards, discounts = placeholders

        self.arrays = [
            np.zeros(shape=(self._sequence_length, 1) + shape, dtype=dtype)
            for shape, dtype in zip(shapes, dtypes)
        ]

        # Build actor and critic losses.
        logits, values = snt.BatchApply(network)(observations)
        _, bootstrap_value = network(tf.expand_dims(obs, 0))

        critic_loss, (advantages, _) = td_lambda_loss(
            state_values=values,
            rewards=rewards,
            pcontinues=agent_discount * discounts,
            bootstrap_value=bootstrap_value,
            lambda_=td_lambda)
        actor_loss = discrete_policy_gradient_loss(logits, actions, advantages)
        train_op = optimizer.minimize(actor_loss + critic_loss)

        # Create TF session and callables.
        session = tf.Session()
        self._policy_fn = session.make_callable(action, [obs])
        self._update_fn = session.make_callable(train_op, placeholders + [obs])
        session.run(tf.global_variables_initializer())
Example #6
0
    def train(self,
              x_data: tf.Tensor,
              y_data: tf.Tensor,
              example_number: int,
              epochs: int,
              batch_size: int,
              activation_function: ActivationFunction,
              cost_function: CostFunction,
              optimizer_function: tf.train.Optimizer,
              ) -> None:

        assert x_data.shape[0] == y_data.shape[0] == example_number

        x_dataset = tf.data.Dataset.from_tensor_slices(x_data)
        y_dataset = tf.data.Dataset.from_tensor_slices(y_data)

        assert x_dataset.output_shapes == self.x_size
        assert y_dataset.output_shapes == self.y_size
        assert self.is_initialized

        self.activation_function = activation_function

        batched_x, batched_y = (i.batch(batch_size)
                                for i in (x_dataset, y_dataset))

        x_batch, y_batch = (
            tf.placeholder('float', shape=(batch_size, self.x_size)),
            tf.placeholder('float', shape=(batch_size, self.y_size)))

        pred_y_batch = tf.map_fn(self.model, x_batch)
        cost = tf.reduce_mean(cost_function(pred_y_batch, y_batch))
        optimizer = optimizer_function.minimize(cost)

        print('Training...')
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for epoch in range(epochs):
                epoch_loss = 0
                x_it, y_it = (b.make_one_shot_iterator()
                              for b in (batched_x, batched_y))

                curr_x_init = x_it.get_next()
                curr_y_init = y_it.get_next()

                while True:
                    try:
                        curr_x = sess.run(curr_x_init)
                        curr_y = sess.run(curr_y_init)
                        _, c = sess.run([optimizer, cost], feed_dict={
                            x_batch: curr_x, y_batch: curr_y
                        })
                        epoch_loss += c
                    except tf.errors.OutOfRangeError:
                        break
                print('Epoch', epoch + 1, 'out of', epochs, 'completed.',
                      'Current loss:', epoch_loss)
            self.test_accuracy(sess, x_data, y_data, example_number)
Example #7
0
def train_model(optimizer: tf.train.Optimizer, loss: tf.Tensor):
    """Minimize the loss with respect to the model variables.

    Args:
        optimizer (tf.train.Optimizer):
        loss (tf.Tensor): Loss value as defined by a loss function..

    Returns:
        An Operation that updates the variables in `var_list`
        & also increments `global_step`.
    """
    return optimizer.minimize(loss=loss,
                              global_step=tf.train.get_or_create_global_step())
Example #8
0
def adversarial_train_op_func(
        generator_loss: tf.Tensor,
        discriminator_loss: tf.Tensor,
        generator_weights: List[tf.Variable],
        discriminator_weights: List[tf.Variable],
        n_gen_steps: int = 1,
        n_disc_steps: int = 5,
        optimizer: tf.train.Optimizer = tf.train.RMSPropOptimizer(0.0005)
    ) -> tf.Operation:
    """
    Build the adversarial train operation (n_disc_steps discriminator optimization steps
    followed by n_gen_steps generator optimization steps).

    Arguments:

    generator_loss -- generator loss.
    discriminator_loss -- discriminator loss.
    generator_weights -- list of generator trainable weights.
    discriminator_weights -- list of discriminator trainable weights.
    n_gen_steps -- number of generator update steps per single train operation,
        optional (default = 1).
    n_disc_steps -- number of discriminator update steps per single train
        operation, optional (default = 10).
    optimizer -- optimizer to use, optional (default = tf.train.RMSPropOptimizer(0.001))
    """
    disc_train_op = _op_repeat_n(
            lambda: optimizer.minimize(discriminator_loss, var_list=discriminator_weights),
            n_disc_steps
        )
    
    with tf.control_dependencies([disc_train_op]):
        gen_train_op = _op_repeat_n(
                lambda: optimizer.minimize(generator_loss, var_list=generator_weights),
                n_gen_steps
            )
    
    return gen_train_op
Example #9
0
def feature_eval_setup(sess: Session,
                       X: Tensor,
                       Z: Tensor,
                       data_train: DataSet,
                       data_test: DataSet,
                       eval_fn: Callable[[Tensor, Tensor], Tensor],
                       eval_loss_fn: Callable[[Tensor, Tensor], Tensor],
                       supervise_net: Optional[Callable[[Tensor], Tensor]] = None,
                       optimizer: tf.train.Optimizer = (
                               tf.train.RMSPropOptimizer(learning_rate=1e-4)),
                       mb_size: Optional[int] = 128,
                       max_iter: int = 5000,
                       restart_training: bool = True
                       ) -> Callable[[Session], Tuple[Number, Number]]:
    with tf.variable_scope('feature_eval'):
        if supervise_net is not None:
            y_logits = supervise_net(Z)
        else:
            y_logits = dense_net(Z, [256, data_train.dim_y])

    y_hat = tf.sigmoid(y_logits)
    y = tf.placeholder(tf.float32, [None] + data_train.dim_Y)
    eval_loss = tf.reduce_mean(eval_loss_fn(y_logits, y))
    eval_result = eval_fn(y_hat, y)
    vars_fteval = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                    scope='feature_eval')
    train = optimizer.minimize(eval_loss, var_list=vars_fteval)
    eval_vars_initializer = tf.variables_initializer(
        tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='feature_eval'))
    sess.run(eval_vars_initializer)

    def feature_eval(_sess: Session) -> Tuple[Number, Number]:
        if restart_training:
            _sess.run(eval_vars_initializer)
        for _ in tqdm(range(max_iter)):
            if mb_size is not None:
                _mb = data_train.sample(mb_size)
            else:
                _mb = data_train
            data_feed = {X: _mb.x, y: _mb.y}
            _sess.run(train, feed_dict=data_feed)
        data_feed = {X: data_test.x, y: data_test.y}
        val_eval_loss = _sess.run(eval_loss, feed_dict=data_feed)
        val_eval = _sess.run(eval_result, feed_dict=data_feed)
        return val_eval_loss, val_eval

    return feature_eval
def train_step(model: tf.keras.Model, optimizer: tf.train.Optimizer,
               loss_func: loss, inputs: tf.Tensor, labels: tf.Tensor, **kwargs):
    """Kicks off training for a given model.

    Args:
        model (tf.keras.Model):
        optimizer (tf.train.Optimizer):
        loss_func (loss): Loss function.
        inputs (tf.Tensor): Dataset's input features.
        labels (tf.Tensor): Dataset true labels.

    Keyword Args:
            sparse (bool): False if labels are not one-hot encoded.

    Returns:
        An Operation that updates the variables in `var_list`.  If `global_step`
              was not `None`, that operation also increments `global_step`.
    """

    return optimizer.minimize(loss=lambda: loss_func(model, inputs, labels, **kwargs),
                              global_step=tf.train.get_or_create_global_step())
Example #11
0
    def __init__(
        self,
        obs_dim: int,
        latent_dim: int,
        encoder: Callable[[tf.Tensor, int], tf.Tensor],
        decoder: Callable[[tf.Tensor], tf.Tensor],
        decoder_loss: Callable[[tf.Tensor, tf.Tensor], tf.Tensor],
        optimiser: tf.train.Optimizer,
        seed: int,
    ) -> None:
        super().__init__(obs_dim, latent_dim)

        tf.set_random_seed(seed)

        obs_pl = tf.placeholder(tf.float32, [None, obs_dim])
        latent_pl = tf.placeholder(tf.float32, [None, latent_dim])
        latent_dist_params = encoder(obs_pl, latent_dim)
        latent = self._build_sampled_latent(latent_dist_params)
        generated_dist_params = decoder(latent)
        _fixed_prior = tf.random_normal([1, latent_dim])
        _generated_dist_params = decoder(latent_pl)

        loss = self._kl_divergence(latent_dist_params)
        loss += decoder_loss(obs_pl, generated_dist_params)
        loss *= np.log2(np.e)
        train_op = optimiser.minimize(loss)

        session = tf.Session()
        self._prior_fn = session.make_callable(_fixed_prior)
        self._posterior_fn = session.make_callable(latent_dist_params,
                                                   [obs_pl])
        self._generator_fn = session.make_callable(_generated_dist_params,
                                                   [latent_pl])
        self._loss_fn = session.make_callable(loss, [obs_pl])
        self._train_fn = session.make_callable(train_op, [obs_pl])
        session.run(tf.global_variables_initializer())
        self.session = session
        self.saver = tf.train.Saver()
def get_gradient_op(tensors: MDPTensors,
                    objective_initial_scales: SRLObjectives,
                    optimizer: tf.train.Optimizer,
                    gradient_clip: Optional[float], **kwargs):
    objectives: SRLObjectives = SRLObjectives(
        value_function=ValueFunction(tensors,
                                     objective_initial_scales.value_function,
                                     **kwargs),
        reward_prediction=RewardPrediction(
            tensors, objective_initial_scales.reward_prediction, **kwargs),
        auto_encoding=AutoEncodingPrediction(
            tensors, objective_initial_scales.auto_encoding, **kwargs),
        forward_dynamics=ForwardDynamicsPrediction(
            tensors, objective_initial_scales.forward_dynamics, **kwargs),
        inverse_dynamics=InverseDynamicsPrediction(
            tensors, objective_initial_scales.inverse_dynamics, **kwargs),
        slowness=SlownessLoss(tensors, objective_initial_scales.slowness,
                              **kwargs),
        diversity=DiversityLoss(tensors, objective_initial_scales.diversity,
                                **kwargs),
    )

    active_objectives = [
        o for o in objectives
        if o is not None and backend.get_value(o.scale) > 0
    ]
    total_loss = backend.mean(
        backend.stack([o.loss for o in active_objectives]))

    if gradient_clip is not None:
        gradients = optimizer.compute_gradients(total_loss)
        for i, (grad, var) in enumerate(gradients):
            if grad is not None:
                gradients[i] = (tf.clip_by_norm(grad, gradient_clip), var)
        return optimizer.apply_gradients(gradients)
    else:
        return optimizer.minimize(total_loss)
Example #13
0
    def __init__(
        self,
        obs_spec: dm_env.specs.Array,
        action_spec: dm_env.specs.BoundedArray,
        ensemble: Sequence[snt.AbstractModule],
        target_ensemble: Sequence[snt.AbstractModule],
        batch_size: int,
        agent_discount: float,
        replay_capacity: int,
        min_replay_size: int,
        sgd_period: int,
        target_update_period: int,
        optimizer: tf.train.Optimizer,
        mask_prob: float,
        noise_scale: float,
        epsilon_fn: Callable[[int], float] = lambda _: 0.,
        seed: int = None,
    ):
        """Bootstrapped DQN with additive prior functions."""
        # Dqn configurations.
        self._ensemble = ensemble
        self._target_ensemble = target_ensemble
        self._num_actions = action_spec.maximum - action_spec.minimum + 1
        self._batch_size = batch_size
        self._sgd_period = sgd_period
        self._target_update_period = target_update_period
        self._min_replay_size = min_replay_size
        self._epsilon_fn = epsilon_fn
        self._replay = replay.Replay(capacity=replay_capacity)
        self._mask_prob = mask_prob
        self._noise_scale = noise_scale
        self._rng = np.random.RandomState(seed)
        tf.set_random_seed(seed)

        self._total_steps = 0
        self._total_episodes = 0
        self._active_head = 0
        self._num_ensemble = len(ensemble)
        assert len(ensemble) == len(target_ensemble)

        # Making the tensorflow graph
        session = tf.Session()

        # Placeholders = (obs, action, reward, discount, next_obs, mask, noise)
        o_tm1 = tf.placeholder(shape=(None, ) + obs_spec.shape,
                               dtype=obs_spec.dtype)
        a_tm1 = tf.placeholder(shape=(None, ), dtype=action_spec.dtype)
        r_t = tf.placeholder(shape=(None, ), dtype=tf.float32)
        d_t = tf.placeholder(shape=(None, ), dtype=tf.float32)
        o_t = tf.placeholder(shape=(None, ) + obs_spec.shape,
                             dtype=obs_spec.dtype)
        m_t = tf.placeholder(shape=(None, self._num_ensemble),
                             dtype=tf.float32)
        z_t = tf.placeholder(shape=(None, self._num_ensemble),
                             dtype=tf.float32)

        losses = []
        value_fns = []
        target_updates = []
        for k in range(self._num_ensemble):
            model = self._ensemble[k]
            target_model = self._target_ensemble[k]
            q_values = model(o_tm1)

            train_value = batched_index(q_values, a_tm1)
            target_value = tf.stop_gradient(
                tf.reduce_max(target_model(o_t), axis=-1))
            target_y = r_t + z_t[:, k] + agent_discount * d_t * target_value
            loss = tf.square(train_value - target_y) * m_t[:, k]

            value_fn = session.make_callable(q_values, [o_tm1])
            target_update = update_target_variables(
                target_variables=target_model.get_all_variables(),
                source_variables=model.get_all_variables(),
            )

            losses.append(loss)
            value_fns.append(value_fn)
            target_updates.append(target_update)

        sgd_op = optimizer.minimize(tf.stack(losses))
        self._value_fns = value_fns
        self._sgd_step = session.make_callable(
            sgd_op, [o_tm1, a_tm1, r_t, d_t, o_t, m_t, z_t])
        self._update_target_nets = session.make_callable(target_updates)
        session.run(tf.global_variables_initializer())