Example #1
0
    def compile(self, memory, optimizer, policy, preprocessor):
        # override to make 4 optimizers
        self.optimizer = optimizer
        # clone for actor, critic networks
        self.optimizer.actor_keras_optimizer = clone_optimizer(
            self.optimizer.keras_optimizer)
        self.optimizer.target_actor_keras_optimizer = clone_optimizer(
            self.optimizer.keras_optimizer)
        self.optimizer.critic_keras_optimizer = clone_optimizer(
            self.optimizer.keras_optimizer)
        self.optimizer.target_critic_keras_optimizer = clone_optimizer(
            self.optimizer.keras_optimizer)
        del self.optimizer.keras_optimizer

        super(DDPG, self).compile(memory, self.optimizer, policy, preprocessor)
Example #2
0
 def compile_model(self):
     self.optimizer.keras_optimizer_2 = clone_optimizer(
         self.optimizer.keras_optimizer)
     self.model.compile(
         loss='mse',
         optimizer=self.optimizer.keras_optimizer)
     self.model_2.compile(
         loss='mse',
         optimizer=self.optimizer.keras_optimizer_2)
     logger.info("Models 1 and 2 compiled")
Example #3
0
def test_clone_optimizer():
    lr, momentum, clipnorm, clipvalue = np.random.random(size=4)
    optimizer = SGD(lr=lr, momentum=momentum, clipnorm=clipnorm, clipvalue=clipvalue)
    clone = clone_optimizer(optimizer)

    assert isinstance(clone, SGD)
    assert K.get_value(optimizer.lr) == K.get_value(clone.lr)
    assert K.get_value(optimizer.momentum) == K.get_value(clone.momentum)
    assert optimizer.clipnorm == clone.clipnorm
    assert optimizer.clipvalue == clone.clipvalue
Example #4
0
def test_clone_optimizer():
    lr, momentum, clipnorm, clipvalue = np.random.random(size=4)
    optimizer = SGD(lr=lr,
                    momentum=momentum,
                    clipnorm=clipnorm,
                    clipvalue=clipvalue)
    clone = clone_optimizer(optimizer)

    assert isinstance(clone, SGD)
    assert K.get_value(optimizer.lr) == K.get_value(clone.lr)
    assert K.get_value(optimizer.momentum) == K.get_value(clone.momentum)
    assert optimizer.clipnorm == clone.clipnorm
    assert optimizer.clipvalue == clone.clipvalue
Example #5
0
def test_clone_optimizer_from_string():
    clone = clone_optimizer('sgd')
    assert isinstance(clone, SGD)
    def compile(self, optimizer, metrics=[]):
        metrics += [mean_q]

        if type(optimizer) in (list, tuple):
            if len(optimizer) != 2:
                raise ValueError(
                    'More than two optimizers provided. Please only provide a maximum of two optimizers, the first one for the actor and the second one for the critic.'
                )
            actor_optimizer, critic_optimizer = optimizer
        else:
            actor_optimizer = optimizer
            critic_optimizer = clone_optimizer(optimizer)
        if type(actor_optimizer) is str:
            actor_optimizer = optimizers.get(actor_optimizer)
        if type(critic_optimizer) is str:
            critic_optimizer = optimizers.get(critic_optimizer)
        assert actor_optimizer != critic_optimizer

        if len(metrics) == 2 and hasattr(metrics[0], '__len__') and hasattr(
                metrics[1], '__len__'):
            actor_metrics, critic_metrics = metrics
        else:
            actor_metrics = critic_metrics = metrics

        def clipped_error(y_true, y_pred):
            return K.mean(huber_loss(y_true, y_pred, self.delta_clip), axis=-1)

        # Compile target networks. We only use them in feed-forward mode, hence we can pass any
        # optimizer and loss since we never use it anyway.
        self.target_actor = clone_model(self.actor, self.custom_model_objects)
        self.target_actor.compile(optimizer='sgd', loss='mse')
        self.target_critic = clone_model(self.critic,
                                         self.custom_model_objects)
        self.target_critic.compile(optimizer='sgd', loss='mse')

        # We also compile the actor. We never optimize the actor using Keras but instead compute
        # the policy gradient ourselves. However, we need the actor in feed-forward mode, hence
        # we also compile it with any optimzer and
        self.actor.compile(optimizer='sgd', loss='mse')

        # Compile the critic.
        if self.target_model_update < 1.:
            # We use the `AdditionalUpdatesOptimizer` to efficiently soft-update the target model.
            critic_updates = get_soft_target_model_updates(
                self.target_critic, self.critic, self.target_model_update)
            critic_optimizer = AdditionalUpdatesOptimizer(
                critic_optimizer, critic_updates)
        self.critic.compile(optimizer=critic_optimizer,
                            loss=clipped_error,
                            metrics=critic_metrics)

        # Combine actor and critic so that we can get the policy gradient.
        # Assuming critic's state inputs are the same as actor's.
        combined_inputs = []
        state_inputs = []
        for i in self.critic.input:
            if i == self.critic_action_input:
                combined_inputs.append([])
            else:
                combined_inputs.append(i)
                state_inputs.append(i)
        combined_inputs[self.critic_action_input_idx] = self.actor(
            state_inputs)

        combined_output = self.critic(combined_inputs)

        updates = actor_optimizer.get_updates(
            params=self.actor.trainable_weights, loss=-K.mean(combined_output))
        if self.target_model_update < 1.:
            # Include soft target model updates.
            updates += get_soft_target_model_updates(self.target_actor,
                                                     self.actor,
                                                     self.target_model_update)
        updates += self.actor.updates  # include other updates of the actor, e.g. for BN

        # Finally, combine it all into a callable function.
        if K.backend() == 'tensorflow':
            self.actor_train_fn = K.function(state_inputs +
                                             [K.learning_phase()],
                                             [self.actor(state_inputs)],
                                             updates=updates)
        else:
            if self.uses_learning_phase:
                state_inputs += [K.learning_phase()]
            self.actor_train_fn = K.function(state_inputs,
                                             [self.actor(state_inputs)],
                                             updates=updates)
        self.actor_optimizer = actor_optimizer

        self.compiled = True
Example #7
0
 def compile(self, optimizer, metrics=[]):
     self.agent1.compile(clone_optimizer(optimizer), deepcopy(metrics))
     self.agent2.compile(clone_optimizer(optimizer), deepcopy(metrics))
Example #8
0
def test_clone_optimizer_from_string():
    clone = clone_optimizer('sgd')
    assert isinstance(clone, SGD)