Exemple #1
0
                               Tank_model.setpoint))

        ## Record information of state
        buffer.record((Last_state, action, reward, state),
                      base_critic=base_critic,
                      base_actor=base_actor,
                      target_actor=target_actor,
                      target_critic=target_critic,
                      gamma=gamma,
                      reward=reward)

        ## Update base actor and base critic
        base_actor, base_critic = buffer.learn(
            base_critic=base_critic,
            base_actor=base_actor,
            target_actor=target_actor,
            target_critic=target_critic,
            gamma=gamma,
            actor_optimizer=actor_optimizer,
            critic_optimizer=critic_optimizer)

        ## Soft update target actor and target critic
        target_actor, target_critic = update_target_single(
            tau,
            base_critic=base_critic,
            base_actor=base_actor,
            target_actor=target_actor,
            target_critic=target_critic)

        ## Remember noise produced
        Noise_mem.append(noise)