Python DDPGAgent Examples

Programming Language: Python

Namespace/Package Name: agents

Class/Type: DDPGAgent

Examples at hotexamples.com: 5

Python DDPGAgent - 5 examples found. These are the top rated real world Python examples of agents.DDPGAgent extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DDPGAgent(5)

get_action(2)

noise_scale(1)

step(1)

update(1)

Example #1

Show file

 def __init__(self, agent_init_params=None, alg_types=None,
              gamma=0.95, tau=0.01, lr=0.01, hidden_dim=64
             #  discrete_action=False
     ):
     """
     Inputs:
         agent_init_params (list of dict): List of dicts with parameters to
                                           initialize each agent
             num_in_pol (int): Input dimensions to policy
             num_out_pol (int): Output dimensions to policy
             num_in_critic (int): Input dimensions to critic
         alg_types (list of str): Learning algorithm for each agent (DDPG
                                    or MADDPG)
         gamma (float): Discount factor
         tau (float): Target update rate
         lr (float): Learning rate for policy and critic
         hidden_dim (int): Number of hidden dimensions for networks
         discrete_action (bool): Whether or not to use discrete action space
     """
     self.nagents = len(alg_types)
     self.alg_types = alg_types
     self.agents = [DDPGAgent(lr=lr, hidden_dim=hidden_dim, **params)
                    for params in agent_init_params]
     self.agent_init_params = agent_init_params
     self.gamma = gamma
     self.tau = tau
     self.lr = lr
     # self.discrete_action = discrete_action
     self.pol_dev = 'cpu'  # device for policies
     self.critic_dev = 'cpu'  # device for critics
     self.trgt_pol_dev = 'cpu'  # device for target policies
     self.trgt_critic_dev = 'cpu'  # device for target critics
     self.niter = 0
     # summaries tracker 
     self.agent_losses = defaultdict(list)

Example #2

Show file

File: learn.py Project: n-lamprou/DeepReinforcementLearning

                break

        # save most recent score
        scores_window.append(round(np.max(scores), 2))

        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(
                i_episode, np.mean(scores_window)))

        if np.mean(scores_window) >= 1:
            print(
                '\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'
                .format(i_episode - 100, np.mean(scores_window)))
            for i, a in enumerate(agent.maddpg_agent):
                torch.save(a.actor_local.state_dict(),
                           'MADDPG_actor_{}.pth'.format(i + 1))
            break


if __name__ == "__main__":

    # instantiate agents and multiagent framework
    Player1 = DDPGAgent(state_size, action_size, num_agents, random_seed=0)
    Player2 = DDPGAgent(state_size, action_size, num_agents, random_seed=0)

    maddpg = MADDPG(agents=[Player1, Player2])

    # Learn agent and save model
    learn(maddpg)
    env.close()

Example #3

Show file

File: train_ddpg.py Project: tanzeyy/reinforcement_learning

    "Pong-v0", "MsPacman-v0", "SpaceInvaders-v0", "Seaquest-v0",
    "LunarLanderV2", "Reacher-v2", "FrozenLake-v0"
]

env = gym.make("BipedalWalker-v2")
obs, rew, done, ep_ret, ep_len = env.reset(), 0, False, 0, 0

epochs = 100
steps_per_epoch = 50
max_ep_len = 500
replay_size = int(1e6)
start_steps = 2000
batch_size = 64
tf.set_random_seed(0)

agent = DDPGAgent(env.observation_space, env.action_space)
buffer = DDPGReplayBuffer(env.observation_space.shape[0],
                          env.action_space.shape[0],
                          size=replay_size)

rewards = [0]
q_losses = []
pi_losses = []
total_steps = steps_per_epoch * epochs
ep_ret = 0
ep_len = 0
for t in tqdm(range(5000)):
    if t > start_steps:
        act = agent.get_action(obs)
        # print(act)
    else:

Example #4

Show file

File: run.py Project: n-lamprou/DeepReinforcementLearning

    
    # Begin simulation
    env_info = env.reset(train_mode=False)[brain_name] # reset the environment
    state = env_info.vector_observations[0]            # get the current state
    score = 0                                          # initialize the score
    while True:
        action = agent.act(state, add_noise=False)    # select an action (without noise)
        env_info = env.step(action)[brain_name]        # send the action to the environment
        next_state = env_info.vector_observations[0]   # get the next state
        reward = env_info.rewards[0]                   # get the reward
        done = env_info.local_done[0]                  # see if episode has finished
        score += reward                                # update the score
        state = next_state                             # roll over the state to next time step
        if done:                                       # exit loop if episode finished
            break
    
    print("Score: {}".format(score))

    
if __name__ == "__main__":
    
    # instantiate agent and load weights
    agent = DDPGAgent(state_size=33, action_size=4, model=(Actor, Critic), random_seed=0)
    agent.actor_local.load_state_dict(torch.load('DDPG_actor.pth'))
        
    # Run simulation with specified agent
    print('Running simulation with DDPG agent')
    run(agent)
    env.close()

Example #5

Show file

File: tennis_train.py Project: aribiswas/drlnd-tennis

# create experience buffer
buffer = ExperienceBuffer(osize, asize, max_len=params["BUFFER_LENGTH"])

# create noise models
np.random.seed(0)  # set the numpy seed

# create actor network
actor = DeterministicActor(osize, asize, seed=0).to(device)
target_actor = DeterministicActor(osize, asize, seed=0).to(device)

# create critic network
critic = QCritic(osize, asize, seed=0).to(device)
target_critic = QCritic(osize, asize, seed=0).to(device)

# create DDPG agents
agent_0 = DDPGAgent(actor, critic, target_actor, target_critic, buffer, params)
agent_1 = DDPGAgent(actor, critic, target_actor, target_critic, buffer, params)

# ------  Train loop -------

for ep_count in range(1, MAX_EPISODES):

    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]
    states = env_info.vector_observations

    ep_reward = np.zeros(num_agents)
    ep_steps = 1

    while True: