def train(args): np.random.seed(args.seed) torch.manual_seed(args.seed) env = gym.make(args.env) env.seed(args.seed) env_params = get_env_params(env) agent = DDPG_Agent(args, env_params) logger = SummaryWriter(log_dir='results/DDPG_{}_{}_{}'.format( args.env, args.seed, datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))) agent.train(env, logger)
def evaluate(args): np.random.seed(args.seed) torch.manual_seed(args.seed) env = gym.make(args.env) env.seed(args.seed) env_params = get_env_params(env) agent = DDPG_Agent(args, env_params) agent.load_model(remark=args.load_model_remark) for i in range(200): agent.evaluate(env, render=True)
def __init__(self, num_agents=2, state_size=24, action_size=2, random_seed=0, TD3=False): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action random_seed (int): random seed """ self.num_agents = num_agents self.state_size = state_size self.action_size = action_size if(TD3): self.agents = [TD3_Agent(state_size, action_size, i+1, random_seed) for i in range(num_agents)] else: self.agents = [DDPG_Agent(state_size, action_size, i+1, random_seed) for i in range(num_agents)] # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed=0)
if __name__ == '__main__': num_agents, state_size, action_size = check_env(env) '''agent_1 = DDPG_Agent(state_size, action_size, num_agents, lr_critic=0.0004, lr_actor=0.003, gamma = 0.99, tau=0.003, update_every=1, weight_decay=0) init_1 = initialize(agent_1, n_episodes=10000, max_t=3000) scores_agent_1 = init_1.train(1)''' agent_2 = DDPG_Agent(state_size, action_size, num_agents, lr_critic=0.00001, lr_actor=0.0005, tau=0.05, update_every=1, weight_decay=0) init_2 = initialize(agent_2, n_episodes=10000, max_t=3000) scores_agent_2 = init_2.train(2) agent_3 = DDPG_Agent(state_size, action_size, num_agents, lr_critic=0.00005, lr_actor=0.0001, tau=0.01, update_every=10, weight_decay=0) init_3 = initialize(agent_3, n_episodes=10000, max_t=3000)
display_freq=self.d_freq, save_at_checkpoint=self.save_at_checkpoint) torch.save(self.agent.actor_local.state_dict(), file_path + 'ddpg_{}_actor_multiple_agents.pth'.format(str(exp_number))) torch.save(self.agent.critic_local.state_dict(), file_path + 'ddpg_{}_critic_multiple_agents.pth'.format(str(exp_number))) ts.close() return scores_agents if __name__ == '__main__': num_agents, state_size, action_size = check_env(env) '''agent_1 = DDPG_Agent(state_size, action_size, num_agents, lr_critic=0.00005, lr_actor=0.00005, tau=0.075, update_every=2, weight_decay=0.15) init_1 = initialize(agent_1, n_episodes=10000, max_t=3000) scores_agent_1 = init_1.train(1) agent_2 = DDPG_Agent(state_size, action_size, num_agents, lr_critic=0.001, lr_actor=0.10, tau=0.05, update_every=2, weight_decay=0.15) init_2 = initialize(agent_2, n_episodes=10000, max_t=3000) scores_agent_2 = init_2.train(2) agent_3 = DDPG_Agent(state_size, action_size, num_agents,
for b in range(len(env.player.bullets)): if int(env.player.bullets[b].b_circle_shape.body.velocity[0]) != 0: space.remove(arbiter.shapes[1].body, arbiter.shapes[1]) env.player.bullets.pop(b) break pygame.init() screen = pygame.display.set_mode((1000, 800)) clock = pygame.time.Clock() space = pymunk.Space() space.gravity = (0, 100) env = Environment(space, screen) agent = DDPG_Agent(state_space_size=3, action_space_size=1, random_seed=10) handler = space.add_default_collision_handler() handler.begin = coll_begin handler.post_solve = coll_post def train_agent(episodes): max_timesteps = 1000 scores_deque = deque(maxlen=100) scores = [] max_score = -np.Inf for episode in range(1, episodes + 1): state = env.reset() agent.reset() episode_reward = 0
import torch import numpy as np from collections import deque import gym import random from agent import DDPG_Agent env = gym.make('BipedalWalker-v3') env.seed(10) agent = DDPG_Agent(state_space_size=env.observation_space.shape[0], action_space_size=env.action_space.shape[0], random_seed=10) def train_agent(episodes): max_timesteps = 700 scores_deque = deque(maxlen=100) scores = [] max_score = -np.Inf for episode in range(1, episodes + 1): state = env.reset() agent.reset() episode_reward = 0 for t in range(max_timesteps): action = agent.current_action(state) print(action) next_state, reward, done, _ = env.step(action) agent.step(state, action, reward, next_state, done) state = next_state episode_reward += reward