print('Number of agents:', num_agents) # size of each action action_size = brain.vector_action_space_size print('Size of each action:', action_size) # examine the state space states = env_info.vector_observations state_size = states.shape[1] print('There are {} agents. Each observes a state with length: {}'.format( states.shape[0], state_size)) print('The state for the first agent looks like:', states[0]) agent_1 = Agent(state_size=state_size, action_size=action_size, random_seed=2) agent_2 = Agent(state_size=state_size, action_size=action_size, random_seed=3) agent_2.memory = agent_1.memory agent_2.actor_local = agent_1.actor_local agent_2.actor_target = agent_1.actor_target agent_2.critic_local = agent_1.critic_local agent_2.critic_target = agent_1.critic_target t_max = 1000 print_every = 100 maxlen = 100 score = [] ev_score = [] scores_deque = deque(maxlen=maxlen) for i_episode in range(1, env.n_episodes + 1): # play game for 5 episodes env_info = env.reset(train_mode=True)[brain_name] # reset the environment states = env_info.vector_observations # get the current state (for each agent) scores = np.zeros(num_agents) # initialize the score (for each agent)
env = UnityEnvWrapper(no_graphics=False) agent1 = Agent(state_size=env.n_states() + 1, action_size=env.n_actions(), random_seed=2) agent2 = Agent(state_size=env.n_states() + 1, action_size=env.n_actions(), random_seed=2) agent2.critic_local = agent1.critic_local agent2.critic_target = agent1.critic_target agent2.critic_optimizer = agent1.critic_optimizer agent2.actor_local = agent1.actor_local agent2.actor_target = agent1.actor_target agent2.actor_optimizer = agent1.actor_optimizer agent2.memory = agent1.memory print(env.n_agents(), env.n_states(), env.n_actions()) def save(i_episode, scores1, scores2, mean_scores): print("Saving checkpoints...") torch.save(agent1.actor_local.state_dict(), 'checkpoint_actor_1.pth') torch.save(agent2.actor_local.state_dict(), 'checkpoint_actor_2.pth') torch.save(agent1.critic_local.state_dict(), 'checkpoint_critic_1.pth') torch.save(agent2.critic_local.state_dict(), 'checkpoint_critic_2.pth') torch.save(dict(episode=i_episode,