Esempio n. 1
0
def get_agent_unity():
    sys.path.append(UNITY_PYTHONPATH)
    from unityagents import UnityEnvironment
    env = UnityEnvironment(file_name=FPATH, seed=RANDOM_SEED)
    brain_name = env.brain_names[BRAIN_INDEX]
    brain = env.brains[brain_name]
    state_size = brain.vector_observation_space_size
    action_size = brain.vector_action_space_size
    agent = Agent(state_size=state_size,
                  action_size=action_size,
                  random_seed=RANDOM_SEED)
    return env, agent
Esempio n. 2
0
def play():
    environment = Environemnt(env_path="envs/3/Tennis.exe", train_mode=False)

    num_agents = environment.get_number_of_agents()
    state_size = len(environment.get_current_state()[0])
    action_size = environment.get_number_of_actions()

    agent = Agent(state_size=state_size,
                  action_size=action_size,
                  random_seed=0,
                  num_agents=num_agents)
    play_env(environment, agent, num_agents)
 def __init__(self, state_size, action_size, num_agents, random_seed):
     
     self.state_size = state_size
     self.action_size = action_size
     self.random_seed = random.seed(random_seed)
     
     # creating agents and store them into agents list
     self.agents = [Agent(state_size, action_size, num_agents, random_seed) for i in range(num_agents)]
     
     # Replay memory
     self.memory = ReplayBuffer(BUFFER_SIZE, BATCH_SIZE, random_seed)
     self.step_count = 0
def ddpg_train(n_episodes, seed, buffer_size, batch_size, gamma, tau, lr_actor,
               lr_critic, weight_decay):
    memory = ReplayBuffer(action_size, buffer_size, batch_size, seed)
    agents = [
        Agent(state_size, action_size, seed, buffer_size, batch_size, gamma,
              tau, lr_actor, lr_critic, weight_decay, memory)
        for _ in range(num_agents)
    ]
    load(agents)
    scores_deque = deque(maxlen=100)
    scores = []

    for i_episode in range(1, n_episodes + 1):
        env_info = env.reset(train_mode=True)[brain_name]
        states = env_info.vector_observations
        episode_scores = np.zeros(num_agents)
        while True:
            for agent in agents:
                agent.reset()
            actions = list()
            for agent, state in zip(agents, states):
                actions.append(agent.act(state))
            env_info = env.step(actions)[brain_name]
            next_states = env_info.vector_observations
            rewards = env_info.rewards
            dones = env_info.local_done
            for agent, state, action, reward, next_state, done in zip(
                    agents, states, actions, rewards, next_states, dones):
                agent.step(state, action, reward, next_state, done)
            states = next_states
            episode_scores += np.array(rewards)
            if np.any(dones):
                break
        score = episode_scores.max()
        scores_deque.append(score)
        scores.append(score)
        print('\rEpisode: \t{} \tScore: \t{:.2f} \tAverage Score: \t{:.2f}'.
              format(i_episode, np.mean(score), np.mean(scores_deque)),
              end="")
        if i_episode % 10 == 0:
            save(agents)
        if np.mean(scores_deque) >= 0.5:
            print(
                '\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'
                .format(i_episode, np.mean(scores_deque)))
            break
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.grid()
    ax.plot(np.arange(len(scores)), scores)
    ax.set(xlabel="Episode #", ylabel="'Score", title="DDPG Network")
    fig.savefig("ddpg_network.pdf")
 def __init__(self, config):
     self.config = config
     # Replay memory
     self.memory = ReplayBuffer(self.config.action_size,
                                self.config.buffer_size,
                                self.config.batch_size, self.config.seed)
     self.agents = [
         Agent(self.config) for _ in range(self.config.num_agents)
     ]
     # 'action_size', 'num_agents', and 'random_seed'
     #self.agents = [Agent(self.config, self.config.action_size, self.config.num_agents, self.config.random_seed) for _ in range(self.config.num_agents)]
     self.t_step = 0
     self.loss = (0.0, 0.0)
    def __init__(self, num_agents, state_size, action_size, random_seed):
        super(MADDPG, self).__init__()

        self.state_size = state_size
        self.action_size = action_size
        self.num_agents = num_agents
        self.seed = random.seed(random_seed)

        self.discount_factor = GAMMA
        self.tau = TAU
        self.iter = 0

        self.maddpg_agents = [Agent(state_size, action_size, random_seed) for i in range(num_agents)]
Esempio n. 7
0
def load_trained_agent(filepath):
    """ Load the results an parameters of a trained agent"""
    checkpoint = torch.load(filepath)
    agent = Agent(state_size=checkpoint['state_size'],
                  action_size=checkpoint['action_size'],
                  random_seed=checkpoint['seed'],
                  hidden_layers=checkpoint['hidden_layers'],
                  n_agents=checkpoint['n_agents'])

    agent.actor_local.load_state_dict(checkpoint['al_state_dict'])
    agent.critic_local.load_state_dict(checkpoint['cl_state_dict'])

    return agent
    def __init__(self):
        # Create the Gym environment
        self.env = gym.make('DeeplengDocking-v1')
        rospy.loginfo("Gym environment done")
        self.agent = Agent(state_size=13, action_size=3, random_seed=2)

        # Set the logging system
        rospack = rospkg.RosPack()
        pkg_path = rospack.get_path('deepleng_control')
        outdir = pkg_path + '/training_results'
        # env = wrappers.Monitor(env, outdir, force=True)
        # rospy.loginfo("Monitor Wrapper started")
        self.max_episodes = 200
        self.max_timesteps = 1000
    def __init__(self, state_size, action_size, n_agents, random_seed):
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)

        self.ma = [
            Agent(state_size, action_size, i, n_agents, random_seed)
            for i in range(n_agents)
        ]

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   random_seed)
        self.t_step = 0
Esempio n. 10
0
    def __init__(self, state_size, action_size, num_agents, random_seeds):

        self.state_size = state_size
        self.action_size = action_size
        self.num_agents = num_agents
        self.random_seeds = random_seeds
        self.agents = [
            Agent(self.state_size, self.action_size, random_seeds[i])
            for i in range(self.num_agents)
        ]
        self.memory = ReplayBuffer(action_size,
                                   BUFFER_SIZE,
                                   BATCH_SIZE,
                                   seed=7)
Esempio n. 11
0
    def __init__(self, state_size, action_size, seed = 42):
        super(MADDPG, self).__init__()

        self.agents = [Agent(state_size, action_size, lr_actor=LR_ACTOR, lr_critic=LR_CRITIC, agent_number=0, epsilon=EPSILON,
                             epsilon_decay=EPSILON_DECAY, weight_decay=WEIGHT_DECAY, clipgrad=CLIPGRAD), 
                       Agent(state_size, action_size, lr_actor=LR_ACTOR, lr_critic=LR_CRITIC, agent_number=1, epsilon=EPSILON,
                             epsilon_decay=EPSILON_DECAY, weight_decay=WEIGHT_DECAY, clipgrad=CLIPGRAD)]
        
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        
        # Init tracking of params
        wandb.login()
        wandb.init(project=project_name, name=name, config={"buffer_size": BUFFER_SIZE,
                                                          "batch_size": BATCH_SIZE,
                                                          "learn_every": LEARN_EVERY,
                                                          "learn_number": LEARN_NUMBER,
                                                          "lr_actor": LR_ACTOR,
                                                          "lr_critic": LR_CRITIC,
                                                          "gamma": GAMMA,
                                                          "tau": TAU,
                                                          "epsilon": EPSILON,
                                                          "epsilon_decay": EPSILON_DECAY,
                                                          "weight_decay": WEIGHT_DECAY,
                                                          "clipgrad": CLIPGRAD})
        
        jovian.log_hyperparams(project=project_name, name=name, config={"buffer_size": BUFFER_SIZE,
                                                          "batch_size": BATCH_SIZE,
                                                          "learn_every": LEARN_EVERY,
                                                          "learn_number": LEARN_NUMBER,
                                                          "lr_actor": LR_ACTOR,
                                                          "lr_critic": LR_CRITIC,
                                                          "gamma": GAMMA,
                                                          "tau": TAU,
                                                          "epsilon": EPSILON,
                                                          "epsilon_decay": EPSILON_DECAY,
                                                          "weight_decay": WEIGHT_DECAY,
                                                          "clipgrad": CLIPGRAD})
Esempio n. 12
0
    def __init__(self, state_size, action_size, num_agents, random_seed):
        self.state_size = state_size
        self.action_size = action_size
        self.num_agents = num_agents
        self.seed = random.seed(random_seed)

        # Instantiate Multiple  Agent
        self.agents = [
            Agent(state_size, action_size, random_seed, num_agents)
            for i in range(num_agents)
        ]

        # Instantiate Memory replay Buffer (shared between agents)
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   random_seed)
def ddpg(n_episodes=2000, store_every=10):
    scores_deque = deque(maxlen=store_every)
    scores = []

    agents = Agent(state_size=state_size,
                   action_size=action_size,
                   num_agents=num_agents,
                   random_seed=0)

    for i_episode in range(1, n_episodes + 1):
        env_info = env.reset(train_mode=GRAPHICS_OFF)[brain_name]
        state = env_info.vector_observations
        agents.reset()
        score = np.zeros(num_agents)
        while True:
            action = agents.act(state)

            env_info = env.step(action)[brain_name]
            next_state = env_info.vector_observations
            rewards = env_info.rewards
            dones = env_info.local_done

            agents.step(state, action, rewards, next_state, dones)
            state = next_state
            score += rewards

            if np.any(dones):
                break
        scores_deque.append(np.mean(score))
        scores.append(np.mean(score))
        avg_score = np.mean(scores_deque)

        print('\rEpisode {}\tAverage Score: {:.2f}\tScore: {:.2f}\t {}'.format(
            i_episode, np.mean(scores_deque), np.mean(score),
            strftime("%H:%M:%S", gmtime())),
              end="")
        if i_episode % store_every == 0 or avg_score >= TARGET_SCORE:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(
                i_episode, avg_score))

            if avg_score >= TARGET_SCORE:
                torch.save(agents.actor_local.state_dict(),
                           "ckpt/{}".format(ACTOR_CHECKPOINT_NAME))
                torch.save(agents.critic_local.state_dict(),
                           "ckpt/{}".format(CRITIC_CHECKPOINT_NAME))
                break

    return scores
Esempio n. 14
0
def load_agents(state_size, action_size, num_agents, memory):
    # create agents
    agents= []
    for i in range(num_agents):
        agents.append(Agent(state_size, action_size, 1, memory))
    # load checkpoints
    for i in range(len(agents)):
        checkpoint = torch.load('./data/checkpoint_agent_{}_qnetwork_local.pth'.format(i))
        agents[i].qnetwork_local.load_state_dict(checkpoint['state_dict'])
        checkpoint = torch.load('./data/checkpoint_agent_{}_qnetwork_target.pth'.format(i))
        agents[i].qnetwork_target.load_state_dict(checkpoint['state_dict'])
        checkpoint = torch.load('./data/checkpoint_agent_{}_munetwork_local.pth'.format(i))
        agents[i].munetwork_local.load_state_dict(checkpoint['state_dict'])
        checkpoint = torch.load('./data/checkpoint_agent_{}_munetwork_target.pth'.format(i))
        agents[i].munetwork_target.load_state_dict(checkpoint['state_dict'])
    return agents
Esempio n. 15
0
def main(args):
    print(args)

    env = UnityEnvironment(file_name=args.path)

    env_wr = EnvWrapper(env)

    agent = Agent(state_size=33, action_size=4, random_seed=10)

    scores = train(env_wr, agent, n_episodes=args.episodes)

    fig, ax = plt.subplots(1, 1, figsize=(8, 8))
    ax.plot(scores)
    ax.set_xlabel("Episodes")
    ax.set_ylabel("Score")
    fig.savefig("scores.png")
    env_wr.close()
Esempio n. 16
0
    def __init__(self, agent_count, state_size, action_size, random_seed):

        self.action_size = action_size
        self.state_size = state_size
        self.agent_count = agent_count
        self.agents = [Agent(agent_count, state_size, action_size, random_seed) for _ in range(agent_count) ]

        random.seed(random_seed)

        # Noise process
        self.noise = OUNoise(action_size, random_seed)
        self.exploration = 1.0

        # Replay memory
        self.memory = ReplayBuffer(BUFFER_SIZE, BATCH_SIZE, random_seed)

        self.setp_count = 0
Esempio n. 17
0
def ddpg_train(n_episodes, seed, buffer_size, batch_size, gamma, tau, lr_actor,
               lr_critic, weight_decay):
    scores = []
    scores_deque = deque(maxlen=100)
    agent = Agent(n_agents, state_size, action_size, seed, buffer_size,
                  batch_size, gamma, tau, lr_actor, lr_critic, weight_decay)
    load(agent)
    for i_episode in range(n_episodes):
        env_info = env.reset(
            train_mode=True)[brain_name]  # reset the environment
        states = env_info.vector_observations
        agent.reset()  # reset the agent noise
        score = np.zeros(n_agents)
        while True:
            actions = agent.act(states)
            # send the action to the environment
            env_info = env.step(actions)[brain_name]
            next_states = env_info.vector_observations  # get the next state
            rewards = env_info.rewards  # get the reward
            dones = env_info.local_done  # see if episode has finished
            agent.step(states, actions, rewards, next_states, dones)
            score += rewards  # update the score
            states = next_states  # roll over the state to next time step
            if np.any(dones):  # exit loop if episode finished
                break
        scores.append(np.mean(score))
        scores_deque.append(np.mean(score))
        print('\rEpisode: \t{} \tScore: \t{:.2f} \tAverage Score: \t{:.2f}'.
              format(i_episode, np.mean(score), np.mean(scores_deque)),
              end="")
        if n_episodes % 10 == 0:
            torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
            torch.save(agent.critic_local.state_dict(),
                       'checkpoint_critic.pth')
        if np.mean(scores_deque) >= 30.0:
            print(
                '\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'
                .format(i_episode, np.mean(scores_deque)))
            break
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.grid()
    ax.plot(np.arange(len(scores)), scores)
    ax.set(xlabel="Episode #", ylabel="'Score", title="DDPG Network")
    fig.savefig("ddpg_network.pdf")
    def __init__(self,
                 state_size,
                 action_size,
                 discount_factor=0.95,
                 tau=0.02):

        self.num_agents = 2
        self.maddpg = [
            Agent(state_size, action_size, 2) for i in range(self.num_agents)
        ]
        self.state_size = state_size
        self.action_size = action_size
        self.discount_factor = discount_factor
        self.tau = tau
        self.iter = 0
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   num_agents)  #defined in the function setup
        self.t_step = 0
def main():

    # load version 1 (with 1 agent) of the environment
    env_name = "Reacher_Windows_x86_64_version1\Reacher.exe" # Add the Unity Reacher Environment name
    no_graphics = True
    env = UnityEnvironment(file_name = env_name, no_graphics = no_graphics)

    # Environments contain **_brains_** which are responsible for deciding the actions of their associated agents. Here we check for the first brain available, and set it as the default brain we will be controlling from Python.

    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # reset the enviroment
    env_info = env.reset(train_mode = True)[brain_name]

    # number of agents
    num_agents = len(env_info.agents)
    print("Number of agents : ", num_agents)

    # size of each action
    action_size = brain.vector_action_space_size
    print("Size of each action : ", action_size)

    # examine the state space
    states = env_info.vector_observations
    state_size = states.shape[1]
    print("There are {} agents. Each observes a state with length: {}".format(states.shape[0], state_size))
    print('The state for the first agent looks like:', states[0])

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    random_seed = 12345 #10
    agent = Agent(state_size, action_size, random_seed, device = device)

    scores = train_ddpg_v1(env, agent, n_episodes = 300)

    fig = plt.figure()
    ax = fig.add_subplot(111)
    plt.plot(np.arange(1, len(scores)+1), scores)
    plt.ylabel('Score')
    plt.xlabel('Episode #')
    plt.show()
    def __init__(self, agent_count, state_size, action_size, random_seed):
        """Initialize a MultiAgent object.

        Params
        ======
            agent_count (int): Number of agents
        """

        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   random_seed)

        self.agents = [
            Agent(
                memory=self.memory,
                state_size=state_size,
                action_size=action_size,
                random_seed=random_seed,
            ) for _ in range(agent_count)
        ]
Esempio n. 21
0
def launch(app_path, train_or_test, save_or_load_path, hyper_file):
    env = UnityEnvironment(file_name=app_path)
    
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
        
    # reset the environment
    env_info = env.reset(train_mode=train_or_test)[brain_name]

    # number of agents
    num_agents = len(env_info.agents)
    print('Number of agents:', num_agents)

    # size of each action
    action_size = brain.vector_action_space_size
    print('Size of each action:', action_size)

    # examine the state space 
    states = env_info.vector_observations
    state_size = states.shape[1]
    print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))

    agent = Agent(state_size=state_size, action_size=action_size, n_agents=num_agents, random_seed=42)    
    
    if train_or_test:
        if hyper_file is None:
            scores = train_ddpg(env, agent, num_agents, save_or_load_path)
        else:
            with open(hyper_file) as f:
                variables = json.load(f)
                if len(list(set(variables.keys()) & set(["n_episodes", "max_t", "print_every"]))) != 3:
                    print("Parameters file is not well specified")
                    pass
                else:
                    scores = train_ddpg(env, agent, num_agents, save_or_load_path, variables["n_episodes"], variables["max_t"], variables["print_every"])

        plot_scores(scores, True)
        
    else:
        agent.qnetwork_local.load_state_dict(torch.load(save_or_load_path))
        test_ddpg(env, agent, num_agent)
    
    env.close()
Esempio n. 22
0
def main():
    env = UnityEnvironment(file_name='data/Reacher_Linux/Reacher.x86_64')
    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]
    # number of agents
    num_agents = len(env_info.agents)
    # size of each action
    action_size = brain.vector_action_space_size
    # examine the state space
    states = env_info.vector_observations
    state_size = states.shape[1]

    n_agent = 20
    agent = Agent(state_size=state_size,
                  action_size=action_size,
                  random_seed=2,
                  n_agent=n_agent)
    # load trained model
    agent.actor_local.load_state_dict(torch.load('model/checkpoint_actor.pth'))
    agent.critic_local.load_state_dict(
        torch.load('model/checkpoint_critic.pth'))

    state = env.reset()
    env_info = env.reset(train_mode=False)[brain_name]
    state = env_info.vector_observations
    for t in range(1000):
        action = [
            agent.act(state[agent_x], agent_x, add_noise=False)
            for agent_x in range(n_agent)
        ]
        env_info = env.step(action)[brain_name]
        next_state = env_info.vector_observations
        reward = env_info.rewards
        done = env_info.local_done
        state = next_state
        if all(done):
            break

    env.close()
Esempio n. 23
0
def trained_agent():
    agent = Agent(n_agents, state_size, action_size, 0, 0, 0, 0, 0, 0, 0, 0)
    load(agent)
    for episode in range(3):
        env_info = env.reset(train_mode=False)[brain_name]
        states = env_info.vector_observations
        score = np.zeros(n_agents)
        while True:
            actions = agent.act(states, add_noise=False)
            env_info = env.step(actions)[brain_name]
            next_states = env_info.vector_observations
            rewards = env_info.rewards
            dones = env_info.local_done
            score += rewards
            states = next_states
            if np.any(dones):
                break
        print('Episode: \t{} \tScore: \t{:.2f}'.format(episode,
                                                       np.mean(score)))
    env.close()
Esempio n. 24
0
def main():
    # select this option to load version 2 (with 20 agents) of the environment
    env = UnityEnvironment(file_name='data/Reacher_Linux/Reacher.x86_64')
    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]
    # number of agents
    n_agent = len(env_info.agents)
    # size of each action
    action_size = brain.vector_action_space_size
    # size of state space
    state_size = env_info.vector_observations.shape[1]
    # train
    agent = Agent(state_size=state_size,
                  action_size=action_size,
                  random_seed=2,
                  n_agent=n_agent)
    scores = ddpg(env, agent, n_agent)
Esempio n. 25
0
def ddpg(n_episodes=1000, max_t=500, print_every=100):
    scores_deque = deque(maxlen=print_every)
    scores = []

    # Create the env and the agent
    terminating_angle = 15
    env = CubeEnv(np.deg2rad(terminating_angle))
    agent = Agent(state_size=3, action_size=1, random_seed=2)

    plotter = LivePlotter(env, max_t, terminating_angle, n_episodes)

    for i_episode in range(1, n_episodes + 1):
        state = env.reset()
        agent.reset()
        score = 0
        done = False
        plotter.reset()
        while not done:
            # Select the next action and update system
            action = agent.act(state) * 10
            next_state, reward, done, _ = env.step(action)
            agent.step(state, action, reward, next_state, done)

            # Update plots and metrics
            state = next_state
            score += reward
            plotter.add_data_from_env(env)

        scores_deque.append(score)
        scores.append(score)
        plotter.add_score(score)
        print('\rEpisode {}\tScore: {}'.format(i_episode, score), end="")

        # Display the plotrs
        plotter.display()

        # Save model
        torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
        torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')

    return scores
Esempio n. 26
0
def test():
    agent = Agent(state_size=33, action_size=4, seed=0)
    load_model(agent.critic_local, 'solved_critic_trained_model.pth')
    load_model(agent.actor_local, 'solved_actor_trained_model.pth')
    env_info = env.reset(train_mode=False)[brain_name]

    state = env_info.vector_observations
    score = np.zeros(1)
    while True:
        action = agent.act(state, 0, False)
        env_info = env.step(action)[brain_name]
        next_state = env_info.vector_observations  # get the next state
        reward = env_info.rewards  # get the reward
        done = env_info.local_done  # see if episode has finished

        state = next_state
        score += reward

        if np.any(done):
            print('\r\tTest Score: {:.2f}'.format(score[0], end=""))
            break
Esempio n. 27
0
    def __init__(self, state_size, action_size, num_agents, random_seed,
                 config):
        """Initialize the MultAgent object.
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.agents = [
            Agent(state_size, action_size, num_agents, random_seed, config)
            for _ in range(num_agents)
        ]

        self.num_agents = num_agents
        self.action_size = action_size

        # Replay memory
        self.batch_size = config["batch_size"]
        self.memory = ReplayBuffer(action_size, config["buffer_size"],
                                   self.batch_size, random_seed, num_agents)
def ddpg_test():
    agents = [
        Agent(state_size, action_size, 0, 0, 0, 0, 0, 0, 0, 0, 0)
        for _ in range(num_agents)
    ]
    load(agents)
    for i_episode in range(3):
        env_info = env.reset(train_mode=False)[brain_name]
        states = env_info.vector_observations
        while True:
            for agent in agents:
                agent.reset()
            actions = list()
            for agent, state in zip(agents, states):
                actions.append(agent.act(state))
            env_info = env.step(actions)[brain_name]
            next_states = env_info.vector_observations
            dones = env_info.local_done
            states = next_states
            if np.any(dones):
                break
Esempio n. 29
0
    def __init__(self, num_agents=2, state_size=24, action_size=2):
        """Initialize a maddpg_agent wrapper.
        Params
        ======
            num_agents (int): the number of agents in the environment
            state_size (int): dimension of each state
            action_size (int): dimension of each action
        """
        self.num_agents = num_agents
        self.state_size = state_size
        self.action_size = action_size

        self.agents = [
            Agent(state_size, action_size, i + 1, random_seed=0)
            for i in range(num_agents)
        ]

        # Replay memory shared by all agents
        self.memory = ReplayBuffer(action_size,
                                   BUFFER_SIZE,
                                   BATCH_SIZE,
                                   seed=0)
Esempio n. 30
0
def runner(chkp_actor=None, chkp_critic=None):
    '''
    This function loads the environment and the agent. By default runs in
    training mode, but if a checkpoint file is passed it runs in eval mode.
    Params
    ======
        chkp_actor (None|file):
            file containing an actor checkpoint saved during training.
        chkp_critic (None|file):
            file containing a critic checkpoint saved during training.
    '''
    # instantiate Unity environment
    env = UnityEnvironment(file_name='./Reacher_Linux/Reacher.x86_64')
    # get first brain
    brain_name = env.brain_names[0]
    # get action size
    brain = env.brains[brain_name]
    action_size = brain.vector_action_space_size
    # get state size
    env_info = env.reset(train_mode=True)[brain_name]
    states = env_info.vector_observations
    state_size = states.shape[1]
    # instantiate the Agent
    agent = Agent(state_size=state_size,
                  action_size=action_size, random_seed=2)

    if chkp_actor:
        cp_actor = torch.load(chkp_actor)
        cp_critic = torch.load(chkp_critic)
        agent.actor_local.load_state_dict(cp_actor)
        agent.critic_local.load_state_dict(cp_critic)
        ddpg(agent, env, brain_name, n_episodes=100, train=False)

    else:
        ddpg(agent, env, brain_name, train=True)

    env.close()