Esempio n. 1
0
def main(unused_argv):
    del unused_argv

    configs = extract_configs(*FLAGS.eval_config)
    # instantiate the Banana env
    env = BananaWrapper(file_name="./Banana")
    state_size = env.observation_size
    action_size = env.action_size
    # instantiate agent object
    agent = Agent(state_size=state_size,
                  action_size=action_size,
                  configs=configs)

    # load trained model
    agent.qnetwork_local.load_state_dict(
        torch.load("results/checkpoints/DoubleDQN.pth"))

    horizon = 1000
    episodes = 5
    for _ in range(episodes):
        state = env.reset()
        for _ in range(horizon):
            # Perform action given the trained policy
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            time.sleep(0.05)
            if done:
                break
            state = next_state

    # close env
    env.close()
Esempio n. 2
0
def main():

    ################################################
    # components requred from main_02.py
    ################################################

    # spin up environment
    env = gym.make('LunarLander-v2')
    env.seed(0)

    # spin up agent (with underlying nn model)
    agent = Agent(state_size=8, action_size=4, seed=0)

    ################################################
    # Import trained agent and render performance
    ################################################

    # load the weights from file
    agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth'))

    for i in range(10):
        state = env.reset()
        img = plt.imshow(env.render(mode='rgb_array'))
        for j in range(400):
            action = agent.act(state)
            img.set_data(env.render(mode='rgb_array'))
            plt.axis('off')
            state, reward, done, _ = env.step(action)
            if done:
                break

    env.close()
def process(args):
    env = UnityEnvironment(file_name="Banana.app")
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    env_info = env.reset(train_mode=False)[brain_name]  # reset the environment
    state = env_info.vector_observations[0]  # get the current state
    score = 0  # initialize the score
    action_size = brain.vector_action_space_size
    state_size = len(state)

    agent = Agent(state_size, action_size, 1, args.model_path)

    while True:
        action = agent.act(state, 0.0)  # select an action
        env_info = env.step(action)[
            brain_name]  # send the action to the environment
        next_state = env_info.vector_observations[0]  # get the next state
        reward = env_info.rewards[0]  # get the reward
        done = env_info.local_done[0]  # see if episode has finished
        score += reward  # update the score
        state = next_state  # roll over the state to next time step
        if done:  # exit loop if episode finished
            break

    print("Score: {}".format(score))
Esempio n. 4
0
def main():
    env = UnityEnvironment(file_name="./../Banana.app")

    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # Instantiate agent:
    env_info = env.reset(train_mode=False)[brain_name]
    #agent = Agent(state_size=8, action_size=4, seed=0)
    action_size = brain.vector_action_space_size
    state = env_info.vector_observations[0]
    state_size = len(state)
    agent = Agent(state_size=state_size, action_size=action_size, seed=0)

    # load the weights from file
    agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth'))

    for i in range(3):
        #state = env.reset()
        env_info = env.reset(train_mode=False)[brain_name]
        state = env_info.vector_observations[0]
        for j in range(200):
            action = agent.act(state)
            #state, reward, done, _ = env.step(action)
            env_info = env.step(action)[brain_name]
            next_state = env_info.vector_observations[0]
            reward = env_info.rewards[0]
            done = env_info.local_done[0]
            if done:
                break
            state = next_state

    env.close()
Esempio n. 5
0
    def test(self, run_id=5):
        agent = Agent(state_size=37, action_size=4, seed=0)

        run_dir = "results/{}".format(run_id)

        # load the weights from file
        agent.qnetwork_local.load_state_dict(
            torch.load("{}/checkpoint.pth".format(run_dir)))

        for i in range(5):

            env_info = self.env.reset(
                train_mode=False)[self.brain_name]  # reset the environment
            state = env_info.vector_observations[0]

            for j in range(50):
                action = agent.act(state)

                env_info = self.env.step(action)[
                    self.brain_name]  # send the action to the environment
                next_state = env_info.vector_observations[
                    0]  # get the next state
                reward = env_info.rewards[0]  # get the reward
                done = env_info.local_done[0]  # see if episode has finished

                if done:
                    break
Esempio n. 6
0
def test(n_epi):
    agent = Agent(state_size=37, action_size=4, seed=0)
    env = UnityEnvironment(file_name="Banana.app")
    brain_name = env.brain_names[0]  # get the default brain
    brain = env.brains[brain_name]
    agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth'))

    for i in range(n_epi):
        scores = []  # list containing scores from each episode
        scores_window = deque(maxlen=100)  # last 100 scores
        score = 0  # initialize the score
        env_info = env.reset(
            train_mode=False)[brain_name]  # reset the environment
        state = env_info.vector_observations[0]  # get the current state
        while True:
            action = agent.act(state)
            env_info = env.step(action)[
                brain_name]  # send the action to the environment
            next_state = env_info.vector_observations[0]  # get the next state
            reward = env_info.rewards[0]  # get the reward
            score += reward  # update the score
            done = env_info.local_done[0]  # see if episode has finished
            agent.step(state, action, reward, next_state, done)
            state = next_state  # roll over the state to next time step
            if done:
                break
        scores_window.append(score)  # save most recent score
        scores.append(score)  # save most recent score
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(
            i, np.mean(scores_window)))

    env.close()
def DQN_gif(file_name):
    env = gym.make('LunarLander-v2')
    env.seed(0)

    agent = Agent(state_size=8, action_size=4, seed=0)
    agent.qnetwork_local.load_state_dict(
        torch.load('checkpoint.pth',
                   map_location=lambda storage, loc: storage))

    images = []
    state = env.reset()
    img = env.render(mode='rgb_array')
    for j in range(200):
        action = agent.act(state)
        state, reward, done, _ = env.step(action)
        frame = env.render(mode='rgb_array')

        pil_img = Image.fromarray(frame)
        draw = ImageDraw.Draw(pil_img)
        text = 'Step = {}\nReward = {}'.format(j + 1, reward)
        draw.text((20, 20), text, (255, 255, 255))

        images.append(np.asarray(pil_img))

        if done:
            break
    imageio.mimsave(file_name, images)
Esempio n. 8
0
def main(
    file_name="/Users/joshuaschoenfield/Downloads/Banana.app",
    weights_file="checkpoint_banana_2_LONG_SAFE.pth",
):
    with get_environment(file_name=file_name) as env:
        from dqn_agent import Agent

        agent = Agent(state_size=37, action_size=4, seed=0)

        agent.qnetwork_local.load_state_dict(torch.load(weights_file))
        scores = []
        num_iterations = 100
        for i in range(num_iterations):
            state = reset_and_get_first_state(env, train_mode=True)
            score = 0
            for j in range(2000):
                action = agent.act(state, eps=0)
                # env.render()
                state, reward, done = get_next_state_reward_done(env, action)
                score += reward
                if done:
                    break
            scores.append(score)
            # print(f"Score: {score}")
        # print(f"Average Score: {np.mean(scores)}")
        ax = plot_score_cumulative_distribution(scores)
        ax.figure.savefig("Media/validation_scores_cumulative.png")
        # plt.show()
        np.savetxt("validation_scores.txt", scores)
        return scores
Esempio n. 9
0
def main():
    agent = Agent(state_size=3, action_size=8, seed=0)

    start_pos = (200, 600)
    end_pos = (800, 375)
    #start_pos = (200,500)
    #end_pos = (800,500)
    env = environment(MAP, start_pos, end_pos)
    """
	x_end, y_end = end_pos
	plt.figure(figsize=(10,6), dpi=200)
	plt.plot(start_pos[0], start_pos[1], 'rx')
	plt.plot(x_end, y_end, 'bx')
	plt.contourf(np.array(MAP), linestyles='dashed')
	#plt.imshow(np.array(MAP))
	plt.gca().set_aspect('equal', adjustable='box')
	plt.colorbar()
	plt.show()
	sys.exit(())
	"""

    # load the weights from file
    agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth'))

    for i in range(1):
        path_x = [start_pos[0]]
        path_y = [start_pos[1]]

        state, _, _ = env.reset(start_pos, end_pos)
        for j in range(6000):
            action = agent.act(state)

            #print (j, action)
            print(j)
            #if j%100 == 0:
            #	env.render()

            state, reward, done = env.step(action)

            path_x.append(state[0])
            path_y.append(state[1])

            if done:
                break

        print(done)
        x_end, y_end = end_pos
        plt.figure(figsize=(10, 6), dpi=200)
        plt.plot(path_x, path_y, 'ro', markevery=20)
        plt.plot(x_end, y_end, 'bx')
        plt.contourf(np.array(MAP), linestyles='dashed')
        #plt.imshow(np.array(MAP))
        plt.gca().set_aspect('equal', adjustable='box')
        plt.colorbar()
        plt.show()

    env.close()
Esempio n. 10
0
def dqn(LR,
        GAMMA,
        TAU,
        BUFF,
        UPD,
        n_episodes=1000,
        max_t=100,
        eps_start=1.0,
        eps_end=0.01,
        eps_decay=0.995):
    """Deep Q-Learning.

    Params
    ======
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
        eps_start (float): starting value of epsilon, for epsilon-greedy action selection
        eps_end (float): minimum value of epsilon
        eps_decay (float): multiplicative factor (per episode) for decreasing epsilon
    """

    agent = Agent(state_size, action_size, LR, GAMMA, TAU, BUFF, UPD, seed=0)
    scores = []  # list containing scores from each episode
    scores_window = deque(maxlen=100)  # last 100 scores
    eps = eps_start  # initialize epsilon
    for i_episode in range(1, n_episodes + 1):
        env_info = env.reset(train_mode=True)[brain_name]
        state = env_info.vector_observations[0]
        score = 0
        for t in range(max_t):
            action = agent.act(state, eps)
            env_info = env.step(action)[brain_name]
            next_state = env_info.vector_observations[0]
            reward = env_info.rewards[0]
            done = env_info.local_done[0]
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break
        scores_window.append(score)  # save most recent score
        scores.append(score)  # save most recent score
        eps = max(eps_end, eps_decay * eps)  # decrease epsilon
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(
            i_episode, np.mean(scores_window)),
              end="")
        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(
                i_episode, np.mean(scores_window)))
            #        if np.mean(scores_window)>=13.0:
            #            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(scores_window)))
            torch.save(agent.qnetwork_local.state_dict(), 'checkpoint.pth')
            #break
#    return scores
    return np.mean(scores_window)
Esempio n. 11
0
def dqn(args, eps_start=1.0, eps_end=0.01, eps_decay=0.995):
    """Deep Q-Learning.
    
    Params
    ======
        args : command line arguments
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
        eps_start (float): starting value of epsilon, for epsilon-greedy action selection
        eps_end (float): minimum value of epsilon
        eps_decay (float): multiplicative factor (per episode) for decreasing epsilon
    """
    scores = []  # list containing scores from each episode
    scores_window = deque(maxlen=100)  # last 100 scores
    eps = eps_start  # initialize epsilon
    state_size = 37
    action_size = 4
    agent = Agent(state_size, action_size, 1)
    for i_episode in range(1, args.num_episodes + 1):
        #resetting the environment for a new episode
        env_info = env.reset(train_mode=True)[brain_name]
        state = env_info.vector_observations[0]
        score = 0
        cnt = 0
        while True:
            action = agent.act(state, eps)
            env_info = env.step(action)[
                brain_name]  # send the action to the environment
            next_state = env_info.vector_observations[0]  # get the next state
            reward = env_info.rewards[0]  # get the reward
            done = env_info.local_done[0]
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            cnt += 1
            if done:
                break
        scores_window.append(
            score)  # save most recent score in the 100 episode window
        scores.append(score)  # save most recent score
        eps = max(eps_end, eps_decay * eps)  # decrease epsilon
        print('\rEpisode {}\tAverage Score in the last 100 episodes: {:.2f}'.
              format(i_episode, np.mean(scores_window)),
              end="")
        if i_episode % args.save_every == 0:
            print(
                '\nSaving Checkpoint for {:d} episodes!\tAverage Score: {:.2f}'
                .format(i_episode, np.mean(scores_window)))
            torch.save(
                agent.qnetwork_local.state_dict(),
                os.path.join(args.save_checkpoint_path,
                             'checkpoint_' + str(i_episode) + '.pth'))
    return scores
Esempio n. 12
0
def test(dev, weights_file, n_episodes=100, max_t=1000):
    """Test the environment with the parameters stored in weights_file

    Params
    ======
        dev (string): cpu or gpu
        weights_file (string): name of the file to load the weights
        n_episodes (int): number of test episodes that will be performed
        max_t (int): maximum number of timesteps per episode
    """
    env = UnityEnvironment(file_name='./Banana_Linux/Banana.x86_64')
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    env_info = env.reset(train_mode=False)[brain_name]
    state_size = len(env_info.vector_observations[0])
    action_size = brain.vector_action_space_size
    agent = Agent(state_size, action_size, seed=0, device=dev)

    # load the weights from file
    print('Loading weights')
    try:
        checkpoint = torch.load(weights_file)
    except FileNotFoundError:
        print('Error: File \'{}\' not found'.format(weights_file))
        sys.exit(1)

    agent.qnetwork_local.load_state_dict(checkpoint)
    scores = []
    print('Running {} episodes'.format(n_episodes))
    for i_episode in range(1, n_episodes + 1):
        env_info = env.reset(train_mode=False)[brain_name]
        score = 0
        state = env_info.vector_observations[0]
        for j in range(max_t):
            action = agent.act(state)
            env_info = env.step(action)[brain_name]
            state = env_info.vector_observations[0]
            reward = env_info.rewards[0]
            done = env_info.local_done[0]
            score += reward
            if done:
                break
        scores.append(score)
        if (i_episode % 100 != 0):
            print('\rEpisode {}\tScore: {:.0f}\tAverage Score: {:.2f}'.format(
                i_episode, score, np.mean(scores)),
                  end="")
        else:
            print('\rEpisode {}\tScore: {:.0f}\tAverage Score: {:.2f}'.format(
                i_episode, score, np.mean(scores)))

    env.close()
Esempio n. 13
0
 def __init__(self, name, state_size, action_size, env, load_net=False):
     self.agent = Agent(name,
                        state_size=state_size,
                        action_size=action_size,
                        seed=0)
     self.env = env
     self.saved_network = name + '_dqn_checkpoint.pth'
     self.load_net = load_net
     if load_net:
         print('Loading pretrained network...')
         self.agent.qnetwork_local.load_state_dict(
             torch.load(self.saved_network))
         self.agent.qnetwork_target.load_state_dict(
             torch.load(self.saved_network))
         print('Loaded.')
Esempio n. 14
0
def main(FLAGS):
    #env = UnityEnvironment(file_name="Banana_Linux_NoVis/Banana.x86_64", docker_training=FLAGS.docker_training, no_graphics=FLAGS.no_graphics)
    env = UnityEnvironment(file_name="Banana_Linux_NoVis/Banana.x86_64",
                           no_graphics=FLAGS.no_graphics)
    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    env_info = env.reset(train_mode=True)[brain_name]

    action_size = brain.vector_action_space_size
    state = env_info.vector_observations[0]
    state_size = len(state)

    FLAGS = vars(FLAGS)
    agent = Agent(state_size=state_size, action_size=action_size, **FLAGS)

    scores = dqn(env, brain_name, agent, **FLAGS)

    # plot the scores
    fig = plt.figure()
    ax = fig.add_subplot(111)
    plt.plot(np.arange(len(scores)), scores)
    plt.ylabel('Score')
    plt.xlabel('Episode #')
    plt.show()
def run(learning_strategy, n_episodes, experiment_name):
    experiment_name = os.path.basename(__file__).split(
        '.')[0] if experiment_name == None else experiment_name

    mlflow.set_experiment(experiment_name)

    with mlflow.start_run():

        mlflow.log_param('Learning Strategy', learning_strategy)

        env = create_env()

        agent = Agent(state_size=8,
                      action_size=4,
                      seed=0,
                      learning_strategy=LearningStrategy[learning_strategy])

        checkpoint_directory = './artifacts/checkpoints'

        if not os.path.exists(checkpoint_directory):
            os.makedirs(checkpoint_directory)

        with ArtifactHandler() as _:
            scores = dqn(env,
                         agent,
                         n_episodes=n_episodes,
                         checkpoint_directory=checkpoint_directory)
            print("Saving scores..")
            np.savetxt('./artifacts/scores.txt', scores)
Esempio n. 16
0
def main():

    ################################################
    # components requred from main_02.py
    ################################################

    # spin up environment
    env = gym.make('LunarLander-v2')
    env.seed(0)

    # spin up agent (with underlying nn model)
    agent = Agent(state_size=8, action_size=4, seed=0)

    ################################################
    # Train the Agent with DQN
    ################################################

    # train the agent
    scores = dqn(env, agent)

    # plot the scores that the agent received while training
    fig = plt.figure()
    ax = fig.add_subplot(111)
    plt.plot(np.arange(len(scores)), scores)
    plt.xlabel('Episode #')
    plt.ylabel('Score')
    plt.show()
Esempio n. 17
0
def navigate_smart(model_file: Path) -> None:
    """Take the model trained the longest to navigate through the Banana environment"""

    # Init environment.
    env, brain_name, state_size, action_size, state = init_env(
        settings.env_file, train_mode=False)
    # Load last trained model.
    agent: Agent = Agent(state_size, action_size, random.randint(0, 100))
    agent.qnetwork_local.load_state_dict(torch.load(model_file))
    agent.qnetwork_local.eval()

    score: float = 0.0

    while True:
        action: int = agent.act(state)
        env_info: BrainInfo = env.step(action)[brain_name]
        next_state: np.ndarray = env_info.vector_observations[0]
        reward: float = env_info.rewards[0]
        done: bool = env_info.local_done[0]
        state = next_state
        score += reward
        if done:
            break

    print("Score: {}".format(score))
Esempio n. 18
0
def main():
    # GPU
    print('Is GPU available?', torch.cuda.is_available())
    print()

    # Environment
    env = gym.make(ENV)
    env.seed(0)
    print('Environment:', env)
    print('State shape:', env.observation_space.shape)
    print('Number of actions:', env.action_space.n)
    print()

    # DQN agent
    agent = Agent(state_size=STATE_SIZE, action_size=ACTION_SIZE, seed=SEED)

    # Training
    scores = dqn(n_episodes=N_EPISODES,
                 max_t=MAX_T,
                 eps_start=EPS_START,
                 eps_end=EPS_END,
                 eps_decay=EPS_DECAY,
                 env=env,
                 agent=agent)

    # Save score
    print(scores[-5:])
    pickle.dump(scores, open(PATH_SCORE, 'wb'))
    print('Saved score')

    # Visualize training result
    plot_score(scores)
Esempio n. 19
0
def initialize_env():

    env = atari_wrappers.make_atari('RiverraidNoFrameskip-v4')
    env = atari_wrappers.wrap_deepmind(env,
                                       clip_rewards=False,
                                       frame_stack=True,
                                       pytorch_img=True)
    agent = Agent(in_channels=4, action_size=18, seed=0)

    ####initial network####
    agent.qnetwork_target.load_model(
        torch.load('./data/dqn_Riverraid_qnetwork_target_state_dict.pth'))
    agent.qnetwork_local.load_model(
        torch.load('./data/dqn_Riverraid_local_model_state_dict.pth'))

    ####initial the buffer replay####
    while len(agent.memory) < BUFFER_INI:
        observation = env.reset()
        done = False
        while not done:
            action = random.sample(range(env.action_space.n), 1)[0]
            next_observation, reward, done, info = env.step(action)
            agent.memory.add(observation, action, reward, next_observation,
                             done)
            observation = next_observation
    print("Replay Buffer Initialized")
    return env, agent
Esempio n. 20
0
def main():
    env = World()
    agent = Agent(state_size=env.observation_space,
                  action_size=env.action_space,
                  seed=0)
    scores = dqn(env, agent)
    with open('./DQN/scores.pkl', 'wb') as f:
        pickle.dump(scores, f, protocol=pickle.HIGHEST_PROTOCOL)

    # plot the scores

    scores_window = deque(maxlen=100)
    avg_scores = []
    for score in scores:
        scores_window.append(score)
        avg_scores.append(np.mean(scores_window))

    fig = plt.figure()
    ax = fig.add_subplot(111)
    plt.plot(np.arange(len(scores)), scores, linewidth=0.5, label='score')
    plt.plot(np.arange(len(avg_scores)),
             avg_scores,
             linewidth=2,
             label='MA score')
    plt.legend(['score', 'MA score'])
    plt.ylabel('Score')
    plt.xlabel('Episode #')
    fig.savefig('./DQN/training_curve.png')
def main(unused_argv):
    del unused_argv

    configs = extract_configs(*FLAGS.config)
    training = configs["training"]
    label = configs["agent"]["name"]

    # plot the scores
    fig = plt.figure()
    ax = fig.add_subplot(111)
    env = BananaWrapper(file_name="./Banana")

    state_size = env.observation_size
    action_size = env.action_size
    agent = Agent(state_size=state_size,
                  action_size=action_size,
                  configs=configs)

    scores = dqn(env=env, agent=agent, label=label, **training)
    ax.plot(np.arange(len(scores)), scores, label=label)

    plt.ylabel("Score")
    plt.xlabel("Episode #")
    ax.legend(loc="upper center", shadow=True, fontsize="small")
    plt.savefig("results/plots/" + str(label))
    plt.show()
Esempio n. 22
0
def play_banana(isDoubleDQN=0):
    isDoubleDQN = int(isDoubleDQN)
    # find the path to the environment, this can be different for different OS
    env = UnityEnvironment(file_name="Banana_Windows_x86_64\Banana.exe")

    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]
    # number of actions
    action_size = brain.vector_action_space_size
    # examine the state space 
    state = env_info.vector_observations[0]
    state_size = len(state)

    # instantiate agent
    agent = Agent(state_size=state_size, action_size=action_size, seed=0, isDoubleDQN=isDoubleDQN)

    # load the weights from file
    if (isDoubleDQN==1):
        print("Using Double DQN")
        agent.qnetwork_local.load_state_dict(torch.load('checkpoint_double_agent.pth'))
    else:
        print("Not Using Double DQN")
        agent.qnetwork_local.load_state_dict(torch.load('checkpoint_simple_agent.pth'))  

    # start the agent
    env_info = env.reset(train_mode=False)[brain_name] # reset the environment
    state = env_info.vector_observations[0]            # get the current state
    score = 0                                          # initialize the score
    while True:
        action = agent.act(state, eps=0)      # select an action
        env_info = env.step(action)[brain_name]        # send the action to the environment
        next_state = env_info.vector_observations[0]   # get the next state
        reward = env_info.rewards[0]                   # get the reward
        done = env_info.local_done[0]                  # see if episode has finished
        score += reward                                # update the score
        state = next_state                             # roll over the state to next time step
        if done:                                       # exit loop if episode finished
            break
        
    print("Score: {}".format(score))

    env.close()
Esempio n. 23
0
def main():
    # Parse arguments:
    parser = argparse.ArgumentParser(fromfile_prefix_chars='@')
    parser.add_argument("--env_path",
                        type=str,
                        help='Path to the ml-agents environment file')
    args = parser.parse_args()

    # instantiate environment:
    # - Make sure you don't have the same environment already opened in jupyter
    #   notebook or with other python.
    # - Do not try disabling rendering. Visual observations will not work.
    env = UnityEnvironment(file_name=args.env_path)

    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    ### instantiate agent
    env_info = env.reset(train_mode=False)[brain_name]

    # get action size
    action_size = brain.vector_action_space_size

    # get input size
    # fist, get frame from emulator
    frame = env_info.visual_observations[0]

    # set deque to store frames stack to input the neural network
    state_buffer = deque(maxlen=NUM_FRAMES)

    # preprocess frame and stack it to build the state
    frame = preprocess_image(frame)
    state = stack_frames(frame, state_buffer)
    state_size = state.shape

    # finally, instantiate agent
    agent = Agent(state_size=state_size, action_size=action_size, seed=0)
    ###

    # start training
    scores = dqn(env, brain_name, agent)

    # close environment
    env.close()

    # Write scores in file for later plot edition:
    #np.save("scores", scores)

    # plot the scores
    fig = plt.figure()
    ax = fig.add_subplot(111)
    plt.plot(np.arange(len(scores)), scores)
    plt.ylabel('Score')
    plt.xlabel('Episode #')
    print("To finish the program, manually close the plot window.")
    plt.show()
    print("Done!")
def trainFunction(n_episodes=2000,
                  max_t=1000,
                  eps_start=1.0,
                  eps_end=0.01,
                  eps_decay=0.995):
    agent = Agent(state_size=37, action_size=4, seed=0, priority=True)
    epsilons = []
    scores = []  # list containing scores from each episode
    scores_window = deque(maxlen=100)  # last 100 scores
    eps = eps_start  # initialize epsilon
    for i_episode in range(1, n_episodes + 1):
        env_info = env.reset(
            train_mode=True)[brain_name]  # reset the environment
        state = env_info.vector_observations[0]
        score = 0
        for t in range(max_t):
            action = agent.act(state, eps)
            env_info = env.step(action.astype(np.int32))[brain_name]
            next_state = env_info.vector_observations[0]  # get the next state
            reward = env_info.rewards[0]  # get the reward
            done = env_info.local_done[0]  # see if episode has finished
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break
        scores_window.append(score)  # save most recent score
        scores.append(score)  # save most recent score
        eps = max(eps_end, eps_decay * eps)  # decrease epsilon
        epsilons.append(eps)
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(
            i_episode, np.mean(scores_window)),
              end="")
        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(
                i_episode, np.mean(scores_window)),
                  end="")
        # if np.mean(scores_window)>=13.0:

    print('\nEnvironment finished in {:d} episodes!\tAverage Score: {:.2f}'.
          format(i_episode, np.mean(scores_window)))
    torch.save(agent.qnetwork_local.state_dict(), 'checkpoint.pth')
    return scores, epsilons
def game_start(game_name):
    '''
    initial the environment
    '''
    env = gym.make(game_name)
    env.seed(0)
    print('State shape: ', env.observation_space.shape)
    print('Number of actions: ', env.action_space.n)

    agent = Agent(state_size=8, action_size=4, seed=0)
    return env, agent
Esempio n. 26
0
def main():
    params = Params(n_episodes=50000,
                    max_t=1000,
                    eps_start=1.0,
                    eps_end=0.01,
                    eps_decay=0.995)

    environment = Environment('/Users/dali/workspace/RL/Reacher.app')
    # environment = Environment('/Users/dali/workspace/RL/Banana.app')
    agent = Agent(environment=environment, seed=0, device=device)
    dqn(agent, params)
def main(file_name="/Users/joshuaschoenfield/Downloads/Banana.app",
         with_plotting=True):
    with get_environment(file_name=file_name) as env:
        from dqn_agent import Agent

        agent = Agent(state_size=37, action_size=4, seed=0)

        scores, running_average = dqn(env=env,
                                      agent=agent,
                                      with_plotting=with_plotting)

        return scores, running_average
Esempio n. 28
0
def testFunction():
    agent = Agent(state_size=37, action_size=4, seed=0)
    agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth'))

    env_info = env.reset(train_mode=False)[brain_name]  # reset the environment
    state = env_info.vector_observations[0]  # get the current state
    score = 0  # initialize the score
    time_steps = 100000
    for t in range(time_steps):
        action = agent.act(state)  # select an action
        env_info = env.step(action.astype(
            np.int32))[brain_name]  # send the action to the environment
        next_state = env_info.vector_observations[0]  # get the next state
        reward = env_info.rewards[0]  # get the reward
        done = env_info.local_done[0]  # see if episode has finished
        score += reward  # update the score
        state = next_state  # roll over the state to next time step
        if done:  # exit loop if episode finished
            break

    print("Score: {}".format(score))
def main():
    parser = argparse.ArgumentParser(description="Run Extended Q-Learning with given config")
    parser.add_argument("-c",
                        "--config",
                        type=str,
                        metavar="",
                        required=True,
                        help="Config file name - file must be available as .json in ./configs")

    args = parser.parse_args()

    # load config files
    with open(os.path.join(".", "configs", args.config), "r") as read_file:
        config = json.load(read_file)

    env = UnityEnvironment(file_name=os.path.join(*config["general"]["env_path"]))
    agent = Agent(config=config)

    if config["train"]["run_training"]:
        scores = sessions.train(agent, env, config)
        helper.plot_scores(scores)
        agent.save()
    else:
        agent.load()
        sessions.test(agent, env)

    env.close()
Esempio n. 30
0
def run(agent_source, location, n_episodes):

    source = AgentSource[agent_source.upper()]
    agent = Agent(state_size=8,
                  action_size=4,
                  seed=0,
                  learning_strategy=LearningStrategy.DQN)

    path_to_agent_checkpoint = retrieve_agent_checkpoint(source, location)
    agent.qnetwork_local.load_state_dict(
        torch.load(path_to_agent_checkpoint,
                   map_location=lambda storage, loc: storage))

    #display = Display(visible=0, size=(1400, 900))
    #display.start()

    env = gym.make('LunarLander-v2')
    env.seed(0)
    print('State shape: ', env.observation_space.shape)
    print('Number of actions: ', env.action_space.n)

    for i in range(n_episodes):
        state = env.reset()
        #img = plt.imshow(
        env.render(mode='rgb_array')
        for j in range(500):
            action = agent.act(state)
            #img.set_data(
            env.render(mode='rgb_array')
            plt.axis('off')
            time.sleep(0.1)
            #display.display(plt.gcf())
            #display.clear_output(wait=True)
            state, reward, done, _ = env.step(action)
            if done:
                break

    env.close()

env = gym.make('LunarLander-v2')
env.seed(0)
print('State shape: ', env.observation_space.shape)
print('Number of actions: ', env.action_space.n)


# Please refer to the instructions in `Deep_Q_Network.ipynb` if you would like to write your own DQN agent.  Otherwise, run the code cell below to load the solution files.

# In[4]:


from dqn_agent import Agent

agent = Agent(state_size=8, action_size=4, seed=0)

# watch an untrained agent
state = env.reset()
for j in range(200):
    action = agent.act(state)
    env.render()
    state, reward, done, _ = env.step(action)
    if done:
        break 
        
env.close()


# ### 3. Train the Agent with DQN
#