Beispiel #1
0
def main(num_episodes, render=False):
    # initialize gym environment and the agent
    # env = gym.make('SpaceInvaders-v0')
    env = gym.make('Breakout-v0')
    state = env.reset()
    state_shape = list(state.shape)
    state_shape[-1] = state_shape[-1] * 5
    agent = DQNAgent(state_shape, env.action_space.n)

    states = deque(maxlen=5)

    max_train_time = 800

    # Iterate the game
    for e in range(num_episodes):
        # reset state in the beginning of each game
        state = env.reset()
        for i in range(5):
            states.appendleft(state)
        # time_t represents each frame of the game
        num_random = 0
        total_reward = 0.
        for time_t in range(max_train_time):
            # turn this on if you want to render
            if render:
                env.render()
            # Decide action
            action = agent.act(states)
            if agent.acted_randomly:
                num_random += 1
            # Advance the game to the next frame based on the action.
            next_state, reward, done, info = env.step(action)
            total_reward += reward
            # Remember the previous state, action, reward, and done
            agent.remember(states.copy(), action, reward, next_state, done)
            # make next_state the new current state for the next frame.
            states.appendleft(next_state)
            # done becomes True when the game ends
            if done:
                # print the score and break out of the loop
                rand_perc = num_random / float(
                    time_t + 1) * 100.  # Percentage of random actions.
                print(
                    "episode: {}/{}, training_time: {}, summed_reward: {}, random_actions: {}%, eps: {}"
                    .format(e, num_episodes, time_t, total_reward, rand_perc,
                            agent.epsilon))
                # train the agent with the experience of the episode
                agent.replay(min(100, time_t))
                break
        # print("epsilon {}".format(agent.epsilon))
        if e % 1000 == 0:
            agent.save("./deep_q_model.h5")
            print("saved model")
Beispiel #2
0
def print_progress(agent: DQNAgent, data: dict):
    percent = data['percent']
    progress = '=' * int(percent)
    progress += '>'
    left = ' ' * (100 - percent)
    progress = f'{percent}% [{progress + left}]'

    reward, steps = data['stats']
    mean = round(reward.mean(), 1)
    std = round(reward.std(), 1)
    positive = reward[reward > 0].size
    total = reward.size
    steps = steps.sum()
    losses = data['losses']

    if total > 50:
        graph(reward, verbose=True)
        plt.savefig(f'figures/{run_id}_training.png')
        if len(losses) > 10:
            graph(losses.detach().numpy(), xlabel='Replays', ylabel='Loss', window=5)
            plt.savefig(f'figures/{run_id}_losses.png')
    # print(progress + f'  μ: {mean}, σ: {std}; +{positive}/{total}, steps: {steps}', end='\r')
    # if percent % 5 != 0:
    #     return
    last100 = reward[-100:]
    last_mean = round(last100.mean(), 2)
    last_std = round(last100.std(), 1)
    verbose = data['verbose']

    if percent % 2 == 0 and last_mean > 200:
        print(' ' * 100, end='\r')
        if verbose:
            print('Last 100 episodes average over 200! ', end='')
        agent.save(f'{run_id}_{percent}p', str(round(last_mean, 0)))

    # rar = f'rar: {round(data["rar"], 5)}' if verbose else ''
    # Spaces at the end are to clean up the progress bar
    print(f'Total mean: {mean}, std: {std};  '
          f'Last 100 mean: {last_mean}, std: {last_std};  '
          f'Positive: {positive}/{total}  '
          f'Steps: {steps}  ',
          # rar,
          " " * 20)
    if verbose:
        if len(losses) > 1:
            mean = round(losses.mean().item(), 3)
            std = round(torch.std(losses).item(), 3)
            print(f'Recent Losses: {losses[-5:]}, mean: {mean}, std: {std}')
    print(progress, end='\r')
Beispiel #3
0
    def on_progress(self, agent: DQNAgent, data):
        """
        After 1% of the total iterations is complete, the agent will call this function
        This is an opportunity to decide if it is time to quit early.
        """
        percent: int = data['percent']
        reward, steps = data['stats']
        rar = data['rar']

        if len(reward) >= 100:
            last100 = reward[-100:]
            mean = np.round(last100.mean())
            if mean >= 200:
                print("Successfully completed goal")
                self.success = True
                self.exit_early = True
                agent.end_training_early()
            elif mean >= 50 and percent % 5 == 0:
                print("\nGood performance found, saving checkpoint")
                epoch = int(self.episodes * percent / 100)
                agent.save(f'{self.id}', f'{epoch}_{mean}')

        if self.verbose and percent % 10 == 0:
            # TODO: Print additional info
            print(f"\n{percent}% "
                  f"\tTotal reward={round(reward.mean(), 3)}  "
                  f"steps={steps.sum()}  "
                  f"rar={round(rar, 3)}")
            # look at the last several episodes
            reward = reward[-self.percent_size:]
            print(f"\t\tRecent reward={round(reward.mean(), 3)},  "
                  f"max={round(reward.max(), 3)}")

        if self.verbose:
            print(f'{percent}% ... ', end="")
        else:
            progress = '=' * int(percent)
            progress += '>'
            left = ' ' * (100 - percent)
            print(f'{percent}% [{progress + left}]', end='\r')
Beispiel #4
0
            discount=0.99,
            target_freq=10,
            verbose=True,
            print_every=10)
        '''

        agent = DQNAgent(action_set=[0, 1, 2],
                         reward_function=mountain_car_reward_function,
                         feature_extractor=MountainCarIdentityFeature(),
                         hidden_dims=[50, 50],
                         learning_rate=5e-4,
                         buffer_size=50000,
                         batch_size=64,
                         num_batches=100,
                         starts_learning=5000,
                         final_epsilon=0.02,
                         discount=0.99,
                         target_freq=10,
                         verbose=True,
                         print_every=10)

        _, _, rewards = live(agent=agent,
                             environment=env,
                             num_episodes=episodes,
                             max_timesteps=200,
                             verbose=True,
                             print_every=50)

        np.save(os.path.join(reward_path, file_name), rewards)
        agent.save(path=os.path.join(agent_path, file_name + '.pt'))
Beispiel #5
0
    np.random.seed(0)
    torch.manual_seed(0)

    env = ForexEnv()

    agent = DQNAgent(
        action_set=[0, 1, 2],
        reward_function=functools.partial(Forex_reward_function),
        feature_extractor=ForexIdentityFeature(),
        hidden_dims=[50, 50],
        learning_rate=5e-4, 
        buffer_size=5000,
        batch_size=12,
        num_batches=100, 
        starts_learning=5000, 
        final_epsilon=0.02, 
        discount=0.99, 
        target_freq=10,
        verbose=False, 
        print_every=10)

    observation_data, action_data, rewards = live(
                            agent=agent,
                            environment=env,
                            num_episodes=5,
                            max_timesteps=5,
                            verbose=True,
                            print_every=50)

    agent.save('./dqn.pt')