Python ReplayBuffer Examples, agent.ExperienceReplay.ReplayBuffer Python Examples

Example #1

0

Show file

File: hyperparameter_tuning.py Project: dganbold/deep_reinforcement_learning

def train_agent(actor_learning_rate, critic_learning_rate, fc_units, thau,
                batch_size):
    # Set tunable parameters
    params['actor_hidden_layers'] = [int(fc_units), int(fc_units / 2)]
    params['critic_hidden_layers'] = [int(fc_units), int(fc_units / 2)]
    params['actor_learning_rate'] = actor_learning_rate
    params['critic_learning_rate'] = critic_learning_rate
    params['thau'] = thau
    params['batch_size'] = int(batch_size)

    # Create agent instance
    print("Created agent with following hyperparameter values:")
    pprint.pprint(params)

    # Initialize agent
    agent = Agent(state_size=state_size,
                  action_size=action_size,
                  param=params,
                  seed=0)

    # Initialize replay buffer
    memory = ReplayBuffer(action_size,
                          params['replay_size'],
                          params['batch_size'],
                          seed=0)
    update_interval = params['update_interval']
    replay_start = params['replay_initial']
    """ Training loop  """
    scores = []  # list containing scores from each episode
    scores_window = deque(
        maxlen=params['scores_window_size'])  # last (window_size) scores
    filemeta = "{:s}_{:s}_{:.1E}_{:.1E}_{:d}_{:.1E}_{:d}_solved{:d}"
    for i_episode in range(1, params['train_episodes'] + 1):
        # Reset the environment
        env_info = env.reset(train_mode=True)[brain_name]
        agent.reset()

        # Capture the current state
        state = env_info.vector_observations[0]

        # Reset score collector
        score = 0
        # One episode loop
        step = 0
        done = False
        while not done:
            # Action selection
            action = agent.act(state)

            # Take action and get rewards and new state
            env_info = env.step(action)[brain_name]
            next_state = env_info.vector_observations[0]
            reward = env_info.rewards[0]
            done = env_info.local_done[0]  # if next is terminal state

            # Store experience
            memory.push(state, action, reward, next_state, done)

            # Update Q-Learning
            step += 1
            if (step % update_interval) == 0 and len(memory) > replay_start:
                # Rechyperparameter_optimizationall experiences (miniBatch)
                experiences = memory.recall()
                # Train agent
                agent.learn(experiences)

            # State transition
            state = next_state

            # Update total score
            score += reward

        # Push to score list
        scores_window.append(score)
        scores.append([score, np.mean(scores_window), np.std(scores_window)])

        # Print episode summary
        print('\r#TRAIN Episode:{}, Score:{:.2f}, Average Score:{:.2f}'.format(
            i_episode, score, np.mean(scores_window)),
              end="")
        if i_episode % 100 == 0:
            print('\r#TRAIN Episode:{}, Score:{:.2f}, Average Score:{:.2f}'.
                  format(i_episode, score, np.mean(scores_window)))
        if np.mean(scores_window) >= params['stop_scores']:
            print(
                '\nEnvironment solved in {:d} episodes!\tAverageimport time Score: {:.2f}'
                .format(i_episode - params['scores_window_size'],
                        np.mean(scores_window)))
            break
    """ End of the Training """
    print('\n')

    # Filename string
    filename = filemeta.format( params['env_name'],agent.name,      \
                                params['actor_learning_rate'],      \
                                params['critic_learning_rate'],     \
                                fc_units,params['thau'],            \
                                params['batch_size'], i_episode-100)
    agent.export_network('./models/{:s}'.format(filename))
    # Export scores to csv file
    df = pandas.DataFrame(scores, columns=['scores', 'average_scores', 'std'])
    df.to_csv('./scores/{:s}.csv'.format(filename), sep=',', index=False)

    hyperscores.append([
        params['actor_learning_rate'], params['critic_learning_rate'],
        fc_units, params['thau'], params['batch_size'],
        np.mean(scores_window), i_episode - params['scores_window_size']
    ])
    log_df = pandas.DataFrame(hyperscores,
                              columns=[
                                  'actor_learning_rate',
                                  'critic_learning_rate', 'fc_units', 'thau',
                                  'batch_size', 'i_episode'
                              ])
    log_df.to_csv('scores/{:s}.csv'.format(log_filename))

    return (params['stop_scores'] - np.mean(scores_window))

Example #2

0

Show file

print('Number of actions : ', action_size)
print('  - low:', env.action_space.low)
print('  - high:', env.action_space.high)
print('Dimension of state space : ', state_size)
print('  - low:', env.observation_space.low)
print('  - high:', env.observation_space.high)

# Initialize agent
agent = Agent(state_size=state_size,
              action_size=action_size,
              param=params,
              seed=params['random_seed'])

# Initialize replay buffer
memory = ReplayBuffer(action_size,
                      params['replay_size'],
                      params['batch_size'],
                      seed=params['random_seed'])

print('Hyperparameter values:')
pprint.pprint(params)
""" Training loop  """
filename_format = "{:s}_{:s}_{:.1E}_{:.1E}_{:d}_{:.1E}_{:d}"
scores = []  # list containing scores from each episode
scores_window = deque(
    maxlen=params['scores_window_size'])  # last (window_size) scores
for i_episode in range(1, params['train_episodes'] + 1):
    # Reset the environment
    state = env.reset()
    agent.reset()

    # Reset score collector

Example #3

0

Show file

def train_agent(trail_id):
    # Create agent instance
    print("Created agent with following hyperparameter values:")
    pprint.pprint(params)

    # Initialize agent
    agent = Agent(state_size=state_size,
                  action_size=action_size,
                  param=params,
                  seed=params['random_seed'])

    # Initialize replay buffer
    memory = ReplayBuffer(action_size,
                          params['replay_size'],
                          params['batch_size'],
                          seed=params['random_seed'])

    # Define parameters for exploration
    noise_amplitude = params['noise_amplitude_start']
    noise_amplitude_final = params['noise_amplitude_final']
    noise_amplitude_decay = params['noise_amplitude_decay']
    """ Training loop  """
    max_step = 500
    max_score = -np.Inf
    filename_format = "{:d}"
    scores_history = []  # list containing scores from each episode
    scores_window = deque(
        maxlen=params['scores_window_size'])  # last (window_size) scores
    for i_episode in range(1, params['train_episodes'] + 1):
        # Reset the environment
        state = env.reset()
        agent.reset()
        # Reset score collector
        score = 0
        # One episode loop
        step = 0
        done = False
        while not np.any(done):
            # Get actions from all agent
            action = agent.act(state, noise_amplitude=noise_amplitude)

            # Take action and get rewards and new state
            next_state, reward, done, _ = env.step(action)

            # Store experience
            memory.push(state, action, reward, next_state, done)

            # Update the Critics and Actors of all the agents
            step += 1
            if (step % params['update_interval']
                ) == 0 and len(memory) > params['replay_initial']:
                # Recall experiences (miniBatch)
                experiences = memory.recall()
                # Train agent
                agent.learn(experiences)

            # State transition
            state = next_state

            # Update total score
            score += reward

            if max_step < step:
                break

        # Push to score list
        scores_window.append(score)
        scores_history.append(
            [score, np.mean(scores_window),
             np.std(scores_window)])

        # Print episode summary
        print(
            '\r#TRAIN Episode:{}, Score:{:.2f}, Average Score:{:.2f}, Exploration:{:1.4f}'
            .format(i_episode, score, np.mean(scores_window), noise_amplitude),
            end="")
        if i_episode % 100 == 0:
            print(
                '\r#TRAIN Episode:{}, Score:{:.2f}, Average Score:{:.2f}, Exploration:{:1.4f}'
                .format(i_episode, score, np.mean(scores_window),
                        noise_amplitude))
        if np.mean(scores_window) >= params['stop_scores']:
            max_score = np.mean(scores_window)
            print(
                '\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'
                .format(i_episode - 100, np.mean(scores_window)))
            break
        elif max_score < np.mean(scores_window):
            max_score = np.mean(scores_window)

        # Update exploration
        noise_amplitude = max(noise_amplitude_final,
                              noise_amplitude * noise_amplitude_decay)
    """ End of the Training """
    print('\n')

    # Filename string
    filename = "{:05d}".format(trail_id)
    # Export trained agent's parameters
    #agents.export_network('./models/{:s}'.format(filename))
    # Export scores to csv file
    df = pandas.DataFrame(scores_history,
                          columns=['scores', 'average_scores', 'std'])
    df.to_csv('./scores/optuna_logs/{:s}.csv'.format(filename),
              sep=',',
              index=False)
    #
    param_metas = [key for key in params.keys()]
    param_metas.extend(['scores', 'trained_episodes', 'filename'])
    param_values = [value for value in params.values()]
    param_values.extend([np.mean(scores_window), i_episode, filename])
    #
    optuna_log.append(param_values)
    optuna_df = pandas.DataFrame(optuna_log, columns=param_metas)
    optuna_df.to_csv('scores/{:s}.csv'.format(log_filename))
    #
    return (params['stop_scores'] - max_score)

Example #4

0

Show file

File: train.py Project: dganbold/deep_reinforcement_learning

action_size = brain.vector_action_space_size
state_size = env_info.vector_observations.shape[1]
print('Number of agents  : ', number_of_agents)
print('Number of actions : ', action_size)
print('Dimension of state space : ', state_size)

# Initialize agent
agents = MultiAgent(number_of_agents=number_of_agents,
                    state_size=state_size,
                    action_size=action_size,
                    param=params,
                    seed=params['random_seed'])

# Initialize replay buffer
memory = ReplayBuffer(params['replay_size'],
                      params['batch_size'],
                      seed=params['random_seed'])
update_interval = params['update_interval']
replay_start = params['replay_initial']

# Define parameters for training
episodes = params['train_episodes']  # maximum number of training episodes
stop_scores = params['stop_scores']
scores_window_size = params['scores_window_size']

# Define parameters for exploration
noise_amplitude = params['noise_amplitude_start']
noise_amplitude_final = params['noise_amplitude_final']
noise_amplitude_decay = params['noise_amplitude_decay']

print('Hyperparameter values:')

Example #5

0

Show file

number_of_agents = len(env_info.agents)
action_size = brain.vector_action_space_size
state_size = env_info.vector_observations.shape[1]
print('Number of agents  : ', number_of_agents)
print('Number of actions : ', action_size)
print('Dimension of state space : ', state_size)

# Initialize agent
agent = Agent(state_size=state_size,
              action_size=action_size,
              param=params,
              seed=params['random_seed'])

# Initialize replay buffer
memory = ReplayBuffer(action_size,
                      params['replay_size'],
                      params['batch_size'],
                      seed=params['random_seed'])

# Define parameters for exploration
noise_amplitude = 1.0  #params['noise_amplitude_start']
noise_amplitude_final = 0.1  #params['noise_amplitude_final']
noise_amplitude_decay = 0.999  #params['noise_amplitude_decay']

print('Hyperparameter values:')
pprint.pprint(params)
""" Training loop  """
filename_format = "{:s}_{:s}_{:.1E}_{:.1E}_{:d}_{:.1E}_{:d}"
scores_history = []  # list containing scores from each episode
scores_window = deque(
    maxlen=params['scores_window_size'])  # last (window_size) scores
for i_episode in range(1, params['train_episodes'] + 1):

Example #6

0

Show file

File: hyperparameter_tuning.py Project: dganbold/deep_reinforcement_learning

def train_agent():
    # Create agent instance
    print("Created agent with following hyperparameter values:")
    pprint.pprint(params)

    # Initialize agent
    agents = MultiAgent(number_of_agents=number_of_agents,
                        state_size=state_size,
                        action_size=action_size,
                        param=params,
                        seed=params['random_seed'])

    # Initialize replay buffer
    memory = ReplayBuffer(params['replay_size'],
                          params['batch_size'],
                          seed=params['random_seed'])
    update_interval = params['update_interval']
    replay_start = params['replay_initial']

    # Define parameters for training
    episodes = params['train_episodes']  # maximum number of training episodes
    stop_scores = params['stop_scores']
    scores_window_size = params['scores_window_size']

    # Define parameters for exploration
    noise_amplitude = params['noise_amplitude_start']
    noise_amplitude_final = params['noise_amplitude_final']
    noise_amplitude_decay = params['noise_amplitude_decay']
    """ Training loop  """
    filename_format = "{:s}_{:s}_{:.1E}_{:.1E}_{:d}_{:.1E}_{:d}"
    scores_history = []  # list containing scores from each episode
    scores_window = deque(
        maxlen=scores_window_size)  # last (window_size) scores
    for i_episode in range(1, episodes + 1):
        # Reset the environment
        env_info = env.reset(train_mode=True)[brain_name]
        agents.reset()
        # Capture the current state
        states = env_info.vector_observations
        dones = env_info.local_done
        # Reset score collector
        scores = np.zeros(number_of_agents)
        # One episode loop
        step = 0
        while not np.any(dones):
            # Get actions from all agents
            actions = agents.act(states, noise_amplitude=noise_amplitude)

            # Take action and get rewards and new state
            env_info = env.step(actions)[
                brain_name]  # send all actions to tne environment
            next_states = env_info.vector_observations  # get next state (for each agent)
            rewards = env_info.rewards  # get reward (for each agent)
            dones = env_info.local_done  # see if episode finished
            scores += env_info.rewards  # update the score (for each agent)

            # Store experience
            memory.push(states, actions, rewards, next_states, dones)

            # Update the Critics and Actors of all the agents
            step += 1
            if (step % update_interval) == 0 and len(memory) > replay_start:
                for agent_id in range(number_of_agents):
                    # Recall experiences (miniBatch)
                    experiences = memory.recall()
                    # Train agent
                    agents.learn(experiences, agent_id)

            # State transition
            states = next_states

        # Push to score list
        scores_window.append(np.max(scores))
        scores_history.append([
            np.max(scores),
            np.mean(scores_window, axis=0),
            np.std(scores_window, axis=0)
        ])

        # Print episode summary
        print(
            '\r#TRAIN Episode:{}, Score:{:.2f}, Average Score:{:.2f}, Exploration:{:1.4f}'
            .format(i_episode, np.max(scores), np.mean(scores_window),
                    noise_amplitude),
            end="")
        if i_episode % 100 == 0:
            print(
                '\r#TRAIN Episode:{}, Score:{:.2f}, Average Score:{:.2f}, Exploration:{:1.4f}'
                .format(i_episode, np.max(scores), np.mean(scores_window),
                        noise_amplitude))
        if np.mean(scores_window) >= params['stop_scores']:
            print(
                '\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'
                .format(i_episode - 100, np.mean(scores_window)))
            break

        # Update exploration
        noise_amplitude = max(noise_amplitude_final,
                              noise_amplitude * noise_amplitude_decay)
    """ End of the Training """
    print('\n')

    # Filename string
    filename = filename_format.format(  params['env_name'],'MADDPG',        \
                                        params['actor_learning_rate'],      \
                                        params['critic_learning_rate'],     \
                                        params['actor_hidden_layers'][0],   \
                                        params['actor_thau'],params['batch_size'],i_episode-100)

    # Export trained agent's parameters
    agents.export_network('./models/{:s}'.format(filename))
    # Export scores to csv file
    df = pandas.DataFrame(scores_history,
                          columns=['scores', 'average_scores', 'std'])
    df.to_csv('./scores/{:s}.csv'.format(filename), sep=',', index=False)

    hyperscores.append([[value for param, value in params.items()],
                        np.mean(scores_window), i_episode])
    log_df = pandas.DataFrame(
        hyperscores,
        columns=[[param for param, value in params.items()], 'scores',
                 'trained_episodes'])

    log_df.to_csv('scores/{:s}.csv'.format(log_filename))

    return (params['stop_scores'] - np.mean(scores_window))