Ejemplo n.º 1
0
def main():

    actor_learning_rate = [
        1 * 10**-4, 3 * 10**-4, 6 * 10**-4, 10**-3, 3 * 10**-3, 6 * 10**-3,
        10**-2
    ]
    critic_learning_rate = [
        1 * 10**-4, 3 * 10**-4, 6 * 10**-4, 10**-3, 3 * 10**-3, 6 * 10**-3,
        10**-2
    ]
    tau = [.9, .93, .95, .97, .99]
    batch_size = [32, 64, 128, 256]
    p_rand = [0, .1, .2, .3, .4]
    sigma = [0, .1, .2, .3, .4]
    L2_norm_coeff = [0, .01, .03, .1, .3, .6, 1]

    load_checkpoint = False

    env = gym.make("CartPoleContinuousBulletEnv-v0")

    agent = Agent(input_dims=env.observation_space.shape,
                  n_actions=env.action_space.shape[0])
    episodes = 250
    filename = 'MoutainCarContinuous.png'
    figure_file = 'plots/' + filename

    best_score = env.reward_range[0]
    score_history = []

    if load_checkpoint:
        agent.load_models()
        env.render(mode='human')

    for i in range(episodes):
        observation = env.reset()
        done = False
        score = 0
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            agent.remember(observation, action, reward, observation_, done)
            if not load_checkpoint:
                agent.learn()
            else:
                env.render()
            observation = observation_
        score_history.append(score)
        avg_score = np.mean(score_history[-100:])
        if avg_score > best_score:
            best_score = avg_score
            if not load_checkpoint:
                agent.save_models()

        print("episode", i, "score", score, "average score", avg_score)
    if not load_checkpoint:
        x = [i + 1 for i in range(episodes)]
        plot_error_drop(x, score_history)
Ejemplo n.º 2
0
from ddpg_torch import Agent
import gym
import numpy as np
#from utils import PlotLearning
env = gym.make('LunarLanderContinuous-v2')
agent = Agent(alpha=0.000025,beta=0.00025, input_dims=[8],tau=0.001,env=env)
np.random.seed(0)
agent.load_models()
score_history = []
ep = 0
while True:
    ep += 1
    done = False
    score = 0
    obs = env.reset()
    while not done:
        env.render()
        act = agent.choose_action(obs)
        new_state,reward,done,info = env.step(act)
        agent.remember(obs,act,reward,new_state,int(done))
        #agent.learn()
        score += reward
        obs = new_state
    score_history.append(score)
    print('episode',ep,'score%.2f' % score, '100 game average %.2f' % np.mean(score_history[-100:]))
    if ep % 25 == 0:
        agent.save_models()    
    if len(score_history) >= 25 and np.mean(score_history[-25:]) > 200:
        break
filename = 'pendulum.png'
#plotLearning(score_history, filename, window=100)
Ejemplo n.º 3
0
                str(agent.beta) + '_' + str(n_games) + '_games'
    figure_file = 'plots/' + filename + '.png'

    if load_checkpoint:
            agent.load_models()
    
    best_score = env.reward_range[0]
    score_history = []
    for i in range(n_games):
        observation = env.reset()
        observation=observation.reshape(observation.shape[0]*observation.shape[1],)
        done = False
        score = 0
        agent.noise.reset()
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            observation_ = observation_.reshape(observation_.shape[0]*observation_.shape[1],)
            
            if not test_agent:
                agent.remember(observation, action, reward, observation_, done)
                agent.learn()
            score += reward
            observation = observation_
            env.render()
        score_history.append(score)
        avg_score = np.mean(score_history[-100:])

        if not test_agent:
            if avg_score > best_score:
                best_score = avg_score