コード例 #1
0
from ddpg import Agent
import gym
import numpy as np

env = gym.make('LunarLanderContinuous-v2')

agent = Agent(alpha = 0.000025, beta = 0.00025, input_dims = [8], tau = 0.001, env = env, batch_size = 64, layer1_size = 400, layer2_size = 300, n_actions = 2)

np.random.seed(42)
score_history = []

for i in range(1000):
	done = False
	score = 0 
	obs = env.reset()
	while not done:
		act = agent.choose_action(obs)
		new_state, reward, done, info = env.step(act)
		agent.remember(obs, act, reward, new_state, int(done))
		agent.learn()
		score += reward
		obs = new_state

	score_history.append(score)
	print("Episode - {} Score - {} 100 game average {}".format(i, score, np.mean(score_history[-100:])))

	if i % 25 == 0:
		agent.save_models()

filename = l
コード例 #2
0
    env_params = env.reset()
    obs = env_params[
        'observation']  # Remove 'observation' indexing for envs with no dict
    d_goal = env_params['desired_goal']
    net_input = np.hstack((obs, d_goal))
    # print(obs.shape)
    done = False
    score = 0
    while not done:
        act = agent.choose_action(net_input)
        # print(act)
        new_state, reward, done, info = env.step(act)
        new_state = np.hstack(
            (new_state['observation'], new_state['desired_goal']
             ))  ## Remove 'observation' indexing for envs with no dict
        agent.remember(net_input, act, reward, new_state, int(done))
        agent.learn()
        score += reward
        net_input = new_state
        #env.render()
    score_history.append(score)

    if i % 25 == 0:
        agent.save_models()

    print('episode ', i, 'score %.2f' % score,
          'trailing 100 games avg %.3f' % np.mean(score_history[-100:]))

filename = 'LunarLander-alpha000025-beta00025-400-300.png'
plotLearning(score_history, filename, window=100)
コード例 #3
0
    base_dir: str = os.path.dirname(__file__)

    figure_file = os.path.abspath(os.path.join(base_dir, 'plots/pendulum.png'))

    best_score = env.reward_range[0]
    score_history = []
    load_checkpoint = False

    if load_checkpoint:
        n_steps = 0
        while n_steps <= agent.batch_size:
            observation = env.reset()
            action = env.action_space.sample()
            observation_, reward, done, info = env.step(action)
            agent.remember(observation, action, reward, observation_, done)
            n_steps += 1
        agent.learn()
        agent.load_models()
        evaluate = True
    else:
        evaluate = False

    for i in range(n_episodes):
        observation = env.reset()
        done = False
        score = 0
        while not done:
            action = agent.choose_action(observation, evaluate)
            env.render()
            observation_, reward, done, info = env.step(action)
コード例 #4
0
              input_dims=[3],
              tau=0.001,
              env=env,
              n_actions=1)

np.random.seed(0)
score_history = []

for episode in range(1000):

    state = env.reset()
    done = False
    score = 0

    while not done:
        action = agent.choose_action(state)

        next_state, reward, done, info = env.step(action)
        agent.remember(state, action, reward, next_state, int(done))

        agent.learn()

        score += reward
        state = next_state

    score_history.append(score)
    print('Episode {}, Score: {:.2f}, 100 game average: {:.2f}'.format(
        episode, score, np.mean(score_history[-100:])))

filename = 'pendulum.png'
plotLearning(score_history, filename, window=100)
コード例 #5
0
history = []
critic_loss = []
actor_loss = []

for i in range(n_episodes):
    loss1 = []
    loss2 = []
    obs = env.reset()
    done = False
    score = 0

    while not done:
        action = agent.choose_action(obs)
        obs_, reward, done, _ = env.step(action)
        score += reward
        agent.remember(obs, action, reward, obs_, done)
        agent.learn()
        obs = obs_

    history.append(score)

avg = np.zeros(len(history))
for i in range(len(avg)):
    avg[i] = np.mean(history[max(0, i - 25):(i + 1)])
plt.plot(range(1, n_episodes + 1), avg)
plt.xlabel('Episode')
plt.ylabel('Reward')
plt.title(f'{env_name} Showing Mean Reward')
plt.savefig(f'results/{env_name}-DDPG-Showing-Mean-Rewards-b128.jpg')
plt.show()