Ejemplo n.º 1
0
                total_loss = actor_loss + critic_loss

            gradient = tape.gradient(total_loss, self.actor_critic.trainable_variables)
            self.actor_critic.optimizer.apply_gradients(zip(
                gradient, self.actor_critic.trainable_variables))


############## PART 3 ###################
import gym
import numpy as np
from actor_critic import Agent
from utils import plot_learning_curve

if __name__=='main':
    env = gym.make('CartPole-v0')
    agent = Agent(alpha=1e-5, n_actions=env.action_space.n)
    n_games = 1800

    filename='cartpole.png'
    figure_file = 'plots/' + filename

    best_score = env.reward_range[0]
    score_history = []
    load_checkpoint

    if load_checkpoint:
            agent.load_models()

    for i in range(n_games):
        observation = env.reset()
        done = False
Ejemplo n.º 2
0
import gym
import numpy as np
from actor_critic import Agent
from utils import plotLearning

if __name__ == '__main__':
    env = gym.make('CartPole-v0')
    agent = Agent(alpha=0.001, n_actions=env.action_space.n)
    n_episodes = 400
    filename = 'cartpole.png'
    best_score = env.reward_range[0]
    score_history = []
    load_checkpoint = False

    if load_checkpoint:
        agent.load_model()

    for i in range(n_episodes):
        obs = env.reset()
        done = False
        score = 0
        while not done:
            action = agent.choose_action(obs)
            new_state, reward, done, info = env.step(action)
            score += reward
            if not load_checkpoint:
                agent.learn(obs, reward, new_state, done)
            obs = new_state
        score_history.append(score)
        avg_score = np.mean(score_history[-100:])
        print(f'episode-{i},score={score},avg-score={avg_score}')
Ejemplo n.º 3
0
import gym
import numpy as np
from actor_critic import Agent
from utils import plot_learning_curve


if __name__ == '__main__':
    env = gym.make('CartPole-v0')
    agent = Agent(alpha=1e-5, n_actions=env.action_space.n)
    n_games = 1800

    filename = 'cartpole.png'
    figure_file = 'plots/' + filename

    best_score = env.reward_range[0]
    score_history = []
    load_checkpoint = False

    if load_checkpoint:
        agent.load_model()
    
    for i in range(n_games):
        state = env.reset()
        done = False
        score = 0

        while not done:
            action = agent.choose_action(state)
            next_state, reward, done, info = env.step(action)
            score += reward
Ejemplo n.º 4
0
import pybullet_envs
import gym
import numpy as np

from actor_critic import Agent
from utils import plot_learning_curve
from gym import wrappers

if __name__ == "__main__":
    env_name = "InvertedPendulumBulletEnv-v0"
    env = gym.make(env_name)
    agent = Agent(
        input_dims=env.observation_space.shape,
        env=env,
        n_actions=env.action_space.shape[0],
    )
    n_games = 250

    env = wrappers.Monitor(env,
                           "tmp/video",
                           video_callable=lambda episode_id: True,
                           force=True)
    filename = "inverted_pendulum.png"

    figure_file = "plots/" + filename

    best_score = env.reward_range[0]
    score_history = []
    load_checkpoint = False

    if load_checkpoint:
Ejemplo n.º 5
0
import gym
import numpy as np
from actor_critic import Agent, plot_learning_curve

if __name__ == '__main__':
    env = gym.make('LunarLander-v2')
    best_score = -np.inf
    load_checkpoint = True

    agent = Agent(gamma=0.99,
                  lr=5e-6,
                  input_dims=[8],
                  n_actions=4,
                  fc1_dims=2048,
                  fc2_dims=1536,
                  chkpt_dir='.\\models\\',
                  env_name='Lunar_Practice')

    if load_checkpoint:
        agent.load_model()

    n_games = 5
    fname = 'ACTOR_CRITIC_' + 'lunar_practice'
    figure_file = '.\\plots\\' + fname + '.png'

    scores = []
    for i in range(n_games):
        done = False
        observation = env.reset()
        score = 0
        while not done:
Ejemplo n.º 6
0
        running_avg[i] = np.mean(scores[max(0, i - 100):i + 1])
    plt.plot(x, running_avg)
    plt.title('running average-100 games')
    plt.savefig(fig_file)


if __name__ == '__main__':
    env = gym.make('LunarLander-v2')
    n_games = 2000
    lr = 0.00005
    fc1 = 2048
    fc2 = 1536
    gamma = 0.99
    fname = f'plots/lunarlander_actor_critic_{fc1}_{fc2}_lr{lr}_{n_games}games.png'
    scores = []
    agent = Agent(4, [8], lr=lr, fc1=fc1, fc2=fc2, gamma=gamma)
    for i in range(n_games):
        observation = env.reset()
        done = False
        score = 0
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            agent.learn(observation, reward, observation_, done)
            observation = observation_
        scores.append(score)
        avg = np.mean(scores[-100:])
        print(f'episode:{i}, score:{score}, average:{avg}')
    x = [(i + 1) for i in len(range(scores))]
    plot_learning(x, scores, fname)