total_loss = actor_loss + critic_loss gradient = tape.gradient(total_loss, self.actor_critic.trainable_variables) self.actor_critic.optimizer.apply_gradients(zip( gradient, self.actor_critic.trainable_variables)) ############## PART 3 ################### import gym import numpy as np from actor_critic import Agent from utils import plot_learning_curve if __name__=='main': env = gym.make('CartPole-v0') agent = Agent(alpha=1e-5, n_actions=env.action_space.n) n_games = 1800 filename='cartpole.png' figure_file = 'plots/' + filename best_score = env.reward_range[0] score_history = [] load_checkpoint if load_checkpoint: agent.load_models() for i in range(n_games): observation = env.reset() done = False
import gym import numpy as np from actor_critic import Agent from utils import plotLearning if __name__ == '__main__': env = gym.make('CartPole-v0') agent = Agent(alpha=0.001, n_actions=env.action_space.n) n_episodes = 400 filename = 'cartpole.png' best_score = env.reward_range[0] score_history = [] load_checkpoint = False if load_checkpoint: agent.load_model() for i in range(n_episodes): obs = env.reset() done = False score = 0 while not done: action = agent.choose_action(obs) new_state, reward, done, info = env.step(action) score += reward if not load_checkpoint: agent.learn(obs, reward, new_state, done) obs = new_state score_history.append(score) avg_score = np.mean(score_history[-100:]) print(f'episode-{i},score={score},avg-score={avg_score}')
import gym import numpy as np from actor_critic import Agent from utils import plot_learning_curve if __name__ == '__main__': env = gym.make('CartPole-v0') agent = Agent(alpha=1e-5, n_actions=env.action_space.n) n_games = 1800 filename = 'cartpole.png' figure_file = 'plots/' + filename best_score = env.reward_range[0] score_history = [] load_checkpoint = False if load_checkpoint: agent.load_model() for i in range(n_games): state = env.reset() done = False score = 0 while not done: action = agent.choose_action(state) next_state, reward, done, info = env.step(action) score += reward
import pybullet_envs import gym import numpy as np from actor_critic import Agent from utils import plot_learning_curve from gym import wrappers if __name__ == "__main__": env_name = "InvertedPendulumBulletEnv-v0" env = gym.make(env_name) agent = Agent( input_dims=env.observation_space.shape, env=env, n_actions=env.action_space.shape[0], ) n_games = 250 env = wrappers.Monitor(env, "tmp/video", video_callable=lambda episode_id: True, force=True) filename = "inverted_pendulum.png" figure_file = "plots/" + filename best_score = env.reward_range[0] score_history = [] load_checkpoint = False if load_checkpoint:
import gym import numpy as np from actor_critic import Agent, plot_learning_curve if __name__ == '__main__': env = gym.make('LunarLander-v2') best_score = -np.inf load_checkpoint = True agent = Agent(gamma=0.99, lr=5e-6, input_dims=[8], n_actions=4, fc1_dims=2048, fc2_dims=1536, chkpt_dir='.\\models\\', env_name='Lunar_Practice') if load_checkpoint: agent.load_model() n_games = 5 fname = 'ACTOR_CRITIC_' + 'lunar_practice' figure_file = '.\\plots\\' + fname + '.png' scores = [] for i in range(n_games): done = False observation = env.reset() score = 0 while not done:
running_avg[i] = np.mean(scores[max(0, i - 100):i + 1]) plt.plot(x, running_avg) plt.title('running average-100 games') plt.savefig(fig_file) if __name__ == '__main__': env = gym.make('LunarLander-v2') n_games = 2000 lr = 0.00005 fc1 = 2048 fc2 = 1536 gamma = 0.99 fname = f'plots/lunarlander_actor_critic_{fc1}_{fc2}_lr{lr}_{n_games}games.png' scores = [] agent = Agent(4, [8], lr=lr, fc1=fc1, fc2=fc2, gamma=gamma) for i in range(n_games): observation = env.reset() done = False score = 0 while not done: action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) score += reward agent.learn(observation, reward, observation_, done) observation = observation_ scores.append(score) avg = np.mean(scores[-100:]) print(f'episode:{i}, score:{score}, average:{avg}') x = [(i + 1) for i in len(range(scores))] plot_learning(x, scores, fname)