import matplotlib.pyplot as plt import tensorflow as tf import gym import numpy as np import math import reinforcement_learning as rl # Training env_name = 'Breakout-v0' #env_name = 'SpaceInvaders-v0' rl.checkpoint_base_dir = 'checkpoints_tutorial16/' rl.update_paths(env_name=env_name) rl.maybe_download_checkpoint(env_name=env_name) agent = rl.Agent(env_name=env_name, training=True, render=True, use_logging=False) model = agent.model replay_memory = agent.replay_memory agent.run(num_episodes=1) # Testing agent.epsilon_greedy.epsilon_testing agent.training = False agent.reset_episode_rewards() agent.render = True agent.run(num_episodes=1)
import numpy as np import math import reinforcement_learning as rl env_name = 'Pong-v0' # env_name = 'Breakout-v0' # env_name = 'SpaceInvaders-v0' rl.checkpoint_base_dir = 'checkpoints/' rl.update_paths(env_name=env_name) agent = rl.Agent( env_name=env_name, training=True, # set to False to see the agent play render= False, # set to True to render the game (maybe it speeds up training?) use_logging=True) model = agent.model replay_memory = agent.replay_memory # Hint for testing: put the epsilon value to 0.1 or even less (0.01), which mean that the agent will choose # a random action from the action set with 10% (1%) probability, instead of the max Q-value action. # You should find it in the agent definition. agent.run(num_episodes=None) # None = it won't stop until you stop it log_q_values = rl.LogQValues() log_reward = rl.LogReward() log_loss = rl.LogLoss(env_name) log_q_values.read()
def main(): agent = reinforcement_learning.Agent(0, 0) agent.load(reinforcement_learning.WEIGHTS_PATH) test(agent)