Beispiel #1
0
import matplotlib.pyplot as plt
import tensorflow as tf
import gym
import numpy as np
import math

import reinforcement_learning as rl

# Training

env_name = 'Breakout-v0'
#env_name = 'SpaceInvaders-v0'
rl.checkpoint_base_dir = 'checkpoints_tutorial16/'
rl.update_paths(env_name=env_name)
rl.maybe_download_checkpoint(env_name=env_name)
agent = rl.Agent(env_name=env_name,
                 training=True,
                 render=True,
                 use_logging=False)
model = agent.model
replay_memory = agent.replay_memory
agent.run(num_episodes=1)

# Testing

agent.epsilon_greedy.epsilon_testing
agent.training = False
agent.reset_episode_rewards()
agent.render = True
agent.run(num_episodes=1)
Beispiel #2
0
import numpy as np
import math

import reinforcement_learning as rl

env_name = 'Pong-v0'
# env_name = 'Breakout-v0'
# env_name = 'SpaceInvaders-v0'

rl.checkpoint_base_dir = 'checkpoints/'

rl.update_paths(env_name=env_name)

agent = rl.Agent(
    env_name=env_name,
    training=True,  # set to False to see the agent play
    render=
    False,  # set to True to render the game (maybe it speeds up training?)
    use_logging=True)
model = agent.model
replay_memory = agent.replay_memory

# Hint for testing: put the epsilon value to 0.1 or even less (0.01), which mean that the agent will choose
#                   a random action from the action set with 10% (1%) probability, instead of the max Q-value action.
#                   You should find it in the agent definition.

agent.run(num_episodes=None)  # None = it won't stop until you stop it

log_q_values = rl.LogQValues()
log_reward = rl.LogReward()
log_loss = rl.LogLoss(env_name)
log_q_values.read()
def main():
    agent = reinforcement_learning.Agent(0, 0)
    agent.load(reinforcement_learning.WEIGHTS_PATH)
    test(agent)