Ejemplo n.º 1
0
    def __init__(self):

        #display attributes
        self.clock = pygame.time.Clock()
        pygame.init()
        self.font=pygame.font.Font(None,30)
    	self.size = (1024, 768)
        self.size_vec = Vector2(1024, 768)
    	self.screen = pygame.display.set_mode(self.size)
        self.colors = {'WHITE':(255,255,255), 'red': (255,0,0), 'blue': (0,0,255), 'black': (0,0,0)}

        #world attr
        self.g=980.0#cm/sec^2

        #peg-att
        self.peg=Vector2(512.0,100.0)

        #ball-att
        self.ball_length=100.0

        #initial state: config
        self.ball_theta=m.pi/2#[0,2*pi]
        self.ball_omega=0.0
        self.ball_alpha=self.g/self.ball_length*m.sin(self.ball_theta)

        self.ball_theta_min = 10000
        self.ball_theta_max = -10000

        self.ball_omega_min = 10000
        self.ball_omega_max = -10000

        self.ball=Vector2(self.polar_cart())
        #self.ball(x,y)

        self.player = QAgent(self.get_ranges())
Ejemplo n.º 2
0
def run_model(params):

    # https://stackoverflow.com/questions/11526975/set-random-seed-programwide-in-python
    # https://stackoverflow.com/questions/30517513/global-seed-for-multiple-numpy-imports
    random.seed(params.seed)
    np.random.seed(params.seed)
    # Must be called before Session
    # https://stackoverflow.com/questions/38469632/tensorflow-non-repeatable-results/40247201#40247201
    tf.set_random_seed(params.seed)

    qagent = QAgent(params)
    if params.is_train:
        qagent.fit()
    elif params.eval_mode == 0:
        qagent.evaluate_mine()
    elif params.eval_mode == 1:
        qagent.test_mine()
    elif params.eval_mode == 2:
        qagent.play_mine()
Ejemplo n.º 3
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

from agent import QAgent
from configs import object_seaquest_config
from util import get_log_dir


if __name__ == '__main__':
    config = object_seaquest_config
    log_dir = get_log_dir('log', config['game']+'_'+str(config['double_q'])) # Name of logging directory
    agent = QAgent(config=config, log_dir=log_dir)
    saver = tf.train.Saver(max_to_keep=None)

    saver.restore(agent.session, '%s/episode_%d.ckpt'%("log/log/2017-12-09_23-40-34_SeaquestDeterministic-v4_True",800))

    print('Validate....\n==============')
    scores = agent.validate_episode(epsilon=0, visualise=True)
        
Ejemplo n.º 4
0
import matplotlib.pyplot as plt
from util import get_log_dir

import tensorflow as tf
import numpy as np
from agent import QAgent
from configs import object_pong_config

config = object_pong_config
load_episode = 1000
epsilon = 0.05  # The epsilon for the strategy

# Build the graph on CPU to keep gpu for training....
log_dir = get_log_dir('log', config['game'] + '_' +
                      str(config['double_q']))  # Name of logging directory
agent = QAgent(config=config, log_dir=log_dir)

# Restore the values....
tf.train.Saver().restore(agent.session,
                         'saves/trained_Q/1000/episode_%d.ckpt' % load_episode)

var = tf.trainable_variables()
model_vars = [v for v in var if v.name.startswith('Model')]

agent.session.run(tf.initialize_variables(model_vars))
'''
# Reset the model functions
agent.model_pro = config['model'](
    batch_size=config['batch_size'],
    state_shape=config['model_state_shape']+[config['state_time']],
    output_state_shape=3,
from plot_util import init_figure, update_figure

import tensorflow as tf
import numpy as np
from agent import QAgent
from configs import pong_config, breakout_config

if __name__ == '__main__':
    config = pong_config
    config['state_memory'] = 1  # prevent allocating of a huge chunk of memory
    load_episode = 3650
    epsilon = 0.05  # The epsilon for the strategy

    # Build the graph on CPU to keep gpu for training....
    with tf.device('/cpu:0'):
        agent = QAgent(config=config, log_dir=None)

    # Restore the values....
    tf.train.Saver().restore(
        agent.session,
        'saves/%s/episode_%d.ckpt' % (config['game'], load_episode))
    mean = 0
    total = 0
    episode = 0

    while True:
        print("\n")
        # Initialise the episode
        state = agent.reset_to_zero_state()
        done = False
        total_reward = 0.
Ejemplo n.º 6
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

from agent import QAgent
from configs import pong_config, object_pong_config, breakout_config
from util import get_log_dir


if __name__ == '__main__':
    config = object_pong_config
    log_dir = get_log_dir('log', config['game']+'_'+str(config['double_q'])) # Name of logging directory
    agent = QAgent(config=config, log_dir=log_dir)
    saver = tf.train.Saver(max_to_keep=None)
    reward_list = []

    for episode in range(config['episodes']):
        print('episode: %d, step: %d, eps: %.4f' % (episode, agent.steps, agent.epsilon))
        # Store the rewards...
        cur_trng_reward = agent.train_episode()
        agent._update_training_reward(cur_trng_reward)
        reward_list.append(cur_trng_reward)

        if episode > 10:
            del reward_list[0]

        avg_trng_reward = np.mean(reward_list)
Ejemplo n.º 7
0
import gym
from agent import QAgent

env = gym.make('CartPole-v0')

agent = QAgent(env)
agent.train()
t = agent.run()
print("Time", t)
Ejemplo n.º 8
0
DISCOUNT_FACTOR = 0.6

SAVE_MODEL_EVERY = 0

if __name__ == '__main__':
    #create env
    env = gym.make(ENV_NAME)
    print(env.unwrapped.spec.id)
    #create agent
    model = QTable(nostates=env.observation_space.n,
                   noactions=env.action_space.n,
                   learning_rate=LEARNING_RATE,
                   discount_factor=DISCOUNT_FACTOR)
    agent = QAgent(actions=env.action_space.n,
                   expl_max=EXPLORATION_MAX,
                   expl_min=EXPLORATION_MIN,
                   expl_decay=EXPLORATION_DECAY,
                   model=model)

    #get and parse user args
    args = Parser.parseargs(defaultTrainIterations=10000,
                            defaultEvalIterations=10)
    if args.load:
        agent.load(env, args.loadversion)
    if args.train != 0:
        agent.train(env,
                    iterations=args.train,
                    train_s=1,
                    save_i=SAVE_MODEL_EVERY)
    if args.eval != 0:
        print("Evaluation results (lower scores are better):")
Ejemplo n.º 9
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

from agent import QAgent
from configs import object_seaquest_config
from util import get_log_dir

if __name__ == '__main__':
    config = object_seaquest_config
    log_dir = get_log_dir('log', config['game'] + '_' +
                          str(config['double_q']))  # Name of logging directory
    agent = QAgent(config=config, log_dir=log_dir)
    saver = tf.train.Saver(max_to_keep=None)
    reward_list = []

    for episode in range(config['episodes']):
        print('\nepisode: %d, step: %d, eps: %.4f\n\n---------------------' %
              (episode, agent.steps, agent.epsilon))
        # Store the rewards...
        cur_trng_reward = agent.train_episode()
        agent._update_training_reward(cur_trng_reward)
        reward_list.append(cur_trng_reward)

        if episode > 10:
            del reward_list[0]

        avg_trng_reward = np.mean(reward_list)
Ejemplo n.º 10
0
                        action='store_true',
                        help='Use ICM module')

    args = parser.parse_args()

    env = gym.make(args.gym_env)

    if (type(env.action_space) == Discrete):
        if args.use_DQN:

            a = QAgent(epsilon_start=args.epsilon_start,
                       epsilon_end=args.epsilon_end,
                       epsilon_anneal=args.epsilon_anneal,
                       nb_actions=env.action_space.n,
                       learning_rate=args.learning_rate,
                       gamma=args.gamma,
                       batch_size=args.batch_size,
                       replay_memory_size=args.replay_memory_size,
                       hidden_size=args.hidden_size,
                       model_input_size=env.observation_space.shape[0],
                       use_PER=args.use_PER,
                       use_ICM=args.use_ICM)
            trainQ(a, env, args.MAX_NUMBER_OF_STEPS, args.EPISODES_TO_TRAIN,
                   args.START_RENDERING, args.update_frequency)
        else:
            if not args.use_ICM:
                a = ActorCriticAgent(
                    continuous=False,
                    nb_actions=env.action_space.n,
                    learning_rate=args.learning_rate,
                    gamma=args.gamma,
                    hidden_size=args.hidden_size,
Ejemplo n.º 11
0
class GameManager:

    def __init__(self):

        #display attributes
        self.clock = pygame.time.Clock()
        pygame.init()
        self.font=pygame.font.Font(None,30)
    	self.size = (1024, 768)
        self.size_vec = Vector2(1024, 768)
    	self.screen = pygame.display.set_mode(self.size)
        self.colors = {'WHITE':(255,255,255), 'red': (255,0,0), 'blue': (0,0,255), 'black': (0,0,0)}

        #world attr
        self.g=980.0#cm/sec^2

        #peg-att
        self.peg=Vector2(512.0,100.0)

        #ball-att
        self.ball_length=100.0

        #initial state: config
        self.ball_theta=m.pi/2#[0,2*pi]
        self.ball_omega=0.0
        self.ball_alpha=self.g/self.ball_length*m.sin(self.ball_theta)

        self.ball_theta_min = 10000
        self.ball_theta_max = -10000

        self.ball_omega_min = 10000
        self.ball_omega_max = -10000

        self.ball=Vector2(self.polar_cart())
        #self.ball(x,y)

        self.player = QAgent(self.get_ranges())

    def polar_cart(self):
        x=int(self.peg.x-self.ball_length*m.sin(self.ball_theta))
        y=int(self.peg.y-self.ball_length*m.cos(self.ball_theta))
        return x,y

    def calculate_min_max(self):
        self.ball_theta_min = min(self.ball_theta_min, self.ball_theta)
        self.ball_theta_max = max(self.ball_theta_max, self.ball_theta)

        self.ball_omega_min = min(self.ball_omega_min, self.ball_omega)
        self.ball_omega_max = max(self.ball_omega_max, self.ball_omega)

    def draw_texts(self):
        scoretext=self.font.render("Theta: %f[%f-%f], Omega: %f[%f-%f], Alpha: %f" % (self.ball_theta, self.ball_theta_min, self.ball_theta_max, self.ball_omega, self.ball_omega_min, self.ball_omega_max, self.ball_alpha), 1,(255,255,255))
        self.screen.blit(scoretext, (0, 457))

    def draw(self):
        self.screen.fill(self.colors['black'])

        pygame.draw.circle(self.screen, self.colors['blue'], (int(self.peg.x), int(self.peg.y)), 10)
        pygame.draw.circle(self.screen, self.colors['blue'], self.ball, 5)
        pygame.draw.line(self.screen, self.colors['blue'], self.peg, self.ball)

        self.draw_texts()

    def get_state(self):
        return [self.ball_theta, self.ball_omega]

    def get_ranges(self):
        # Returns ranges for each state and last but not the least, action
        return [np.arange(0, m.pi*2, m.pi/64), np.arange(-100, 100, 10), np.arange(-10*self.g, 10*self.g, 1)]

    def tangential_force(self, f):
        self.ball_alpha = f/self.ball_length
        return self.ball_alpha

    # All the physics code will be added here
    def update(self, action):
        #higher order terms removed
        dt=0.01
        x=np.array([[self.ball_theta],[self.ball_omega],[self.ball_alpha]])
        F=np.array([[1.0,dt,dt*dt/2.0],[0.0,1.0,dt],[0.0,0.0,1.0]])
        #print x.shape, F.shape
        y = np.dot(F,x)
        self.ball_theta=y[0][0]%(2*m.pi)
        self.ball_omega=(y[1][0]%10)*(y[1][0]/abs(y[1][0]))

        # Apply
        self.tangential_force(100)
        #action = 0
        self.tangential_force(self.g*m.sin(self.ball_theta) + action)

        self.ball=Vector2(self.polar_cart())

        self.calculate_min_max()

    def run(self):
        # Core algorithm, everything happens here
        prev_state = self.get_state()
        action = self.player.choose_action(prev_state) # Decide best action according to the agent
        self.update(action) # Execute that action
        next_state = self.get_state() # Get next state
        reward = self.player.get_reward(prev_state, next_state, action)
        self.player.update_Qvalue(prev_state, action, next_state, reward)
        self.draw()
Ejemplo n.º 12
0
from plot_util import init_figure, update_figure

import tensorflow as tf
import numpy as np
from agent import QAgent
from configs import pong_config, breakout_config

if __name__ == '__main__':

    config = pong_config
    config['state_memory'] = 1  # prevent allocating of a huge chunk of memory

    # Build the graph on CPU to keep gpu for training....
    with tf.device('/cpu:0'):
        agent = QAgent(config=config, log_dir=None)

    rewards = []

    for idx in range(4, 9):
        load_episode = idx * config['episodes_save_interval']
        epsilon = 0.05  # The epsilon for the strategy

        # Restore the values....
        tf.train.Saver().restore(
            agent.session,
            'log/2017-10-28_20-06-09_PongDeterministic-v4_True/episode_%d.ckpt'
            % (load_episode))

        # Save validation reward to textfile
        cur_reward = agent.session.run(agent.validation_reward)