Esempio n. 1
0
    batch_size=config['batch_size'],
    state_shape=config['model_state_shape']+[config['state_time']],
    output_state_shape=3,
    name='Model_ant_new',
    summaries=True,
    **config['q_params']
    )

agent.session.run(tf.global_variables_initializer())
'''

reward_list = []

for episode in range(config['episodes']):
    # Store the rewards...
    cur_trng_reward, loss = agent.train_episode()
    agent._update_training_reward(cur_trng_reward)
    reward_list.append(cur_trng_reward)

    print(
        'episode: %d, step: %d, eps: %.4f, model loss (ant, ball, pro): %.4f, %.4f, %.4f'
        % (episode, agent.steps, agent.epsilon, loss[0], loss[1], loss[2]))

    if episode > 10:
        del reward_list[0]

    avg_trng_reward = np.mean(reward_list)

    if episode % config['episodes_validate'] == 0 and episode != 0:
        agent.validate_model(epsilon=0.05)
Esempio n. 2
0
    config = object_seaquest_config
    log_dir = get_log_dir('log', config['game'] + '_' +
                          str(config['double_q']))  # Name of logging directory
    agent = QAgent(config=config, log_dir=log_dir)
    saver = tf.train.Saver(max_to_keep=None)
    #save.restore(agent.session,'%s/episode_%d.ckpt'%(log_dir,episode))
    reward_list = []
    grad_list = []
    avg_trng_reward = 0
    avg_grad = 0
    for episode in range(config['episodes']):
        print(
            '\nepisode: %d, step: %d, eps: %.4f, avg_reward: %.4f, avg_grad: %.4f\n\n---------------------'
            % (episode, agent.steps, agent.epsilon, avg_trng_reward, avg_grad))
        # Store the rewards...
        cur_trng_reward, grad_cur = agent.train_episode()
        agent._update_training_reward(cur_trng_reward)
        reward_list.append(cur_trng_reward)
        if grad_cur > 0:
            grad_list.append(grad_cur)
            avg_grad = np.mean(grad_list)
        if episode > 10:
            del reward_list[0]
        if len(grad_list) > 10:
            del grad_list[0]

        avg_trng_reward = np.mean(reward_list)

        tol = 1e-5
        if episode % config['episodes_validate'] == 0 and episode != 0:
            #if agent.steps % config['steps_validate'] == 0:
Esempio n. 3
0
from agent import QAgent
from configs import pong_config, object_pong_config, breakout_config
from util import get_log_dir


if __name__ == '__main__':
    config = object_pong_config
    log_dir = get_log_dir('log', config['game']+'_'+str(config['double_q'])) # Name of logging directory
    agent = QAgent(config=config, log_dir=log_dir)
    saver = tf.train.Saver(max_to_keep=None)
    reward_list = []

    for episode in range(config['episodes']):
        print('episode: %d, step: %d, eps: %.4f' % (episode, agent.steps, agent.epsilon))
        # Store the rewards...
        cur_trng_reward = agent.train_episode()
        agent._update_training_reward(cur_trng_reward)
        reward_list.append(cur_trng_reward)

        if episode > 10:
            del reward_list[0]

        avg_trng_reward = np.mean(reward_list)

        if episode % config['episodes_validate']==0 and episode != 0:
        #if agent.steps % config['steps_validate'] == 0:
            print('Validate....\n==============')
            scores = [agent.validate_episode(epsilon=0.0) for i in range(config['episodes_validate_runs'])]
            agent._update_validation_reward(np.mean(scores))
            print(scores)
            f = open('learning_curves/trial13/rewards6.txt', 'a')
Esempio n. 4
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

from agent import QAgent
from configs import pong_config, breakout_config
from util import get_log_dir

if __name__ == '__main__':
    config = breakout_config
    log_dir = get_log_dir('log', config['game']+'_'+str(config['double_q']))
    agent = QAgent(config=config, log_dir=log_dir)
    saver = tf.train.Saver()
    for episode in range(config['episodes']):
        print('\n\nepisode: %d, step: %d, eps: %.4f\n\n---------------------' % (episode, agent.steps, agent.epsilon))
        # Store the rewards...
        agent._update_training_reward(agent.train_episode())

        if episode % config['episodes_validate']==0:
            print('Validate....\n==============')
            scores = [agent.validate_episode(epsilon=0.05) for i in range(config['episodes_validate_runs'])]
            agent._update_validation_reward(np.mean(scores))
            print(scores)
        # Store every validation interval
        if episode % config['episodes_save_interval']==0:
            saver.save(agent.session,'%s/episode_%d.ckpt'%(log_dir,episode))