def run_model(params): # https://stackoverflow.com/questions/11526975/set-random-seed-programwide-in-python # https://stackoverflow.com/questions/30517513/global-seed-for-multiple-numpy-imports random.seed(params.seed) np.random.seed(params.seed) # Must be called before Session # https://stackoverflow.com/questions/38469632/tensorflow-non-repeatable-results/40247201#40247201 tf.set_random_seed(params.seed) qagent = QAgent(params) if params.is_train: qagent.fit() elif params.eval_mode == 0: qagent.evaluate_mine() elif params.eval_mode == 1: qagent.test_mine() elif params.eval_mode == 2: for mines in range(1, 13): params.mines_min = mines params.mines_max = mines print("Mines =", mines) qagent.test_mine() tf.reset_default_graph() qagent = QAgent(params)
def __init__(self): #display attributes self.clock = pygame.time.Clock() pygame.init() self.font=pygame.font.Font(None,30) self.size = (1024, 768) self.size_vec = Vector2(1024, 768) self.screen = pygame.display.set_mode(self.size) self.colors = {'WHITE':(255,255,255), 'red': (255,0,0), 'blue': (0,0,255), 'black': (0,0,0)} #world attr self.g=980.0#cm/sec^2 #peg-att self.peg=Vector2(512.0,100.0) #ball-att self.ball_length=100.0 #initial state: config self.ball_theta=m.pi/2#[0,2*pi] self.ball_omega=0.0 self.ball_alpha=self.g/self.ball_length*m.sin(self.ball_theta) self.ball_theta_min = 10000 self.ball_theta_max = -10000 self.ball_omega_min = 10000 self.ball_omega_max = -10000 self.ball=Vector2(self.polar_cart()) #self.ball(x,y) self.player = QAgent(self.get_ranges())
def train(params): # https://stackoverflow.com/questions/11526975/set-random-seed-programwide-in-python # https://stackoverflow.com/questions/30517513/global-seed-for-multiple-numpy-imports random.seed(params.seed) np.random.seed(params.seed) # Must be called before Session # https://stackoverflow.com/questions/38469632/tensorflow-non-repeatable-results/40247201#40247201 tf.set_random_seed(params.seed) qagent = QAgent(params) if params.is_train: qagent.fit() else: qagent.test_mine()
from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import tensorflow as tf from agent import QAgent from configs import object_seaquest_config from util import get_log_dir if __name__ == '__main__': config = object_seaquest_config log_dir = get_log_dir('log', config['game']+'_'+str(config['double_q'])) # Name of logging directory agent = QAgent(config=config, log_dir=log_dir) saver = tf.train.Saver(max_to_keep=None) saver.restore(agent.session, '%s/episode_%d.ckpt'%("log/log/2017-12-09_23-40-34_SeaquestDeterministic-v4_True",800)) print('Validate....\n==============') scores = agent.validate_episode(epsilon=0, visualise=True)
from plot_util import init_figure, update_figure import tensorflow as tf import numpy as np from agent import QAgent from configs import pong_config, breakout_config if __name__ == '__main__': config = pong_config config['state_memory'] = 1 # prevent allocating of a huge chunk of memory load_episode = 3650 epsilon = 0.05 # The epsilon for the strategy # Build the graph on CPU to keep gpu for training.... with tf.device('/cpu:0'): agent = QAgent(config=config, log_dir=None) # Restore the values.... tf.train.Saver().restore( agent.session, 'saves/%s/episode_%d.ckpt' % (config['game'], load_episode)) mean = 0 total = 0 episode = 0 while True: print("\n") # Initialise the episode state = agent.reset_to_zero_state() done = False total_reward = 0.
import gym from agent import QAgent env = gym.make('CartPole-v0') agent = QAgent(env) agent.train() t = agent.run() print("Time", t)
DISCOUNT_FACTOR = 0.6 SAVE_MODEL_EVERY = 0 if __name__ == '__main__': #create env env = gym.make(ENV_NAME) print(env.unwrapped.spec.id) #create agent model = QTable(nostates=env.observation_space.n, noactions=env.action_space.n, learning_rate=LEARNING_RATE, discount_factor=DISCOUNT_FACTOR) agent = QAgent(actions=env.action_space.n, expl_max=EXPLORATION_MAX, expl_min=EXPLORATION_MIN, expl_decay=EXPLORATION_DECAY, model=model) #get and parse user args args = Parser.parseargs(defaultTrainIterations=10000, defaultEvalIterations=10) if args.load: agent.load(env, args.loadversion) if args.train != 0: agent.train(env, iterations=args.train, train_s=1, save_i=SAVE_MODEL_EVERY) if args.eval != 0: print("Evaluation results (lower scores are better):")
action='store_true', help='Use ICM module') args = parser.parse_args() env = gym.make(args.gym_env) if (type(env.action_space) == Discrete): if args.use_DQN: a = QAgent(epsilon_start=args.epsilon_start, epsilon_end=args.epsilon_end, epsilon_anneal=args.epsilon_anneal, nb_actions=env.action_space.n, learning_rate=args.learning_rate, gamma=args.gamma, batch_size=args.batch_size, replay_memory_size=args.replay_memory_size, hidden_size=args.hidden_size, model_input_size=env.observation_space.shape[0], use_PER=args.use_PER, use_ICM=args.use_ICM) trainQ(a, env, args.MAX_NUMBER_OF_STEPS, args.EPISODES_TO_TRAIN, args.START_RENDERING, args.update_frequency) else: if not args.use_ICM: a = ActorCriticAgent( continuous=False, nb_actions=env.action_space.n, learning_rate=args.learning_rate, gamma=args.gamma, hidden_size=args.hidden_size,