def __init__(self, params): print "Initializing Module..." self.params = params self.sess = tf.Session() self.DB = database(self.params["db_size"], self.params["input_dims_proc"]) self.engine = emulator(rom_name="breakout.bin", vis=True) self.params["num_act"] = len(self.engine.legal_actions) self.build_nets() self.Q_global = 0 self.cost_disp = 0
def __init__(self, args, sess): print('Initializing..') self.args = args self.sess = sess self.per = per.PER(self.args) self.engine = emulator(rom_name='breakout.bin', vis=self.args.visualize) self.args.num_actions = len(self.engine.legal_actions) # Build model self.build_model() self.sess.run(tf.global_variables_initializer())
def __init__(self, params): print 'Initializing Module...' self.params = params self.sess = tf.Session() self.DB = database(self.params['db_size'], self.params['input_dims_proc']) self.engine = emulator(rom_name='breakout.bin', vis=True) self.params['num_act'] = len(self.engine.legal_actions) self.build_nets() self.Q_global = 0 self.cost_disp = 0
def __init__(self,params): print 'Initializing Module...' self.params = params self.gpu_config = tf.ConfigProto(gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=self.params['gpu_fraction'])) self.sess = tf.Session(config=self.gpu_config) self.DB = database(self.params) self.engine = emulator(rom_name='breakout.bin', vis=self.params['visualize'],windowname=self.params['network_type']+'_preview') self.params['num_act'] = len(self.engine.legal_actions) self.build_net() self.training = True
def __init__(self,params): print('Initializing Module...') self.params = params self.gpu_config = tf.ConfigProto(gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=self.params['gpu_fraction'])) self.sess = tf.Session(config=self.gpu_config) self.DB = database(self.params) self.engine = emulator(rom_name='breakout.bin', vis=self.params['visualize'],windowname=self.params['network_type']+'_preview') self.params['num_act'] = len(self.engine.legal_actions) self.build_net() self.training = True self.lock = _thread.allocate_lock()
def __init__(self, device=None, init_options=None, game=nature_params['game']): super(DeepQ,self).__init__(device=device, init_options=init_options) assert game in ["breakout", "space_invaders", "seaquest"] self.G = tf.Graph() # NOTE: moved tf.Graph construction to setup self.params = nature_params self.DB = database(self.params) self.engine = emulator(rom_name='{}.bin'.format(game), vis=self.params['visualize'], frameskip=self.params['frameskip'], windowname=self.params['window_name']) #self.engine = emulator(rom_name='{}.bin'.format(game), vis=self.params['visualize'], frameskip=self.params['frameskip'], windowname=self.params['window_name']) self.params['num_act'] = len(self.engine.legal_actions) with self.G.device(device): self.build_inference()
def __init__(self,params): print 'Initializing Module...' self.params = params # self.gpu_config = tf.ConfigProto(gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=self.params['gpu_fraction'])) # self.sess = tf.Session(config=self.gpu_config) # GPU使用率 config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.8 #固定比例 config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.DB = database(self.params) self.engine = emulator(rom_name='roms/breakout.bin', vis=self.params['visualize']) self.params['num_act'] = len(self.engine.legal_actions) self.build_net() self.training = not self.params['only_eval'] self.game_cnt = 0 fourcc = cv2.cv.CV_FOURCC('m', 'p', '4', 'v') self.video = cv2.VideoWriter('ckpt/video.mp4', fourcc, 25, (160, 210), True);
sess.run(tf.initialize_variables(targetnet.param_list)) prevstate = newstate newstate, reward, terminal = engine.next(action) #IMP: newstate contains terminal info params['eps'] = 0.1 + max(0, (1 - 0.1) * (100000 - max(0, global_cntr))/100000) total_reward_ep = total_reward_ep + reward global_cntr = global_cntr + 1 sys.stdout.write("Episode: %d | Training progress: %d | ep_time: %f | reward: %f \n" % (numeps, global_cntr, time.time()-start_time, total_reward_ep)) sys.stdout.flush() if __name__ == "__main__": qvals = [] global_cntr = 1 DB = database(params['db_size'], params['input_dims']) engine = emulator(rom_name='breakout.bin', vis=False) params['num_actions'] = len(engine.legal_actions) # creating Q and target network. qnet = Model(params, None) sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) targetnet = Model(params, qnet) sess.run(tf.initialize_variables(targetnet.param_list)) #cost calculation discount = tf.constant(params['discount']) maxval = tf.mul(discount, tf.reduce_max(targetnet.pyx, 1)) yj_val = tf.add(targetnet.rewards, tf.mul(targetnet.terminals, maxval))
nepisodes = None episode_reward = None agent_params = { 'subgoal_dims': 7, 'use_distance': True, 'max_reward': 1000, 'min_reward': -1000, 'rescale_r': True } # param setting section ending # training initial section ag = agent(agent_params) emu = emulator("montezuma_revenge.bin", False) rawstate = emu.newGame() reward = 0 terminal = False learn_start = ag.learn_start time_history[1] = 0 # initial section ending if META_AGENT: subgoal = ag.pick_subgoal(rawstate, 0, False, False, None) action_list = [ 'no-op', 'fire', 'up', 'right', 'left', 'down', 'up-right', 'up-left', 'down-right', 'down-left', 'up-fire', 'right-fire', 'left-fire', 'down-fire', 'up-right-fire', 'up-left-fire', 'down-right-fire', 'down-left-fire'
params["eps"] = 0.1 + max(0, (1 - 0.1) * (100000 - max(0, global_cntr)) / 100000) total_reward_ep = total_reward_ep + reward global_cntr = global_cntr + 1 sys.stdout.write( "Episode: %d | Training progress: %d | ep_time: %f | reward: %f \n" % (numeps, global_cntr, time.time() - start_time, total_reward_ep) ) sys.stdout.flush() if __name__ == "__main__": qvals = [] global_cntr = 1 DB = database(params["db_size"], params["input_dims"]) engine = emulator(rom_name="breakout.bin", vis=False) params["num_actions"] = len(engine.legal_actions) # creating Q and target network. qnet = Model(params, None) sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) targetnet = Model(params, qnet) sess.run(tf.initialize_variables(targetnet.param_list)) # cost calculation discount = tf.constant(params["discount"]) maxval = tf.mul(discount, tf.reduce_max(targetnet.pyx, 1)) yj_val = tf.add(targetnet.rewards, tf.mul(targetnet.terminals, maxval))