Example #1
0
 def __init__(self, params):
     print "Initializing Module..."
     self.params = params
     self.sess = tf.Session()
     self.DB = database(self.params["db_size"], self.params["input_dims_proc"])
     self.engine = emulator(rom_name="breakout.bin", vis=True)
     self.params["num_act"] = len(self.engine.legal_actions)
     self.build_nets()
     self.Q_global = 0
     self.cost_disp = 0
Example #2
0
 def __init__(self, args, sess):
     print('Initializing..')
     self.args = args
     self.sess = sess
     self.per = per.PER(self.args)
     self.engine = emulator(rom_name='breakout.bin',
                            vis=self.args.visualize)
     self.args.num_actions = len(self.engine.legal_actions)
     # Build model
     self.build_model()
     self.sess.run(tf.global_variables_initializer())
Example #3
0
 def __init__(self, params):
     print 'Initializing Module...'
     self.params = params
     self.sess = tf.Session()
     self.DB = database(self.params['db_size'],
                        self.params['input_dims_proc'])
     self.engine = emulator(rom_name='breakout.bin', vis=True)
     self.params['num_act'] = len(self.engine.legal_actions)
     self.build_nets()
     self.Q_global = 0
     self.cost_disp = 0
Example #4
0
	def __init__(self,params):
		print 'Initializing Module...'
		self.params = params

		self.gpu_config = tf.ConfigProto(gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=self.params['gpu_fraction']))

		self.sess = tf.Session(config=self.gpu_config)
		self.DB = database(self.params)
		self.engine = emulator(rom_name='breakout.bin', vis=self.params['visualize'],windowname=self.params['network_type']+'_preview')
		self.params['num_act'] = len(self.engine.legal_actions)
		self.build_net()
		self.training = True
	def __init__(self,params):
		print('Initializing Module...')
		self.params = params

		self.gpu_config = tf.ConfigProto(gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=self.params['gpu_fraction']))

		self.sess = tf.Session(config=self.gpu_config)
		self.DB = database(self.params)
		self.engine = emulator(rom_name='breakout.bin', vis=self.params['visualize'],windowname=self.params['network_type']+'_preview')
		self.params['num_act'] = len(self.engine.legal_actions)
		self.build_net()
		self.training = True
		self.lock = _thread.allocate_lock()	
Example #6
0
  def __init__(self, device=None, init_options=None, game=nature_params['game']):
    super(DeepQ,self).__init__(device=device, init_options=init_options)
    assert game in ["breakout", "space_invaders", "seaquest"]

    self.G = tf.Graph()

    # NOTE: moved tf.Graph construction to setup
    self.params = nature_params

    self.DB = database(self.params)
    self.engine = emulator(rom_name='{}.bin'.format(game), vis=self.params['visualize'], frameskip=self.params['frameskip'], windowname=self.params['window_name'])
    #self.engine = emulator(rom_name='{}.bin'.format(game), vis=self.params['visualize'], frameskip=self.params['frameskip'], windowname=self.params['window_name'])
    self.params['num_act'] = len(self.engine.legal_actions)

    with self.G.device(device):
      self.build_inference()
Example #7
0
	def __init__(self,params):
		print 'Initializing Module...'
		self.params = params

		# self.gpu_config = tf.ConfigProto(gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=self.params['gpu_fraction']))
		# self.sess = tf.Session(config=self.gpu_config)
		# GPU使用率
		config = tf.ConfigProto()
		config.gpu_options.per_process_gpu_memory_fraction = 0.8    #固定比例
		config.gpu_options.allow_growth = True

		self.sess = tf.Session(config=config)
		self.DB = database(self.params)
		self.engine = emulator(rom_name='roms/breakout.bin', vis=self.params['visualize'])
		self.params['num_act'] = len(self.engine.legal_actions)
		self.build_net()
		self.training = not self.params['only_eval']
		self.game_cnt = 0
		fourcc = cv2.cv.CV_FOURCC('m', 'p', '4', 'v')
		self.video  = cv2.VideoWriter('ckpt/video.mp4', fourcc, 25, (160, 210), True);
Example #8
0
          sess.run(tf.initialize_variables(targetnet.param_list))
          
        prevstate = newstate
        newstate, reward, terminal = engine.next(action) #IMP: newstate contains terminal info
        params['eps'] = 0.1 + max(0, (1 - 0.1) * (100000 - max(0, global_cntr))/100000)
        total_reward_ep = total_reward_ep + reward
        global_cntr = global_cntr + 1

      sys.stdout.write("Episode: %d | Training progress: %d | ep_time: %f | reward: %f \n" % (numeps, global_cntr, time.time()-start_time, total_reward_ep))
      sys.stdout.flush()

if __name__ == "__main__":
  qvals = []
  global_cntr = 1
  DB = database(params['db_size'], params['input_dims'])
  engine = emulator(rom_name='breakout.bin', vis=False)
  params['num_actions'] = len(engine.legal_actions)

  # creating Q and target network. 
  qnet = Model(params, None)
  sess = tf.Session()
  init = tf.initialize_all_variables()
  sess.run(init)
  targetnet = Model(params, qnet)
  sess.run(tf.initialize_variables(targetnet.param_list))

  #cost calculation
  discount = tf.constant(params['discount'])
  maxval = tf.mul(discount, tf.reduce_max(targetnet.pyx, 1))
  yj_val = tf.add(targetnet.rewards, tf.mul(targetnet.terminals, maxval))
nepisodes = None
episode_reward = None

agent_params = {
    'subgoal_dims': 7,
    'use_distance': True,
    'max_reward': 1000,
    'min_reward': -1000,
    'rescale_r': True
}

# param setting section ending

# training initial section
ag = agent(agent_params)
emu = emulator("montezuma_revenge.bin", False)
rawstate = emu.newGame()
reward = 0
terminal = False
learn_start = ag.learn_start
time_history[1] = 0
# initial section ending

if META_AGENT:
    subgoal = ag.pick_subgoal(rawstate, 0, False, False, None)

action_list = [
    'no-op', 'fire', 'up', 'right', 'left', 'down', 'up-right', 'up-left',
    'down-right', 'down-left', 'up-fire', 'right-fire', 'left-fire',
    'down-fire', 'up-right-fire', 'up-left-fire', 'down-right-fire',
    'down-left-fire'
Example #10
0
                params["eps"] = 0.1 + max(0, (1 - 0.1) * (100000 - max(0, global_cntr)) / 100000)
                total_reward_ep = total_reward_ep + reward
                global_cntr = global_cntr + 1

            sys.stdout.write(
                "Episode: %d | Training progress: %d | ep_time: %f | reward: %f \n"
                % (numeps, global_cntr, time.time() - start_time, total_reward_ep)
            )
            sys.stdout.flush()


if __name__ == "__main__":
    qvals = []
    global_cntr = 1
    DB = database(params["db_size"], params["input_dims"])
    engine = emulator(rom_name="breakout.bin", vis=False)
    params["num_actions"] = len(engine.legal_actions)

    # creating Q and target network.
    qnet = Model(params, None)
    sess = tf.Session()
    init = tf.initialize_all_variables()
    sess.run(init)
    targetnet = Model(params, qnet)
    sess.run(tf.initialize_variables(targetnet.param_list))

    # cost calculation
    discount = tf.constant(params["discount"])
    maxval = tf.mul(discount, tf.reduce_max(targetnet.pyx, 1))
    yj_val = tf.add(targetnet.rewards, tf.mul(targetnet.terminals, maxval))