def __init__(self, chain_length): super(ActionChainAgent, self).__init__(name='ActionChainAgent', version='1.2') self.q = dict() # state-action values: q[state][action] self.chain = CircularList(chain_length) # e=1 until frame 5k, then interpolate down to e=0.05 in frame 10k, # and keep it there for the remaining time self.e_params = (5000, 10000, 1.0, 0.05) self.e = 0.5 self.nframes = 0 self.learning_rate = 0.1 self.discount = 0.9 self.last_action = None
def __init__(self, n_frames_per_action=4, trace_type='replacing', learning_rate=0.001, discount=0.99, lambda_v=0.5, record=False): super(SarsaAgent, self).__init__(name='Sarsa', version='1') self.n_frames_per_action = n_frames_per_action self.epsilon = LinearInterpolationManager([(0, 1.0), (1e4, 0.005)]) self.action_repeat_manager = RepeatManager(n_frames_per_action - 1) self.trace_type = trace_type self.learning_rate = learning_rate self.lambda_v = lambda_v self.discount = discount self.a_ = 0 self.s_ = 0 self.r_ = 0 self.q_vals = None self.e_vals = None self.n_goals = 0 self.n_greedy = 0 self.n_random = 0 self.record = record if record: # 5 action, 3 states # => q_vals.shape == (5, 3) # e_vals.shape == (5, 3) # sarsa.shape == (5, 1) self.mem = CircularList(100000) self.n_rr = 0 self.n_sa = 0 self.n_episode = 0
def __init__(self, n_frames_per_action=4): super(SLAgent, self).__init__(name='SL', version='1') self.experience = CircularList(1000) self.epsilon = LinearInterpolationManager([(0, 1.0), (1e4, 0.1)]) self.action_repeat_manager = RepeatManager(n_frames_per_action - 1)