Ejemplo n.º 1
0
    def __init__(self,
                 n_frames_per_action=4,
                 trace_type='replacing',
                 learning_rate=0.001,
                 discount=0.99,
                 lambda_v=0.5):
        super(Sarsa2Agent, self).__init__(name='Sarsa2', version='2')
        self.n_frames_per_action = n_frames_per_action

        self.epsilon = LinearInterpolationManager([(0, 1.0), (1e4, 0.005)])
        self.action_repeat_manager = RepeatManager(n_frames_per_action - 1)

        self.trace_type = trace_type
        self.learning_rate = learning_rate
        self.lambda_v = lambda_v
        self.discount = discount

        self.q_vals = None
        self.e_vals = None

        self.initialize_asr_and_counters()
Ejemplo n.º 2
0
    def __init__(self, n_frames_per_action=4, 
                 trace_type='replacing', 
                 learning_rate=0.001,
                 discount=0.99, 
                 lambda_v=0.5,
                 record=False):
        super(SarsaAgent, self).__init__(name='Sarsa', version='1')
        self.n_frames_per_action = n_frames_per_action

        self.epsilon = LinearInterpolationManager([(0, 1.0), (1e4, 0.005)])
        self.action_repeat_manager = RepeatManager(n_frames_per_action - 1)
        
        self.trace_type = trace_type
        self.learning_rate = learning_rate
        self.lambda_v = lambda_v
        self.discount = discount

        self.a_ = 0
        self.s_ = 0
        self.r_ = 0

        self.q_vals = None
        self.e_vals = None

        self.n_goals = 0
        self.n_greedy = 0
        self.n_random = 0

        self.record = record
        if record:
            # 5 action, 3 states 
            # => q_vals.shape == (5, 3)
            #    e_vals.shape == (5, 3)
            #    sarsa.shape == (5, 1)
            self.mem = CircularList(100000) 

        self.n_rr = 0
        self.n_sa = 0

        self.n_episode = 0
Ejemplo n.º 3
0
 def __init__(self, n_frames_per_action=4):
     super(SLAgent, self).__init__(name='SL', version='1')
     self.experience = CircularList(1000)
     self.epsilon = LinearInterpolationManager([(0, 1.0), (1e4, 0.1)])
     self.action_repeat_manager = RepeatManager(n_frames_per_action - 1)