def __init__(self, actions, discount, featureExtractor, explorationProb,
              stepSize, threshold, decay, maxGradient,
              num_consecutive_random_actions):
     """
     :note: please see parent class for params not described here
     """
     super(SARSALambdaLearningAlgorithm,
           self).__init__(actions, discount, featureExtractor,
                          explorationProb, stepSize, maxGradient,
                          num_consecutive_random_actions)
     self.eligibility_traces = EligibilityTraces(threshold, decay)
예제 #2
0
 def __init__(self, actions, featureExtractor, discount, explorationProb,
              stepSize, decay, threshold):
     super(SARSALambdaLearningAlgorithm,
           self).__init__(actions, featureExtractor, discount,
                          explorationProb, stepSize)
     self.threshold = threshold
     self.decay = decay
     self.eligibility_traces = EligibilityTraces(threshold, decay)
     self.name = "SARSALambda"
     self.maxFeatVectorNorm = 1
     self.firstReward = 0
     self.sawFirst = False
예제 #3
0
from q_learning import QLearning
from SARSA import SARSALearning
from eligibility_traces import EligibilityTraces
from function_approximation import FApprox
from mountain_cart import run_methods, self_iterate
import pickle

if __name__ == "__main__":
    # Initialize a method
    methods = [
        QLearning("MountainCar-v0", print_progress=False),
        SARSALearning("MountainCar-v0", print_progress=False),
        FApprox("MountainCar-v0", print_progress=False),
        EligibilityTraces("MountainCar-v0", print_progress=False)
    ]

    # Run the tests
    run_methods(methods)

    method = methods[0]
    method.q_table = pickle.load(
        open("Best_Method_" + str(type(method).__name__) + ".p", "rb"))
    method.evaluate()
    method.display()

    self_iterate(methods[0])
예제 #4
0
 def resetTraces(self):
     self.eligibility_traces = EligibilityTraces(self.threshold, self.decay)
예제 #5
0
 def new_episode(self):
     self.eligibility_traces = EligibilityTraces(1 - self.γ * self.λ)
     self.ε *= self.ε_decay
     self.episode += 1
     self.episode_reward = 0