def __init__(self, actions, discount, featureExtractor, explorationProb, stepSize, threshold, decay, maxGradient, num_consecutive_random_actions): """ :note: please see parent class for params not described here """ super(SARSALambdaLearningAlgorithm, self).__init__(actions, discount, featureExtractor, explorationProb, stepSize, maxGradient, num_consecutive_random_actions) self.eligibility_traces = EligibilityTraces(threshold, decay)
def __init__(self, actions, featureExtractor, discount, explorationProb, stepSize, decay, threshold): super(SARSALambdaLearningAlgorithm, self).__init__(actions, featureExtractor, discount, explorationProb, stepSize) self.threshold = threshold self.decay = decay self.eligibility_traces = EligibilityTraces(threshold, decay) self.name = "SARSALambda" self.maxFeatVectorNorm = 1 self.firstReward = 0 self.sawFirst = False
from q_learning import QLearning from SARSA import SARSALearning from eligibility_traces import EligibilityTraces from function_approximation import FApprox from mountain_cart import run_methods, self_iterate import pickle if __name__ == "__main__": # Initialize a method methods = [ QLearning("MountainCar-v0", print_progress=False), SARSALearning("MountainCar-v0", print_progress=False), FApprox("MountainCar-v0", print_progress=False), EligibilityTraces("MountainCar-v0", print_progress=False) ] # Run the tests run_methods(methods) method = methods[0] method.q_table = pickle.load( open("Best_Method_" + str(type(method).__name__) + ".p", "rb")) method.evaluate() method.display() self_iterate(methods[0])
def resetTraces(self): self.eligibility_traces = EligibilityTraces(self.threshold, self.decay)
def new_episode(self): self.eligibility_traces = EligibilityTraces(1 - self.γ * self.λ) self.ε *= self.ε_decay self.episode += 1 self.episode_reward = 0