def __init__(self, question, testDict): super(EpsilonGreedyTest, self).__init__(question, testDict) self.discount = float(testDict['discount']) self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) if 'noise' in testDict: self.grid.setNoise(float(testDict['noise'])) if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward'])) self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) self.env = gridworld.GridworldEnvironment(self.grid) self.epsilon = float(testDict['epsilon']) self.learningRate = float(testDict['learningRate']) self.numExperiences = int(testDict['numExperiences']) self.numIterations = int(testDict['iterations']) self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
def __init__(self, question, testDict): super(GridPolicyTest, self).__init__(question, testDict) # Function in module in analysis that returns (discount, noise) self.parameterFn = testDict['parameterFn'] self.question2 = testDict.get('question2', 'false').lower() == 'true' # GridWorld specification # _ is empty space # numbers are terminal states with that value # # is a wall # S is a start state # self.gridText = testDict['grid'] self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) self.gridName = testDict['gridName'] # Policy specification # _ policy choice not checked # N, E, S, W policy action must be north, east, south, west # self.policy = parseGrid(testDict['policy']) # State the most probable path must visit # (x,y) for a particular location; (0,0) is bottom left # terminal for the terminal state self.pathVisits = testDict.get('pathVisits', None) # State the most probable path must not visit # (x,y) for a particular location; (0,0) is bottom left # terminal for the terminal state self.pathNotVisits = testDict.get('pathNotVisits', None)
def __init__(self, question, testDict): super(QLearningTest, self).__init__(question, testDict) self.discount = float(testDict['discount']) self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) if 'noise' in testDict: self.grid.setNoise(float(testDict['noise'])) if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward'])) self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) self.env = gridworld.GridworldEnvironment(self.grid) self.epsilon = float(testDict['epsilon']) self.learningRate = float(testDict['learningRate']) self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate} numExperiences = int(testDict['numExperiences']) maxPreExperiences = 10 self.numsExperiencesForDisplay = range(min(numExperiences, maxPreExperiences)) self.testOutFile = testDict['test_out_file'] if maxPreExperiences < numExperiences: self.numsExperiencesForDisplay.append(numExperiences)
def __init__(self, question, testDict): super(ValueIterationTest, self).__init__(question, testDict) self.discount = float(testDict['discount']) self.grid = gridworld.Gridworld(parseGrid(testDict['grid'])) iterations = int(testDict['valueIterations']) if 'noise' in testDict: self.grid.setNoise(float(testDict['noise'])) if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward'])) maxPreIterations = 10 self.numsIterationsForDisplay = range(min(iterations, maxPreIterations)) self.testOutFile = testDict['test_out_file'] if maxPreIterations < iterations: self.numsIterationsForDisplay.append(iterations)