Exemple #1
0
    def __init__(self, question, testDict):
        super(EpsilonGreedyTest, self).__init__(question, testDict)
        self.discount = float(testDict['discount'])
        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
        if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
        if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))

        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
        self.env = gridworld.GridworldEnvironment(self.grid)
        self.epsilon = float(testDict['epsilon'])
        self.learningRate = float(testDict['learningRate'])
        self.numExperiences = int(testDict['numExperiences'])
        self.numIterations = int(testDict['iterations'])
        self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
Exemple #2
0
    def __init__(self, question, testDict):
        super(GridPolicyTest, self).__init__(question, testDict)

        # Function in module in analysis that returns (discount, noise)
        self.parameterFn = testDict['parameterFn']
        self.question2 = testDict.get('question2', 'false').lower() == 'true'

        # GridWorld specification
        #    _ is empty space
        #    numbers are terminal states with that value
        #    # is a wall
        #    S is a start state
        #
        self.gridText = testDict['grid']
        self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
        self.gridName = testDict['gridName']

        # Policy specification
        #    _                  policy choice not checked
        #    N, E, S, W policy action must be north, east, south, west
        #
        self.policy = parseGrid(testDict['policy'])

        # State the most probable path must visit
        #    (x,y) for a particular location; (0,0) is bottom left
        #    terminal for the terminal state
        self.pathVisits = testDict.get('pathVisits', None)

        # State the most probable path must not visit
        #    (x,y) for a particular location; (0,0) is bottom left
        #    terminal for the terminal state
        self.pathNotVisits = testDict.get('pathNotVisits', None)
Exemple #3
0
 def __init__(self, question, testDict):
     super(QLearningTest, self).__init__(question, testDict)
     self.discount = float(testDict['discount'])
     self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
     if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
     if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
     self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
     self.env = gridworld.GridworldEnvironment(self.grid)
     self.epsilon = float(testDict['epsilon'])
     self.learningRate = float(testDict['learningRate'])
     self.opts = {'actionFn': self.env.getPossibleActions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learningRate}
     numExperiences = int(testDict['numExperiences'])
     maxPreExperiences = 10
     self.numsExperiencesForDisplay = range(min(numExperiences, maxPreExperiences))
     self.testOutFile = testDict['test_out_file']
     if maxPreExperiences < numExperiences:
         self.numsExperiencesForDisplay.append(numExperiences)
Exemple #4
0
 def __init__(self, question, testDict):
     super(ValueIterationTest, self).__init__(question, testDict)
     self.discount = float(testDict['discount'])
     self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
     iterations = int(testDict['valueIterations'])
     if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
     if 'livingReward' in testDict: self.grid.setLivingReward(float(testDict['livingReward']))
     maxPreIterations = 10
     self.numsIterationsForDisplay = range(min(iterations, maxPreIterations))
     self.testOutFile = testDict['test_out_file']
     if maxPreIterations < iterations:
         self.numsIterationsForDisplay.append(iterations)