Ejemplo n.º 1
0
def main():
    aiType = 3
    worldSize = 6
    game = Game(aiType, worldSize)

    agent = Agent()
    pc = None
    policy = None
    if aiType == 1:
        policy = ValueIteration()
        pc = PolicyConfiguration(inpRewards=[1, -1, 0, 10, -1],
                                 inpDiscounts=[1, .1, .1],
                                 inpStochastic=[[100, 0, 0], [0, 100, 0],
                                                [0, 0, 100]])
    elif aiType == 2:
        policy = PolicyIteration()
        pc = PolicyConfiguration(inpRewards=[1, -1, 0, 10, -1],
                                 inpDiscounts=[1, .1, .1],
                                 inpStochastic=[[100, 0, 0], [0, 100, 0],
                                                [0, 0, 100]])
    elif aiType == 3:
        policy = qLearningAgent()
        pc = PolicyConfiguration(inpRewards=[0, -1, 0, 10, -1],
                                 inpDiscounts=[1, .1, .1],
                                 inpStochastic=[[100, 0, 0], [0, 100, 0],
                                                [0, 0, 100]],
                                 inpFile="QLValues.p",
                                 inpTrainingLimit=1000)
    elif aiType == 4:
        policy = approximateQLearning()
        pc = PolicyConfiguration(inpRewards=[2, -1, 0, 0, -1],
                                 inpDiscounts=[0.9, .2, .1],
                                 inpStochastic=[[100, 0, 0], [0, 100, 0],
                                                [0, 0, 100]],
                                 inpFile="AQLWeights.json",
                                 inpTrainingLimit=500)
    else:
        policy = ValueIteration()
        pc = PolicyConfiguration()
    policy.config = pc

    agent.policy = policy

    game.agent = agent

    game.mainLoop()
Ejemplo n.º 2
0
 def __init__(self, can, direction, inpAIType):
     self.flag = 1
     self.can = can
     self.direction = direction
     self.aiType = inpAIType
     self.agent = Agent()
     pc = None
     policy = None
     #inpRewards = [food reward, hazard reward, living reward, good location reward, bad location reward]
         #good and bad location is only used for qlearning
             #tried to use to cause graph searching
         #not really used and can give wonky results
     #inpDiscounts = [gamma discount, alpha discount, epsilon explore chance]
     #inpStochastic = [forward action[forward chance, left chance, right chance]
     #left action[forward chance, left chance, right chance]
     #right action[forward chance, left chance, right chance]]
     #inpFile file for weight or qvalues
     if self.aiType == 1:
         policy = ValueIteration()
         pc = PolicyConfiguration(inpRewards = [1,-1,0,10,-1], inpDiscounts = [1,.1,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]])
     elif self.aiType == 2:
         policy = PolicyIteration()
         pc = PolicyConfiguration(inpRewards = [1,-1,0,10,-1], inpDiscounts = [1,.1,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]])
     elif self.aiType == 3:
         policy = qLearningAgent()
         #risk aversion aka rarely go off best path seems to work best
         #This one seemed to work #pc = PolicyConfiguration(inpRewards = [2,-1,0,0,-1], inpDiscounts = [0.9,.2,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]])
         pc = PolicyConfiguration(inpRewards = [2,-1,0,0,0], inpDiscounts = [0.9,.2,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]], inpFile = None, inpTrainingLimit = 20000)
     elif self.aiType == 4:
         policy = approximateQLearning()
         pc = PolicyConfiguration(inpRewards = [2,-1,0,0,-1], inpDiscounts = [0.9,.2,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]], inpFile = None, inpTrainingLimit = 5000)
     else:
         policy = ValueIteration()
         pc = PolicyConfiguration()
     policy.config = pc
         
     self.agent.policy = policy