def main(): number_of_turns = 0 #going to use this for counting the number of steps before game end catchCount = 0 #count of game ends env = Maze(FILE_NAME) myCat = Brain('Cat', env.cat.pos, env.actions) myMouse = Brain('Mouse', env.mouse.pos, env.actions) cheesePos = env.cheese.pos board = env.mazeList ## DEBUGING debug = False #Step by step toggle env.renderWindow = False #start with graphics being rendered while True: if debug: print('\nCLICK to start loop.') env.win.getMouse() print('==At start of loop, cat and mouse information:==') myCat.printInfo() myMouse.printInfo() if debug: print('\nCLICK to let mouse choose action.') env.win.getMouse() # print('Calling mouse.chooseRandom with catpos mousepos cheese pos:', myCat.pos, myMouse.pos, cheesePos) mouseAction = myMouse.chooseAction(board, myCat.pos, myMouse.pos, cheesePos) mouseImmediateReward = env.moveMouse(mouseAction) if debug: print('immediate reward:', mouseImmediateReward) print('myMouse.q_table:', myMouse.q_table) print('\nCLICK to let cat choose action.') env.win.getMouse() # print('Calling cat.chooseRandom with catpos mousepos cheese pos:', myCat.pos, myMouse.pos, cheesePos) catAction = myCat.chooseAction(board, myCat.pos, myMouse.pos, cheesePos) catImmediateReward = env.moveCat(catAction) if debug: print('catAction:', catAction) print('immediate reward:', catImmediateReward) print('myCat.q_table:', myCat.q_table) print('\nCLICK to get feedback from environment.') env.win.getMouse() #get feedback from the environment catPos, catReward, mousePos, mouseReward, done = env.turnEnd() #add goal rewards if any catImmediateReward += catReward mouseImmediateReward += mouseReward if debug: print('catPos:', catPos, 'catImmediateReward:', catImmediateReward, 'mousePos:', mousePos, 'mouseImmediateReward:', mouseImmediateReward, 'done:', done) print('catReward:', catReward, 'mouseReward:', mouseReward) print('\nCLICK to update agent Brain with positions.') env.win.getMouse() # Update agent's brains to reflect board positions after move myMouse.updateBrain(catPos, catReward, mousePos, mouseReward) myCat.updateBrain(catPos, catReward, mousePos, mouseReward) myCat.printInfo() myMouse.printInfo() if debug: print('\nCLICK to start learnLast step for both agents.') env.win.getMouse() #immediate learning of step taken myMouse.learnLast(mouseImmediateReward) myCat.learnLast(catImmediateReward) myCat.printInfo() myMouse.printInfo() if debug: print('\nCLICK to continue.') #if something got caught, execute learning of agents if done: # time.sleep(1) catchCount += 1 print('Hit something') if debug: print('mouse q-table before learnAll') print(myMouse.q_table) print('mouse history before learnAll') print(myMouse.history) myMouse.learnAll(mouseReward) myCat.learnAll(catReward) myCat.pos, myMouse.pos, cheesePos = env.restart( ) #using restart() so I can program in random spot spawning # env.win.getMouse() number_of_turns += 1 # if number_of_turns == 100: # break if catchCount % 1000 == 0: env.renderWindow = True if catchCount % 1001 == 2: env.renderWindow = False if (catchCount % 100 == 0): saveAgent(myCat, catchCount) saveAgent(myMouse, catchCount) if catchCount == 1: break
def testLoading(itNumber): print('INIT base game..') time.sleep(1) catchCount = itNumber env = Maze(FILE_NAME) myCat = Brain('Cat', env.cat.pos, env.actions) myMouse = Brain('Mouse', env.mouse.pos, env.actions) cheesePos = env.cheese.pos board = env.mazeList print('loading from file') loadAgent(myCat, catchCount) loadAgent(myMouse, catchCount) time.sleep(1) print('showing agent info/q_tables') myCat.printInfo() myMouse.printInfo() time.sleep(1) print('testing running of agents from this point..') time.sleep(1) ## DEBUGING debug = True #Step by step toggle env.renderWindow = True #start with graphics being rendered while True: if debug: print('\nCLICK to start loop.') env.win.getMouse() print('==At start of loop, cat and mouse information:==') myCat.printInfo() myMouse.printInfo() if debug: print('\nCLICK to let mouse choose action.') env.win.getMouse() # print('Calling mouse.chooseRandom with catpos mousepos cheese pos:', myCat.pos, myMouse.pos, cheesePos) mouseAction = myMouse.chooseAction(board, myCat.pos, myMouse.pos, cheesePos) mouseImmediateReward = env.moveMouse(mouseAction) if debug: print('immediate reward:', mouseImmediateReward) print('myMouse.q_table:', myMouse.q_table) print('\nCLICK to let cat choose action.') env.win.getMouse() # print('Calling cat.chooseRandom with catpos mousepos cheese pos:', myCat.pos, myMouse.pos, cheesePos) catAction = myCat.chooseAction(board, myCat.pos, myMouse.pos, cheesePos) catImmediateReward = env.moveCat(catAction) if debug: print('catAction:', catAction) print('immediate reward:', catImmediateReward) print('myCat.q_table:', myCat.q_table) print('\nCLICK to get feedback from environment.') env.win.getMouse() #get feedback from the environment catPos, catReward, mousePos, mouseReward, done = env.turnEnd() #add goal rewards if any catImmediateReward += catReward mouseImmediateReward += mouseReward if debug: print('catPos:', catPos, 'catImmediateReward:', catImmediateReward, 'mousePos:', mousePos, 'mouseImmediateReward:', mouseImmediateReward, 'done:', done) print('catReward:', catReward, 'mouseReward:', mouseReward) print('\nCLICK to update agent Brain with positions.') env.win.getMouse() # Update agent's brains to reflect board positions after move myMouse.updateBrain(catPos, catReward, mousePos, mouseReward) myCat.updateBrain(catPos, catReward, mousePos, mouseReward) myCat.printInfo() myMouse.printInfo() if debug: print('\nCLICK to start learnLast step for both agents.') env.win.getMouse() #immediate learning of step taken myMouse.learnLast(mouseImmediateReward) myCat.learnLast(catImmediateReward) myCat.printInfo() myMouse.printInfo() if debug: print('\nCLICK to continue.') #if something got caught, execute learning of agents if done: catchCount += 1 print('Hit something') if debug: print('mouse q-table before learnAll') print(myMouse.q_table) print('mouse history before learnAll') print(myMouse.history) myMouse.learnAll(mouseReward) myCat.learnAll(catReward) myCat.pos, myMouse.pos, cheesePos = env.restart( ) #using restart() so I can program in random spot spawning if catchCount == 500000: break