コード例 #1
0
ファイル: main.py プロジェクト: Wonseokkyang/RLCatMouse
def main():
    number_of_turns = 0  #going to use this for counting the number of steps before game end
    catchCount = 0  #count of game ends
    env = Maze(FILE_NAME)
    myCat = Brain('Cat', env.cat.pos, env.actions)
    myMouse = Brain('Mouse', env.mouse.pos, env.actions)
    cheesePos = env.cheese.pos
    board = env.mazeList

    ## DEBUGING
    debug = False  #Step by step toggle
    env.renderWindow = False  #start with graphics being rendered

    while True:
        if debug:
            print('\nCLICK to start loop.')
            env.win.getMouse()
        print('==At start of loop, cat and mouse information:==')
        myCat.printInfo()
        myMouse.printInfo()

        if debug:
            print('\nCLICK to let mouse choose action.')
            env.win.getMouse()
        # print('Calling mouse.chooseRandom with catpos mousepos cheese pos:', myCat.pos, myMouse.pos, cheesePos)
        mouseAction = myMouse.chooseAction(board, myCat.pos, myMouse.pos,
                                           cheesePos)
        mouseImmediateReward = env.moveMouse(mouseAction)

        if debug:
            print('immediate reward:', mouseImmediateReward)
            print('myMouse.q_table:', myMouse.q_table)
            print('\nCLICK to let cat choose action.')
            env.win.getMouse()
        # print('Calling cat.chooseRandom with catpos mousepos cheese pos:', myCat.pos, myMouse.pos, cheesePos)
        catAction = myCat.chooseAction(board, myCat.pos, myMouse.pos,
                                       cheesePos)
        catImmediateReward = env.moveCat(catAction)

        if debug:
            print('catAction:', catAction)
            print('immediate reward:', catImmediateReward)
            print('myCat.q_table:', myCat.q_table)
            print('\nCLICK to get feedback from environment.')
            env.win.getMouse()
        #get feedback from the environment
        catPos, catReward, mousePos, mouseReward, done = env.turnEnd()

        #add goal rewards if any
        catImmediateReward += catReward
        mouseImmediateReward += mouseReward

        if debug:
            print('catPos:', catPos, 'catImmediateReward:', catImmediateReward,
                  'mousePos:', mousePos, 'mouseImmediateReward:',
                  mouseImmediateReward, 'done:', done)
            print('catReward:', catReward, 'mouseReward:', mouseReward)
            print('\nCLICK to update agent Brain with positions.')
            env.win.getMouse()
        # Update agent's brains to reflect board positions after move
        myMouse.updateBrain(catPos, catReward, mousePos, mouseReward)
        myCat.updateBrain(catPos, catReward, mousePos, mouseReward)

        myCat.printInfo()
        myMouse.printInfo()

        if debug:
            print('\nCLICK to start learnLast step for both agents.')
            env.win.getMouse()
        #immediate learning of step taken
        myMouse.learnLast(mouseImmediateReward)
        myCat.learnLast(catImmediateReward)
        myCat.printInfo()
        myMouse.printInfo()
        if debug:
            print('\nCLICK to continue.')

        #if something got caught, execute learning of agents
        if done:
            # time.sleep(1)
            catchCount += 1
            print('Hit something')
            if debug:
                print('mouse q-table before learnAll')
                print(myMouse.q_table)
                print('mouse history before learnAll')
                print(myMouse.history)
            myMouse.learnAll(mouseReward)
            myCat.learnAll(catReward)
            myCat.pos, myMouse.pos, cheesePos = env.restart(
            )  #using restart() so I can program in random spot spawning
        # env.win.getMouse()
        number_of_turns += 1
        # if number_of_turns == 100:
        # break

        if catchCount % 1000 == 0:
            env.renderWindow = True
        if catchCount % 1001 == 2:
            env.renderWindow = False
        if (catchCount % 100 == 0):
            saveAgent(myCat, catchCount)
            saveAgent(myMouse, catchCount)
        if catchCount == 1:
            break
コード例 #2
0
ファイル: main.py プロジェクト: Wonseokkyang/RLCatMouse
def testLoading(itNumber):
    print('INIT base game..')
    time.sleep(1)
    catchCount = itNumber
    env = Maze(FILE_NAME)
    myCat = Brain('Cat', env.cat.pos, env.actions)
    myMouse = Brain('Mouse', env.mouse.pos, env.actions)
    cheesePos = env.cheese.pos
    board = env.mazeList

    print('loading from file')
    loadAgent(myCat, catchCount)
    loadAgent(myMouse, catchCount)
    time.sleep(1)

    print('showing agent info/q_tables')
    myCat.printInfo()
    myMouse.printInfo()
    time.sleep(1)

    print('testing running of agents from this point..')
    time.sleep(1)

    ## DEBUGING
    debug = True  #Step by step toggle
    env.renderWindow = True  #start with graphics being rendered

    while True:
        if debug:
            print('\nCLICK to start loop.')
            env.win.getMouse()
        print('==At start of loop, cat and mouse information:==')
        myCat.printInfo()
        myMouse.printInfo()

        if debug:
            print('\nCLICK to let mouse choose action.')
            env.win.getMouse()
        # print('Calling mouse.chooseRandom with catpos mousepos cheese pos:', myCat.pos, myMouse.pos, cheesePos)
        mouseAction = myMouse.chooseAction(board, myCat.pos, myMouse.pos,
                                           cheesePos)
        mouseImmediateReward = env.moveMouse(mouseAction)

        if debug:
            print('immediate reward:', mouseImmediateReward)
            print('myMouse.q_table:', myMouse.q_table)
            print('\nCLICK to let cat choose action.')
            env.win.getMouse()
        # print('Calling cat.chooseRandom with catpos mousepos cheese pos:', myCat.pos, myMouse.pos, cheesePos)
        catAction = myCat.chooseAction(board, myCat.pos, myMouse.pos,
                                       cheesePos)
        catImmediateReward = env.moveCat(catAction)

        if debug:
            print('catAction:', catAction)
            print('immediate reward:', catImmediateReward)
            print('myCat.q_table:', myCat.q_table)
            print('\nCLICK to get feedback from environment.')
            env.win.getMouse()
        #get feedback from the environment
        catPos, catReward, mousePos, mouseReward, done = env.turnEnd()

        #add goal rewards if any
        catImmediateReward += catReward
        mouseImmediateReward += mouseReward

        if debug:
            print('catPos:', catPos, 'catImmediateReward:', catImmediateReward,
                  'mousePos:', mousePos, 'mouseImmediateReward:',
                  mouseImmediateReward, 'done:', done)
            print('catReward:', catReward, 'mouseReward:', mouseReward)
            print('\nCLICK to update agent Brain with positions.')
            env.win.getMouse()
        # Update agent's brains to reflect board positions after move
        myMouse.updateBrain(catPos, catReward, mousePos, mouseReward)
        myCat.updateBrain(catPos, catReward, mousePos, mouseReward)

        myCat.printInfo()
        myMouse.printInfo()

        if debug:
            print('\nCLICK to start learnLast step for both agents.')
            env.win.getMouse()
        #immediate learning of step taken
        myMouse.learnLast(mouseImmediateReward)
        myCat.learnLast(catImmediateReward)
        myCat.printInfo()
        myMouse.printInfo()
        if debug:
            print('\nCLICK to continue.')

        #if something got caught, execute learning of agents
        if done:
            catchCount += 1
            print('Hit something')
            if debug:
                print('mouse q-table before learnAll')
                print(myMouse.q_table)
                print('mouse history before learnAll')
                print(myMouse.history)
            myMouse.learnAll(mouseReward)
            myCat.learnAll(catReward)
            myCat.pos, myMouse.pos, cheesePos = env.restart(
            )  #using restart() so I can program in random spot spawning

        if catchCount == 500000:
            break