コード例 #1
0
def qLearnMain(gWorldArg, gammaArg, alphaArg, epsilonArg, printDebugStatementsFlagArg, screenWidthArg):
    global gWorld, screenWidth, printDebugStatementsFlag
    global gamma, alpha, epsilon
    
    gWorld = gWorldArg
    screenWidth = screenWidthArg
    printDebugStatementsFlag = printDebugStatementsFlagArg

    # Q Learning Parameters
    gamma = gammaArg
    alpha = alphaArg
    epsilon = epsilonArg    

    print '{:^{screenWidth}}'.format('{:=^{w}}'.format('', w = screenWidth-10), screenWidth=screenWidth)
    print '{:^{screenWidth}}'.format('{:^{w}}'.format('Welcome to Gold Explorer Using Reinforcement Learning  - Q Learning', w = screenWidth-10), screenWidth=screenWidth)
    print '{:^{screenWidth}}'.format('{:=^{w}}'.format('', w = screenWidth-10), screenWidth=screenWidth)    
    print 
   
    qLearn()
    gWorld.printGridWorldRewardMatrix() 
    printGrids(gWorld)
    gWorld.printGridWorldOptimumPolicyQValue()

    print '{:^{screenWidth}}'.format('{:=^{w}}'.format('', w = screenWidth-10), screenWidth=screenWidth)
    print '{:^{screenWidth}}'.format('{:^{w}}'.format('Thank you for using Gold Explorer Using Reinforcement Learning - Q Learning', w = screenWidth-10), screenWidth=screenWidth)
    print '{:^{screenWidth}}'.format('{:=^{w}}'.format('', w = screenWidth-10), screenWidth=screenWidth)    
    print 
コード例 #2
0
def qLearnMain(gWorldArg, gammaArg, alphaArg, epsilonArg,
               printDebugStatementsFlagArg, screenWidthArg):
    global gWorld, screenWidth, printDebugStatementsFlag
    global gamma, alpha, epsilon

    gWorld = gWorldArg
    screenWidth = screenWidthArg
    printDebugStatementsFlag = printDebugStatementsFlagArg

    # Q Learning Parameters
    gamma = gammaArg
    alpha = alphaArg
    epsilon = epsilonArg

    print '{:^{screenWidth}}'.format('{:=^{w}}'.format('', w=screenWidth - 10),
                                     screenWidth=screenWidth)
    print '{:^{screenWidth}}'.format('{:^{w}}'.format(
        'Welcome to Gold Explorer Using Reinforcement Learning  - Q Learning',
        w=screenWidth - 10),
                                     screenWidth=screenWidth)
    print '{:^{screenWidth}}'.format('{:=^{w}}'.format('', w=screenWidth - 10),
                                     screenWidth=screenWidth)
    print

    qLearn()
    gWorld.printGridWorldRewardMatrix()
    printGrids(gWorld)
    gWorld.printGridWorldOptimumPolicyQValue()

    print '{:^{screenWidth}}'.format('{:=^{w}}'.format('', w=screenWidth - 10),
                                     screenWidth=screenWidth)
    print '{:^{screenWidth}}'.format('{:^{w}}'.format(
        'Thank you for using Gold Explorer Using Reinforcement Learning - Q Learning',
        w=screenWidth - 10),
                                     screenWidth=screenWidth)
    print '{:^{screenWidth}}'.format('{:=^{w}}'.format('', w=screenWidth - 10),
                                     screenWidth=screenWidth)
    print
コード例 #3
0
def qLearn():
        
    global epsilon
    # Counters
    iterationCount = 0
    episodeCount=0
    
    currGrid = gWorld.getGrids()[0][0]   
    epsilon_choices = randChoiceList([('explore', epsilon), ('exploit', 1-epsilon )])
    
    sys.stdout.write('\n\tIterating.') if printDebugStatementsFlag == False else None

    while True:

        oldGridMatrixValue = getGridWorldQValues(gWorld) # To Check for convergence        
        episodeCount +=1
        goalTraversedFlag = False
        
        sys.stdout.write('\n{:^{screenWidth}}\n'.format('{:#^{w}}'.format(' Episode #'+str(episodeCount)+" ", w = screenWidth-10), screenWidth=screenWidth)) if printDebugStatementsFlag == True else None
        
        if episodeCount % 100 == 0 :
            epsilon = epsilon / (1 + epsilon)
            sys.stdout.write('\n\n{:^{screenWidth}}\n'.format('{:<{w}}'.format('***Updating epsilon to:'+ str(epsilon)+" ", w = screenWidth-10), screenWidth=screenWidth)) if printDebugStatementsFlag == True else None
        
        while True:
                                    
            if currGrid.isGoal():
                sys.stdout.write('\n{:^{screenWidth}}\n'.format('{:<{w}}'.format('***Goal Reached Once, setting the flag', w = screenWidth-10), screenWidth=screenWidth)) if printDebugStatementsFlag == True else None
                goalTraversedFlag = True
            
    
            sys.stdout.write('\n{:^{screenWidth}}'.format('{:*^{w}}'.format('', w = screenWidth-10), screenWidth=screenWidth)) if printDebugStatementsFlag == True else None
            sys.stdout.write('\n{:^{screenWidth}}'.format('{:<{w}}'.format(" Current Grid: "+ str(currGrid.getGridName())+ "\tQ Value : " + str(currGrid.value) + "\tReward : " + str(currGrid.gridReward), w = screenWidth-10), screenWidth=screenWidth)) if printDebugStatementsFlag == True else None
            sys.stdout.write('\n{:^{screenWidth}}'.format('{:*^{w}}'.format('', w = screenWidth-10), screenWidth=screenWidth)) if printDebugStatementsFlag == True else None
    
            if currGrid.isBlocked():
                sys.stdout.write('\n{:^{screenWidth}}'.format('{:<{w}}'.format('Blocked Grid... Skipping', w = screenWidth-10), screenWidth=screenWidth)) if printDebugStatementsFlag == True else None
                 
            else:                
                iterationCount +=1 
                
                if iterationCount % 200 == 0:
                    sys.stdout.write(".") if printDebugStatementsFlag == False else None  
                if iterationCount % 10500 == 0:
                    sys.stdout.write("\n\t") if printDebugStatementsFlag == False else None
            
                sys.stdout.write('\n{:^{screenWidth}}\n'.format('{:#^{w}}'.format(' Iteration #'+str(iterationCount)+" ", w = screenWidth-10), screenWidth=screenWidth)) if printDebugStatementsFlag == True else None
                
                exploitOrExplore = random.choice(epsilon_choices)
                
                if exploitOrExplore == 'explore':
                    sys.stdout.write('\n{:^{screenWidth}}\n'.format('{:<{w}}'.format('*****Exploring', w = screenWidth-10), screenWidth=screenWidth)) if printDebugStatementsFlag == True else None
                    nextGridDirection,nextGrid = explore(currGrid, gWorld)
                elif exploitOrExplore == 'exploit':
                    sys.stdout.write('\n{:^{screenWidth}}\n'.format('{:<{w}}'.format('*****Exploiting', w = screenWidth-10), screenWidth=screenWidth)) if printDebugStatementsFlag == True else None
                    nextGridDirection,nextGrid = exploit(currGrid, gWorld)
                    
                allQValuesOfNextGrid = [nextGrid.getQLeft(),nextGrid.getQRight(),nextGrid.getQUp(),nextGrid.getQDown()]
                maxQValueNextGrid = max(allQValuesOfNextGrid)

                sys.stdout.write('\n{:^{screenWidth}}'.format('{:<{w}}'.format("Action Chosen \t: "+ nextGridDirection+ "\tNextGrid : ("+ nextGrid.getGridName()+")" , w = screenWidth-10), screenWidth=screenWidth)) if printDebugStatementsFlag == True else None
                sys.stdout.write('\n{:^{screenWidth}}'.format('{:<{w}}'.format("All QValues Of NextGrid : "+ ','.join([str(round(v,3)) for v in allQValuesOfNextGrid])+ "\tMax : " + str(maxQValueNextGrid), w = screenWidth-10), screenWidth=screenWidth)) if printDebugStatementsFlag == True else None

                # Compute the Q(s,a) 
                qValofCurrGrid = getQValueforCurrGrid(currGrid, nextGridDirection) 
                newQValofCurrGrid = qValofCurrGrid + alpha * (currGrid.getGridReward() + (gamma * maxQValueNextGrid) - qValofCurrGrid)  
                
                # Update Q Value of the current grid for the corresponding direction
                updateGridQValue(currGrid, nextGridDirection, newQValofCurrGrid)
                sys.stdout.write('\n{:^{screenWidth}}'.format('{:<{w}}'.format("Using the Q(s,a) equation, updated Grid "+ currGrid.getGridName()+"'s "+ nextGridDirection+" QValue to : "+ str(newQValofCurrGrid), w = screenWidth-10), screenWidth=screenWidth)) if printDebugStatementsFlag == True else None
                    
                # Update current grid
                currGrid = nextGrid
                
                if goalTraversedFlag == True:
                    sys.stdout.write('\n{:^{screenWidth}}\n'.format('{:<{w}}'.format("Goal Reached", w = screenWidth-10), screenWidth=screenWidth)) if printDebugStatementsFlag == True else None
                    break

                if printDebugStatementsFlag:
                    printGrids(gWorld)
                    print "\n\n"

        
        currGrid = gWorld.getGrids()[0][0]

        newGridMatrixValue = getGridWorldQValues(gWorld)
        convergedFlag = isConverged(oldGridMatrixValue,newGridMatrixValue)
                  
        if convergedFlag == True:
            print '\n\n{:^{screenWidth}}'.format('{:%^{w}}'.format(" Total # of Iterations\t:" + str(iterationCount)+" ", w = screenWidth-20), screenWidth=screenWidth)
            print '\n{:^{screenWidth}}\n'.format('{:%^{w}}'.format(" Total # of Episodes\t:" + str(episodeCount)+" ", w = screenWidth-20), screenWidth=screenWidth)            
            break
コード例 #4
0
def qLearn():

    global epsilon
    # Counters
    iterationCount = 0
    episodeCount = 0

    currGrid = gWorld.getGrids()[0][0]
    epsilon_choices = randChoiceList([('explore', epsilon),
                                      ('exploit', 1 - epsilon)])

    sys.stdout.write(
        '\n\tIterating.') if printDebugStatementsFlag == False else None

    while True:

        oldGridMatrixValue = getGridWorldQValues(
            gWorld)  # To Check for convergence
        episodeCount += 1
        goalTraversedFlag = False

        sys.stdout.write('\n{:^{screenWidth}}\n'.format(
            '{:#^{w}}'.format(' Episode #' + str(episodeCount) + " ",
                              w=screenWidth - 10),
            screenWidth=screenWidth)
                         ) if printDebugStatementsFlag == True else None

        if episodeCount % 100 == 0:
            epsilon = epsilon / (1 + epsilon)
            sys.stdout.write('\n\n{:^{screenWidth}}\n'.format(
                '{:<{w}}'.format('***Updating epsilon to:' + str(epsilon) +
                                 " ",
                                 w=screenWidth - 10),
                screenWidth=screenWidth)
                             ) if printDebugStatementsFlag == True else None

        while True:

            if currGrid.isGoal():
                sys.stdout.write('\n{:^{screenWidth}}\n'.format(
                    '{:<{w}}'.format('***Goal Reached Once, setting the flag',
                                     w=screenWidth - 10),
                    screenWidth=screenWidth
                )) if printDebugStatementsFlag == True else None
                goalTraversedFlag = True

            sys.stdout.write('\n{:^{screenWidth}}'.format(
                '{:*^{w}}'.format('', w=screenWidth - 10),
                screenWidth=screenWidth)
                             ) if printDebugStatementsFlag == True else None
            sys.stdout.write('\n{:^{screenWidth}}'.format(
                '{:<{w}}'.format(" Current Grid: " +
                                 str(currGrid.getGridName()) + "\tQ Value : " +
                                 str(currGrid.value) + "\tReward : " +
                                 str(currGrid.gridReward),
                                 w=screenWidth - 10),
                screenWidth=screenWidth)
                             ) if printDebugStatementsFlag == True else None
            sys.stdout.write('\n{:^{screenWidth}}'.format(
                '{:*^{w}}'.format('', w=screenWidth - 10),
                screenWidth=screenWidth)
                             ) if printDebugStatementsFlag == True else None

            if currGrid.isBlocked():
                sys.stdout.write('\n{:^{screenWidth}}'.format(
                    '{:<{w}}'.format('Blocked Grid... Skipping',
                                     w=screenWidth - 10),
                    screenWidth=screenWidth
                )) if printDebugStatementsFlag == True else None

            else:
                iterationCount += 1

                if iterationCount % 200 == 0:
                    sys.stdout.write(
                        ".") if printDebugStatementsFlag == False else None
                if iterationCount % 10500 == 0:
                    sys.stdout.write(
                        "\n\t") if printDebugStatementsFlag == False else None

                sys.stdout.write('\n{:^{screenWidth}}\n'.format(
                    '{:#^{w}}'.format(' Iteration #' + str(iterationCount) +
                                      " ",
                                      w=screenWidth - 10),
                    screenWidth=screenWidth
                )) if printDebugStatementsFlag == True else None

                exploitOrExplore = random.choice(epsilon_choices)

                if exploitOrExplore == 'explore':
                    sys.stdout.write('\n{:^{screenWidth}}\n'.format(
                        '{:<{w}}'.format('*****Exploring', w=screenWidth - 10),
                        screenWidth=screenWidth
                    )) if printDebugStatementsFlag == True else None
                    nextGridDirection, nextGrid = explore(currGrid, gWorld)
                elif exploitOrExplore == 'exploit':
                    sys.stdout.write('\n{:^{screenWidth}}\n'.format(
                        '{:<{w}}'.format('*****Exploiting', w=screenWidth -
                                         10),
                        screenWidth=screenWidth
                    )) if printDebugStatementsFlag == True else None
                    nextGridDirection, nextGrid = exploit(currGrid, gWorld)

                allQValuesOfNextGrid = [
                    nextGrid.getQLeft(),
                    nextGrid.getQRight(),
                    nextGrid.getQUp(),
                    nextGrid.getQDown()
                ]
                maxQValueNextGrid = max(allQValuesOfNextGrid)

                sys.stdout.write('\n{:^{screenWidth}}'.format(
                    '{:<{w}}'.format("Action Chosen \t: " + nextGridDirection +
                                     "\tNextGrid : (" +
                                     nextGrid.getGridName() + ")",
                                     w=screenWidth - 10),
                    screenWidth=screenWidth
                )) if printDebugStatementsFlag == True else None
                sys.stdout.write('\n{:^{screenWidth}}'.format(
                    '{:<{w}}'.format("All QValues Of NextGrid : " + ','.join(
                        [str(round(v, 3)) for v in allQValuesOfNextGrid]) +
                                     "\tMax : " + str(maxQValueNextGrid),
                                     w=screenWidth - 10),
                    screenWidth=screenWidth
                )) if printDebugStatementsFlag == True else None

                # Compute the Q(s,a)
                qValofCurrGrid = getQValueforCurrGrid(currGrid,
                                                      nextGridDirection)
                newQValofCurrGrid = qValofCurrGrid + alpha * (
                    currGrid.getGridReward() +
                    (gamma * maxQValueNextGrid) - qValofCurrGrid)

                # Update Q Value of the current grid for the corresponding direction
                updateGridQValue(currGrid, nextGridDirection,
                                 newQValofCurrGrid)
                sys.stdout.write(
                    '\n{:^{screenWidth}}'.format('{:<{w}}'.format(
                        "Using the Q(s,a) equation, updated Grid " +
                        currGrid.getGridName() + "'s " + nextGridDirection +
                        " QValue to : " + str(newQValofCurrGrid),
                        w=screenWidth - 10),
                                                 screenWidth=screenWidth)
                ) if printDebugStatementsFlag == True else None

                # Update current grid
                currGrid = nextGrid

                if goalTraversedFlag == True:
                    sys.stdout.write('\n{:^{screenWidth}}\n'.format(
                        '{:<{w}}'.format("Goal Reached", w=screenWidth - 10),
                        screenWidth=screenWidth
                    )) if printDebugStatementsFlag == True else None
                    break

                if printDebugStatementsFlag:
                    printGrids(gWorld)
                    print "\n\n"

        currGrid = gWorld.getGrids()[0][0]

        newGridMatrixValue = getGridWorldQValues(gWorld)
        convergedFlag = isConverged(oldGridMatrixValue, newGridMatrixValue)

        if convergedFlag == True:
            print '\n\n{:^{screenWidth}}'.format('{:%^{w}}'.format(
                " Total # of Iterations\t:" + str(iterationCount) + " ",
                w=screenWidth - 20),
                                                 screenWidth=screenWidth)
            print '\n{:^{screenWidth}}\n'.format('{:%^{w}}'.format(
                " Total # of Episodes\t:" + str(episodeCount) + " ",
                w=screenWidth - 20),
                                                 screenWidth=screenWidth)
            break