def testGetAvailableActions( self ):
     #emtpy board should give nine actions
     self.assertEqual( deepTic.Game( ( 0, 0, 0, 0, 0, 0, 0, 0, 0 ) ).getAvailableActions( False ), tuple( range( 9 ) ) )
     #full boad should give 0 actions
     self.assertEqual( deepTic.Game( ( 2, 1, 2, 1, 2, 1, 1, 2, 1 ) ).getAvailableActions( False ), tuple() )
     
     fullBoard = ( 2, 1, 2, 1, 2, 1, 1, 2, 1 )
     #test some other configurations of the boad being filled
     for x in xrange(10):
         if x > 0:
             state = fullBoard[:-x]+ (0,) * x
         else:
             state = fullBoard
         self.assertEqual( deepTic.Game( state ).getAvailableActions( False ), tuple( range(9-x, 9, 1 ) ) )   
 def testSetState( self ):  
     for x in xrange(9):
         g = deepTic.Game()
         g.setState( x, 1 , False )
         expectedState = [ 0, ] * 9
         expectedState[ x ] = 1            
         self.assertEqual( g.returnState( False ), tuple( expectedState ) )
Exemple #3
0
def experiment(sarsa, defVal, outputFileName, selfPlay, symmetry):
    print outputFileName
    canonicalPlayer = deepTic.AIPlayer(0, "brainy.brain")
    canonicalPlayer.competitionMode = True
    canonicalPlayer.useSymmetry = True

    nOfEpisodes = 5000
    nOfAgentIterations = 200
    totalResults = [0.0] * nOfEpisodes
    for agentIndex in xrange(nOfAgentIterations):
        if agentIndex % 10 == 0:
            print("{:.0%} done".format(agentIndex / nOfAgentIterations))
        trainee = deepTic.AIPlayer(0.1)
        trainee.sarsa = sarsa
        trainee.initialStateActionValue = defVal
        trainee.useSymmetry = symmetry
        if selfPlay:
            canonicalPlayer = trainee
        gameResults = [0.0] * nOfEpisodes
        for episodeNumber in xrange(nOfEpisodes):
            trainee.setEps(0.2 * (1 - episodeNumber / float(nOfEpisodes)))
            gresult = deepTic.GameEnvironment(trainee, canonicalPlayer,
                                              deepTic.Game()).play()
            gameResults[episodeNumber] = gresult
        totalResults = list(a + b for (a, b) in zip(totalResults, gameResults))
    totalResults = list(x / float(nOfAgentIterations) for x in totalResults)
    with open(outputFileName, 'wt') as f:
        pickle.dump(totalResults, f)
    def testPlay( self ):
        #check if the game ends in a win for player one
        p1 = TestGameEnvironment.MockPlayer( [ 4, 0, 8 ] )
        p2 = TestGameEnvironment.MockPlayer( [ 1, 7 ] )
        gameInstance = deepTic.GameEnvironment( p1, p2, deepTic.Game() )
        self.assertEqual(gameInstance.play(), 1 )
        self.assertEqual( p1.updateCount, 3 )
        self.assertEqual( p2.updateCount, 2 )

        #check if the game ends up in a tie
        p1 = TestGameEnvironment.MockPlayer( [ 0, 2, 4, 5, 7 ] ) 
        p2 = TestGameEnvironment.MockPlayer( [ 1, 6, 8, 3 ] )         
        gameInstance = deepTic.GameEnvironment( p1, p2, deepTic.Game() )
        self.assertEqual( gameInstance.play(), 0 )
        self.assertEqual( p1.updateCount, 5 )
        self.assertEqual( p2.updateCount, 4 )

        #check that the players receive expected updates, as the 
        #game progresses
        player1ExpectedUpdates = [
            { 's1': ( 0, 0, 0, 0, 0, 0, 0, 0, 0 ), 'a1': 0, 
              's2': ( 1, 0, 0, 0, 2, 0, 0, 0, 0 ), 'a2': 2,
              'r': 0, 't': False },                        
            { 's1': ( 1, 0, 0, 0, 2, 0, 0, 0, 0 ), 'a1': 2, 
              's2': ( 1, 2, 1, 0, 2, 0, 0, 0, 0 ), 'a2': 8,  
              'r': 0, 't': False },
            { 's1': ( 1, 2, 1, 0, 2, 0, 0, 0, 0 ), 'a1': 8,
              's2': ( 1, 2, 1, 0, 2, 0, 0, 2, 1 ), 'a2': -1, 
              'r': -1, 't': True } ]
        
        player2ExpectedUpdates = [
            { 's1': (1, 0, 0, 0, 0, 0, 0, 0, 0), 'a1': 4,
              's2': (1, 0, 1, 0, 2, 0, 0, 0, 0), 'a2': 1, 
              'r': 0, 't': False },
            { 's1': (1, 0, 1, 0, 2, 0, 0, 0, 0), 'a1': 1, 
              's2': (1, 2, 1, 0, 2, 0, 0, 0, 1), 'a2': 7, 
              'r': 0, 't': False },
            { 's1': (1, 2, 1, 0, 2, 0, 0, 0, 1), 'a1': 7,  
              's2': (1, 2, 1, 0, 2, 0, 0, 2, 1), 'a2': -1, 
              'r': 1, 't': True } ]

        p1 = TestGameEnvironment.MockPlayer( [ 0, 2, 8 ] ) 
        p2 = TestGameEnvironment.MockPlayer( [ 4, 1, 7 ] ) 
        gameInstance = deepTic.GameEnvironment( p1, p2, deepTic.Game() )
        self.assertEqual( gameInstance.play(), -1 )
        self.assertEqual( p1.updatesReceived, player1ExpectedUpdates )
        self.assertEqual( p2.updatesReceived, player2ExpectedUpdates )
 def testTerminalState( self ):
     g = deepTic.Game()
     for x in xrange( 1, 3 ):
         #test for terminal state in the rows
         self.assertTrue( g.terminalState( ( x, x, x, 0, 0, 0, 0, 0, 0 ) ) )
         self.assertTrue( g.terminalState( ( 0, 0, 0, x, x, x, 0, 0, 0 ) ) )
         self.assertTrue( g.terminalState( ( 0, 0, 0, 0, 0, 0, x, x, x ) ) )
         
         #test for terminal state in the columns
         self.assertTrue( g.terminalState( ( x, 0, 0, x, 0, 0, x, 0, 0 ) ) )
         self.assertTrue( g.terminalState( ( 0, x, 0, 0, x, 0, 0, x, 0 ) ) )
         self.assertTrue( g.terminalState( ( 0, 0, x, 0, 0, x, 0, 0, x ) ) )
         
         #test for for terminal state in the diagonals
         self.assertTrue( g.terminalState( ( x, 0, 0, 0, x, 0, 0, 0, x ) ) )
         self.assertTrue( g.terminalState( ( 0, 0, x, 0, x, 0, x, 0, 0 ) ) )
         
     self.assertFalse( g.terminalState( ( 1, 2, 1, 0, 0, 0, 0, 0, 0 ) ) )
     self.assertFalse( g.terminalState( ( 1, 1, 2, 0, 0, 0, 0, 0, 0 ) ) )
     self.assertFalse( g.terminalState( ( 2, 1, 1, 0, 0, 0, 0, 0, 0 ) ) )
     
     self.assertFalse( g.terminalState( ( 2, 1, 2, 1, 2, 1, 1, 2, 1 ) ) )
 def testTie( self ):
     g = deepTic.Game( ( 2, 1, 2, 1, 2, 1, 1, 2, 1 ) ) 
     self.assertTrue( g.tie() )
     g = deepTic.Game( ( 1, 1, 2, 1, 2, 1, 1, 2, 1 ) ) 
     self.assertFalse( g.tie() )