Esempio n. 1
0
 RA1 = policy_agent(1000, 0.01)
 RA2 = policy_agent(1000, 0.01)
 RA3 = random_agent()
 policy2_wins = 0
 policy1_wins = 0
 draws = 0
 doomed = 0
 exit = 1
 win_ratio = 0
 random_wins = 0
 for i in range(1000000):
     wrong_move = 0
     board = Board()
     start = np.random.uniform() > 0.5
     first = False
     while (board.check_win() == 0 and np.any(board.board == 0)):
         episodes = 0
         if first or start:
             while board.play_tac(*(RA1.get_move(
                     board.get_feature_vec(board.tac)))) is False:
                 pass
             if not np.any(board.board == 0):
                 break
         episodes = 0
         while board.play_tic(*(
                 RA2.get_move(board.get_feature_vec(board.tic)))) is False:
             pass
         first = True
     RA2.update_params((board.check_win() - 0) * -200 * board.tic)
     RA1.update_params((board.check_win() + 0) * -200 * board.tac)
     if board.check_win() == board.tic:
Esempio n. 2
0
 RA1 = policy_agent(1000, 0.01)
 RA2 = policy_agent(1000, 0.01)
 RA3 = random_agent()
 policy2_wins = 0
 policy1_wins = 0
 draws = 0
 doomed = 0
 exit = 1
 win_ratio = 0
 random_wins = 0
 for i in range(1000000):
     wrong_move = 0
     board = Board()
     start = np.random.uniform() > 0.5
     first = False
     while(board.check_win() == 0 and np.any(board.board == 0)):
         episodes = 0
         if first or start:
             while board.play_tac(*(RA1.get_move(
                     board.get_feature_vec(board.tac)))) is False:
                 pass
             if not np.any(board.board == 0):
                 break
         episodes = 0
         while board.play_tic(*(RA2.get_move(
                 board.get_feature_vec(board.tic)))) is False:
             pass
         first = True
     RA2.update_params((board.check_win() - 0) * -200 * board.tic)
     RA1.update_params((board.check_win() + 0) * -200 * board.tac)
     if board.check_win() == board.tic: