def instance(name, bot, exploratory, rounds): print("task: name:", name, "bot:", bot, "exploratory:", exploratory, "rounds:", rounds) start_time = time.process_time() env = environment.Environment(discount=1.0, q_init=0.5) first_player = player.Player(name=1, exploratory=exploratory, environment=env) second_player = player.Player(name=2, exploratory=exploratory, environment=env) first_bot = bot(environment=env, player=first_player) second_bot = bot(environment=env, player=second_player) game = TicTacToe() # play the first bot against the second bot, self learning !! # the game should last .. long # report every 1000 rounds report = 1000 # play until ends # the first bot makes move, alternate this afterwards start_turn = first_bot.player.name for round in range(rounds): # do the training # playing against each other # from the start # the players will alternatively begin game.restart(start=start_turn) whose_turn = start_turn start_turn = start_turn % 2 + 1 first_bot.restart() second_bot.restart() while not game.is_end(): # keep playing # the first bot makes move if first_bot.player.name is whose_turn: current_bot = first_bot else: current_bot = second_bot current_state = game.table action = current_bot.take_turn(current_state) # next player comes into play whose_turn = whose_turn % 2 + 1 game.turn(action) # make the robot to learn something a bit # this will make the bots learn about the result of the game current_state = game.table first_bot.take_turn(current_state) second_bot.take_turn(current_state) if round % report is 0: # make report print("exploratory : ", exploratory, "percent: ", round / rounds * 100) # write the q_table into a file # only the fist_bot is important filename = "results/" + name + "-" + str(exploratory) + "-" + str(rounds) + ".txt" print("saving into file:", filename) json.dump(first_bot.q_table, open(filename, "w")) print("finished exploratory: ", exploratory, " time: ", time.process_time() - start_time) return 0
from tictactoe import TicTacToe __author__ = 'phizaz' bot_desc = { 'name': 'BotRLBetterDiscovery', 'exploratory': 1, 'rounds': 100000, } bot = Bot(name=1, q_table=Tools.load_source(bot_desc)) # bot starts first, but alternatively afterwards start_turn = bot.name while True: game = TicTacToe() game.restart(start=start_turn) whose_turn = start_turn start_turn = start_turn % 2 + 1 print('game start : ') while True: if whose_turn is bot.name: # bot action action = bot.take_turn(game.table) else: # user action action = (int(input('row:')), int(input('col:'))) game.turn(action) game.display() whose_turn = whose_turn % 2 + 1 winner = game.winner()