예제 #1
0
def instance(name, bot, exploratory, rounds):
    print("task: name:", name, "bot:", bot, "exploratory:", exploratory, "rounds:", rounds)
    start_time = time.process_time()
    env = environment.Environment(discount=1.0, q_init=0.5)
    first_player = player.Player(name=1, exploratory=exploratory, environment=env)
    second_player = player.Player(name=2, exploratory=exploratory, environment=env)
    first_bot = bot(environment=env, player=first_player)
    second_bot = bot(environment=env, player=second_player)

    game = TicTacToe()
    # play the first bot against the second bot, self learning !!
    # the game should last .. long
    # report every 1000 rounds
    report = 1000
    # play until ends
    # the first bot makes move, alternate this afterwards
    start_turn = first_bot.player.name
    for round in range(rounds):
        # do the training
        # playing against each other
        # from the start
        # the players will alternatively begin
        game.restart(start=start_turn)
        whose_turn = start_turn
        start_turn = start_turn % 2 + 1
        first_bot.restart()
        second_bot.restart()
        while not game.is_end():
            # keep playing
            # the first bot makes move
            if first_bot.player.name is whose_turn:
                current_bot = first_bot
            else:
                current_bot = second_bot
            current_state = game.table
            action = current_bot.take_turn(current_state)
            # next player comes into play
            whose_turn = whose_turn % 2 + 1
            game.turn(action)

        # make the robot to learn something a bit
        # this will make the bots learn about the result of the game
        current_state = game.table
        first_bot.take_turn(current_state)
        second_bot.take_turn(current_state)

        if round % report is 0:
            # make report
            print("exploratory : ", exploratory, "percent: ", round / rounds * 100)

    # write the q_table into a file
    # only the fist_bot is important
    filename = "results/" + name + "-" + str(exploratory) + "-" + str(rounds) + ".txt"
    print("saving into file:", filename)
    json.dump(first_bot.q_table, open(filename, "w"))
    print("finished exploratory: ", exploratory, " time: ", time.process_time() - start_time)
    return 0
예제 #2
0
from tictactoe import TicTacToe

__author__ = 'phizaz'

bot_desc = {
    'name': 'BotRLBetterDiscovery',
    'exploratory': 1,
    'rounds': 100000,
}

bot = Bot(name=1, q_table=Tools.load_source(bot_desc))
# bot starts first, but alternatively afterwards
start_turn = bot.name
while True:
    game = TicTacToe()
    game.restart(start=start_turn)
    whose_turn = start_turn
    start_turn = start_turn % 2 + 1
    print('game start : ')
    while True:
        if whose_turn is bot.name:
            # bot action
            action = bot.take_turn(game.table)
        else:
            # user action
            action = (int(input('row:')),
                      int(input('col:')))
        game.turn(action)
        game.display()
        whose_turn = whose_turn % 2 + 1
        winner = game.winner()