def train(nbSticks, nbGames, plotValueFunction, players, debugMode=False): writer = Writer() if debugMode else None game = Game(nbSticks, players, { ConfigKey.TRAIN: True, ConfigKey.DEBUG: debugMode }) # stats for trained players valueFunctionStats = {} for p in (p for p in players if type(p) is TrainedPlayer): valueFunctionStats[p] = dict((i, []) for i in range(1, nbSticks + 1)) print("\nTraining...") for i in range(0, nbGames): if writer: writer.beginGame(i, game.players[0], game.players[1]) game.start() # play a game if writer: writer.endGame(game.players[0], game.players[1]) # update stats for p in valueFunctionStats: for s, v in p.v.items(): valueFunctionStats[p][s].append(v) # decrease exploration over time if i % 10 == 0: for p in players: p.epsilon = max(p.epsilon * 0.996, 0.05) game.reset() if writer: writer.close() for p in (p for p in players if type(p) is TrainedPlayer): print(f'\n=== Value function of {p}: ===') PrettyPrinter().pprint(p.v) if plotValueFunction: plot(valueFunctionStats)