def train_player(): # Treniraj # Zug stdout.disable() games = 10000 #10000, one game means up to 1001 Points!!! last = 1000 wins = list() #Training: print("inside train player") now = datetime.datetime.now() for i in range(games): game = Game(pairA, pairB) pointsA, pointsB = game.play() wins.append("A" if pointsA > pointsB else "B") if i > 0 and i % last == 0: winsA = wins[-last:].count("A") winningPercentage = winsA / last * 100 if winningPercentage >= 90:#90 before break game.saveNetworks(str(i)) stdout.enable() print("[RL] {} - win percentage (in last 100 games): {}% ({} / {}) at game:".format(pairA, last, winningPercentage, winsA, last), i, (datetime.datetime.now() - now)) stdout.disable() # Igraj # spielen #stdout.enable() game.saveNetworks() print(datetime.datetime.now() - now) playerA1.eval() playerA2.eval()
def notifyTrick(self, value): # der schlimmste wert ist -17 (g10, g5, r1, r2) # ausser wenn noch mal2 hinzukommt?! dann ist es wohl 21?! value += 21 normalizedReward = value / 26 # 21 zuvor sonst 26 if abs(normalizedReward) > 1: stdout.enable() print(normalizedReward) print(eeee) #self.playingPolicy.feedback(normalizedReward) self.witchesPolicy.feedback(normalizedReward)
def play(self): number_of_won = [0, 0, 0, 0] gameover_limit = -70 history = [] ai_player_index = 0 nuGames = 50 out_path = "models/rl_policy/" stdout.disable() stdout.write_file(out_path + "output.txt") # contains all logging! start_time = datetime.datetime.now() try: for j in range(1, 100000000): i = 0 while i < nuGames: action = self.selectAction() current_player = self.my_game.active_player card = self.my_game.players[current_player].hand[action] print("[{}] {} {}\t{}\tCard {}\tHand Index {}".format( self.my_game.current_round, current_player, self.my_game.names_player[current_player], self.my_game.ai_player[current_player], card, action)) rewards, round_finished = self.my_game.step_idx( action, auto_shift=False) if round_finished: # player idx of Reinforce self.notifyTrick(rewards[ai_player_index]) print("Update rewards: ", rewards, "\n") if len(self.my_game.players[current_player].hand ) == 0: # one game finished print("update policy at end of one game!") #self.playingPolicy.updatePolicy() self.witchesPolicy.updatePolicy() print(self.my_game.total_rewards) if min(self.my_game.total_rewards ) <= gameover_limit: winner_idx = np.where( (self.my_game.total_rewards == max( self.my_game.total_rewards))) number_of_won[winner_idx[0][0]] += 1 self.my_game.total_rewards = [0, 0, 0, 0] i += 1 if i == nuGames: stdout.enable() print("Win Stats:", number_of_won, "at game", j * nuGames, "for:", self.witchesPolicy.lr, datetime.datetime.now() - start_time, "\n") active_player, state, options = self.my_game.getState( ) state = torch.tensor( state).float().resize_(180) path = out_path + str( self.witchesPolicy.lr) + "_" + str( j * nuGames) + "__" + str( number_of_won[ai_player_index]) self.exportONNX(self.witchesPolicy.network, state, path) history.append(number_of_won) number_of_won = [0, 0, 0, 0] stdout.disable() self.my_game.reset_game() self.plotHistory(history, ai_player_index, out_path) except Exception as e: stdout.enable() print("ERROR!!!!", e) print(traceback.format_exc()) print(number_of_won) self.plotHistory(history, ai_player_index, out_path)
games = 10000 last = 100 wins = list() for i in range(games): game = Game(pairA, pairB) pointsA, pointsB = game.play() wins.append("A" if pointsA > pointsB else "B") if i > 0 and i % last == 0: winsA = wins[-last:].count("A") winningPercentage = winsA / last * 100 # if winningPercentage >= 90: # break stdout.enable() print("[RL] {} - postotak pobjeda (u zadnjih {} igara): {}% ({} / {})".format(pairA, last, winningPercentage, winsA, last)) stdout.disable() # Igraj stdout.enable() playerA1.eval() playerA2.eval() playerMe = PlayerKeyboard("Ja") playerFriend = PlayerRandom("On") game = Game( Pair(playerMe, playerFriend), pairA
import stdout # reinforcement learning pair # TODO save & load policy networks # TODO share policy networks across all RL players playerA1 = PlayerRL("Borna") playerA2 = PlayerRL("Mislav") pairA = Pair(playerA1, playerA2) # random pair playerB1 = PlayerRandom("Luka") playerB2 = PlayerRandom("Lovro") pairB = Pair(playerB1, playerB2) stdout.disable() games = 10000 last=50 wins=list() for i in range(games): game = Game(pairA, pairB) pointsA, pointsB = game.play() wins.append("A" if pointsA>pointsB else "B") if i>0 and i%last==0: winningPercentage=wins[-last:].count("A")/last*100 stdout.enable() print("[RL] {} - postotak pobjeda (u zadnjih {} igara): {}%".format(pairA, last, winningPercentage)) stdout.disable()