Esempio n. 1
0
def train_player():
    # Treniraj # Zug
    stdout.disable()

    games = 10000 #10000, one game means up to 1001 Points!!!
    last  = 1000
    wins  = list()

    #Training:
    print("inside train player")
    now = datetime.datetime.now()
    for i in range(games):
        game = Game(pairA, pairB)
        pointsA, pointsB = game.play()

        wins.append("A" if pointsA > pointsB else "B")

        if i > 0 and i % last == 0:
            winsA = wins[-last:].count("A")
            winningPercentage = winsA / last * 100
            if winningPercentage >= 90:#90 before
                break
            game.saveNetworks(str(i))
            stdout.enable()
            print("[RL] {} - win percentage (in last 100 games): {}% ({} / {}) at game:".format(pairA, last, winningPercentage, winsA, last), i, (datetime.datetime.now() - now))
            stdout.disable()

    # Igraj # spielen
    #stdout.enable()
    game.saveNetworks()
    print(datetime.datetime.now() - now)

    playerA1.eval()
    playerA2.eval()
 def notifyTrick(self, value):
     # der schlimmste wert ist -17 (g10, g5, r1, r2)
     # ausser wenn noch mal2 hinzukommt?! dann ist es wohl 21?!
     value += 21
     normalizedReward = value / 26  # 21 zuvor sonst 26
     if abs(normalizedReward) > 1:
         stdout.enable()
         print(normalizedReward)
         print(eeee)
     #self.playingPolicy.feedback(normalizedReward)
     self.witchesPolicy.feedback(normalizedReward)
 def play(self):
     number_of_won = [0, 0, 0, 0]
     gameover_limit = -70
     history = []
     ai_player_index = 0
     nuGames = 50
     out_path = "models/rl_policy/"
     stdout.disable()
     stdout.write_file(out_path + "output.txt")  # contains all logging!
     start_time = datetime.datetime.now()
     try:
         for j in range(1, 100000000):
             i = 0
             while i < nuGames:
                 action = self.selectAction()
                 current_player = self.my_game.active_player
                 card = self.my_game.players[current_player].hand[action]
                 print("[{}] {} {}\t{}\tCard {}\tHand Index {}".format(
                     self.my_game.current_round, current_player,
                     self.my_game.names_player[current_player],
                     self.my_game.ai_player[current_player], card, action))
                 rewards, round_finished = self.my_game.step_idx(
                     action, auto_shift=False)
                 if round_finished:
                     # player idx of Reinforce
                     self.notifyTrick(rewards[ai_player_index])
                     print("Update rewards: ", rewards, "\n")
                     if len(self.my_game.players[current_player].hand
                            ) == 0:  # one game finished
                         print("update policy at end of one game!")
                         #self.playingPolicy.updatePolicy()
                         self.witchesPolicy.updatePolicy()
                         print(self.my_game.total_rewards)
                         if min(self.my_game.total_rewards
                                ) <= gameover_limit:
                             winner_idx = np.where(
                                 (self.my_game.total_rewards == max(
                                     self.my_game.total_rewards)))
                             number_of_won[winner_idx[0][0]] += 1
                             self.my_game.total_rewards = [0, 0, 0, 0]
                             i += 1
                             if i == nuGames:
                                 stdout.enable()
                                 print("Win Stats:", number_of_won,
                                       "at game", j * nuGames, "for:",
                                       self.witchesPolicy.lr,
                                       datetime.datetime.now() - start_time,
                                       "\n")
                                 active_player, state, options = self.my_game.getState(
                                 )
                                 state = torch.tensor(
                                     state).float().resize_(180)
                                 path = out_path + str(
                                     self.witchesPolicy.lr) + "_" + str(
                                         j * nuGames) + "__" + str(
                                             number_of_won[ai_player_index])
                                 self.exportONNX(self.witchesPolicy.network,
                                                 state, path)
                                 history.append(number_of_won)
                                 number_of_won = [0, 0, 0, 0]
                                 stdout.disable()
                         self.my_game.reset_game()
         self.plotHistory(history, ai_player_index, out_path)
     except Exception as e:
         stdout.enable()
         print("ERROR!!!!", e)
         print(traceback.format_exc())
         print(number_of_won)
         self.plotHistory(history, ai_player_index, out_path)
Esempio n. 4
0
games = 10000
last = 100
wins = list()

for i in range(games):
    game = Game(pairA, pairB)
    pointsA, pointsB = game.play()
    wins.append("A" if pointsA > pointsB else "B")

    if i > 0 and i % last == 0:
        winsA = wins[-last:].count("A")
        winningPercentage = winsA / last * 100
        # if winningPercentage >= 90:
        #     break
        stdout.enable()
        print("[RL] {} - postotak pobjeda (u zadnjih {} igara): {}% ({} / {})".format(pairA, last, winningPercentage, winsA, last))
        stdout.disable()

# Igraj
stdout.enable()

playerA1.eval()
playerA2.eval()

playerMe = PlayerKeyboard("Ja")
playerFriend = PlayerRandom("On")

game = Game(
    Pair(playerMe, playerFriend),
    pairA
Esempio n. 5
0
import stdout

# reinforcement learning pair
# TODO save & load policy networks
# TODO share policy networks across all RL players
playerA1 = PlayerRL("Borna")
playerA2 = PlayerRL("Mislav")
pairA = Pair(playerA1, playerA2)

# random pair
playerB1 = PlayerRandom("Luka")
playerB2 = PlayerRandom("Lovro")
pairB = Pair(playerB1, playerB2)

stdout.disable()

games = 10000
last=50
wins=list()
for i in range(games):
    game = Game(pairA, pairB)
    pointsA, pointsB = game.play()
    wins.append("A" if pointsA>pointsB else "B")

    if i>0 and i%last==0:
        winningPercentage=wins[-last:].count("A")/last*100
        stdout.enable()
        print("[RL] {} - postotak pobjeda (u zadnjih {} igara): {}%".format(pairA, last, winningPercentage))
        stdout.disable()