Ejemplo n.º 1
0
 def end(self):
     if self.skip:
         # 50 % of the time add 0 reward for no_game
         if np.random.rand() < 0.5:
             for player in self.match.players:
                 self.match.rl_agent.update_game_memory_with_reward(
                     player.position, 0)
             self.match.rl_agent.flush_game_memory()
             self.match.rl_agent.train_game_network()
         return
     self.winners = Rules.calc_game_winner(self)
     if len(self.winners) == 1 and self.match.rl_agent.explore == False:
         self.log_msgs.append("WINNER WINNER")
     self.payout = Rules.calc_game_payout(self)
     #print("Points: ", list(map(lambda player: player.game_points, self.match.players)))
     old_coins = list(map(lambda player: player.coins, self.match.players))
     for player in self.match.players:
         if player in self.winners:
             continue
         for winning_player in self.winners:
             #TODO: this will not work with callgame!!
             player.coins -= self.payout
             winning_player.coins += self.payout
     new_coins = list(map(lambda player: player.coins, self.match.players))
     rewards = [x[0] - x[1] for x in zip(new_coins, old_coins)]
     self.log_msgs.append(
         "Player(s) {0} won this game. Rewards: {1}".format(
             list(map(lambda player: player.position, self.winners)),
             rewards))
     for i, reward in enumerate(rewards):
         # Update game memory and card memory with reward - 8.0 is number of cards
         per_card_reward = reward / 8.0
         self.match.rl_agent.update_game_memory_with_reward(
             self.match.players[i].position, reward)
         self.match.rl_agent.update_card_memory_with_next_state(
             self.match.players[i].position, None, True)
         self.match.rl_agent.update_card_memory_with_reward(
             self.match.players[i].position, per_card_reward)
         self.match.players[i].old_state = None
     self.match.rl_agent.flush_card_memory(self.game_type["game"])
     self.match.rl_agent.flush_game_memory()
     self.match.current_starting_position = \
         (self.match.current_starting_position + 1) % self.match.num_players
     self.match.rl_agent.train_game_network()
     self.match.rl_agent.train_action_network(self.game_type['game'])
     # only print logs if caller won the game
     if not self.random_game:
         print("\n".join(self.log_msgs))