예제 #1
0
 def reward_function(self, agent: Agent):
     max_score = 0
     for player in self.players:
         score = self.scores[player.index]
         if score > max_score:
             max_score = score
     if self.terminal_test():
         if self.scores[agent.index] == max_score:
             reward = 100
         else:
             reward = -150
     else:
         multiplier = agent.get_multiplier_last_action()
         player_score_intial = agent.last_score
         current_score = self.scores[agent.index]
         player_won_turn = current_score > player_score_intial
         if player_won_turn:
             reward = 5 * multiplier
         else:
             reward = -10 * multiplier
     return reward