def reward_function(self, agent: Agent): max_score = 0 for player in self.players: score = self.scores[player.index] if score > max_score: max_score = score if self.terminal_test(): if self.scores[agent.index] == max_score: reward = 100 else: reward = -150 else: multiplier = agent.get_multiplier_last_action() player_score_intial = agent.last_score current_score = self.scores[agent.index] player_won_turn = current_score > player_score_intial if player_won_turn: reward = 5 * multiplier else: reward = -10 * multiplier return reward