Beispiel #1
0
    def _reward_fn(self, last_response, response, end_status):
        """Difference between previous gold and new gold."""
        del end_status  # Unused

        old_gold = base._get(last_response, "Blstats.gold", 0)
        gold = base._get(response, "Blstats.gold", old_gold)
        time_penalty = self._get_time_penalty(last_response, response)

        return gold - old_gold + time_penalty
Beispiel #2
0
    def _reward_fn(self, last_response, response, end_status):
        """Difference between previous hunger and new hunger."""
        del end_status  # Unused

        old_hunger = base._get(last_response, "You.uhunger", 0)
        hunger = base._get(response, "You.uhunger", old_hunger)

        reward = max(0, hunger - old_hunger)

        time_penalty = self._get_time_penalty(last_response, response)

        return reward + time_penalty