def get_reward(self, new_stats, old_stats): #longer path is rewarded and less number of regions is rewarded rewards = { "regions": get_range_reward(new_stats["regions"], old_stats["regions"], 1, 1), "path-length": get_range_reward(new_stats["path-length"],old_stats["path-length"], np.inf, np.inf) } #calculate the total reward return rewards["regions"] * self._rewards["regions"] +\ rewards["path-length"] * self._rewards["path-length"]
def get_reward(self, new_stats, old_stats): #longer path is rewarded and less number of regions is rewarded rewards = { "player": get_range_reward(new_stats["player"], old_stats["player"], 1, 1), "key": get_range_reward(new_stats["key"], old_stats["key"], 1, 1), "door": get_range_reward(new_stats["door"], old_stats["door"], 1, 10), "enemies": get_range_reward(new_stats["enemies"], old_stats["enemies"], 2, self._max_enemies), "regions": get_range_reward(new_stats["regions"], old_stats["regions"], 1, 1), "nearest-enemy": get_range_reward(new_stats["nearest-enemy"], old_stats["nearest-enemy"], self._target_enemy_dist, np.inf), "path-length": get_range_reward(new_stats["path-length"], old_stats["path-length"], np.inf, np.inf) } #calculate the total reward return rewards["player"] * self._rewards["player"] +\ rewards["key"] * self._rewards["key"] +\ rewards["door"] * self._rewards["door"] +\ rewards["enemies"] * self._rewards["enemies"] +\ rewards["regions"] * self._rewards["regions"] +\ rewards["nearest-enemy"] * self._rewards["nearest-enemy"] +\ rewards["path-length"] * self._rewards["path-length"]
def get_reward(self, new_stats, old_stats): #longer path is rewarded and less number of regions is rewarded rewards = { "player": get_range_reward(new_stats["player"], old_stats["player"], 1, 1), "crate": get_range_reward(new_stats["crate"], old_stats["crate"], 1, self._max_crates), "target": get_range_reward(new_stats["target"], old_stats["target"], 1, self._max_crates), "regions": get_range_reward(new_stats["regions"], old_stats["regions"], 1, 1), "ratio": get_range_reward(abs(new_stats["crate"] - new_stats["target"]), abs(old_stats["crate"] - old_stats["target"]), -np.inf, -np.inf), "dist-win": get_range_reward(new_stats["dist-win"], old_stats["dist-win"], -np.inf, -np.inf), "sol-length": get_range_reward(len(new_stats["solution"]), len(old_stats["solution"]), np.inf, np.inf) } #calculate the total reward return rewards["player"] * self._rewards["player"] +\ rewards["crate"] * self._rewards["crate"] +\ rewards["target"] * self._rewards["target"] +\ rewards["regions"] * self._rewards["regions"] +\ rewards["ratio"] * self._rewards["ratio"] +\ rewards["dist-win"] * self._rewards["dist-win"] +\ rewards["sol-length"] * self._rewards["sol-length"]
def get_reward(self, new_stats, old_stats): #longer path is rewarded and less number of regions is rewarded rewards = { "player": get_range_reward(new_stats["player"], old_stats["player"], 1, 1), "exit": get_range_reward(new_stats["exit"], old_stats["exit"], 1, 1), "diamonds": get_range_reward(new_stats["diamonds"], old_stats["diamonds"], -np.inf, self._max_diamonds), "dist-floor": get_range_reward(new_stats["dist-floor"], old_stats["dist-floor"], 0, 0), "key": get_range_reward(new_stats["key"], old_stats["key"], 1, 1), "spikes": get_range_reward(new_stats["spikes"], old_stats["spikes"], self._min_spikes, np.inf), "regions": get_range_reward(new_stats["regions"], old_stats["regions"], 1, 1), "num-jumps": get_range_reward(new_stats["num-jumps"], old_stats["num-jumps"], np.inf, np.inf), "dist-win": get_range_reward(new_stats["dist-win"], old_stats["dist-win"], -np.inf, -np.inf), "sol-length": get_range_reward(new_stats["sol-length"], old_stats["sol-length"], np.inf, np.inf) } #calculate the total reward return rewards["player"] * self._rewards["player"] +\ rewards["dist-floor"] * self._rewards["dist-floor"] +\ rewards["exit"] * self._rewards["exit"] +\ rewards["spikes"] * self._rewards["spikes"] +\ rewards["diamonds"] * self._rewards["diamonds"] +\ rewards["key"] * self._rewards["key"] +\ rewards["regions"] * self._rewards["regions"] +\ rewards["num-jumps"] * self._rewards["num-jumps"] +\ rewards["dist-win"] * self._rewards["dist-win"] +\ rewards["sol-length"] * self._rewards["sol-length"]
def get_reward(self, new_stats, old_stats): # longer path is rewarded and less number of regions is rewarded rewards = { "base_count": get_range_reward(new_stats["base_count"], old_stats["base_count"], self._target_base, self._target_base), "base_distance": get_range_reward(new_stats["base_distance"], old_stats["base_distance"], self._width / 2, self._width * 2), # "base_space": get_range_reward(new_stats["base_space"], old_stats["base_space"], 0, 10), # "asymmetry": get_range_reward(new_stats["asymmetry"], old_stats["asymmetry"], 0, 5), "resource_count": get_range_reward(new_stats["resource_count"], old_stats["resource_count"], self._min_resource, self._max_resource), "resource_distance": get_range_reward(new_stats["resource_distance"], old_stats["resource_distance"], 0, 1), # "resource_clustering": get_range_reward(new_stats["resource_clustering"], old_stats["resource_clustering"], 0, 5), # "path_overlapping": get_range_reward(new_stats["path_overlapping"], old_stats["path_overlapping"], 0, 10), "chock_point": get_range_reward(new_stats["chock_point"], old_stats["chock_point"], 0, self._max_chock_points), "region": get_range_reward(new_stats["region"], old_stats["region"], 1, 1) } # calculate the total reward return rewards["base_count"] * self._rewards["base_count"] + \ rewards["base_distance"] * self._rewards["base_distance"] + \ rewards["resource_count"] * self._rewards["resource_count"] + \ rewards["region"] * self._rewards["region"] + \ rewards["resource_distance"] * self._rewards["resource_distance"] + \ rewards["chock_point"] * self._rewards["chock_point"]
def get_reward(self, new_stats, old_stats): #longer path is rewarded and less number of regions is rewarded rewards = { "player": get_range_reward(new_stats["player"], old_stats["player"], 1, 1), "exit": get_range_reward(new_stats["exit"], old_stats["exit"], 1, 1), "potions": get_range_reward(new_stats["potions"], old_stats["potions"], -np.inf, self._max_potions), "treasures": get_range_reward(new_stats["treasures"], old_stats["treasures"], -np.inf, self._max_treasures), "enemies": get_range_reward(new_stats["enemies"], old_stats["enemies"], 1, self._max_enemies), "regions": get_range_reward(new_stats["regions"], old_stats["regions"], 1, 1), "col-enemies": get_range_reward(new_stats["col-enemies"], old_stats["col-enemies"], np.inf, np.inf), "dist-win": get_range_reward(new_stats["dist-win"], old_stats["dist-win"], -np.inf, -np.inf), "sol-length": get_range_reward(new_stats["sol-length"], old_stats["sol-length"], np.inf, np.inf) } #calculate the total reward return rewards["player"] * self._rewards["player"] +\ rewards["exit"] * self._rewards["exit"] +\ rewards["enemies"] * self._rewards["enemies"] +\ rewards["treasures"] * self._rewards["treasures"] +\ rewards["potions"] * self._rewards["potions"] +\ rewards["regions"] * self._rewards["regions"] +\ rewards["col-enemies"] * self._rewards["col-enemies"] +\ rewards["dist-win"] * self._rewards["dist-win"] +\ rewards["sol-length"] * self._rewards["sol-length"]
def get_reward(self, new_stats, old_stats): # longer path is rewarded and less number of regions is rewarded rewards = { "base_count": get_range_reward(new_stats["base_count"], old_stats["base_count"], self._target_base, self._target_base), "base_distance": get_range_reward(new_stats["base_distance"], old_stats["base_distance"], 0, self._base_distance_diff), "resource_count": get_range_reward(new_stats["resource_count"], old_stats["resource_count"], self._min_resource, self._max_resource), "resource_distance": get_range_reward(new_stats["resource_distance"], old_stats["resource_distance"], 0, self._resource_distance_diff), "resource_balance": get_range_reward(new_stats["resource_balance"], old_stats["resource_balance"], 0, self._resource_balance_diff), "obstacle": get_range_reward(new_stats["obstacle"], old_stats["obstacle"], 0, self._max_obstacles), "region": get_range_reward(new_stats["region"], old_stats["region"], 1, 1), "area_control": get_range_reward(new_stats["area_control"], old_stats["area_control"], 0, self._area_control_diff), } # calculate the total reward return rewards["base_count"] * self._rewards["base_count"] + \ rewards["base_distance"] * self._rewards["base_distance"] + \ rewards["resource_count"] * self._rewards["resource_count"] + \ rewards["region"] * self._rewards["region"] + \ rewards["resource_distance"] * self._rewards["resource_distance"] + \ rewards["resource_balance"] * self._rewards["resource_balance"] + \ rewards["obstacle"] * self._rewards["obstacle"] + \ rewards["area_control"] * self._rewards["area_control"]
def get_reward(self, new_stats, old_stats): #longer path is rewarded and less number of regions is rewarded rewards = { "dist-floor": get_range_reward(new_stats["dist-floor"], old_stats["dist-floor"], 0, 0), "disjoint-tubes": get_range_reward(new_stats["disjoint-tubes"], old_stats["disjoint-tubes"], 0, 0), "enemies": get_range_reward(new_stats["enemies"], old_stats["enemies"], self._min_enemies, self._max_enemies), "empty": get_range_reward(new_stats["empty"], old_stats["empty"], self._min_empty, np.inf), "noise": get_range_reward(new_stats["noise"], old_stats["noise"], 0, 0), "jumps": get_range_reward(new_stats["jumps"], old_stats["jumps"], self._min_jumps, np.inf), "jumps-dist": get_range_reward(new_stats["jumps-dist"], old_stats["jumps-dist"], 0, 0), "dist-win": get_range_reward(new_stats["dist-win"], old_stats["dist-win"], 0, 0) } #calculate the total reward return rewards["dist-floor"] * self._rewards["dist-floor"] +\ rewards["disjoint-tubes"] * self._rewards["disjoint-tubes"] +\ rewards["enemies"] * self._rewards["enemies"] +\ rewards["empty"] * self._rewards["empty"] +\ rewards["noise"] * self._rewards["noise"] +\ rewards["jumps"] * self._rewards["jumps"] +\ rewards["jumps-dist"] * self._rewards["jumps-dist"] +\ rewards["dist-win"] * self._rewards["dist-win"]