Esempio n. 1
0
 def get_reward(self, new_stats, old_stats):
     #longer path is rewarded and less number of regions is rewarded
     rewards = {
         "regions": get_range_reward(new_stats["regions"], old_stats["regions"], 1, 1),
         "path-length": get_range_reward(new_stats["path-length"],old_stats["path-length"], np.inf, np.inf)
     }
     #calculate the total reward
     return rewards["regions"] * self._rewards["regions"] +\
         rewards["path-length"] * self._rewards["path-length"]
Esempio n. 2
0
 def get_reward(self, new_stats, old_stats):
     #longer path is rewarded and less number of regions is rewarded
     rewards = {
         "player":
         get_range_reward(new_stats["player"], old_stats["player"], 1, 1),
         "key":
         get_range_reward(new_stats["key"], old_stats["key"], 1, 1),
         "door":
         get_range_reward(new_stats["door"], old_stats["door"], 1, 10),
         "enemies":
         get_range_reward(new_stats["enemies"], old_stats["enemies"], 2,
                          self._max_enemies),
         "regions":
         get_range_reward(new_stats["regions"], old_stats["regions"], 1, 1),
         "nearest-enemy":
         get_range_reward(new_stats["nearest-enemy"],
                          old_stats["nearest-enemy"],
                          self._target_enemy_dist, np.inf),
         "path-length":
         get_range_reward(new_stats["path-length"],
                          old_stats["path-length"], np.inf, np.inf)
     }
     #calculate the total reward
     return rewards["player"] * self._rewards["player"] +\
         rewards["key"] * self._rewards["key"] +\
         rewards["door"] * self._rewards["door"] +\
         rewards["enemies"] * self._rewards["enemies"] +\
         rewards["regions"] * self._rewards["regions"] +\
         rewards["nearest-enemy"] * self._rewards["nearest-enemy"] +\
         rewards["path-length"] * self._rewards["path-length"]
Esempio n. 3
0
 def get_reward(self, new_stats, old_stats):
     #longer path is rewarded and less number of regions is rewarded
     rewards = {
         "player":
         get_range_reward(new_stats["player"], old_stats["player"], 1, 1),
         "crate":
         get_range_reward(new_stats["crate"], old_stats["crate"], 1,
                          self._max_crates),
         "target":
         get_range_reward(new_stats["target"], old_stats["target"], 1,
                          self._max_crates),
         "regions":
         get_range_reward(new_stats["regions"], old_stats["regions"], 1, 1),
         "ratio":
         get_range_reward(abs(new_stats["crate"] - new_stats["target"]),
                          abs(old_stats["crate"] - old_stats["target"]),
                          -np.inf, -np.inf),
         "dist-win":
         get_range_reward(new_stats["dist-win"], old_stats["dist-win"],
                          -np.inf, -np.inf),
         "sol-length":
         get_range_reward(len(new_stats["solution"]),
                          len(old_stats["solution"]), np.inf, np.inf)
     }
     #calculate the total reward
     return rewards["player"] * self._rewards["player"] +\
         rewards["crate"] * self._rewards["crate"] +\
         rewards["target"] * self._rewards["target"] +\
         rewards["regions"] * self._rewards["regions"] +\
         rewards["ratio"] * self._rewards["ratio"] +\
         rewards["dist-win"] * self._rewards["dist-win"] +\
         rewards["sol-length"] * self._rewards["sol-length"]
Esempio n. 4
0
 def get_reward(self, new_stats, old_stats):
     #longer path is rewarded and less number of regions is rewarded
     rewards = {
         "player":
         get_range_reward(new_stats["player"], old_stats["player"], 1, 1),
         "exit":
         get_range_reward(new_stats["exit"], old_stats["exit"], 1, 1),
         "diamonds":
         get_range_reward(new_stats["diamonds"], old_stats["diamonds"],
                          -np.inf, self._max_diamonds),
         "dist-floor":
         get_range_reward(new_stats["dist-floor"], old_stats["dist-floor"],
                          0, 0),
         "key":
         get_range_reward(new_stats["key"], old_stats["key"], 1, 1),
         "spikes":
         get_range_reward(new_stats["spikes"], old_stats["spikes"],
                          self._min_spikes, np.inf),
         "regions":
         get_range_reward(new_stats["regions"], old_stats["regions"], 1, 1),
         "num-jumps":
         get_range_reward(new_stats["num-jumps"], old_stats["num-jumps"],
                          np.inf, np.inf),
         "dist-win":
         get_range_reward(new_stats["dist-win"], old_stats["dist-win"],
                          -np.inf, -np.inf),
         "sol-length":
         get_range_reward(new_stats["sol-length"], old_stats["sol-length"],
                          np.inf, np.inf)
     }
     #calculate the total reward
     return rewards["player"] * self._rewards["player"] +\
         rewards["dist-floor"] * self._rewards["dist-floor"] +\
         rewards["exit"] * self._rewards["exit"] +\
         rewards["spikes"] * self._rewards["spikes"] +\
         rewards["diamonds"] * self._rewards["diamonds"] +\
         rewards["key"] * self._rewards["key"] +\
         rewards["regions"] * self._rewards["regions"] +\
         rewards["num-jumps"] * self._rewards["num-jumps"] +\
         rewards["dist-win"] * self._rewards["dist-win"] +\
         rewards["sol-length"] * self._rewards["sol-length"]
Esempio n. 5
0
 def get_reward(self, new_stats, old_stats):
     # longer path is rewarded and less number of regions is rewarded
     rewards = {
         "base_count":
         get_range_reward(new_stats["base_count"], old_stats["base_count"],
                          self._target_base, self._target_base),
         "base_distance":
         get_range_reward(new_stats["base_distance"],
                          old_stats["base_distance"], self._width / 2,
                          self._width * 2),
         # "base_space": get_range_reward(new_stats["base_space"], old_stats["base_space"], 0, 10),
         # "asymmetry": get_range_reward(new_stats["asymmetry"], old_stats["asymmetry"], 0, 5),
         "resource_count":
         get_range_reward(new_stats["resource_count"],
                          old_stats["resource_count"], self._min_resource,
                          self._max_resource),
         "resource_distance":
         get_range_reward(new_stats["resource_distance"],
                          old_stats["resource_distance"], 0, 1),
         # "resource_clustering": get_range_reward(new_stats["resource_clustering"], old_stats["resource_clustering"], 0, 5),
         # "path_overlapping": get_range_reward(new_stats["path_overlapping"], old_stats["path_overlapping"], 0, 10),
         "chock_point":
         get_range_reward(new_stats["chock_point"],
                          old_stats["chock_point"], 0,
                          self._max_chock_points),
         "region":
         get_range_reward(new_stats["region"], old_stats["region"], 1, 1)
     }
     # calculate the total reward
     return rewards["base_count"] * self._rewards["base_count"] + \
         rewards["base_distance"] * self._rewards["base_distance"] + \
         rewards["resource_count"] * self._rewards["resource_count"] + \
         rewards["region"] * self._rewards["region"] + \
         rewards["resource_distance"] * self._rewards["resource_distance"] + \
         rewards["chock_point"] * self._rewards["chock_point"]
Esempio n. 6
0
 def get_reward(self, new_stats, old_stats):
     #longer path is rewarded and less number of regions is rewarded
     rewards = {
         "player":
         get_range_reward(new_stats["player"], old_stats["player"], 1, 1),
         "exit":
         get_range_reward(new_stats["exit"], old_stats["exit"], 1, 1),
         "potions":
         get_range_reward(new_stats["potions"], old_stats["potions"],
                          -np.inf, self._max_potions),
         "treasures":
         get_range_reward(new_stats["treasures"], old_stats["treasures"],
                          -np.inf, self._max_treasures),
         "enemies":
         get_range_reward(new_stats["enemies"], old_stats["enemies"], 1,
                          self._max_enemies),
         "regions":
         get_range_reward(new_stats["regions"], old_stats["regions"], 1, 1),
         "col-enemies":
         get_range_reward(new_stats["col-enemies"],
                          old_stats["col-enemies"], np.inf, np.inf),
         "dist-win":
         get_range_reward(new_stats["dist-win"], old_stats["dist-win"],
                          -np.inf, -np.inf),
         "sol-length":
         get_range_reward(new_stats["sol-length"], old_stats["sol-length"],
                          np.inf, np.inf)
     }
     #calculate the total reward
     return rewards["player"] * self._rewards["player"] +\
         rewards["exit"] * self._rewards["exit"] +\
         rewards["enemies"] * self._rewards["enemies"] +\
         rewards["treasures"] * self._rewards["treasures"] +\
         rewards["potions"] * self._rewards["potions"] +\
         rewards["regions"] * self._rewards["regions"] +\
         rewards["col-enemies"] * self._rewards["col-enemies"] +\
         rewards["dist-win"] * self._rewards["dist-win"] +\
         rewards["sol-length"] * self._rewards["sol-length"]
Esempio n. 7
0
 def get_reward(self, new_stats, old_stats):
     # longer path is rewarded and less number of regions is rewarded
     rewards = {
         "base_count":
         get_range_reward(new_stats["base_count"], old_stats["base_count"],
                          self._target_base, self._target_base),
         "base_distance":
         get_range_reward(new_stats["base_distance"],
                          old_stats["base_distance"], 0,
                          self._base_distance_diff),
         "resource_count":
         get_range_reward(new_stats["resource_count"],
                          old_stats["resource_count"], self._min_resource,
                          self._max_resource),
         "resource_distance":
         get_range_reward(new_stats["resource_distance"],
                          old_stats["resource_distance"], 0,
                          self._resource_distance_diff),
         "resource_balance":
         get_range_reward(new_stats["resource_balance"],
                          old_stats["resource_balance"], 0,
                          self._resource_balance_diff),
         "obstacle":
         get_range_reward(new_stats["obstacle"], old_stats["obstacle"], 0,
                          self._max_obstacles),
         "region":
         get_range_reward(new_stats["region"], old_stats["region"], 1, 1),
         "area_control":
         get_range_reward(new_stats["area_control"],
                          old_stats["area_control"], 0,
                          self._area_control_diff),
     }
     # calculate the total reward
     return rewards["base_count"] * self._rewards["base_count"] + \
            rewards["base_distance"] * self._rewards["base_distance"] + \
            rewards["resource_count"] * self._rewards["resource_count"] + \
            rewards["region"] * self._rewards["region"] + \
            rewards["resource_distance"] * self._rewards["resource_distance"] + \
            rewards["resource_balance"] * self._rewards["resource_balance"] + \
            rewards["obstacle"] * self._rewards["obstacle"] + \
            rewards["area_control"] * self._rewards["area_control"]
Esempio n. 8
0
 def get_reward(self, new_stats, old_stats):
     #longer path is rewarded and less number of regions is rewarded
     rewards = {
         "dist-floor": get_range_reward(new_stats["dist-floor"], old_stats["dist-floor"], 0, 0),
         "disjoint-tubes": get_range_reward(new_stats["disjoint-tubes"], old_stats["disjoint-tubes"], 0, 0),
         "enemies": get_range_reward(new_stats["enemies"], old_stats["enemies"], self._min_enemies, self._max_enemies),
         "empty": get_range_reward(new_stats["empty"], old_stats["empty"], self._min_empty, np.inf),
         "noise": get_range_reward(new_stats["noise"], old_stats["noise"], 0, 0),
         "jumps": get_range_reward(new_stats["jumps"], old_stats["jumps"], self._min_jumps, np.inf),
         "jumps-dist": get_range_reward(new_stats["jumps-dist"], old_stats["jumps-dist"], 0, 0),
         "dist-win": get_range_reward(new_stats["dist-win"], old_stats["dist-win"], 0, 0)
     }
     #calculate the total reward
     return rewards["dist-floor"] * self._rewards["dist-floor"] +\
         rewards["disjoint-tubes"] * self._rewards["disjoint-tubes"] +\
         rewards["enemies"] * self._rewards["enemies"] +\
         rewards["empty"] * self._rewards["empty"] +\
         rewards["noise"] * self._rewards["noise"] +\
         rewards["jumps"] * self._rewards["jumps"] +\
         rewards["jumps-dist"] * self._rewards["jumps-dist"] +\
         rewards["dist-win"] * self._rewards["dist-win"]