Exemple #1
0
    def spawn_resource(self):
        """
        Spawn a new resource in the source area if it is possible to do so
        :return: x,y coordinate of new resource if successful. None otherwise
        """
        # Places all resources
        resource_placed = False

        # If there is no space to spawn new resources, don't spawn
        if self.source_is_full():
            return None

        while not resource_placed:
            x, y = self.generate_resource_position()
            if self.resource_map[y][x] == 0 and (x, y) not in self.resource_positions:
                self.resource_map[y][x] = self.latest_resource_id + 1
                self.latest_resource_id += 1
                self.resource_positions += [(x, y)]
                try:
                    self.resource_transforms += [rendering.Transform()]
                except:
                    pass
                self.resource_carried_by += [[]]
                resource_placed = True
                self.current_num_resources += 1
                try:
                    self.add_resource_to_rendering(self.latest_resource_id)
                except:
                    pass
                return x, y
Exemple #2
0
 def add_resource_to_rendering(self, resource_id):
     resource = rendering.make_circle(self.resource_width / 2 * self.scale)
     resource.set_color(self.resource_colour[0], self.resource_colour[1],
                        self.resource_colour[2])
     resource.add_attr(rendering.Transform(translation=(0, 0)))
     resource.add_attr(self.resource_transforms[resource_id])
     if self.viewer is not None:
         self.viewer.add_geom(resource)
Exemple #3
0
    def draw_arena_segment(self, top, bottom, left, right, rgb_tuple):
        """
        Helper function that creates the geometry for a segment of the arena. Intended to be used by the viewer

        :param top:
        :param bottom:
        :param rgb_tuple:
        :return: A FilledPolygon object that can be added to the viewer using add_geom
        """

        l, r, t, b = (left + self.x_shift) * self.scale, \
                     (right + self.x_shift) * self.scale, \
                     (top + self.y_shift) * self.scale, \
                     (bottom + self.y_shift) * self.scale
        arena_segment = rendering.FilledPolygon([(l, b), (l, t), (r, t),
                                                 (r, b)])
        arena_segment.add_attr(rendering.Transform(translation=(0, 0)))
        arena_transform = rendering.Transform()
        arena_segment.add_attr(arena_transform)
        arena_segment.set_color(rgb_tuple[0], rgb_tuple[1], rgb_tuple[2])
        return arena_segment
    def draw_arena_segment(self, top, bottom, rgb_tuple):
        """
        Helper function that creates the geometry for a segment of the arena. Intended to be used by the viewer

        :param top:
        :param bottom:
        :param rgb_tuple:
        :return: A FilledPolygon object that can be added to the viewer using add_geom
        """

        l, r, t, b = self.arena_constraints["x_min"] * self.scale, \
                     self.arena_constraints["x_max"] * self.scale, \
                     top * self.scale, \
                     bottom * self.scale
        arena_segment = rendering.FilledPolygon([(l, b), (l, t), (r, t),
                                                 (r, b)])
        arena_segment.add_attr(rendering.Transform(translation=(0, 0)))
        arena_transform = rendering.Transform()
        arena_segment.add_attr(arena_transform)
        arena_segment.set_color(rgb_tuple[0], rgb_tuple[1], rgb_tuple[2])
        return arena_segment
Exemple #5
0
    def render(self, mode='human'):
        """
        Renders the environment, placing all agents in appropriate positions
        :param mode:
        :return:
        """

        screen_width = self.total_width * self.scale
        screen_height = self.total_height * self.scale

        if self.viewer is None:
            self.viewer = rendering.Viewer(screen_width, screen_height)

            # Draw up hall
            up_hall = self.draw_arena_segment(
                self.arena_constraints["y_max"],
                self.arena_constraints["y_max"] - self.hall_size,
                self.arena_constraints["x_min"],
                self.arena_constraints["x_min"] + self.start_zone_size,
                self.hall_colour)
            self.viewer.add_geom(up_hall)

            # Draw down hall
            down_hall = self.draw_arena_segment(
                self.arena_constraints["y_max"] - self.hall_size -
                self.start_zone_size, self.arena_constraints["y_min"],
                self.arena_constraints["x_min"],
                self.arena_constraints["x_min"] + self.start_zone_size,
                self.hall_colour)
            self.viewer.add_geom(down_hall)

            # Draw right hall
            right_hall = self.draw_arena_segment(
                self.arena_constraints["y_min"] + self.hall_size +
                self.start_zone_size,
                self.arena_constraints["y_min"] + self.hall_size,
                self.arena_constraints["x_min"] + self.start_zone_size,
                self.arena_constraints["x_max"], self.hall_colour)
            self.viewer.add_geom(right_hall)

            # Draw start zone
            start_zone = self.draw_arena_segment(
                self.arena_constraints["y_min"] + self.hall_size +
                self.start_zone_size,
                self.arena_constraints["y_min"] + self.hall_size,
                self.arena_constraints["x_min"],
                self.arena_constraints["x_min"] + self.start_zone_size,
                self.start_zone_colour)
            self.viewer.add_geom(start_zone)

            # Draw grid
            grid_lines = self.draw_grid()
            for line in grid_lines:
                self.viewer.add_geom(line)

            # Draw agent(s)
            for i in range(self.num_agents):
                agent = rendering.make_circle(self.agent_width / 2 *
                                              self.scale)
                agent.set_color(self.agent_colour[0], self.agent_colour[1],
                                self.agent_colour[2])
                agent.add_attr(rendering.Transform(translation=(0, 0)))
                agent.add_attr(self.agent_transforms[i])
                self.viewer.add_geom(agent)

            # Draw obstacles
            for i in range(len(self.obstacle_coordinates)):
                obstacle = rendering.make_circle(self.obstacle_width / 2 *
                                                 self.scale)
                obstacle.set_color(self.obstacle_colour[0],
                                   self.obstacle_colour[1],
                                   self.obstacle_colour[2])
                obstacle.add_attr(rendering.Transform(translation=(0, 0)))
                obstacle.add_attr(self.obstacle_transforms[i])
                self.viewer.add_geom(obstacle)

        # Set position of agent(s)
        for i in range(self.num_agents):
            self.agent_transforms[i].set_translation(
                (self.agent_positions[i][0] - self.arena_constraints["x_min"] +
                 0.5) * self.scale,
                (self.agent_positions[i][1] - self.arena_constraints["y_min"] +
                 0.5) * self.scale)

        # Set position of obstacle(s)
        for i, key in enumerate(self.obstacle_coordinates):
            self.obstacle_transforms[i].set_translation(
                (key[0] - self.arena_constraints["x_min"] + 0.5) * self.scale,
                (key[1] - self.arena_constraints["y_min"] + 0.5) * self.scale)

        return self.viewer.render(return_rgb_array=mode == 'rgb_array')
Exemple #6
0
    def __init__(self, parameter_filename=None):
        if parameter_filename is None:
            raise RuntimeError(
                "No parameter file specified for the environment")

        parameter_dictionary = json.loads(open(parameter_filename).read())

        # self.seed_value = parameter_dictionary['general']['seed']
        self.seed_value = parameter_dictionary['environment']['tmaze'][
            'env_seed']
        self.np_random = np.random.RandomState(self.seed_value)

        # Environment dimensions
        self.hall_size = parameter_dictionary["environment"]["tmaze"][
            "hall_size"]
        self.start_zone_size = parameter_dictionary["environment"]["tmaze"][
            "start_zone_size"]

        if self.start_zone_size % 2 == 0:
            self.offset = 0
        else:
            self.offset = 1

        self.arena_constraints = {
            "x_min": -(self.start_zone_size // 2),
            "x_max":
            (self.start_zone_size // 2) + self.offset + self.hall_size,
            "y_min": -(self.start_zone_size // 2) - self.hall_size,
            "y_max": (self.start_zone_size // 2) + self.offset + self.hall_size
        }

        self.obstacle_generation_constraints = {
            "UP": {
                "x_min": self.arena_constraints["x_min"],
                "x_max":
                self.arena_constraints["x_min"] + self.start_zone_size,
                "y_min": self.arena_constraints["y_min"] + self.hall_size +
                self.start_zone_size,
                "y_max": self.arena_constraints["y_max"]
            },
            "DOWN": {
                "x_min": self.arena_constraints["x_min"],
                "x_max":
                self.arena_constraints["x_min"] + self.start_zone_size,
                "y_min": self.arena_constraints["y_min"],
                "y_max": self.arena_constraints["y_min"] + self.hall_size
            },
            "RIGHT": {
                "x_min":
                self.arena_constraints["x_min"] + self.start_zone_size,
                "x_max":
                self.arena_constraints["x_max"],
                "y_min":
                self.arena_constraints["y_min"] + self.hall_size,
                "y_max":
                self.arena_constraints["y_min"] + self.hall_size +
                self.start_zone_size
            }
        }

        self.total_width = self.start_zone_size + self.hall_size
        self.total_height = self.start_zone_size + self.hall_size + self.hall_size

        self.x_shift = abs(self.arena_constraints["x_min"])
        self.y_shift = abs(self.arena_constraints["y_min"])

        self.num_obstacles_per_hall = parameter_dictionary["environment"][
            "tmaze"]["num_obstacles_per_hall"]
        self.obstacle_coordinates = {}

        self.place_obstacles()

        # Constants and variables
        self.agent_width = 0.8
        self.obstacle_width = 1.0
        self.num_agents = parameter_dictionary["environment"]["tmaze"][
            "num_agents"]
        self.num_episodes = parameter_dictionary["environment"]["tmaze"][
            "num_episodes"]
        self.episode_length = parameter_dictionary["environment"]["tmaze"][
            "episode_length"]
        self.reward_structure = parameter_dictionary["environment"]["tmaze"][
            "reward_structure"]

        if self.num_episodes > 1:
            raise RuntimeError(
                "All episodes are identical in TMaze. Modify the reset function to add this functionality"
            )

        if self.reward_structure == "sparse":
            self.top_goal = self.arena_constraints[
                "y_max"] - self.hall_size // 2
            self.bottom_goal = self.arena_constraints[
                "y_min"] + self.hall_size // 2
            self.right_goal = self.arena_constraints[
                "x_max"] - self.hall_size // 2

        self.specialised_actions = 0
        self.total_rewarded_actions = 0

        # Novelty constants
        self.bc_measure = parameter_dictionary['environment']['tmaze'][
            'bc_measure']
        self.avg_pos_for_agent = [[0, 0] for _ in range(self.num_agents)]

        # Rendering constants
        self.scale = 40
        self.start_zone_colour = [0.5, 0.5, 0.5]
        self.hall_colour = [0.25, 0.5, 0.5]
        self.agent_colour = [0, 0, 0.25]
        self.obstacle_colour = [0.0, 0.0, 0.0]

        # Rendering variables
        self.viewer = None
        self.agent_transforms = None
        self.obstacle_transforms = None
        # self.resource_transforms = None

        try:
            self.agent_transforms = [
                rendering.Transform() for _ in range(self.num_agents)
            ]
            self.obstacle_transforms = [
                rendering.Transform()
                for _ in range(self.num_obstacles_per_hall * 3)
            ]
            # self.resource_transforms = [rendering.Transform() for i in range(self.default_num_resources)]

        except:
            pass

        self.agent_positions = self.generate_agent_positions()

        # Step variables
        self.behaviour_map = [self.up, self.down, self.right, self.left]
        self.action_name = ["UP", "DOWN", "RIGHT", "LEFT"]

        # Observation space
        # Agent's x-coordinate and y-coordinate
        self.observation_space_size = 2

        # Action space
        # 0- Forward, 1- Backward, 2- Right, 3- Left
        self.action_space_size = 4
Exemple #7
0
    def render(self, mode='human'):
        """
        Renders the environment, placing all agents and resources in appropriate positions
        :param mode:
        :return:
        """

        screen_width = self.arena_constraints["x_max"] * self.scale
        screen_height = self.arena_constraints["y_max"] * self.scale

        if self.viewer is None:
            self.viewer = rendering.Viewer(screen_width, screen_height)

            # Draw nest
            nest = self.draw_arena_segment(self.nest_size, self.nest_start, self.nest_colour)
            self.viewer.add_geom(nest)

            # Draw cache
            cache = self.draw_arena_segment(self.cache_start + self.cache_size,
                                            self.cache_start, self.cache_colour)
            self.viewer.add_geom(cache)

            # Draw slope
            slope = self.draw_arena_segment(self.slope_start + self.slope_size,
                                            self.slope_start, self.slope_colour)
            self.viewer.add_geom(slope)

            # Draw source
            source = self.draw_arena_segment(self.source_start + self.source_size,
                                             self.source_start, self.source_colour)
            self.viewer.add_geom(source)

            # Draw grid
            grid_lines = self.draw_grid()
            for line in grid_lines:
                self.viewer.add_geom(line)

            # Draw agent(s)
            for i in range(self.num_agents):
                agent = rendering.make_circle(self.agent_width / 2 * self.scale)
                agent.set_color(self.agent_colour[0], self.agent_colour[1], self.agent_colour[2])
                agent.add_attr(
                    rendering.Transform(
                        translation=(
                            0,
                            0)))
                agent.add_attr(self.agent_transforms[i])
                self.viewer.add_geom(agent)

            # Draw resource(s)
            for i in range(self.default_num_resources):
                resource = rendering.make_circle(self.resource_width / 2 * self.scale)
                resource.set_color(self.resource_colour[0], self.resource_colour[1], self.resource_colour[2])
                resource.add_attr(
                    rendering.Transform(
                        translation=(
                            0,
                            0)))
                resource.add_attr(self.resource_transforms[i])
                self.viewer.add_geom(resource)

        # Set position of agent(s)
        for i in range(self.num_agents):
            self.agent_transforms[i].set_translation(
                (self.agent_positions[i][0] - self.arena_constraints["x_min"] + 0.5) * self.scale,
                (self.agent_positions[i][1] - self.arena_constraints["y_min"] + 0.5) * self.scale)

        # Set position of resource(s)
        for i in range(len(self.resource_positions)):
            self.resource_transforms[i].set_translation(
                (self.resource_positions[i][0] - self.arena_constraints["x_min"] + 0.5) * self.scale,
                (self.resource_positions[i][1] - self.arena_constraints["y_min"] + 0.5) * self.scale)

        return self.viewer.render(return_rgb_array=mode == 'rgb_array')
Exemple #8
0
    def reset(self):
        """
        """

        # Make sure agents and resources will all fit in the environment
        assert self.num_agents <= self.arena_constraints[
            "x_max"] * self.nest_size, "Not enough room in the nest for all agents"
        assert self.default_num_resources <= self.arena_constraints[
            "x_max"] * self.source_size, "Not enough room in the source for all resources"

        try:
            self.viewer.close()
        except:
            pass

        self.viewer = None

        self.resource_positions = [None for i in range(self.default_num_resources)]

        self.resource_carried_by = [[] for i in range(self.default_num_resources)]

        try:
            self.resource_transforms = [rendering.Transform() for i in range(self.default_num_resources)]
        except:
            pass

        self.latest_resource_id = self.default_num_resources - 1

        # Creates empty state
        self.agent_map = self.generate_arena()  # Empty agent map

        self.resource_map = self.generate_arena()  # Empty resource map

        # Places all agents
        for i in range(self.num_agents):
            agent_placed = False
            while not agent_placed:
                x, y = self.generate_agent_position()
                if self.agent_map[y][x] == 0:
                    self.agent_map[y][x] = i + 1
                    self.agent_positions[i] = (x, y)
                    agent_placed = True

        # Places all resources
        for i in range(self.default_num_resources):
            resource_placed = False
            while not resource_placed:
                x, y = self.generate_resource_position()
                if self.resource_map[y][x] == 0:
                    self.resource_map[y][x] = i + 1
                    self.resource_positions[i] = (x, y)
                    resource_placed = True
        '''
        # Places straight line of resources
        for i in range(self.default_num_resources):
            x, y = i, self.arena_constraints["y_max"]-1
            self.resource_map[y][x] = i + 1
            self.resource_positions[i] = (x,y)
        '''

        # NOTE: To change this, must also change the observation space in __init__
        self.state = np.concatenate((self.agent_map, self.resource_map), axis=0)

        # Reset variables that were changed during runtime
        self.has_resource = [None for i in range(self.num_agents)]
        self.current_num_resources = self.default_num_resources

        # return np.array(self.state)
        return self.get_agent_observations()
Exemple #9
0
    def __init__(self, parameter_filename=None):
        """
        Initialises constants and variables for agents, resources and environment
        :param
        """
        if parameter_filename is None:
            raise RuntimeError("No parameter file specified for the environment")

        parameter_dictionary = json.loads(open(parameter_filename).read())

        try:
            self.observation_version = parameter_dictionary['environment']['observation_version']
        except KeyError:
            self.observation_version = "complex"

        # Environment dimensions
        self.arena_constraints = {"x_min": 0, "x_max": parameter_dictionary['environment']['arena_width'], "y_min": 0,
                                  "y_max": parameter_dictionary['environment']['arena_length']}
        self.nest_size = parameter_dictionary['environment']['cache_start']
        self.cache_size = parameter_dictionary['environment']['slope_start'] - parameter_dictionary['environment'][
            'cache_start']
        self.slope_size = parameter_dictionary['environment']['source_start'] - parameter_dictionary['environment'][
            'slope_start']
        self.source_size = parameter_dictionary['environment']['arena_length'] - parameter_dictionary['environment'][
            'source_start']
        self.nest_start = self.arena_constraints["y_min"]
        self.cache_start = parameter_dictionary['environment']['cache_start']
        self.slope_start = parameter_dictionary['environment']['slope_start']
        self.source_start = parameter_dictionary['environment']['source_start']
        self.num_arena_tiles = self.arena_constraints["x_max"] * self.arena_constraints["y_max"]
        self.sliding_speed = parameter_dictionary['environment']['sliding_speed']

        # agent constants
        self.agent_width = 0.8
        self.sensor_range = parameter_dictionary['environment']['sensor_range']

        # Resource constants
        self.resource_width = 0.6
        self.base_cost = parameter_dictionary['environment']['base_cost']
        self.reward_for_resource = parameter_dictionary['environment']['resource_reward']
        self.upward_cost_factor = parameter_dictionary['environment']['upward_cost_factor']
        self.downward_cost_factor = parameter_dictionary['environment']['downward_cost_factor']
        self.carry_factor = parameter_dictionary['environment']['carry_factor']

        # Other constants and variables
        self.num_agents = parameter_dictionary['environment']['num_agents']
        self.default_num_resources = parameter_dictionary['environment']['num_resources']
        self.current_num_resources = self.default_num_resources
        self.latest_resource_id = self.default_num_resources - 1
        self.dumping_position = (-10, -10)

        # Rendering constants
        self.scale = 50  # Scale for rendering
        self.nest_colour = [0.25, 0.25, 0.25]
        self.cache_colour = [0.5, 0.5, 0.5]
        self.slope_colour = [0.5, 0.25, 0.25]
        self.source_colour = [0.25, 0.5, 0.5]
        self.agent_colour = [0, 0, 0.25]
        self.resource_colour = [0, 0.25, 0]

        # Rendering variables
        self.viewer = None
        self.agent_transforms = None
        self.resource_transforms = None

        try:
            self.agent_transforms = [rendering.Transform() for i in range(self.num_agents)]
            self.resource_transforms = [rendering.Transform() for i in range(self.default_num_resources)]

        except:
            pass

        self.agent_positions = [None] * self.num_agents
        self.resource_positions = [None] * self.default_num_resources
        self.resource_carried_by = [[]] * self.default_num_resources

        # Step variables
        self.behaviour_map = [self.forward_step, self.backward_step, self.left_step, self.right_step]
        self.action_name = ["FORWARD", "BACKWARD", "LEFT", "RIGHT", "PICKUP", "DROP"]
        self.has_resource = [None for i in range(self.num_agents)]

        self.seed_value = parameter_dictionary['general']['seed']
        self.np_random = np.random.RandomState(self.seed_value)

        # Observation space (additional details explained in self.get_agent_observations())

        # Range=1 -> 9 tiles. Range=2 -> 25 tiles. Agent at the center.
        self.tiles_in_sensing_range = (2 * self.sensor_range + 1) ** 2

        if self.observation_version == "simple":
            # 1 bit for each tile in range + 4 bits for location + 1 bit for object detection + 1 bit for object possession
            self.observation_space_size = self.tiles_in_sensing_range + 4 + 1 + 1
        else:
            # Tiles in sensing range are onehotencoded + 4 bits for location + 1 bit for object possession
            self.observation_space_size = self.tiles_in_sensing_range * 4 + 4 + 1

        # Action space
        # 0- Forward, 1- Backward, 2- Left, 3- Right, 4- Pick up, 5- Drop
        self.action_space_size = 6
    def render(self, mode='human'):
        """
        Renders the environment, placing all agents and boxes in appropriate positions
        :param mode:
        :return:
        """

        screen_width = self.arena_constraints["x_max"] * self.scale
        screen_height = self.arena_constraints["y_max"] * self.scale

        if self.viewer is None:
            self.viewer = rendering.Viewer(screen_width, screen_height)
            home_top = self.arena_constraints["y_min"] + self.home_length
            main_top = self.arena_constraints["y_max"] - self.goal_length
            goal_top = self.arena_constraints["y_max"]

            # Draw home
            home = self.draw_arena_segment(home_top,
                                           self.arena_constraints["y_min"],
                                           self.home_colour)
            self.viewer.add_geom(home)

            # Draw main
            main_area = self.draw_arena_segment(main_top, home_top,
                                                self.main_colour)
            self.viewer.add_geom(main_area)

            # Draw goal
            goal = self.draw_arena_segment(goal_top, main_top,
                                           self.goal_colour)
            self.viewer.add_geom(goal)

            # Draw grid
            grid_lines = self.draw_grid()
            for line in grid_lines:
                self.viewer.add_geom(line)

            # Draw agent(s)
            for i in range(self.num_agents):
                agent = rendering.FilledPolygon([
                    (0, 0), (self.agent_width * self.scale, 0),
                    (self.agent_width / 2 * self.scale,
                     self.agent_height * self.scale)
                ])
                agent.set_color(self.agent_colour[0], self.agent_colour[1],
                                self.agent_colour[2])
                agent.add_attr(
                    rendering.Transform(
                        translation=(-self.agent_width / 2 * self.scale,
                                     -self.agent_height / 2 * self.scale)))
                agent.add_attr(self.agent_transforms[i])
                self.viewer.add_geom(agent)

            # Draw box(es)
            for i in range(self.num_small_boxes + self.num_medium_boxes +
                           self.num_large_boxes):
                if i < self.num_small_boxes and self.num_small_boxes > 0:
                    l, r, t, b = -self.small_box_width / 2 * self.scale, self.small_box_width / 2 * self.scale, self.small_box_width * self.scale, 0

                elif i >= self.num_small_boxes and self.num_medium_boxes > 0:
                    l, r, t, b = -self.medium_box_width / 2 * self.scale, self.medium_box_width / 2 * self.scale, self.small_box_width * self.scale, 0

                elif i >= self.num_small_boxes and self.num_large_boxes > 0:
                    l, r, t, b = -self.large_box_width / 2 * self.scale, self.large_box_width / 2 * self.scale, self.small_box_width * self.scale, 0

                box = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
                box.set_color(self.box_colour[0], self.box_colour[1],
                              self.box_colour[2])
                box.add_attr(rendering.Transform(translation=(0, 0)))
                box.add_attr(self.box_transforms[i])
                self.viewer.add_geom(box)

        # Set position of agent(s)
        for i in range(self.num_agents):
            self.agent_transforms[i].set_translation(
                (self.agent_positions[i][0] - self.arena_constraints["x_min"] +
                 0.5) * self.scale,
                (self.agent_positions[i][1] - self.arena_constraints["y_min"] +
                 0.5) * self.scale)

        # Set position of box(es)
        for box_id, box_details in self.boxes_in_arena.items():
            if box_details[2] == "small":
                self.box_transforms[box_id].set_translation(
                    (box_details[0] - self.arena_constraints["x_min"] + 0.5) *
                    self.scale,
                    (box_details[1] - self.arena_constraints["y_min"] + 0.125)
                    * self.scale)

            elif box_details[2] == "medium":
                self.box_transforms[box_id].set_translation(
                    (box_details[0] - self.arena_constraints["x_min"] +
                     (0.5 * self.medium_box_size)) * self.scale,
                    (box_details[1] - self.arena_constraints["y_min"] + 0.125)
                    * self.scale)

            elif box_details[2] == "large":
                self.box_transforms[box_id].set_translation(
                    (box_details[0] - self.arena_constraints["x_min"] +
                     (0.5 * self.num_agents)) * self.scale,
                    (box_details[1] - self.arena_constraints["y_min"] + 0.125)
                    * self.scale)

        return self.viewer.render(return_rgb_array=mode == 'rgb_array')
    def __init__(self, parameter_filename=None):
        if parameter_filename is None:
            raise RuntimeError(
                "No parameter file specified for the environment")

        parameter_dictionary = json.loads(open(parameter_filename).read())

        self.seed_value = parameter_dictionary['environment']['box_pushing'][
            'env_seed']
        self.np_random = np.random.RandomState(self.seed_value)

        # Cooperation parameters

        if parameter_dictionary['environment']['box_pushing'][
                'defection'] == "True":
            self.defection = True
        elif parameter_dictionary['environment']['box_pushing'][
                'defection'] == "False":
            self.defection = False

        if parameter_dictionary['environment']['box_pushing'][
                'partial_cooperation'] == "True":
            self.partial_cooperation = True
        elif parameter_dictionary['environment']['box_pushing'][
                'partial_cooperation'] == "False":
            self.partial_cooperation = False

        self.sensing = parameter_dictionary['environment']['box_pushing'][
            'sensing']

        if parameter_dictionary['environment']['box_pushing'][
                'specialisation'] == "True":
            self.specialisation = True
        elif parameter_dictionary['environment']['box_pushing'][
                'specialisation'] == "False":
            self.specialisation = False

        self.environment_scaling = parameter_dictionary['environment'][
            'box_pushing']['environment_scaling']

        if parameter_dictionary['environment']['box_pushing'][
                'reward_sharing'] == "True":
            self.reward_sharing = True
        elif parameter_dictionary['environment']['box_pushing'][
                'reward_sharing'] == "False":
            self.reward_sharing = False

        self.time_scaling = parameter_dictionary['environment']['box_pushing'][
            'time_scaling']

        if parameter_dictionary['environment']['box_pushing'][
                'sparse_rewards'] == "True":
            self.sparse_rewards = True
        elif parameter_dictionary['environment']['box_pushing'][
                'sparse_rewards'] == "False":
            self.sparse_rewards = False

        # Environment

        self.num_agents = parameter_dictionary['environment']['box_pushing'][
            'num_agents']
        self.arena_length = parameter_dictionary['environment']['box_pushing'][
            'arena_length']
        self.arena_width = parameter_dictionary['environment']['box_pushing'][
            'min_arena_width'] * self.num_agents

        if self.defection:
            self.num_small_boxes = self.num_agents
        else:
            self.num_small_boxes = 0

        if self.partial_cooperation:
            self.medium_box_size = parameter_dictionary['environment'][
                'box_pushing']['medium_box_size']
            self.num_medium_boxes = self.num_agents // self.medium_box_size
            self.num_large_boxes = 0
        else:
            self.num_medium_boxes = 0
            self.num_large_boxes = 1  # 1 box pushed by all agents

        if self.time_scaling == "variable":
            self.episode_length = parameter_dictionary['environment'][
                'box_pushing']['min_episode_length'] * self.num_agents
        elif self.time_scaling == "constant":
            self.episode_length = parameter_dictionary['environment'][
                'box_pushing']['min_episode_length']
        else:
            raise RuntimeError(
                "Time scaling must be either constant or variable")

        self.num_episodes = parameter_dictionary['environment']['box_pushing'][
            'num_episodes']

        self.arena_constraints = {
            "x_min": 0,
            "x_max": self.arena_width,
            "y_min": 0,
            "y_max": self.arena_length
        }

        self.home_length = self.goal_length = 1

        self.agent_width = 0.8
        self.small_box_width = 0.8

        if self.partial_cooperation:
            self.medium_box_width = self.medium_box_size - 0.2

        self.large_box_width = self.num_agents - 0.2
        self.agent_height = 0.4

        # Rewards
        self.large_reward_per_agent = parameter_dictionary['environment'][
            'box_pushing']['large_reward_per_agent']
        self.small_reward_per_agent = parameter_dictionary['environment'][
            'box_pushing']['small_reward_per_agent']
        self.cost_per_time_step = parameter_dictionary['environment'][
            'box_pushing']['cost_per_time_step']

        # Rendering constants
        self.scale = 40
        self.goal_colour = [0.5, 0.5, 0.5]
        self.main_colour = [0.25, 0.5, 0.5]
        self.home_colour = [0.5, 0.6, 0.6]
        self.agent_colour = [0, 0, 0.25]
        self.box_colour = [0, 0.25, 0]

        # Rendering variables
        self.viewer = None
        self.agent_transforms = None
        self.box_transforms = None

        try:
            self.agent_transforms = [
                rendering.Transform() for _ in range(self.num_agents)
            ]
            self.box_transforms = [
                rendering.Transform()
                for _ in range(self.num_large_boxes + self.num_medium_boxes +
                               self.num_small_boxes)
            ]

        except:
            pass

        self.agent_positions = [None] * self.num_agents
        self.boxes_in_arena = {}

        # Step variables
        self.behaviour_map = [
            self.forward, self.rotate_right, self.rotate_left, self.stay
        ]
        self.action_name = ["FORWARD", "ROTATE RIGHT", "ROTATE LEFT", "STAY"]

        # Observation space
        if self.sensing == "local":
            # Onehotencoded vector, with possibilities for each of the 3 box sizes, an agent, a wall, or an empty spot
            self.observation_space_size = 6

        # Action space
        # 0- Forward, 1- Rotate right, 2- Rotate left, 3- Stay
        self.action_space_size = 4

        # Given an agent's orientation, add these values to the agent's x and y to get the block in front of them
        self.orientation_map = {
            "NORTH": (0, 1),
            "SOUTH": (0, -1),
            "EAST": (1, 0),
            "WEST": (-1, 0)
        }
Exemple #12
0
    def __init__(self, parameter_filename=None):
        """
        Initialises constants and variables for agents, resources and environment
        :param
        """
        if parameter_filename is None:
            raise RuntimeError(
                "No parameter file specified for the environment")

        parameter_dictionary = json.loads(open(parameter_filename).read())

        # Environment dimensions
        self.arena_constraints = {
            "x_min": 0,
            "x_max":
            parameter_dictionary['environment']['slope']['arena_width'],
            "y_min": 0,
            "y_max":
            parameter_dictionary['environment']['slope']['arena_length']
        }
        self.nest_size = parameter_dictionary['environment']['slope'][
            'cache_start']
        self.cache_size = parameter_dictionary['environment']['slope'][
            'slope_start'] - parameter_dictionary['environment']['slope'][
                'cache_start']
        self.slope_size = parameter_dictionary['environment']['slope'][
            'source_start'] - parameter_dictionary['environment']['slope'][
                'slope_start']
        self.source_size = parameter_dictionary['environment']['slope'][
            'arena_length'] - parameter_dictionary['environment']['slope'][
                'source_start']
        self.nest_start = self.arena_constraints["y_min"]
        self.cache_start = parameter_dictionary['environment']['slope'][
            'cache_start']
        self.slope_start = parameter_dictionary['environment']['slope'][
            'slope_start']
        self.source_start = parameter_dictionary['environment']['slope'][
            'source_start']
        self.num_arena_tiles = self.arena_constraints[
            "x_max"] * self.arena_constraints["y_max"]
        self.sliding_speed = parameter_dictionary['environment']['slope'][
            'sliding_speed']

        # agent constants
        self.agent_width = 0.8
        self.sensor_range = parameter_dictionary['environment']['slope'][
            'sensor_range']

        # Resource constants
        self.resource_width = 0.6
        self.base_cost = parameter_dictionary['environment']['slope'][
            'base_cost']
        self.reward_for_resource = parameter_dictionary['environment'][
            'slope']['resource_reward']
        self.upward_cost_factor = parameter_dictionary['environment']['slope'][
            'upward_cost_factor']
        self.downward_cost_factor = parameter_dictionary['environment'][
            'slope']['downward_cost_factor']
        self.carry_factor = parameter_dictionary['environment']['slope'][
            'carry_factor']

        # Other constants and variables
        self.num_agents = parameter_dictionary['environment']['slope'][
            'num_agents']
        self.default_num_resources = parameter_dictionary['environment'][
            'slope']['num_resources']
        self.episode_length = parameter_dictionary['environment']['slope'][
            'episode_length']
        self.max_resources = self.episode_length * self.num_agents  # It is impossible to collect this many resources
        self.current_num_resources = self.default_num_resources
        self.latest_resource_id = self.default_num_resources - 1
        if parameter_dictionary['environment']['slope'][
                'incremental_rewards'] == "True":
            self.incremental_rewards = True
        elif parameter_dictionary['environment']['slope'][
                'incremental_rewards'] == "False":
            self.incremental_rewards = False
        else:
            raise RuntimeError(
                "Incremental rewards is not set to True or False")
            raise RuntimeError(
                "Incremental rewards is not set to True or False")

        # Rendering constants
        self.scale = 50  # Scale for rendering
        self.nest_colour = [0.25, 0.25, 0.25]
        self.cache_colour = [0.5, 0.5, 0.5]
        self.slope_colour = [0.5, 0.25, 0.25]
        self.source_colour = [0.25, 0.5, 0.5]
        self.agent_colour = [0, 0, 0.25]
        self.resource_colour = [0, 0.25, 0]

        # Rendering variables
        self.viewer = None
        self.agent_transforms = None
        self.resource_transforms = None

        try:
            self.agent_transforms = [
                rendering.Transform() for i in range(self.num_agents)
            ]
            self.resource_transforms = [
                rendering.Transform()
                for i in range(self.default_num_resources)
            ]

        except:
            pass

        self.agent_positions = [None] * self.num_agents
        self.resources_in_arena = {}
        self.resource_carried_by = [[False for i in range(self.num_agents)]
                                    for j in range(self.max_resources)]
        self.closest_y_for_resource = {}
        self.resource_history = [
            {
                "dropped_on_slope":
                False,  # True/False (False unless the resource was dropped on the slope once)
                "dropper_index":
                -1,  # index of agent that dropped the resource on the slope
                "collected_from_cache":
                False,  # True/False (The last time it was picked up, was it picked up on the cache?)
                "collector_index":
                -1,  # index of agent who last picked it up on the cache (if the resource is delivered, this will also be the agent that delivers it)
                "retrieved": False  # Was the resource retrieved
            } for i in range(self.max_resources)
        ]
        self.agent_trajectories = [[None for _ in range(self.episode_length)]
                                   for _ in range(self.num_agents)]
        self.last_trajectory_recorded = 0
        self.total_resources_retrieved = 0

        # Step variables
        self.behaviour_map = [
            self.forward_step, self.backward_step, self.left_step,
            self.right_step
        ]
        self.action_name = [
            "FORWARD", "BACKWARD", "LEFT", "RIGHT", "PICKUP", "DROP"
        ]
        self.has_resource = [None] * self.num_agents

        #self.seed_value = parameter_dictionary['general']['seed']
        self.seed_value = parameter_dictionary['environment']['slope'][
            'env_seed']
        self.np_random = np.random.RandomState(self.seed_value)

        # Observation space (additional details explained in self.get_agent_observations())

        # Range=1 -> 9 tiles. Range=2 -> 25 tiles. Agent at the center.
        self.tiles_in_sensing_range = (2 * self.sensor_range + 1)**2

        # 1 bit for each tile in range + 4 bits for location + 1 bit for object possession
        self.observation_space_size = self.tiles_in_sensing_range + 4 + 1

        # Action space
        # 0- Forward, 1- Backward, 2- Left, 3- Right, 4- Pick up, 5- Drop
        self.action_space_size = 6

        # Novelty constants
        self.bc_measure = parameter_dictionary['environment']['slope'][
            'bc_measure']
        self.avg_pos_for_agent = [[0, 0] for _ in range(self.num_agents)]
        self.agent_action_count = [[0 for _ in range(self.action_space_size)]
                                   for _ in range(self.num_agents)]
Exemple #13
0
    def reset(self):
        """
        """

        # Make sure agents and resources will all fit in the environment
        assert self.num_agents <= self.arena_constraints[
            "x_max"] * self.nest_size, "Not enough room in the nest for all agents"
        assert self.default_num_resources <= self.arena_constraints[
            "x_max"] * self.source_size, "Not enough room in the source for all resources"

        try:
            self.viewer.close()
        except:
            pass

        self.viewer = None
        self.resources_in_arena = {}
        self.resource_carried_by = [[False for i in range(self.num_agents)]
                                    for j in range(self.max_resources)]
        self.closest_y_for_resource = {}
        self.resource_history = [{
            "dropped_on_slope": False,
            "dropper_index": -1,
            "collected_from_cache": False,
            "collector_index": -1,
            "retrieved": False
        } for i in range(self.max_resources)]

        try:
            self.resource_transforms = [
                rendering.Transform()
                for i in range(self.default_num_resources)
            ]
        except:
            pass

        self.latest_resource_id = self.default_num_resources - 1

        # Creates empty state
        self.agent_map = self.generate_arena()  # Empty agent map
        self.resource_map = self.generate_arena()  # Empty resource map

        # Places all agents
        for i in range(self.num_agents):
            agent_placed = False
            while not agent_placed:
                x, y = self.generate_agent_position()
                if self.agent_map[y][x] == 0:
                    self.agent_map[y][x] = i + 1
                    self.agent_positions[i] = (x, y)
                    agent_placed = True

        # Places all resources
        for i in range(self.default_num_resources):
            resource_placed = False
            while not resource_placed:
                x, y = self.generate_resource_position()
                if self.resource_map[y][x] == 0:
                    self.resource_map[y][x] = i + 1
                    self.resources_in_arena[i] = (x, y)
                    self.closest_y_for_resource[i] = y
                    resource_placed = True

        # Reset variables that were changed during runtime
        self.has_resource = [None] * self.num_agents
        self.current_num_resources = self.default_num_resources
        self.agent_trajectories = [[None for _ in range(self.episode_length)]
                                   for _ in range(self.num_agents)]

        for agent_id in range(self.num_agents):
            self.agent_trajectories[agent_id][0] = self.agent_positions[
                agent_id][1]

        self.last_trajectory_recorded = 0
        self.total_resources_retrieved = 0

        # Reset BC
        self.avg_pos_for_agent = [[0, 0] for _ in range(self.num_agents)]
        self.agent_action_count = [[0 for _ in range(self.action_space_size)]
                                   for _ in range(self.num_agents)]

        return self.get_agent_observations()