def reset_world(self): agents = self.create_agents() pois = self.create_pois() self._world = World(self.world_width, self.world_height, self.num_agents, self.num_pois, agents, pois) self._agents = self._world.get_agents() self._pois = self._world.get_pois()
def test_apply_cont_actions(): agents = [Agent((0, 0))] world = World(10, 10, 1, 0, agents) commands = np.array([np.array([1, 2])]) world.apply_cont_actions(commands) assert np.array_equal(np.array([1, 2]), world.get_agents()[0].get_loc())
def test_get_obs_image_scale(): agents = [Agent((1.1, 1))] pois = [POI((2.5, 1.2))] world = World(10, 10, 1, 1, agents, pois) img = world.get_obs_image(10) assert img[10][11][0] == 1 assert img[12][25][1] == 1
def test_get_obs_image(): agents = [Agent((1.1, 1))] pois = [POI((2.5, 1.2))] # Even numbers round up, odd round down world = World(10, 10, 1, 1, agents, pois) img = world.get_obs_image() assert img[1][1][0] == 1 assert img[1][2][1] == 1
def test_get_local_obs_images(): agents = [Agent((1.1, 1))] pois = [POI((2.5, 1.2))] world = World(10, 10, 1, 1, agents, pois) imgs = world.get_local_obs_images(3) agent_img = imgs[0] assert agent_img[1][1][0] == 1 # Observe self assert agent_img[1][2][1] == 1 # Observe POI
def test_init_creates_valid_pois(): world = World(10, 10, 0, 2) valid = True for p in world.get_pois(): loc = p.get_loc() x, y = loc[0], loc[1] valid = valid and x >= 0 and x < 10 and y >= 0 and y < 10 assert valid assert len(world.get_pois()) == 2
def test_init_creates_valid_agents(): world = World(10, 10, 2, 0) valid = True for a in world.get_agents(): loc = a.get_loc() x, y = loc[0], loc[1] valid = valid and x >= 0 and x < 10 and y >= 0 and y < 10 assert valid assert len(world.get_agents()) == 2
def test_get_obs_states(): agents = [ Agent((1, 1)), Agent((0, 1.5)), Agent((1.5, 0)), Agent((0, 0)), Agent((1.5, 1.5)) ] pois = [POI((0, 0))] world = World(5, 5, 5, 1, agents, pois) vec = world.get_obs_states() assert vec.shape[0] == 5 assert vec[0][0] == 1 assert vec[0][6] == 1 / np.sqrt(2)
def create_world(self): self._world = World(self.world_width, self.world_height, self.num_agents, self.num_pois) self._agents = self._world.get_agents() self._pois = self._world.get_pois()
class RoverContFeature(gym.Env): metadata = {'render.modes': ['human']} COLOR = {1: [255, 0, 0], 2: [0, 255, 255], 3: [0, 0, 255]} def __init__(self): # Set Default values for env. Because gyms can't have arguments, set # after construction self.world_height = 10 self.world_width = 10 self.num_agents = 1 self.num_pois = 1 self.time_limit = 10 # Creates the Space objects used gym self.set_observation_space() self.set_action_space() # Sets internal state for rover domain self.create_world() self.time_step = 0 # Env requirements self._seed() self.reset() self.viewer = None """ Gym env required functions """ def _reset(self): self.time_step = 0 self.create_world() return self._get_observation() def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _step(self, action): self._world.apply_cont_actions(action) self.time_step += 1 obs = self._get_observation() reward = self._world.get_reward() done = self.time_step > self.time_limit return obs, reward, done, {} def _get_observation(self): return self._world.get_obs_states() def _render(self, mode='human', close=False): from gym.envs.classic_control import rendering if close: if self.viewer is not None: self.viewer.close() self.viewer = None return if self.viewer is None: self.screen_width = 600 self.screen_height = 400 self.scale_w = self.screen_width / self.world_width self.scale_h = self.screen_height / self.world_height self.viewer = rendering.Viewer(self.screen_width, self.screen_height) self._render_agents() self._render_pois() return self.viewer.render(return_rgb_array=mode == 'rgb_array') """ Helper functions """ def create_world(self): self._world = World(self.world_width, self.world_height, self.num_agents, self.num_pois) self._agents = self._world.get_agents() self._pois = self._world.get_pois() def set_observation_space(self): self._box_low = np.array([0, 0, 0, 0, 0, 0, 0, 0]) self._box_high = np.array([ self.num_agents, self.num_agents, self.num_agents, self.num_agents, 6 * self.num_pois, 6 * self.num_pois, 6 * self.num_pois, 6 * self.num_pois ]) self._box_one_obs = spaces.Box(self._box_low, self._box_high) self._all_boxes = [] for _ in range(self.num_agents): self._all_boxes.append(self._box_one_obs) self.observation_space = spaces.Tuple(self._all_boxes) def set_action_space(self): self._agent_action = spaces.Box(np.array([-1, -1]), np.array([1, 1])) self._all_actions = [] for _ in range(self.num_agents): self._all_actions.append(self._agent_action) self.action_space = spaces.Tuple(self._all_actions) def _render_agents(self): from gym.envs.classic_control import rendering for agent in self._agents: poly = rendering.FilledPolygon( self._agent_square(agent, 10, self.scale_w, self.scale_h)) poly.set_color(self.COLOR[1][0], self.COLOR[1][1], self.COLOR[1][2]) self.viewer.add_onetime(poly) def _render_pois(self): from gym.envs.classic_control import rendering for poi in self._pois: poly = rendering.make_circle() trans = (poi.get_loc()[0] * self.scale_w, poi.get_loc()[1] * self.scale_h) poly.add_attr(rendering.Transform(translation=trans)) if poi.visible(): poly.set_color(self.COLOR[2][0], self.COLOR[2][1], self.COLOR[2][2]) else: poly.set_color(self.COLOR[3][0], self.COLOR[3][1], self.COLOR[3][2]) self.viewer.add_onetime(poly) def _agent_square(self, agent, side, scale_w=1, scale_h=1): loc = agent.get_loc() sloc = np.array([loc[0] * scale_w, loc[1] * scale_h]) half = side // 2 """ Points in CW order """ points = [ np.add(sloc, np.array([half, half])), np.add(sloc, np.array([half, -half])), np.add(sloc, np.array([-half, -half])), np.add(sloc, np.array([-half, half])) ] return points
def test_apply_cont_actions_mismatch(): world = World(10, 10, 1, 0) commands = np.array([np.array([1, 2]), np.array([4, 3])]) with pytest.raises(AssertionError): world.apply_cont_actions(commands)
def test_apply_discrete_actions_mismatch(): world = World(10, 10, 1, 0) commands = np.array([1, 5]) with pytest.raises(AssertionError): world.apply_discrete_actions(commands)
class RoverEnv(gym.Env): metadata = {'render.modes': ['human']} COLOR = {1: [255, 0, 0], 2: [0, 255, 255], 3: [0, 0, 255]} def __init__(self, world_height=10, world_width=10, num_agents=1, num_pois=1, time_limit=20, observation_mode='feature', actions='discrete', image_width=5, image_scale=1): """ Rover Domain environment. """ self.world_height = world_height self.world_width = world_width self.num_agents = num_agents self.num_pois = num_pois self.time_limit = time_limit self.observation_mode = observation_mode self.actions = actions self.image_width = image_width self.image_scale = image_scale self.set_observation_space() self.set_action_space() #self._world = self.reset_world() self.create_world() self.viewer = None self.time_step = 0 self._seed() self.reset() def reset_world(self): agents = self.create_agents() pois = self.create_pois() self._world = World(self.world_width, self.world_height, self.num_agents, self.num_pois, agents, pois) self._agents = self._world.get_agents() self._pois = self._world.get_pois() def create_agents(self): return np.array([Agent(np.array([3, 4]))]) def create_pois(self): return np.array([POI(np.array([7, 9]))]) def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): #assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action)) if self.actions == 'continuous': self._world.apply_cont_actions(action) else: self._world.apply_discrete_actions(action) self.time_step += 1 obs = self._get_observation() reward = self._world.get_reward() done = self.time_step > self.time_limit # or not self._world.pois_still_left() return obs, reward, done, {} def reset(self): #self.set_observation_space() #self.set_action_space() self.time_step = 0 #self.reset_world() self.create_world() return self._get_observation() def create_world(self): self._world = World(self.world_width, self.world_height, self.num_agents, self.num_pois) self._agents = self._world.get_agents() self._pois = self._world.get_pois() def set_observation_space(self): if self.observation_mode == 'feature': self.set_observation_space_feature() elif self.observation_mode == 'image': self.set_observation_space_image() def set_observation_space_feature(self): self._box_low = np.array([0, 0, 0, 0, 0, 0, 0, 0]) self._box_high = np.array([ self.num_agents, self.num_agents, self.num_agents, self.num_agents, 6 * self.num_pois, 6 * self.num_pois, 6 * self.num_pois, 6 * self.num_pois ]) self._box_one_obs = spaces.Box(self._box_low, self._box_high) self._all_boxes = [] for _ in range(self.num_agents): self._all_boxes.append(self._box_one_obs) self.observation_space = spaces.Tuple(self._all_boxes) print(self.observation_space) def set_observation_space_image(self): all_images = [] max_count = max(self.num_pois, self.num_agents) for _ in range(self.num_agents): single_space = spaces.Box(low=0, high=max_count, shape=(self.image_width, self.image_width, 2)) all_images.append(single_space) self.observation_space = spaces.Tuple(all_images) def set_action_space(self): if self.actions == 'continuous': self._set_action_space_cont() elif self.actions == 'discrete': self._set_action_space_discrete() def _set_action_space_cont(self): self._agent_action = spaces.Box(np.array([-1, -1]), np.array([1, 1])) self._all_actions = [] for _ in range(self.num_agents): self._all_actions.append(self._agent_action) self.action_space = spaces.Tuple(self._all_actions) def _set_action_space_discrete(self): self._agent_action = spaces.Discrete(8) self._all_actions = [] for _ in range(self.num_agents): self._all_actions.append(self._agent_action) self.action_space = spaces.Tuple(self._all_actions) def render(self, mode='human', close=False): from gym.envs.classic_control import rendering if close: if self.viewer is not None: self.viewer.close() self.viewer = None return if self.viewer is None: self.screen_width = 600 self.screen_height = 400 self.scale_w = self.screen_width / self.world_width self.scale_h = self.screen_height / self.world_height self.viewer = rendering.Viewer(self.screen_width, self.screen_height) self._render_agents() self._render_pois() return self.viewer.render(return_rgb_array=mode == 'rgb_array') def _render_agents(self): from gym.envs.classic_control import rendering for agent in self._agents: poly = rendering.FilledPolygon( self._agent_square(agent, 10, self.scale_w, self.scale_h)) poly.set_color(self.COLOR[1][0], self.COLOR[1][1], self.COLOR[1][2]) self.viewer.add_onetime(poly) def _render_pois(self): from gym.envs.classic_control import rendering for poi in self._pois: poly = rendering.make_circle() trans = (poi.get_loc()[0] * self.scale_w, poi.get_loc()[1] * self.scale_h) poly.add_attr(rendering.Transform(translation=trans)) if poi.visible(): poly.set_color(self.COLOR[2][0], self.COLOR[2][1], self.COLOR[2][2]) else: poly.set_color(self.COLOR[3][0], self.COLOR[3][1], self.COLOR[3][2]) self.viewer.add_onetime(poly) def _agent_square(self, agent, side, scale_w=1, scale_h=1): loc = agent.get_loc() sloc = np.array([loc[0] * scale_w, loc[1] * scale_h]) half = side // 2 """ Points in CW order """ points = [ np.add(sloc, np.array([half, half])), np.add(sloc, np.array([half, -half])), np.add(sloc, np.array([-half, -half])), np.add(sloc, np.array([-half, half])) ] return points def _get_observation(self): if self.observation_mode == 'feature': obs = self._world.get_obs_states() # for i in range(self.num_agents): # obs[i][8] = self.time_limit - self.time_step return obs elif self.observation_mode == 'image': return self._world.get_local_obs_images(self.image_width, self.image_scale)