class Point2DEnv(MultitaskEnv, Serializable): """ A little 2D point whose life goal is to reach a target. """ def __init__( self, render_dt_msec=0, action_l2norm_penalty=0, # disabled for now render_onscreen=True, render_size=84, reward_type="dense", target_radius=0.5, boundary_dist=4, ball_radius=0.25, walls=[], fixed_goal=None, randomize_position_on_reset=True, **kwargs): print("WARNING, ignoring kwargs:", kwargs) self.quick_init(locals()) self.render_dt_msec = render_dt_msec self.action_l2norm_penalty = action_l2norm_penalty self.render_onscreen = render_onscreen self.render_size = render_size self.reward_type = reward_type self.target_radius = target_radius self.boundary_dist = boundary_dist self.ball_radius = ball_radius self.walls = walls self.fixed_goal = fixed_goal self.randomize_position_on_reset = randomize_position_on_reset self._max_episode_steps = 50 self.max_target_distance = self.boundary_dist - self.target_radius self._target_position = None self._position = np.zeros((2)) u = np.ones(2) self.action_space = spaces.Box(-u, u, dtype=np.float32) o = self.boundary_dist * np.ones(2) self.obs_range = spaces.Box(-o, o, dtype='float32') self.observation_space = spaces.Dict([ ('observation', self.obs_range), ('desired_goal', self.obs_range), ('achieved_goal', self.obs_range), ('state_observation', self.obs_range), ('state_desired_goal', self.obs_range), ('state_achieved_goal', self.obs_range), ]) self.drawer = None def step(self, velocities): velocities = np.clip(velocities, a_min=-1, a_max=1) new_position = self._position + velocities for wall in self.walls: new_position = wall.handle_collision(self._position, new_position) self._position = new_position self._position = np.clip( self._position, a_min=-self.boundary_dist, a_max=self.boundary_dist, ) distance_to_target = np.linalg.norm(self._position - self._target_position) is_success = distance_to_target < self.target_radius ob = self._get_obs() reward = self.compute_reward(velocities, ob) info = { 'radius': self.target_radius, 'target_position': self._target_position, 'distance_to_target': distance_to_target, 'velocity': velocities, 'speed': np.linalg.norm(velocities), 'is_success': is_success, } done = False return ob, reward, done, info def _sample_goal(self): return np.random.uniform(size=2, low=-self.max_target_distance, high=self.max_target_distance) def reset(self): self._target_position = np.random.uniform( size=2, low=-self.max_target_distance, high=self.max_target_distance) if self.randomize_position_on_reset: self._position = np.random.uniform(size=2, low=-self.boundary_dist, high=self.boundary_dist) return self._get_obs() def seed(self, s): """Do nothing for seed""" pass def _get_obs(self): return dict( observation=self._position.copy(), desired_goal=self._target_position.copy(), achieved_goal=self._position.copy(), state_observation=self._position.copy(), state_desired_goal=self._target_position.copy(), state_achieved_goal=self._position.copy(), ) def compute_rewards(self, actions, obs): achieved_goals = obs['observation'] desired_goals = obs['desired_goal'] d = np.linalg.norm(achieved_goals - desired_goals, axis=-1) if self.reward_type == "sparse": return -(d > self.target_radius).astype(np.float32) if self.reward_type == "dense": return -d def get_goal(self): return { 'desired_goal': self._target_position.copy(), 'state_desired_goal': self._target_position.copy(), } def sample_goals(self, batch_size): if not self.fixed_goal is None: goals = np.repeat(self.fixed_goal.copy()[None], batch_size, 0) else: goals = np.random.uniform( self.obs_range.low, self.obs_range.high, size=(batch_size, self.obs_range.low.size), ) return { 'desired_goal': goals, 'state_desired_goal': goals, } def set_position(self, pos): self._position[0] = pos[0] self._position[1] = pos[1] """Functions for ImageEnv wrapper""" def get_image(self): """Returns a black and white image""" self.render() img = self.drawer.get_image() # img = img / 255.0 r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2] # img = (-r - g + b).flatten() # GREEN ignored for visualization img = (-r + b).flatten() return img def set_to_goal(self, goal_dict): goal = goal_dict["desired_goal"] self._position = goal self._target_position = goal def get_env_state(self): return self._get_obs() def set_env_state(self, state): position = state["state_observation"] goal = state["state_desired_goal"] self._position = position self._target_position = goal def render(self, close=False): if close: self.drawer = None return if self.drawer is None or self.drawer.terminated: self.drawer = PygameViewer( self.render_size, self.render_size, x_bounds=(-self.boundary_dist, self.boundary_dist), y_bounds=(-self.boundary_dist, self.boundary_dist), render_onscreen=self.render_onscreen, ) self.drawer.fill(Color('white')) self.drawer.draw_solid_circle( self._target_position, self.target_radius, Color('green'), ) self.drawer.draw_solid_circle( self._position, self.ball_radius, Color('blue'), ) for wall in self.walls: self.drawer.draw_segment( wall.endpoint1, wall.endpoint2, Color('black'), ) self.drawer.render() self.drawer.tick(self.render_dt_msec) """Static visualization/utility methods""" @staticmethod def true_model(state, action): velocities = np.clip(action, a_min=-1, a_max=1) position = state new_position = position + velocities return np.clip( new_position, a_min=-Point2DEnv.boundary_dist, a_max=Point2DEnv.boundary_dist, ) @staticmethod def true_states(state, actions): real_states = [state] for action in actions: next_state = Point2DEnv.true_model(state, action) real_states.append(next_state) state = next_state return real_states @staticmethod def plot_trajectory(ax, states, actions, goal=None): assert len(states) == len(actions) + 1 x = states[:, 0] y = -states[:, 1] num_states = len(states) plasma_cm = plt.get_cmap('plasma') for i, state in enumerate(states): color = plasma_cm(float(i) / num_states) ax.plot( state[0], -state[1], marker='o', color=color, markersize=10, ) actions_x = actions[:, 0] actions_y = -actions[:, 1] ax.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], scale_units='xy', angles='xy', scale=1, width=0.005) ax.quiver( x[:-1], y[:-1], actions_x, actions_y, scale_units='xy', angles='xy', scale=1, color='r', width=0.0035, ) ax.plot( [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) if goal is not None: ax.plot(goal[0], -goal[1], marker='*', color='g', markersize=15) ax.set_ylim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) ax.set_xlim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, )
class Point2DEnv(MultitaskEnv, Serializable): """ A little 2D point whose life goal is to reach a target. """ def __init__( self, render_dt_msec=0, action_l2norm_penalty=0, # disabled for now render_onscreen=True, render_size=84, reward_type="dense", target_radius=0.5, boundary_dist=4, ball_radius=0.25, walls=None, fixed_goal=None, randomize_position_on_reset=True, images_are_rgb=False, # else black and white show_goal=True, action_limit=1, initial_position=(0, 0), **kwargs ): if walls is None: walls = [] if len(kwargs) > 0: import logging LOGGER = logging.getLogger(__name__) LOGGER.log(logging.WARNING, "WARNING, ignoring kwargs:", kwargs) self.quick_init(locals()) self.render_dt_msec = render_dt_msec self.action_l2norm_penalty = action_l2norm_penalty self.render_onscreen = render_onscreen self.render_size = render_size self.reward_type = reward_type self.target_radius = target_radius self.boundary_dist = boundary_dist self.ball_radius = ball_radius self.walls = walls self.fixed_goal = fixed_goal self.randomize_position_on_reset = randomize_position_on_reset self.images_are_rgb = images_are_rgb self.show_goal = show_goal self.action_limit = action_limit self._max_episode_steps = 50 self.max_target_distance = self.boundary_dist - self.target_radius self._target_position = None self._position = np.zeros((2)) u = self.action_limit * np.ones(2) self.action_space = spaces.Box(-u, u, dtype=np.float32) o = self.boundary_dist * np.ones(2) self.obs_range = spaces.Box(-o, o, dtype='float32') self.observation_space = spaces.Dict([ ('observation', self.obs_range), ('desired_goal', self.obs_range), ('achieved_goal', self.obs_range), ('state_observation', self.obs_range), ('state_desired_goal', self.obs_range), ('state_achieved_goal', self.obs_range), ]) self.observation_space.shape = (2,) # hack self._step = 0 self.initial_position = initial_position self.drawer = None def step(self, velocities): velocities = np.clip(velocities, a_min=-self.action_limit, a_max=self.action_limit) new_position = self._position + velocities for wall in self.walls: new_position = wall.handle_collision( self._position, new_position ) self._position = new_position self._position = np.clip( self._position, a_min=-self.boundary_dist, a_max=self.boundary_dist, ) distance_to_target = np.linalg.norm( self._position - self._target_position ) is_success = distance_to_target < self.target_radius ob = self._get_obs() reward = self.compute_reward(velocities, ob) info = { 'radius': self.target_radius, 'target_position': self._target_position, 'distance_to_target': distance_to_target, 'velocity': velocities, 'speed': np.linalg.norm(velocities), 'is_success': is_success, } self._step += 1 done = False return ob, reward, done, info def _sample_goal(self): return np.random.uniform( size=2, low=-self.max_target_distance, high=self.max_target_distance ) def reset(self): self._step = 0 if self.fixed_goal: self._target_position = np.array([0, 0]) else: self._target_position = np.random.uniform( size=2, low=-self.max_target_distance, high=self.max_target_distance ) if self.randomize_position_on_reset: self._position = np.random.uniform( size=2, low=-self.boundary_dist, high=self.boundary_dist ) else: self._position = np.array(self.initial_position) return self._get_obs() def _position_inside_wall(self, pos): for wall in self.walls: if wall.contains_point(pos): return True return False def _get_obs(self): return OrderedDict( observation=self._position.copy(), desired_goal=self._target_position.copy(), achieved_goal=self._position.copy(), state_observation=self._position.copy(), state_desired_goal=self._target_position.copy(), state_achieved_goal=self._position.copy(), ) def compute_rewards(self, actions, obs): achieved_goals = obs['state_observation'] desired_goals = obs['state_desired_goal'] d = np.linalg.norm(achieved_goals - desired_goals, axis=-1) if self.reward_type == "sparse": return -(d > self.target_radius).astype(np.float32) if self.reward_type == "dense": return -d def get_diagnostics(self, paths, prefix=''): statistics = OrderedDict() for stat_name in [ 'distance_to_target', 'is_success', ]: stat_name = stat_name stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update(create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update(create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) return statistics def get_goal(self): return { 'desired_goal': self._target_position.copy(), 'state_desired_goal': self._target_position.copy(), } def _sample_position(self, low, high, realistic=True): pos = np.random.uniform(low, high) if realistic: while self._position_inside_wall(pos) is True: pos = np.random.uniform(low, high) return pos def sample_goals(self, batch_size): if not self.fixed_goal is None: goals = np.repeat( self.fixed_goal.copy()[None], batch_size, 0 ) else: goals = np.zeros((batch_size, self.obs_range.low.size)) for b in range(batch_size): goals[b, :] = self._sample_position(self.obs_range.low, self.obs_range.high,) # realistic=self.sample_realistic_goals) # goals = np.random.uniform( # self.obs_range.low, # self.obs_range.high, # size=(batch_size, self.obs_range.low.size), # ) return { 'desired_goal': goals, 'state_desired_goal': goals, } def set_position(self, pos): self._position[0] = pos[0] self._position[1] = pos[1] """Functions for ImageEnv wrapper""" def render(self, mode='human', **kwargs): width, height = 256, 256 """Returns a black and white image""" if width is not None: if width != height: raise NotImplementedError() if width != self.render_size: self.drawer = PygameViewer( screen_width=width, screen_height=height, x_bounds=(-self.boundary_dist, self.boundary_dist), y_bounds=(-self.boundary_dist, self.boundary_dist), render_onscreen=self.render_onscreen, ) self.render_size = width self._render() img = self.drawer.get_image() return np.rot90(img, k=1, axes=(0, 1)) # if self.images_are_rgb: # return img.transpose().flatten() # else: # r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2] # img = (-r + b).transpose().flatten() # return img def set_to_goal(self, goal_dict): goal = goal_dict["state_desired_goal"] self._position = goal self._target_position = goal def get_env_state(self): return self._get_obs() def set_env_state(self, state): position = state["state_observation"] goal = state["state_desired_goal"] self._position = position self._target_position = goal def _render(self, close=False): if close: self.drawer = None return if self.drawer is None or self.drawer.terminated: self.drawer = PygameViewer( self.render_size, self.render_size, x_bounds=(-self.boundary_dist, self.boundary_dist), y_bounds=(-self.boundary_dist, self.boundary_dist), render_onscreen=self.render_onscreen, ) self.drawer.fill(Color('white')) self.drawer.draw_solid_circle( np.array([10, 10]), 1, Color('red') ) if self.show_goal: self.drawer.draw_solid_circle( self._target_position, self.target_radius, Color('green'), ) self.drawer.draw_solid_circle( self._position, self.ball_radius, Color('blue'), ) for wall in self.walls: self.drawer.draw_segment( wall.endpoint1, wall.endpoint2, Color('black'), ) self.drawer.draw_segment( wall.endpoint2, wall.endpoint3, Color('black'), ) self.drawer.draw_segment( wall.endpoint3, wall.endpoint4, Color('black'), ) self.drawer.draw_segment( wall.endpoint4, wall.endpoint1, Color('black'), ) self.drawer.render() self.drawer.tick(self.render_dt_msec) def get_diagnostics(self, paths, prefix=''): statistics = OrderedDict() for stat_name in [ 'distance_to_target', ]: stat_name = stat_name stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update(create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update(create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) return statistics """Static visualization/utility methods""" @staticmethod def true_model(state, action): velocities = np.clip(action, a_min=-1, a_max=1) position = state new_position = position + velocities return np.clip( new_position, a_min=-Point2DEnv.boundary_dist, a_max=Point2DEnv.boundary_dist, ) @staticmethod def true_states(state, actions): real_states = [state] for action in actions: next_state = Point2DEnv.true_model(state, action) real_states.append(next_state) state = next_state return real_states @staticmethod def plot_trajectory(ax, states, actions, goal=None): assert len(states) == len(actions) + 1 x = states[:, 0] y = -states[:, 1] num_states = len(states) plasma_cm = plt.get_cmap('plasma') for i, state in enumerate(states): color = plasma_cm(float(i) / num_states) ax.plot(state[0], -state[1], marker='o', color=color, markersize=10, ) actions_x = actions[:, 0] actions_y = -actions[:, 1] ax.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], scale_units='xy', angles='xy', scale=1, width=0.005) ax.quiver(x[:-1], y[:-1], actions_x, actions_y, scale_units='xy', angles='xy', scale=1, color='r', width=0.0035, ) ax.plot( [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) if goal is not None: ax.plot(goal[0], -goal[1], marker='*', color='g', markersize=15) ax.set_ylim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) ax.set_xlim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) def initialize_camera(self, init_fctn): pass
class Point2DEnv(MultitaskEnv, Serializable): """ A little 2D point whose life goal is to reach a target. """ def __init__( self, render_dt_msec=0, action_l2norm_penalty=0, # disabled for now render_onscreen=False, render_size=200, render_target=True, reward_type="dense", norm_order=2, action_scale=1.0, target_radius=0.50, boundary_dist=4, ball_radius=0.50, walls=[], fixed_goal=None, goal_low=None, goal_high=None, ball_low=None, ball_high=None, randomize_position_on_reset=True, sample_realistic_goals=False, images_are_rgb=False, # else black and white show_goal=True, v_func_heatmap_bounds=None, **kwargs): if walls is None: walls = [] if len(kwargs) > 0: import logging LOGGER = logging.getLogger(__name__) LOGGER.log(logging.WARNING, "WARNING, ignoring kwargs:", kwargs) self.quick_init(locals()) self.render_dt_msec = render_dt_msec self.action_l2norm_penalty = action_l2norm_penalty self.render_onscreen = render_onscreen self.render_size = render_size self.render_target = render_target self.reward_type = reward_type self.norm_order = norm_order self.action_scale = action_scale self.target_radius = target_radius self.boundary_dist = boundary_dist self.ball_radius = ball_radius self.walls = walls self.fixed_goal = fixed_goal self.randomize_position_on_reset = randomize_position_on_reset self.sample_realistic_goals = sample_realistic_goals if self.fixed_goal is not None: self.fixed_goal = np.array(self.fixed_goal) self.images_are_rgb = images_are_rgb self.show_goal = show_goal self.max_target_distance = self.boundary_dist - self.target_radius self._target_position = None self._position = np.zeros((2)) u = np.ones(2) self.action_space = spaces.Box(-u, u, dtype=np.float32) o = self.boundary_dist * np.ones(2) self.obs_range = spaces.Box(-o, o, dtype='float32') if goal_low is None: goal_low = -o if goal_high is None: goal_high = o self.goal_range = spaces.Box(np.array(goal_low), np.array(goal_high), dtype='float32') if ball_low is None: ball_low = -o if ball_high is None: ball_high = o self.ball_range = spaces.Box(np.array(ball_low), np.array(ball_high), dtype='float32') self.observation_space = spaces.Dict([ ('observation', self.obs_range), ('desired_goal', self.goal_range), ('achieved_goal', self.obs_range), ('state_observation', self.obs_range), ('state_desired_goal', self.goal_range), ('state_achieved_goal', self.obs_range), ]) self.drawer = None self.subgoals = None self.v_func_heatmap_bounds = v_func_heatmap_bounds def step(self, velocities): assert self.action_scale <= 1.0 velocities = np.clip(velocities, a_min=-1, a_max=1) * self.action_scale new_position = self._position + velocities for wall in self.walls: new_position = wall.handle_collision(self._position, new_position) self._position = new_position self._position = np.clip( self._position, a_min=-self.boundary_dist, a_max=self.boundary_dist, ) distance_to_target = np.linalg.norm(self._position - self._target_position) below_wall = self._position[1] >= 2.0 is_success = distance_to_target < 1.0 is_success_2 = distance_to_target < 1.5 is_success_3 = distance_to_target < 1.75 ob = self._get_obs() reward = self.compute_reward(velocities, ob) info = { 'radius': self.target_radius, 'target_position': self._target_position, 'distance_to_target': distance_to_target, 'velocity': velocities, 'speed': np.linalg.norm(velocities), 'is_success': is_success, 'is_success_2': is_success_2, 'is_success_3': is_success_3, 'below_wall': below_wall, } done = False return ob, reward, done, info def initialize_camera(self, camera): pass def reset(self): self._target_position = self.sample_goal_for_rollout( )['state_desired_goal'] if self.randomize_position_on_reset: self._position = self._sample_position(self.ball_range.low, self.ball_range.high, realistic=True) self.subgoals = None return self._get_obs() def _position_outside_arena(self, pos): return not self.obs_range.contains(pos) def _position_inside_wall(self, pos): for wall in self.walls: if wall.contains_point(pos): return True return False def realistic_state_np(self, state): return not self._position_inside_wall(state) def realistic_goals(self, g, use_double=False): collision = None for wall in self.walls: wall_collision = wall.contains_points_pytorch(g) wall_collision_result = wall_collision[:, 0] for i in range(wall_collision.shape[1]): wall_collision_result = wall_collision_result * wall_collision[:, i] if collision is None: collision = wall_collision_result else: collision = collision | wall_collision_result result = collision ^ 1 # g_np = ptu.get_numpy(g) # for i in range(g_np.shape[0]): # print(not self._position_inside_wall(g_np[i]), ptu.get_numpy(result[i])) return result.float() def _sample_position(self, low, high, realistic=True): pos = np.random.uniform(low, high) if realistic: while self._position_inside_wall(pos) is True: pos = np.random.uniform(low, high) return pos def _get_obs(self): return dict( observation=self._position.copy(), desired_goal=self._target_position.copy(), achieved_goal=self._position.copy(), state_observation=self._position.copy(), state_desired_goal=self._target_position.copy(), state_achieved_goal=self._position.copy(), ) def compute_rewards(self, actions, obs, prev_obs=None): achieved_goals = obs['state_achieved_goal'] desired_goals = obs['state_desired_goal'] d = np.linalg.norm(achieved_goals - desired_goals, ord=self.norm_order, axis=-1) if self.reward_type == "sparse": return -(d > self.target_radius).astype(np.float32) elif self.reward_type == "dense": return -d elif self.reward_type == 'vectorized_dense': return -np.abs(achieved_goals - desired_goals) def get_diagnostics(self, paths, prefix=''): statistics = OrderedDict() for stat_name in [ 'distance_to_target', 'below_wall', 'is_success', 'is_success_2', 'is_success_3', 'velocity', 'speed', ]: stat_name = stat_name stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update( create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update( create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) return statistics def get_goal(self): return { 'desired_goal': self._target_position.copy(), 'state_desired_goal': self._target_position.copy(), } def set_goal(self, goal): self._target_position = goal['state_desired_goal'] def sample_goal_for_rollout(self): if not self.fixed_goal is None: goal = np.copy(self.fixed_goal) else: goal = self._sample_position(self.goal_range.low, self.goal_range.high, realistic=self.sample_realistic_goals) return { 'desired_goal': goal, 'state_desired_goal': goal, } def sample_goals(self, batch_size): # goals = np.random.uniform( # self.obs_range.low, # self.obs_range.high, # size=(batch_size, self.obs_range.low.size), # ) goals = np.array([ self._sample_position(self.obs_range.low, self.obs_range.high, realistic=self.sample_realistic_goals) for _ in range(batch_size) ]) return { 'desired_goal': goals, 'state_desired_goal': goals, } def set_position(self, pos): self._position[0] = pos[0] self._position[1] = pos[1] """Functions for ImageEnv wrapper""" def get_image(self, width=None, height=None): """Returns a black and white image""" if width is not None: if width != height: raise NotImplementedError() if width != self.render_size: self.drawer = PygameViewer( screen_width=width, screen_height=height, x_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), y_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), render_onscreen=self.render_onscreen, ) self.render_size = width self.render() img = self.drawer.get_image() if self.images_are_rgb: return img.transpose((1, 0, 2)) else: r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2] img = (-r + b) return img def update_subgoals(self, subgoals, subgoals_reproj=None, subgoal_v_vals=None): self.subgoals = subgoals def set_to_goal(self, goal_dict): goal = goal_dict["state_desired_goal"] self._position = goal self._target_position = goal def get_env_state(self): return self._get_obs() def set_env_state(self, state): position = state["state_observation"] goal = state["state_desired_goal"] self._position = position self._target_position = goal def get_states_sweep(self, nx, ny): x = np.linspace(-4, 4, nx) y = np.linspace(-4, 4, ny) xv, yv = np.meshgrid(x, y) states = np.stack((xv, yv), axis=2).reshape((-1, 2)) return states def render(self, close=False): if close: self.drawer = None return if self.drawer is None or self.drawer.terminated: self.drawer = PygameViewer( self.render_size, self.render_size, x_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), y_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), render_onscreen=self.render_onscreen, ) self.drawer.fill(Color('white')) if self.render_target: self.drawer.draw_solid_circle( self._target_position, self.target_radius, Color('green'), ) self.drawer.draw_solid_circle( self._position, self.ball_radius, Color('blue'), ) if self.subgoals is not None: for goal in self.subgoals: self.drawer.draw_solid_circle( goal, self.ball_radius + 0.1, Color('red'), ) for p1, p2 in zip( np.concatenate(([self._position], self.subgoals[:-1]), axis=0), self.subgoals): self.drawer.draw_segment(p1, p2, Color(100, 0, 0, 10)) for wall in self.walls: self.drawer.draw_segment( wall.endpoint1, wall.endpoint2, Color('black'), ) self.drawer.draw_segment( wall.endpoint2, wall.endpoint3, Color('black'), ) self.drawer.draw_segment( wall.endpoint3, wall.endpoint4, Color('black'), ) self.drawer.draw_segment( wall.endpoint4, wall.endpoint1, Color('black'), ) self.drawer.render() self.drawer.tick(self.render_dt_msec) """Static visualization/utility methods""" @staticmethod def true_model(state, action): velocities = np.clip(action, a_min=-1, a_max=1) position = state new_position = position + velocities return np.clip( new_position, a_min=-Point2DEnv.boundary_dist, a_max=Point2DEnv.boundary_dist, ) @staticmethod def true_states(state, actions): real_states = [state] for action in actions: next_state = Point2DEnv.true_model(state, action) real_states.append(next_state) state = next_state return real_states @staticmethod def plot_trajectory(ax, states, actions, goal=None): assert len(states) == len(actions) + 1 x = states[:, 0] y = -states[:, 1] num_states = len(states) plasma_cm = plt.get_cmap('plasma') for i, state in enumerate(states): color = plasma_cm(float(i) / num_states) ax.plot( state[0], -state[1], marker='o', color=color, markersize=10, ) actions_x = actions[:, 0] actions_y = -actions[:, 1] ax.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], scale_units='xy', angles='xy', scale=1, width=0.005) ax.quiver( x[:-1], y[:-1], actions_x, actions_y, scale_units='xy', angles='xy', scale=1, color='r', width=0.0035, ) ax.plot( [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) if goal is not None: ax.plot(goal[0], -goal[1], marker='*', color='g', markersize=15) ax.set_ylim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) ax.set_xlim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) def initialize_camera(self, init_fctn): pass