class Point2DEnv(MultitaskEnv, Serializable): """ A little 2D point whose life goal is to reach a target. """ def __init__( self, render_dt_msec=0, action_l2norm_penalty=0, # disabled for now render_onscreen=True, render_size=84, reward_type="dense", target_radius=0.5, boundary_dist=4, ball_radius=0.25, walls=None, fixed_goal=None, randomize_position_on_reset=True, images_are_rgb=False, # else black and white show_goal=True, action_limit=1, initial_position=(0, 0), **kwargs ): if walls is None: walls = [] if len(kwargs) > 0: import logging LOGGER = logging.getLogger(__name__) LOGGER.log(logging.WARNING, "WARNING, ignoring kwargs:", kwargs) self.quick_init(locals()) self.render_dt_msec = render_dt_msec self.action_l2norm_penalty = action_l2norm_penalty self.render_onscreen = render_onscreen self.render_size = render_size self.reward_type = reward_type self.target_radius = target_radius self.boundary_dist = boundary_dist self.ball_radius = ball_radius self.walls = walls self.fixed_goal = fixed_goal self.randomize_position_on_reset = randomize_position_on_reset self.images_are_rgb = images_are_rgb self.show_goal = show_goal self.action_limit = action_limit self._max_episode_steps = 50 self.max_target_distance = self.boundary_dist - self.target_radius self._target_position = None self._position = np.zeros((2)) u = self.action_limit * np.ones(2) self.action_space = spaces.Box(-u, u, dtype=np.float32) o = self.boundary_dist * np.ones(2) self.obs_range = spaces.Box(-o, o, dtype='float32') self.observation_space = spaces.Dict([ ('observation', self.obs_range), ('desired_goal', self.obs_range), ('achieved_goal', self.obs_range), ('state_observation', self.obs_range), ('state_desired_goal', self.obs_range), ('state_achieved_goal', self.obs_range), ]) self.observation_space.shape = (2,) # hack self._step = 0 self.initial_position = initial_position self.drawer = None def step(self, velocities): velocities = np.clip(velocities, a_min=-self.action_limit, a_max=self.action_limit) new_position = self._position + velocities for wall in self.walls: new_position = wall.handle_collision( self._position, new_position ) self._position = new_position self._position = np.clip( self._position, a_min=-self.boundary_dist, a_max=self.boundary_dist, ) distance_to_target = np.linalg.norm( self._position - self._target_position ) is_success = distance_to_target < self.target_radius ob = self._get_obs() reward = self.compute_reward(velocities, ob) info = { 'radius': self.target_radius, 'target_position': self._target_position, 'distance_to_target': distance_to_target, 'velocity': velocities, 'speed': np.linalg.norm(velocities), 'is_success': is_success, } self._step += 1 done = False return ob, reward, done, info def _sample_goal(self): return np.random.uniform( size=2, low=-self.max_target_distance, high=self.max_target_distance ) def reset(self): self._step = 0 if self.fixed_goal: self._target_position = np.array([0, 0]) else: self._target_position = np.random.uniform( size=2, low=-self.max_target_distance, high=self.max_target_distance ) if self.randomize_position_on_reset: self._position = np.random.uniform( size=2, low=-self.boundary_dist, high=self.boundary_dist ) else: self._position = np.array(self.initial_position) return self._get_obs() def _position_inside_wall(self, pos): for wall in self.walls: if wall.contains_point(pos): return True return False def _get_obs(self): return OrderedDict( observation=self._position.copy(), desired_goal=self._target_position.copy(), achieved_goal=self._position.copy(), state_observation=self._position.copy(), state_desired_goal=self._target_position.copy(), state_achieved_goal=self._position.copy(), ) def compute_rewards(self, actions, obs): achieved_goals = obs['state_observation'] desired_goals = obs['state_desired_goal'] d = np.linalg.norm(achieved_goals - desired_goals, axis=-1) if self.reward_type == "sparse": return -(d > self.target_radius).astype(np.float32) if self.reward_type == "dense": return -d def get_diagnostics(self, paths, prefix=''): statistics = OrderedDict() for stat_name in [ 'distance_to_target', 'is_success', ]: stat_name = stat_name stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update(create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update(create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) return statistics def get_goal(self): return { 'desired_goal': self._target_position.copy(), 'state_desired_goal': self._target_position.copy(), } def _sample_position(self, low, high, realistic=True): pos = np.random.uniform(low, high) if realistic: while self._position_inside_wall(pos) is True: pos = np.random.uniform(low, high) return pos def sample_goals(self, batch_size): if not self.fixed_goal is None: goals = np.repeat( self.fixed_goal.copy()[None], batch_size, 0 ) else: goals = np.zeros((batch_size, self.obs_range.low.size)) for b in range(batch_size): goals[b, :] = self._sample_position(self.obs_range.low, self.obs_range.high,) # realistic=self.sample_realistic_goals) # goals = np.random.uniform( # self.obs_range.low, # self.obs_range.high, # size=(batch_size, self.obs_range.low.size), # ) return { 'desired_goal': goals, 'state_desired_goal': goals, } def set_position(self, pos): self._position[0] = pos[0] self._position[1] = pos[1] """Functions for ImageEnv wrapper""" def render(self, mode='human', **kwargs): width, height = 256, 256 """Returns a black and white image""" if width is not None: if width != height: raise NotImplementedError() if width != self.render_size: self.drawer = PygameViewer( screen_width=width, screen_height=height, x_bounds=(-self.boundary_dist, self.boundary_dist), y_bounds=(-self.boundary_dist, self.boundary_dist), render_onscreen=self.render_onscreen, ) self.render_size = width self._render() img = self.drawer.get_image() return np.rot90(img, k=1, axes=(0, 1)) # if self.images_are_rgb: # return img.transpose().flatten() # else: # r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2] # img = (-r + b).transpose().flatten() # return img def set_to_goal(self, goal_dict): goal = goal_dict["state_desired_goal"] self._position = goal self._target_position = goal def get_env_state(self): return self._get_obs() def set_env_state(self, state): position = state["state_observation"] goal = state["state_desired_goal"] self._position = position self._target_position = goal def _render(self, close=False): if close: self.drawer = None return if self.drawer is None or self.drawer.terminated: self.drawer = PygameViewer( self.render_size, self.render_size, x_bounds=(-self.boundary_dist, self.boundary_dist), y_bounds=(-self.boundary_dist, self.boundary_dist), render_onscreen=self.render_onscreen, ) self.drawer.fill(Color('white')) self.drawer.draw_solid_circle( np.array([10, 10]), 1, Color('red') ) if self.show_goal: self.drawer.draw_solid_circle( self._target_position, self.target_radius, Color('green'), ) self.drawer.draw_solid_circle( self._position, self.ball_radius, Color('blue'), ) for wall in self.walls: self.drawer.draw_segment( wall.endpoint1, wall.endpoint2, Color('black'), ) self.drawer.draw_segment( wall.endpoint2, wall.endpoint3, Color('black'), ) self.drawer.draw_segment( wall.endpoint3, wall.endpoint4, Color('black'), ) self.drawer.draw_segment( wall.endpoint4, wall.endpoint1, Color('black'), ) self.drawer.render() self.drawer.tick(self.render_dt_msec) def get_diagnostics(self, paths, prefix=''): statistics = OrderedDict() for stat_name in [ 'distance_to_target', ]: stat_name = stat_name stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update(create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update(create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) return statistics """Static visualization/utility methods""" @staticmethod def true_model(state, action): velocities = np.clip(action, a_min=-1, a_max=1) position = state new_position = position + velocities return np.clip( new_position, a_min=-Point2DEnv.boundary_dist, a_max=Point2DEnv.boundary_dist, ) @staticmethod def true_states(state, actions): real_states = [state] for action in actions: next_state = Point2DEnv.true_model(state, action) real_states.append(next_state) state = next_state return real_states @staticmethod def plot_trajectory(ax, states, actions, goal=None): assert len(states) == len(actions) + 1 x = states[:, 0] y = -states[:, 1] num_states = len(states) plasma_cm = plt.get_cmap('plasma') for i, state in enumerate(states): color = plasma_cm(float(i) / num_states) ax.plot(state[0], -state[1], marker='o', color=color, markersize=10, ) actions_x = actions[:, 0] actions_y = -actions[:, 1] ax.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], scale_units='xy', angles='xy', scale=1, width=0.005) ax.quiver(x[:-1], y[:-1], actions_x, actions_y, scale_units='xy', angles='xy', scale=1, color='r', width=0.0035, ) ax.plot( [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) if goal is not None: ax.plot(goal[0], -goal[1], marker='*', color='g', markersize=15) ax.set_ylim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) ax.set_xlim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) def initialize_camera(self, init_fctn): pass
class Multiobj2DEnv(MultitaskEnv, Serializable): """ A little 2D point whose life goal is to reach a target. """ def __init__( self, render_dt_msec=0, action_l2norm_penalty=0, # disabled for now render_onscreen=False, render_size=84, reward_type="dense", action_scale=1.0, target_radius=0.60, boundary_dist=4, ball_radius=0.50, include_colors_in_obs=False, walls=None, num_colors=8, change_colors=True, fixed_colors=False, fixed_goal=None, randomize_position_on_reset=True, images_are_rgb=False, # else black and white show_goal=True, use_env_labels=False, num_objects=1, include_white=False, **kwargs): if walls is None: walls = [] if walls is None: walls = [] if fixed_goal is not None: fixed_goal = np.array(fixed_goal) if len(kwargs) > 0: LOGGER = logging.getLogger(__name__) LOGGER.log(logging.WARNING, "WARNING, ignoring kwargs:", kwargs) self.quick_init(locals()) self.render_dt_msec = render_dt_msec self.action_l2norm_penalty = action_l2norm_penalty self.render_onscreen = render_onscreen self.render_size = render_size self.reward_type = reward_type self.action_scale = action_scale self.target_radius = target_radius self.boundary_dist = boundary_dist self.ball_radius = ball_radius self.walls = walls self.fixed_goal = fixed_goal self.randomize_position_on_reset = randomize_position_on_reset self.images_are_rgb = images_are_rgb self.show_goal = show_goal self.num_objects = num_objects self.max_target_distance = self.boundary_dist - self.target_radius self.change_colors = change_colors self.fixed_colors = fixed_colors self.num_colors = num_colors self._target_position = None self._position = np.zeros((2)) self.include_white = include_white self.initial_pass = False self.white_passed = False if change_colors: self.randomize_colors() else: self.object_colors = [Color('blue')] u = np.ones(2) self.action_space = spaces.Box(-u, u, dtype=np.float32) self.include_colors_in_obs = include_colors_in_obs o = self.boundary_dist * np.ones(2) ohe_range = spaces.Box(np.zeros(self.num_colors), np.ones(self.num_colors), dtype='float32') if include_colors_in_obs and self.fixed_colors: high = np.concatenate((o, np.ones(self.num_colors))) low = np.concatenate((-o, np.zeros(self.num_colors))) else: high = o low = -o self.obs_range = spaces.Box(low, high, dtype='float32') self.use_env_labels = use_env_labels if self.use_env_labels: self.observation_space = spaces.Dict([ ('label', ohe_range), ('observation', self.obs_range), ('desired_goal', self.obs_range), ('achieved_goal', self.obs_range), ('state_observation', self.obs_range), ('state_desired_goal', self.obs_range), ('state_achieved_goal', self.obs_range), ]) else: self.observation_space = spaces.Dict([ ('observation', self.obs_range), ('desired_goal', self.obs_range), ('achieved_goal', self.obs_range), ('state_observation', self.obs_range), ('state_desired_goal', self.obs_range), ('state_achieved_goal', self.obs_range), ]) self.drawer = None self.render_drawer = None self.color_index = 0 self.colors = [ Color('green'), Color('red'), Color('blue'), Color('black'), Color('purple'), Color('brown'), Color('pink'), Color('orange'), Color('grey'), Color('yellow') ] def randomize_colors(self): self.object_colors = [] rgbs = np.random.randint(0, 256, (self.num_objects, 3)) for i in range(self.num_objects): if self.fixed_colors: self.object_colors.append(self.colors[self.color_index]) self.color_index = (self.color_index + 1) % 10 else: rgb = map(int, rgbs[i, :]) self.object_colors.append(Color(*rgb, 255)) def step(self, velocities): assert self.action_scale <= 1.0 velocities = np.clip(velocities, a_min=-1, a_max=1) * self.action_scale new_position = self._position + velocities for wall in self.walls: new_position = wall.handle_collision(self._position, new_position) self._position = new_position self._position = np.clip( self._position, a_min=-self.boundary_dist, a_max=self.boundary_dist, ) distance_to_target = np.linalg.norm(self._position - self._target_position) is_success = distance_to_target < self.target_radius ob = self._get_obs() reward = self.compute_reward(velocities, ob) info = { 'radius': self.target_radius, 'target_position': self._target_position, 'distance_to_target': distance_to_target, 'velocity': velocities, 'speed': np.linalg.norm(velocities), 'is_success': is_success, } done = False return ob, reward, done, info def reset(self): if self.white_passed: self.include_white = False if self.change_colors: self.randomize_colors() self._target_position = self.sample_goal()['state_desired_goal'] if self.randomize_position_on_reset: self._position = self._sample_position( self.obs_range.low, self.obs_range.high, ) if self.initial_pass: self.white_passed = True self.initial_pass = True return self._get_obs() def _position_inside_wall(self, pos): for wall in self.walls: if wall.contains_point(pos): return True return False def _sample_position(self, low, high): pos = np.random.uniform(low, high) while self._position_inside_wall(pos) is True: pos = np.random.uniform(low, high) return pos def _get_obs(self): obs = self._position.copy() if self.use_env_labels: return dict( observation=obs, label=ohe, desired_goal=self._target_position.copy(), achieved_goal=self._position.copy(), state_observation=self._position.copy(), state_desired_goal=self._target_position.copy(), state_achieved_goal=self._position.copy(), ) else: return dict( observation=obs, desired_goal=self._target_position.copy(), achieved_goal=self._position.copy(), state_observation=self._position.copy(), state_desired_goal=self._target_position.copy(), state_achieved_goal=self._position.copy(), ) def compute_rewards(self, actions, obs): achieved_goals = obs['state_achieved_goal'] desired_goals = obs['state_desired_goal'] d = np.linalg.norm(achieved_goals - desired_goals, axis=-1) if self.reward_type == "sparse": return -(d > self.target_radius).astype(np.float32) elif self.reward_type == "dense": return -d elif self.reward_type == 'vectorized_dense': return -np.abs(achieved_goals - desired_goals) else: raise NotImplementedError() def get_diagnostics(self, paths, prefix=''): statistics = OrderedDict() for stat_name in [ 'radius', 'target_position', 'distance_to_target', 'velocity', 'speed', 'is_success', ]: stat_name = stat_name stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update( create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update( create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) return statistics def get_goal(self): return { 'desired_goal': self._target_position.copy(), 'state_desired_goal': self._target_position.copy(), } def sample_goals(self, batch_size): if not self.fixed_goal is None: goals = np.repeat(self.fixed_goal.copy()[None], batch_size, 0) else: goals = np.zeros((batch_size, self.obs_range.low.size)) for b in range(batch_size): if batch_size > 1: logging.warning("This is very slow!") goals[b, :] = self._sample_position( self.obs_range.low, self.obs_range.high, ) return { 'desired_goal': goals, 'state_desired_goal': goals, } def set_position(self, pos): self._position[0] = pos[0] self._position[1] = pos[1] """Functions for ImageEnv wrapper""" def get_image(self, width=None, height=None): """Returns a black and white image""" if self.drawer is None: if width != height: raise NotImplementedError() self.drawer = PygameViewer( screen_width=width, screen_height=height, x_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), y_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), render_onscreen=self.render_onscreen, ) self.draw(self.drawer, False) img = self.drawer.get_image() if self.images_are_rgb: return img.transpose((1, 0, 2)) else: r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2] img = (-r + b).transpose().flatten() return img def set_to_goal(self, goal_dict): goal = goal_dict["state_desired_goal"] self._position = goal self._target_position = goal def get_env_state(self): return self._get_obs() def set_env_state(self, state): position = state["state_observation"] goal = state["state_desired_goal"] self._position = position self._target_position = goal def draw(self, drawer, tick): # if self.drawer is not None: # self.drawer.fill(Color('white')) # if self.render_drawer is not None: # self.render_drawer.fill(Color('white')) drawer.fill(Color('white')) if self.show_goal: drawer.draw_solid_circle( self._target_position, self.target_radius, Color('green'), ) if not self.include_white: drawer.draw_solid_circle( self._position, self.ball_radius, self.object_colors[0], ) for wall in self.walls: drawer.draw_segment( wall.endpoint1, wall.endpoint2, Color('black'), ) drawer.draw_segment( wall.endpoint2, wall.endpoint3, Color('black'), ) drawer.draw_segment( wall.endpoint3, wall.endpoint4, Color('black'), ) drawer.draw_segment( wall.endpoint4, wall.endpoint1, Color('black'), ) drawer.render() if tick: drawer.tick(self.render_dt_msec) def render(self, close=False): if close: self.render_drawer = None return if self.render_drawer is None or self.render_drawer.terminated: self.render_drawer = PygameViewer( self.render_size, self.render_size, x_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), y_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), render_onscreen=True, ) self.draw(self.render_drawer, True) def get_diagnostics(self, paths, prefix=''): statistics = OrderedDict() for stat_name in [ 'distance_to_target', ]: stat_name = stat_name stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update( create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update( create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) return statistics """Static visualization/utility methods""" @staticmethod def true_model(state, action): velocities = np.clip(action, a_min=-1, a_max=1) position = state new_position = position + velocities return np.clip( new_position, a_min=-Point2DEnv.boundary_dist, a_max=Point2DEnv.boundary_dist, ) @staticmethod def true_states(state, actions): real_states = [state] for action in actions: next_state = Point2DEnv.true_model(state, action) real_states.append(next_state) state = next_state return real_states @staticmethod def plot_trajectory(ax, states, actions, goal=None): assert len(states) == len(actions) + 1 x = states[:, 0] y = -states[:, 1] num_states = len(states) plasma_cm = plt.get_cmap('plasma') for i, state in enumerate(states): color = plasma_cm(float(i) / num_states) ax.plot( state[0], -state[1], marker='o', color=color, markersize=10, ) actions_x = actions[:, 0] actions_y = -actions[:, 1] ax.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], scale_units='xy', angles='xy', scale=1, width=0.005) ax.quiver( x[:-1], y[:-1], actions_x, actions_y, scale_units='xy', angles='xy', scale=1, color='r', width=0.0035, ) ax.plot( [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) if goal is not None: ax.plot(goal[0], -goal[1], marker='*', color='g', markersize=15) ax.set_ylim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) ax.set_xlim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) def initialize_camera(self, init_fctn): pass
class Point2DEnv(MultitaskEnv, Serializable): """ A little 2D point whose life goal is to reach a target. """ def __init__( self, render_dt_msec=0, action_l2norm_penalty=0, # disabled for now render_onscreen=False, render_size=256, reward_type="dense", action_scale=1.0, target_radius=0.5, boundary_dist=4, ball_radius=0.15, walls=None, init_pos_range=None, target_pos_range=None, images_are_rgb=False, # else black and white show_goal=True, n_bins=10, use_count_reward=False, show_discrete_grid=False, **kwargs): if walls is None: walls = [] if walls is None: walls = [] # if fixed_goal is not None: # fixed_goal = np.array(fixed_goal) self.quick_init(locals()) self.render_dt_msec = render_dt_msec self.action_l2norm_penalty = action_l2norm_penalty self.render_onscreen = render_onscreen self.render_size = render_size self.reward_type = reward_type self.action_scale = action_scale self.target_radius = target_radius self.boundary_dist = boundary_dist self.ball_radius = ball_radius self.walls = walls self.n_bins = n_bins self.use_count_reward = use_count_reward self.show_discrete_grid = show_discrete_grid self.images_are_rgb = images_are_rgb self.show_goal = show_goal self.x_bins = np.linspace(-self.boundary_dist, self.boundary_dist, self.n_bins) self.y_bins = np.linspace(-self.boundary_dist, self.boundary_dist, self.n_bins) self.bin_counts = np.ones((self.n_bins + 1, self.n_bins + 1)) self.max_target_distance = self.boundary_dist - self.target_radius self._target_position = None self._position = np.zeros((2)) u = np.ones(2) self.action_space = spaces.Box(-u, u, dtype=np.float32) o = self.boundary_dist * np.ones(2) self.obs_range = spaces.Box(-o, o, dtype='float32') if not init_pos_range: self.init_pos_range = self.obs_range else: assert np.all(np.abs(init_pos_range) < boundary_dist), ( f"Init position must be" "within the boundaries of the environment: ({-boundary_dist}, {boundary_dist})" ) low, high = init_pos_range self.init_pos_range = spaces.Box(np.array(low), np.array(high), dtype='float32') if not target_pos_range: self.target_pos_range = self.obs_range else: assert np.all(np.abs(target_pos_range) < boundary_dist), ( f"Goal position must be" "within the boundaries of the environment: ({-boundary_dist}, {boundary_dist})" ) low, high = target_pos_range self.target_pos_range = spaces.Box(np.array(low), np.array(high), dtype='float32') self.observation_space = spaces.Dict([ ('observation', self.obs_range), ('onehot_observation', spaces.Box(0, 1, shape=(2 * (self.n_bins + 1), ), dtype=np.float32)), ('desired_goal', self.obs_range), ('achieved_goal', self.obs_range), ('state_observation', self.obs_range), ('state_desired_goal', self.obs_range), ('state_achieved_goal', self.obs_range), ]) self.drawer = None self.render_drawer = None self.reset() def step(self, velocities): assert self.action_scale <= 1.0 velocities = np.clip(velocities, a_min=-1, a_max=1) * self.action_scale new_position = self._position + velocities orig_new_pos = new_position.copy() for wall in self.walls: new_position = wall.handle_collision(self._position, new_position) if sum(new_position != orig_new_pos) > 1: """ Hack: sometimes you get caught on two walls at a time. If you process the input in the other direction, you might only get caught on one wall instead. """ new_position = orig_new_pos.copy() for wall in self.walls[::-1]: new_position = wall.handle_collision(self._position, new_position) self._position = new_position self._position = np.clip( self._position, a_min=-self.boundary_dist, a_max=self.boundary_dist, ) distance_to_target = np.linalg.norm(self._position - self._target_position) is_success = distance_to_target < self.target_radius ob = self._get_obs() x_d, y_d = ob['discrete_observation'] self.bin_counts[x_d, y_d] += 1 reward = self.compute_reward(velocities, ob) info = { 'radius': self.target_radius, 'target_position': self._target_position, 'distance_to_target': distance_to_target, # TODO (kevintli): Make this work for general mazes # 'manhattan_dist_to_target': self._medium_maze_manhattan_distance(ob['state_achieved_goal'], ob['state_desired_goal']), 'velocity': velocities, 'speed': np.linalg.norm(velocities), 'is_success': is_success, 'ext_reward': self._ext_reward, } if self.use_count_reward: info['count_bonus'] = self._count_bonus done = False return ob, reward, done, info def reset(self): # TODO: Make this more general self._target_position = self.sample_goal()['state_desired_goal'] # if self.randomize_position_on_reset: self._position = self._sample_position( # self.obs_range.low, # self.obs_range.high, self.init_pos_range.low, self.init_pos_range.high, ) return self._get_obs() def _position_inside_wall(self, pos): for wall in self.walls: if wall.contains_point(pos): return True return False def _sample_position(self, low, high): if np.all(low == high): return low pos = np.random.uniform(low, high) while self._position_inside_wall(pos) is True: pos = np.random.uniform(low, high) return pos def clear_bin_counts(self): self.bin_counts = np.ones((self.n_bins + 1, self.n_bins + 1)) def _discretize_observation(self, obs): x, y = obs['state_observation'].copy() x_d, y_d = np.digitize(x, self.x_bins), np.digitize(y, self.y_bins) return np.array([x_d, y_d]) def _get_obs(self): obs = collections.OrderedDict( observation=self._position.copy(), desired_goal=self._target_position.copy(), achieved_goal=self._position.copy(), state_observation=self._position.copy(), state_desired_goal=self._target_position.copy(), state_achieved_goal=self._position.copy(), ) # Update with discretized state pos_discrete = self._discretize_observation(obs) pos_onehot = np.eye(self.n_bins + 1)[pos_discrete] obs['discrete_observation'] = pos_discrete obs['onehot_observation'] = pos_onehot return obs def _medium_maze_manhattan_distance(self, s1, s2): # Maze wall positions left_wall_x = -self.boundary_dist / 3 left_wall_bottom = self.inner_wall_max_dist right_wall_x = self.boundary_dist / 3 right_wall_top = -self.inner_wall_max_dist s1 = s1.copy() s2 = s2.copy() if len(s1.shape) == 1: s1, s2 = s1[None], s2[None] # Since maze distances are symmetric, redefine s1,s2 for convenience # so that points in s1 are to the left of those in s2 combined = np.hstack((s1[:, None], s2[:, None])) indices = np.argmin((s1[:, 0], s2[:, 0]), axis=0) s1 = np.take_along_axis(combined, indices[:, None, None], axis=1).squeeze(axis=1) s2 = np.take_along_axis(combined, 1 - indices[:, None, None], axis=1).squeeze(axis=1) x1 = s1[:, 0] x2 = s2[:, 0] # Horizontal movement x_dist = np.abs(x2 - x1) # Vertical movement boundary_ys = [left_wall_bottom, right_wall_top, 0] boundary_xs = [left_wall_x, right_wall_x, self.boundary_dist, -4.0001] y_directions = [ 1, -1, 0 ] # +1 means need to get to bottom, -1 means need to get to top curr_y, goal_y = s1[:, 1], s2[:, 1] y_dist = np.zeros(len(s1)) for i in range(3): # Get all points where s1 and s2 respectively are in the current vertical section of the maze curr_in_section = x1 <= boundary_xs[i] goal_in_section = np.logical_and(boundary_xs[i - 1] < x2, x2 <= boundary_xs[i]) goal_after_section = x2 > boundary_xs[i] # Both in current section: move directly to goal mask = np.logical_and(curr_in_section, goal_in_section) y_dist += mask * np.abs(curr_y - goal_y) curr_y[mask] = goal_y[mask] # s2 is further in maze: move to next corner mask = np.logical_and( curr_in_section, np.logical_and(goal_after_section, y_directions[i] * (boundary_ys[i] - curr_y) > 0)) y_dist += mask * np.clip( y_directions[i] * (boundary_ys[i] - curr_y), 0, None) curr_y[mask] = boundary_ys[i] return x_dist + y_dist def compute_rewards(self, actions, obs, reward_type=None): reward_type = reward_type or self.reward_type achieved_goals = obs['state_achieved_goal'] desired_goals = obs['state_desired_goal'] d = np.linalg.norm(achieved_goals - desired_goals, axis=-1) if reward_type == "sparse": r = -(d > self.target_radius).astype(np.float32) elif reward_type == "dense": r = -d elif reward_type == "vectorized_dense": r = -np.abs(achieved_goals - desired_goals) elif reward_type == "medium_maze_ground_truth_manhattan": """ # Use maze distance from current position to goal as the negative reward. # This is specifically for the medium Maze-v0 env, which has two vertical walls. """ r = -self._medium_maze_manhattan_distance(achieved_goals, desired_goals) elif reward_type == "laplace_smoothing": # Laplace smoothing: 1 within the goal region, 1/(n+2) at all other states # (where n is the number of visitations) r = np.zeros(d.shape) pos_d = obs['discrete_observation'] r = 1 / (self.bin_counts[pos_d[:, 0], pos_d[:, 1]] + 2) r[d <= self.target_radius] = 1 elif reward_type == "none": r = np.zeros(d.shape) else: raise NotImplementedError( f"Unrecognized reward type: {reward_type}") if self.use_count_reward: # TODO: Add different count based strategies pos_d = obs['discrete_observation'] self._ext_reward = r.copy() self._count_bonus = 1 / np.sqrt(self.bin_counts[pos_d[:, 0], pos_d[:, 1]]) r += self._count_bonus else: self._ext_reward = r.copy() return r def get_diagnostics(self, paths, prefix=''): statistics = OrderedDict() for stat_name in [ 'radius', 'target_position', 'distance_to_target', 'velocity', 'speed', 'is_success', 'ext_reward', 'count_bonus', ]: stat_name = stat_name stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update( create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update( create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) return statistics def get_goal(self): return { 'desired_goal': self._target_position.copy(), 'state_desired_goal': self._target_position.copy(), } def sample_goals(self, batch_size): # if self.fixed_goal: # goals = np.repeat( # self.fixed_goal.copy()[None], # batch_size, # 0) # else: goals = np.zeros((batch_size, self.obs_range.low.size)) for b in range(batch_size): if batch_size > 1: logging.warning("This is very slow!") goals[b, :] = self._sample_position( # self.obs_range.low, # self.obs_range.high, self.target_pos_range.low, self.target_pos_range.high, ) return { 'desired_goal': goals, 'state_desired_goal': goals, } def set_position(self, pos): self._position[0] = pos[0] self._position[1] = pos[1] """Functions for ImageEnv wrapper""" def get_image(self, width=None, height=None): """Returns a black and white image""" if self.drawer is None: if width != height: raise NotImplementedError() self.drawer = PygameViewer( screen_width=width, screen_height=height, # TODO(justinvyu): Action scale = 1 breaks rendering, why? # x_bounds=(-self.boundary_dist - self.ball_radius, # self.boundary_dist + self.ball_radius), # y_bounds=(-self.boundary_dist - self.ball_radius, # self.boundary_dist + self.ball_radius), x_bounds=(-self.boundary_dist, self.boundary_dist), y_bounds=(-self.boundary_dist, self.boundary_dist), render_onscreen=self.render_onscreen, ) self.draw(self.drawer) img = self.drawer.get_image() if self.images_are_rgb: return img.transpose((1, 0, 2)) else: r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2] img = (-r + b).transpose().flatten() return img def set_to_goal(self, goal_dict): goal = goal_dict["state_desired_goal"] self._position = goal self._target_position = goal def get_env_state(self): return self._get_obs() def set_env_state(self, state): position = state["state_observation"] goal = state["state_desired_goal"] self._position = position self._target_position = goal def draw(self, drawer): drawer.fill(Color('white')) if self.show_discrete_grid: for x in self.x_bins: drawer.draw_segment((x, -self.boundary_dist), (x, self.boundary_dist), Color(220, 220, 220, 25), aa=False) for y in self.y_bins: drawer.draw_segment((-self.boundary_dist, y), (self.boundary_dist, y), Color(220, 220, 220, 25), aa=False) if self.show_goal: drawer.draw_solid_circle( self._target_position, self.target_radius, Color('green'), ) try: drawer.draw_solid_circle( self._position, self.ball_radius, Color('blue'), ) except ValueError as e: print('\n\n RENDER ERROR \n\n') for wall in self.walls: drawer.draw_segment( wall.endpoint1, wall.endpoint2, Color('black'), ) drawer.draw_segment( wall.endpoint2, wall.endpoint3, Color('black'), ) drawer.draw_segment( wall.endpoint3, wall.endpoint4, Color('black'), ) drawer.draw_segment( wall.endpoint4, wall.endpoint1, Color('black'), ) drawer.render() def render(self, mode='human', width=None, height=None, close=False): if close: self.render_drawer = None return if mode == 'rgb_array': return self.get_image(width=width, height=height) if self.render_drawer is None or self.render_drawer.terminated: self.render_drawer = PygameViewer( self.render_size, self.render_size, # x_bounds=(-self.boundary_dist-self.ball_radius, # self.boundary_dist+self.ball_radius), # y_bounds=(-self.boundary_dist-self.ball_radius, # self.boundary_dist+self.ball_radius), x_bounds=(-self.boundary_dist, self.boundary_dist), y_bounds=(-self.boundary_dist, self.boundary_dist), render_onscreen=True, ) self.draw(self.render_drawer) self.render_drawer.tick(self.render_dt_msec) if mode != 'interactive': self.render_drawer.check_for_exit() # def get_diagnostics(self, paths, prefix=''): # statistics = OrderedDict() # for stat_name in ('distance_to_target', ): # stat_name = stat_name # stat = get_stat_in_paths(paths, 'env_infos', stat_name) # statistics.update(create_stats_ordered_dict( # '%s%s' % (prefix, stat_name), # stat, # always_show_all_stats=True, # )) # statistics.update(create_stats_ordered_dict( # 'Final %s%s' % (prefix, stat_name), # [s[-1] for s in stat], # always_show_all_stats=True, # )) # return statistics """Static visualization/utility methods""" @staticmethod def true_model(state, action): velocities = np.clip(action, a_min=-1, a_max=1) position = state new_position = position + velocities return np.clip( new_position, a_min=-Point2DEnv.boundary_dist, a_max=Point2DEnv.boundary_dist, ) @staticmethod def true_states(state, actions): real_states = [state] for action in actions: next_state = Point2DEnv.true_model(state, action) real_states.append(next_state) state = next_state return real_states @staticmethod def plot_trajectory(ax, states, actions, goal=None): assert len(states) == len(actions) + 1 x = states[:, 0] y = -states[:, 1] num_states = len(states) plasma_cm = plt.get_cmap('plasma') for i, state in enumerate(states): color = plasma_cm(float(i) / num_states) ax.plot( state[0], -state[1], marker='o', color=color, markersize=10, ) actions_x = actions[:, 0] actions_y = -actions[:, 1] ax.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], scale_units='xy', angles='xy', scale=1, width=0.005) ax.quiver( x[:-1], y[:-1], actions_x, actions_y, scale_units='xy', angles='xy', scale=1, color='r', width=0.0035, ) ax.plot( [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) if goal is not None: ax.plot(goal[0], -goal[1], marker='*', color='g', markersize=15) ax.set_ylim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) ax.set_xlim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) def initialize_camera(self, init_fctn): pass
class PickAndPlaceEnv(MultitaskEnv, Serializable): """ A simple env where a 'cursor' robot can push objects around. TODO: refactor to have base class shared with point2d.py code TODO: rather than recreating a drawer, just save the previous drawers """ def __init__( self, num_objects=2, # Environment dynamics action_scale=1.0, ball_radius=.75, boundary_dist=4, object_radius=0.50, min_grab_distance=0.5, walls=None, # Rewards action_l2norm_penalty=0, reward_type="dense", success_threshold=0.60, # Reset settings fixed_goal=None, fixed_init_position=None, init_position_strategy='random', start_near_object=False, # Visualization settings images_are_rgb=True, render_dt_msec=0, render_onscreen=False, render_size=84, get_image_base_render_size=None, show_goal=True, # Goal sampling goal_samplers=None, goal_sampling_mode='random', num_presampled_goals=10000, object_reward_only=False, ): """ :param get_image_base_render_size: If set, then the drawer will always generate images according to this size, and (smoothly) downsampled images will be returned by `get_image` """ walls = walls or [] if fixed_goal is not None: fixed_goal = np.array(fixed_goal) if action_scale <= 0: raise ValueError("Invalid action scale: {}".format(action_scale)) if init_position_strategy not in { 'random', 'on_random_object', 'fixed' }: raise ValueError('Invalid init position strategy: {}'.format( init_position_strategy)) self.quick_init(locals()) self.render_dt_msec = render_dt_msec self.action_l2norm_penalty = action_l2norm_penalty self.render_onscreen = render_onscreen self.render_size = render_size self.reward_type = reward_type self.action_scale = action_scale self.success_threshold = success_threshold self.boundary_dist = boundary_dist self.ball_radius = ball_radius self.walls = walls self.fixed_goal = fixed_goal self.init_position_strategy = init_position_strategy self.images_are_rgb = images_are_rgb self._show_goal = show_goal self._all_objects = [ Object( position=np.zeros((2, )), color=Object.IDX_TO_COLOR[i], radius=ball_radius if i == 0 else object_radius, min_pos=-self.boundary_dist, max_pos=self.boundary_dist, ) for i in range(num_objects + 1) ] self.min_grab_distance = min_grab_distance u = np.ones(3) self.action_space = spaces.Box(-u, u, dtype=np.float32) o = self.boundary_dist * np.ones(2 * (num_objects + 1)) self.num_objects = num_objects self.obs_range = spaces.Box(-o, o, dtype='float32') self.observation_space = spaces.Dict([ ('observation', self.obs_range), ('desired_goal', self.obs_range), ('achieved_goal', self.obs_range), ('state_observation', self.obs_range), ('state_desired_goal', self.obs_range), ('state_achieved_goal', self.obs_range), ]) if get_image_base_render_size: base_width, base_height = get_image_base_render_size self._drawer = PygameViewer( screen_width=base_width, screen_height=base_height, x_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), y_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), render_onscreen=self.render_onscreen, ) self._fixed_get_image_render_size = True else: self._drawer = None self._fixed_get_image_render_size = False self._render_drawer = None if fixed_init_position is None: fixed_init_position = np.zeros_like(self.obs_range.low) self._fixed_init_position = fixed_init_position self._presampled_goals = None goal_samplers = goal_samplers or {} goal_samplers['fixed'] = PickAndPlaceEnv._sample_fixed_goal goal_samplers['presampled'] = PickAndPlaceEnv._sample_presampled_goals goal_samplers['random'] = PickAndPlaceEnv._sample_random_feasible_goals self._custom_goal_sampler = goal_samplers self._num_presampled_goals = num_presampled_goals if goal_sampling_mode is None: if fixed_goal: goal_sampling_mode = 'fixed' else: goal_sampling_mode = 'presampled' self.goal_sampling_mode = goal_sampling_mode self.object_reward_only = object_reward_only @property def cursor(self): return self._all_objects[0] @property def objects(self): return self._all_objects[1:] def step(self, raw_action): velocities = raw_action[:2] grab = raw_action[2] > 0 velocities = np.clip(velocities, a_min=-1, a_max=1) * self.action_scale old_position = self.cursor.position.copy() new_position = old_position + velocities orig_new_pos = new_position.copy() for wall in self.walls: new_position = wall.handle_collision(old_position, new_position) if sum(new_position != orig_new_pos) > 1: """ Hack: sometimes you get caught on two walls at a time. If you process the input in the other direction, you might only get caught on one wall instead. """ new_position = orig_new_pos.copy() for wall in self.walls[::-1]: new_position = wall.handle_collision(old_position, new_position) if grab: grabbed_obj = self._grab_object() if grabbed_obj: grabbed_obj.move(velocities) self.cursor.move(new_position - old_position) ob = self._get_obs() reward = self.compute_reward(velocities, ob) info = self._get_info() done = False return ob, reward, done, info def _get_info(self): distance_to_target = self.cursor.distance_to_target() is_success = distance_to_target < self.success_threshold info = { 'distance_to_target_cursor': distance_to_target, 'success_cursor': is_success, } for i, obj in enumerate(self.objects): obj_distance = obj.distance_to_target() success = obj_distance < self.success_threshold info['distance_to_target_obj_{}'.format(i)] = obj_distance info['success_obj_{}'.format(i)] = success return info def reset(self): goal = self.sample_goal()['state_desired_goal'] self._set_target_positions(goal) if self.init_position_strategy == 'random': init_pos = ( self.observation_space.spaces['state_observation'].sample()) elif self.init_position_strategy == 'fixed': init_pos = self._fixed_init_position.copy() elif self.init_position_strategy == 'on_random_object': init_pos = ( self.observation_space.spaces['state_observation'].sample()) start_i = 2 + 2 * random.randint(0, len(self._all_objects) - 2) end_i = start_i + 2 init_pos[:2] = init_pos[start_i:end_i].copy() else: raise ValueError(self.init_position_strategy) self._set_positions(init_pos) return self._get_obs() def _position_inside_wall(self, pos): for wall in self.walls: if wall.contains_point(pos): return True return False def _sample_position(self, low, high): pos = np.random.uniform(low, high) while self._position_inside_wall(pos) is True: pos = np.random.uniform(low, high) return pos def _get_obs(self): positions = self._get_positions() target_positions = self._get_target_positions() return dict( observation=positions.copy(), desired_goal=target_positions.copy(), achieved_goal=positions.copy(), state_observation=positions.copy(), state_desired_goal=target_positions.copy(), state_achieved_goal=positions.copy(), ) def compute_rewards(self, actions, obs): achieved_goals = obs['state_achieved_goal'] desired_goals = obs['state_desired_goal'] if self.object_reward_only: achieved_goals = achieved_goals[:, 2:] desired_goals = desired_goals[:, 2:] d = np.linalg.norm(achieved_goals - desired_goals, axis=-1) if self.reward_type == "sparse": return -(d > self.success_threshold * len(self._all_objects)).astype(np.float32) elif self.reward_type == "dense": return -d elif self.reward_type == "dense_l1": return -np.linalg.norm( achieved_goals - desired_goals, axis=-1, ord=1) elif self.reward_type == 'vectorized_dense': return -np.abs(achieved_goals - desired_goals) else: raise NotImplementedError() def get_diagnostics(self, paths, prefix=''): statistics = OrderedDict() for stat_name in [ 'distance_to_target', 'is_success', ]: stat_name = stat_name stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update( create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update( create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) for i, obj in enumerate(self.objects): stat_name = 'distance_to_target_obj_{}'.format(i) stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update( create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update( create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) return statistics def get_goal(self): return { 'desired_goal': self._get_target_positions(), 'state_desired_goal': self._get_target_positions(), } def set_goal(self, goal): self._set_target_positions(goal['state_desired_goal']) def sample_goals(self, batch_size): goal_sampler = self._custom_goal_sampler[self.goal_sampling_mode] return goal_sampler(self, batch_size) def _sample_random_feasible_goals(self, batch_size): if len(self.walls) > 0: goals = np.zeros((batch_size, self.obs_range.low.size)) for b in range(batch_size): goals[b, :] = self._sample_position( self.obs_range.low, self.obs_range.high, ) else: goals = np.random.uniform( self.obs_range.low, self.obs_range.high, size=(batch_size, self.obs_range.low.size), ) state_goals = goals return { 'desired_goal': goals, 'state_desired_goal': state_goals, } def _sample_fixed_goal(self, batch_size): goals = state_goals = np.repeat(self.fixed_goal.copy()[None], batch_size, 0) return { 'desired_goal': goals, 'state_desired_goal': state_goals, } def _sample_presampled_goals(self, batch_size): if self._presampled_goals is None: self._presampled_goals = self._sample_random_feasible_goals( self._num_presampled_goals) random_idxs = np.random.choice(len( list(self._presampled_goals.values())[0]), size=batch_size) goals = self._presampled_goals['desired_goal'][random_idxs] state_goals = self._presampled_goals['state_desired_goal'][random_idxs] return { 'desired_goal': goals, 'state_desired_goal': state_goals, } def _set_positions(self, positions): for i, obj in enumerate(self._all_objects): start_i = i * 2 end_i = i * 2 + 2 obj.position = positions[start_i:end_i] def _set_target_positions(self, target_positions): for i, obj in enumerate(self._all_objects): start_i = i * 2 end_i = i * 2 + 2 obj.target_position = target_positions[start_i:end_i] def _get_positions(self): return np.concatenate([obj.position for obj in self._all_objects]) def _get_target_positions(self): return np.concatenate( [obj.target_position for obj in self._all_objects]) """Functions for ImageEnv wrapper""" def get_image(self, width=None, height=None): """Returns a black and white image""" if self._drawer is None or (not self._fixed_get_image_render_size and (self._drawer.width != width or self._drawer.height != height)): if width != height: raise NotImplementedError() self._drawer = PygameViewer( screen_width=width, screen_height=height, x_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), y_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), render_onscreen=self.render_onscreen, ) self._draw(self._drawer) if width and height: wh_size = (width, height) else: wh_size = None img = self._drawer.get_image(wh_size) if self.images_are_rgb: return img.transpose((1, 0, 2)) else: r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2] img = (r + g + b).transpose() / 3 return img def set_to_goal(self, goal_dict): goal = goal_dict["state_desired_goal"] self._set_positions(goal) def get_env_state(self): return self._get_obs() def set_env_state(self, state): self._set_positions(state["state_observation"]) self._set_target_positions(state["state_desired_goal"]) def render(self, mode='human', close=False): if close: self._render_drawer = None return if self._render_drawer is None or self._render_drawer.terminated: self._render_drawer = PygameViewer( self.render_size, self.render_size, x_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), y_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), render_onscreen=True, ) self._draw(self._render_drawer) self._render_drawer.tick(self.render_dt_msec) if mode != 'interactive': self._render_drawer.check_for_exit() def _draw(self, drawer): drawer.fill(Color('black')) for obj in self._all_objects: obj.draw(drawer, draw_target_position=self._show_goal) for wall in self.walls: draw_wall(drawer, wall) drawer.render() def get_diagnostics(self, paths, prefix=''): statistics = OrderedDict() for stat_name in [ 'distance_to_target_obj_{}'.format(i) for i in range(len(self.objects)) ] + ['success_obj_{}'.format(i) for i in range(len(self.objects))] + [ 'distance_to_target_cursor', 'success_cursor', ]: stat_name = stat_name stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update( create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update( create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) return statistics def _grab_object(self): closest_object = None min_dis = None for obj in self.objects: distance = obj.distance(self.cursor.position) if ((distance <= self.min_grab_distance) and (closest_object is None or distance < min_dis)): min_dis = distance closest_object = obj return closest_object def goal_conditioned_diagnostics(self, paths, goals): statistics = OrderedDict() stat_to_lists = defaultdict(list) for path, goal in zip(paths, goals): difference = path['observations'] - goal for i in range(len(self._all_objects)): distance = np.linalg.norm(difference[:, 2 * i:2 * i + 2], axis=-1) distance_key = 'distance_to_target_obj_{}'.format(i) stat_to_lists[distance_key].append(distance) success_key = 'success_obj_{}'.format(i) stat_to_lists[success_key].append( distance < self.success_threshold) for stat_name, stat_list in stat_to_lists.items(): statistics.update( create_stats_ordered_dict( stat_name, stat_list, always_show_all_stats=True, )) statistics.update( create_stats_ordered_dict( '{}/final'.format(stat_name), [s[-1:] for s in stat_list], always_show_all_stats=True, exclude_max_min=True, )) statistics.update( create_stats_ordered_dict( '{}/initial'.format(stat_name), [s[:1] for s in stat_list], always_show_all_stats=True, exclude_max_min=True, )) return statistics
class Point2DEnv(MultitaskEnv, Serializable): """ A little 2D point whose life goal is to reach a target. """ def __init__( self, render_dt_msec=0, action_l2norm_penalty=0, # disabled for now render_onscreen=False, render_size=200, render_target=True, reward_type="dense", norm_order=2, action_scale=1.0, target_radius=0.50, boundary_dist=4, ball_radius=0.50, walls=[], fixed_goal=None, goal_low=None, goal_high=None, ball_low=None, ball_high=None, randomize_position_on_reset=True, sample_realistic_goals=False, images_are_rgb=False, # else black and white show_goal=True, v_func_heatmap_bounds=None, **kwargs): if walls is None: walls = [] if len(kwargs) > 0: import logging LOGGER = logging.getLogger(__name__) LOGGER.log(logging.WARNING, "WARNING, ignoring kwargs:", kwargs) self.quick_init(locals()) self.render_dt_msec = render_dt_msec self.action_l2norm_penalty = action_l2norm_penalty self.render_onscreen = render_onscreen self.render_size = render_size self.render_target = render_target self.reward_type = reward_type self.norm_order = norm_order self.action_scale = action_scale self.target_radius = target_radius self.boundary_dist = boundary_dist self.ball_radius = ball_radius self.walls = walls self.fixed_goal = fixed_goal self.randomize_position_on_reset = randomize_position_on_reset self.sample_realistic_goals = sample_realistic_goals if self.fixed_goal is not None: self.fixed_goal = np.array(self.fixed_goal) self.images_are_rgb = images_are_rgb self.show_goal = show_goal self.max_target_distance = self.boundary_dist - self.target_radius self._target_position = None self._position = np.zeros((2)) u = np.ones(2) self.action_space = spaces.Box(-u, u, dtype=np.float32) o = self.boundary_dist * np.ones(2) self.obs_range = spaces.Box(-o, o, dtype='float32') if goal_low is None: goal_low = -o if goal_high is None: goal_high = o self.goal_range = spaces.Box(np.array(goal_low), np.array(goal_high), dtype='float32') if ball_low is None: ball_low = -o if ball_high is None: ball_high = o self.ball_range = spaces.Box(np.array(ball_low), np.array(ball_high), dtype='float32') self.observation_space = spaces.Dict([ ('observation', self.obs_range), ('desired_goal', self.goal_range), ('achieved_goal', self.obs_range), ('state_observation', self.obs_range), ('state_desired_goal', self.goal_range), ('state_achieved_goal', self.obs_range), ]) self.drawer = None self.subgoals = None self.v_func_heatmap_bounds = v_func_heatmap_bounds def step(self, velocities): assert self.action_scale <= 1.0 velocities = np.clip(velocities, a_min=-1, a_max=1) * self.action_scale new_position = self._position + velocities for wall in self.walls: new_position = wall.handle_collision(self._position, new_position) self._position = new_position self._position = np.clip( self._position, a_min=-self.boundary_dist, a_max=self.boundary_dist, ) distance_to_target = np.linalg.norm(self._position - self._target_position) below_wall = self._position[1] >= 2.0 is_success = distance_to_target < 1.0 is_success_2 = distance_to_target < 1.5 is_success_3 = distance_to_target < 1.75 ob = self._get_obs() reward = self.compute_reward(velocities, ob) info = { 'radius': self.target_radius, 'target_position': self._target_position, 'distance_to_target': distance_to_target, 'velocity': velocities, 'speed': np.linalg.norm(velocities), 'is_success': is_success, 'is_success_2': is_success_2, 'is_success_3': is_success_3, 'below_wall': below_wall, } done = False return ob, reward, done, info def initialize_camera(self, camera): pass def reset(self): self._target_position = self.sample_goal_for_rollout( )['state_desired_goal'] if self.randomize_position_on_reset: self._position = self._sample_position(self.ball_range.low, self.ball_range.high, realistic=True) self.subgoals = None return self._get_obs() def _position_outside_arena(self, pos): return not self.obs_range.contains(pos) def _position_inside_wall(self, pos): for wall in self.walls: if wall.contains_point(pos): return True return False def realistic_state_np(self, state): return not self._position_inside_wall(state) def realistic_goals(self, g, use_double=False): collision = None for wall in self.walls: wall_collision = wall.contains_points_pytorch(g) wall_collision_result = wall_collision[:, 0] for i in range(wall_collision.shape[1]): wall_collision_result = wall_collision_result * wall_collision[:, i] if collision is None: collision = wall_collision_result else: collision = collision | wall_collision_result result = collision ^ 1 # g_np = ptu.get_numpy(g) # for i in range(g_np.shape[0]): # print(not self._position_inside_wall(g_np[i]), ptu.get_numpy(result[i])) return result.float() def _sample_position(self, low, high, realistic=True): pos = np.random.uniform(low, high) if realistic: while self._position_inside_wall(pos) is True: pos = np.random.uniform(low, high) return pos def _get_obs(self): return dict( observation=self._position.copy(), desired_goal=self._target_position.copy(), achieved_goal=self._position.copy(), state_observation=self._position.copy(), state_desired_goal=self._target_position.copy(), state_achieved_goal=self._position.copy(), ) def compute_rewards(self, actions, obs, prev_obs=None): achieved_goals = obs['state_achieved_goal'] desired_goals = obs['state_desired_goal'] d = np.linalg.norm(achieved_goals - desired_goals, ord=self.norm_order, axis=-1) if self.reward_type == "sparse": return -(d > self.target_radius).astype(np.float32) elif self.reward_type == "dense": return -d elif self.reward_type == 'vectorized_dense': return -np.abs(achieved_goals - desired_goals) def get_diagnostics(self, paths, prefix=''): statistics = OrderedDict() for stat_name in [ 'distance_to_target', 'below_wall', 'is_success', 'is_success_2', 'is_success_3', 'velocity', 'speed', ]: stat_name = stat_name stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update( create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update( create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) return statistics def get_goal(self): return { 'desired_goal': self._target_position.copy(), 'state_desired_goal': self._target_position.copy(), } def set_goal(self, goal): self._target_position = goal['state_desired_goal'] def sample_goal_for_rollout(self): if not self.fixed_goal is None: goal = np.copy(self.fixed_goal) else: goal = self._sample_position(self.goal_range.low, self.goal_range.high, realistic=self.sample_realistic_goals) return { 'desired_goal': goal, 'state_desired_goal': goal, } def sample_goals(self, batch_size): # goals = np.random.uniform( # self.obs_range.low, # self.obs_range.high, # size=(batch_size, self.obs_range.low.size), # ) goals = np.array([ self._sample_position(self.obs_range.low, self.obs_range.high, realistic=self.sample_realistic_goals) for _ in range(batch_size) ]) return { 'desired_goal': goals, 'state_desired_goal': goals, } def set_position(self, pos): self._position[0] = pos[0] self._position[1] = pos[1] """Functions for ImageEnv wrapper""" def get_image(self, width=None, height=None): """Returns a black and white image""" if width is not None: if width != height: raise NotImplementedError() if width != self.render_size: self.drawer = PygameViewer( screen_width=width, screen_height=height, x_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), y_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), render_onscreen=self.render_onscreen, ) self.render_size = width self.render() img = self.drawer.get_image() if self.images_are_rgb: return img.transpose((1, 0, 2)) else: r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2] img = (-r + b) return img def update_subgoals(self, subgoals, subgoals_reproj=None, subgoal_v_vals=None): self.subgoals = subgoals def set_to_goal(self, goal_dict): goal = goal_dict["state_desired_goal"] self._position = goal self._target_position = goal def get_env_state(self): return self._get_obs() def set_env_state(self, state): position = state["state_observation"] goal = state["state_desired_goal"] self._position = position self._target_position = goal def get_states_sweep(self, nx, ny): x = np.linspace(-4, 4, nx) y = np.linspace(-4, 4, ny) xv, yv = np.meshgrid(x, y) states = np.stack((xv, yv), axis=2).reshape((-1, 2)) return states def render(self, close=False): if close: self.drawer = None return if self.drawer is None or self.drawer.terminated: self.drawer = PygameViewer( self.render_size, self.render_size, x_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), y_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), render_onscreen=self.render_onscreen, ) self.drawer.fill(Color('white')) if self.render_target: self.drawer.draw_solid_circle( self._target_position, self.target_radius, Color('green'), ) self.drawer.draw_solid_circle( self._position, self.ball_radius, Color('blue'), ) if self.subgoals is not None: for goal in self.subgoals: self.drawer.draw_solid_circle( goal, self.ball_radius + 0.1, Color('red'), ) for p1, p2 in zip( np.concatenate(([self._position], self.subgoals[:-1]), axis=0), self.subgoals): self.drawer.draw_segment(p1, p2, Color(100, 0, 0, 10)) for wall in self.walls: self.drawer.draw_segment( wall.endpoint1, wall.endpoint2, Color('black'), ) self.drawer.draw_segment( wall.endpoint2, wall.endpoint3, Color('black'), ) self.drawer.draw_segment( wall.endpoint3, wall.endpoint4, Color('black'), ) self.drawer.draw_segment( wall.endpoint4, wall.endpoint1, Color('black'), ) self.drawer.render() self.drawer.tick(self.render_dt_msec) """Static visualization/utility methods""" @staticmethod def true_model(state, action): velocities = np.clip(action, a_min=-1, a_max=1) position = state new_position = position + velocities return np.clip( new_position, a_min=-Point2DEnv.boundary_dist, a_max=Point2DEnv.boundary_dist, ) @staticmethod def true_states(state, actions): real_states = [state] for action in actions: next_state = Point2DEnv.true_model(state, action) real_states.append(next_state) state = next_state return real_states @staticmethod def plot_trajectory(ax, states, actions, goal=None): assert len(states) == len(actions) + 1 x = states[:, 0] y = -states[:, 1] num_states = len(states) plasma_cm = plt.get_cmap('plasma') for i, state in enumerate(states): color = plasma_cm(float(i) / num_states) ax.plot( state[0], -state[1], marker='o', color=color, markersize=10, ) actions_x = actions[:, 0] actions_y = -actions[:, 1] ax.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], scale_units='xy', angles='xy', scale=1, width=0.005) ax.quiver( x[:-1], y[:-1], actions_x, actions_y, scale_units='xy', angles='xy', scale=1, color='r', width=0.0035, ) ax.plot( [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) if goal is not None: ax.plot(goal[0], -goal[1], marker='*', color='g', markersize=15) ax.set_ylim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) ax.set_xlim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) def initialize_camera(self, init_fctn): pass
class Point2DEnv(MultitaskEnv, Serializable): """A little 2D point whose life goal is to reach a target. Attributes ---------- render_dt_msec : float seconds before the next frame in the image is rendered action_l2norm_penalty : float penalty scale for the actions by the agent render_onscreen : bool whether to include the rendering visually (instead of simply using the image for the observation) render_size : int width/length number of pixels in the rendered image reward_type : str the reward type. Must be one of: "sparse", "dense", or "vectorized_dense" action_scale : float the multiple from action to velocity target_radius : float the radius of the targeted position when being rendered boundary_dist : float the distance from the center to the boundary ball_radius : float the radius of the agent when being rendered walls : list of object a list of wall objects describing the position of the walls and how to handle collisions with said walls fixed_goal : [float, float] or None the goal to use. If set to None, it is picked randomly. randomize_position_on_reset : bool whether to initialize the position of the agent randomly images_are_rgb : bool specifies whether the image is RGB. Otherwise, it's black and white show_goal : bool whether to render the goal(s) images_in_obs : bool whether to use the image in the observation image_size : int number of elements in the image. Set to 0 if images are not being used. action_space : gym.spaces.* the action space of the environment obs_range : gym.spaces.* the range of the initial position of the agent context_space : gym.spaces.* the context space of the environment observation_space : gym.spaces.* the observation space of the environment drawer : multiworld.envs.pygame.pygame_viewer.PygameViewer or None The drawer for the images of the environment. Set to None if images are not being used. render_drawer : multiworld.envs.pygame.pygame_viewer.PygameViewer or None The drawer for the images of the environment if the environment is being rendered. Set to None if images are not being used. horizon : int environment time horizon t : int number of steps since the start of the most recent episode """ def __init__( self, render_dt_msec=0, action_l2norm_penalty=0, # disabled for now render_onscreen=False, render_size=32, reward_type="dense", action_scale=1.0, target_radius=0.60, boundary_dist=4, ball_radius=0.50, walls=None, fixed_goal=None, randomize_position_on_reset=True, images_are_rgb=True, # else black and white show_goal=True, images_in_obs=True, **kwargs): """Instantiate the environment. Parameters ---------- render_dt_msec : float seconds before the next frame in the image is rendered action_l2norm_penalty : float penalty scale for the actions by the agent render_onscreen : bool whether to include the rendering visually (instead of simply using the image for the observation) render_size : int width/length number of pixels in the rendered image reward_type : str the reward type. Must be one of: "sparse", "dense", or "vectorized_dense" action_scale : float the multiple from action to velocity target_radius : float the radius of the targeted position when being rendered boundary_dist : float the distance from the center to the boundary ball_radius : float the radius of the agent when being rendered walls : list of object a list of wall objects describing the position of the walls and how to handle collisions with said walls fixed_goal : [float, float] or None the goal to use. If set to None, it is picked randomly. randomize_position_on_reset : bool whether to initialize the position of the agent randomly images_are_rgb : bool specifies whether the image is RGB. Otherwise, it's black and white show_goal : bool whether to render the goal(s) images_in_obs : bool whether to use the image in the obsevation kwargs : dict additional arguments. Unused here. """ if walls is None: walls = [] if fixed_goal is not None: fixed_goal = np.array(fixed_goal) if len(kwargs) > 0: logger = logging.getLogger(__name__) logger.log(logging.WARNING, "WARNING, ignoring kwargs:", kwargs) self.quick_init(locals()) self.render_dt_msec = render_dt_msec self.action_l2norm_penalty = action_l2norm_penalty self.render_onscreen = render_onscreen self.render_size = render_size self.reward_type = reward_type self.action_scale = action_scale self.target_radius = target_radius self.boundary_dist = boundary_dist self.ball_radius = ball_radius self.walls = walls self.fixed_goal = fixed_goal self.randomize_position_on_reset = randomize_position_on_reset self.images_are_rgb = images_are_rgb self.show_goal = show_goal self.images_in_obs = images_in_obs self.image_size = 1024 * (3 if self.images_are_rgb else 1) if not self.images_in_obs: self.image_size = 0 self.max_target_distance = self.boundary_dist - self.target_radius self._target_position = None self._position = np.zeros(2) u = np.ones(2) self.action_space = spaces.Box(-u, u, dtype=np.float32) o = self.boundary_dist * np.ones(2) self.obs_range = spaces.Box(-o, o, dtype='float32') self.context_space = spaces.Box(-o, o, dtype='float32') self.observation_space = spaces.Box( np.concatenate([np.zeros([self.image_size]), -o], 0), np.concatenate([np.ones([self.image_size]), o], 0), dtype='float32') self.drawer = None self.render_drawer = None self.horizon = 200 self.t = 0 @property def current_context(self): """Return the current goal by the environment.""" return self._target_position def step(self, velocities): """Advance the simulation by one step. Parameters ---------- velocities : array_like the action by the agent, defined as its velocities in the x and y directions Returns ------- array_like agent's observation of the current environment float amount of reward associated with the previous state/action pair bool indicates whether the episode has ended dict contains other diagnostic information from the previous action """ assert self.action_scale <= 1.0 velocities = np.clip(velocities, a_min=-1, a_max=1) * self.action_scale new_position = self._position + velocities orig_new_pos = new_position.copy() for wall in self.walls: new_position = wall.handle_collision(self._position, new_position) if sum(new_position != orig_new_pos) > 1: # Hack: sometimes you get caught on two walls at a time. If you # process the input in the other direction, you might only get # caught on one wall instead. new_position = orig_new_pos.copy() for wall in self.walls[::-1]: new_position = wall.handle_collision(self._position, new_position) self.t += 1 self._position = new_position self._position = np.clip( self._position, a_min=-self.boundary_dist, a_max=self.boundary_dist, ) distance_to_target = np.linalg.norm(self._position - self._target_position) is_success = distance_to_target < self.target_radius ob = self._get_obs() reward = self.compute_reward(velocities, {'ob': ob}) info = { 'radius': self.target_radius, 'target_position': self._target_position, 'distance_to_target': distance_to_target, 'velocity': velocities, 'speed': np.linalg.norm(velocities), 'is_success': is_success, } done = self.t >= self.horizon return ob, reward, done, info def reset(self): """Reset the environment.""" self.t = 0 self._target_position = self.sample_goal()['goals'] if self.randomize_position_on_reset: self._position = self._sample_position( self.obs_range.low, self.obs_range.high, ) return self._get_obs() def _position_inside_wall(self, pos): """Return True if the agent is in a wall.""" for wall in self.walls: if wall.contains_point(pos): return True return False def _sample_position(self, low, high): """Sample a starting position for the agent.""" pos = np.random.uniform(low, high) while self._position_inside_wall(pos) is True: pos = np.random.uniform(low, high) return pos def _get_obs(self): """Return the observation of the agent. See States in the description of the environment for more. """ if self.images_in_obs: img = self.get_image(32, 32).reshape([-1]).astype( np.float32) / 255.0 return np.concatenate([img, self._position.copy()], 0) else: return self._position.copy() def compute_rewards(self, actions, obs): """See parent class. The rewards are described in the Rewards section of the description of the environment. """ achieved_goals = obs['ob'][:, self.image_size:] desired_goals = self._target_position[np.newaxis, :] d = np.linalg.norm(achieved_goals - desired_goals, axis=-1) if self.reward_type == "sparse": return -(d > self.target_radius).astype(np.float32) elif self.reward_type == "dense": return -d elif self.reward_type == 'vectorized_dense': return -np.abs(achieved_goals - desired_goals) else: raise NotImplementedError() def get_goal(self): """See parent class.""" return self._target_position.copy() def sample_goals(self, batch_size): """See parent class. The goal is the desired x,y coordinates. """ if self.fixed_goal is not None: goals = np.repeat(self.fixed_goal.copy()[None], batch_size, 0) else: goals = np.zeros((batch_size, self.obs_range.low.size)) for b in range(batch_size): if batch_size > 1: logging.warning("This is very slow!") goals[b, :] = self._sample_position( self.obs_range.low, self.obs_range.high, ) return {'goals': goals} def set_position(self, pos): """Set the position of the agent.""" self._position[0] = pos[0] self._position[1] = pos[1] # ======================================================================= # # Functions for ImageEnv wrapper # # ======================================================================= # def get_image(self, width=None, height=None): """Return a black and white image.""" if self.drawer is None: if width != height: raise NotImplementedError() self.drawer = PygameViewer( screen_width=width, screen_height=height, x_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), y_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), render_onscreen=self.render_onscreen, ) self.draw(self.drawer) img = self.drawer.get_image() if self.images_are_rgb: return img.transpose((1, 0, 2)) else: r, b = img[:, :, 0], img[:, :, 2] img = (-r + b).transpose().flatten() return img def draw(self, drawer): """Create the image corresponding to the current state.""" drawer.fill(Color('white')) if self.show_goal: drawer.draw_solid_circle( self._target_position, self.target_radius, Color('green'), ) drawer.draw_solid_circle( self._position, self.ball_radius, Color('blue'), ) for wall in self.walls: drawer.draw_segment( wall.endpoint1, wall.endpoint2, Color('black'), ) drawer.draw_segment( wall.endpoint2, wall.endpoint3, Color('black'), ) drawer.draw_segment( wall.endpoint3, wall.endpoint4, Color('black'), ) drawer.draw_segment( wall.endpoint4, wall.endpoint1, Color('black'), ) drawer.render() def render(self, mode='human', close=False): """Render the environment state.""" if mode == 'rgb_array': return self.get_image(self.render_size, self.render_size) if close: self.render_drawer = None return if self.render_drawer is None or self.render_drawer.terminated: self.render_drawer = PygameViewer( self.render_size, self.render_size, x_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), y_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), render_onscreen=True, ) self.draw(self.render_drawer) self.render_drawer.tick(self.render_dt_msec) if mode != 'interactive': self.render_drawer.check_for_exit() # ======================================================================= # # Visualization/utility methods # # ======================================================================= # def true_model(self, state, action): """Return the next position by the agent. Parameters ---------- state : array_like the state by the agent action : array_like the action by the agent Returns ------- array_like the next position. """ velocities = np.clip(action, a_min=-1, a_max=1) position = state new_position = position + velocities return np.clip( new_position, a_min=-self.boundary_dist, a_max=self.boundary_dist, ) def true_states(self, state, actions): """Return the next states given a set of states and actions. Parameters ---------- state : array_like the states by the agent actions : array_like the actions by the agent Returns ------- list of array_like the next states """ real_states = [state] for action in actions: next_state = self.true_model(state, action) real_states.append(next_state) state = next_state return real_states def plot_trajectory(self, ax, states, actions, goal=None): """Plot the trajectory of an agent. Parameters ---------- ax : matplotlib.axes.Axes the axis object to plot the figure on states : array_like the states by the agent actions : array_like the actions by the agent goal : [int, int] the x,y coordinates of the goal """ assert len(states) == len(actions) + 1 x = states[:, 0] y = -states[:, 1] num_states = len(states) plasma_cm = plt.get_cmap('plasma') for i, state in enumerate(states): color = plasma_cm(float(i) / num_states) ax.plot( state[0], -state[1], marker='o', color=color, markersize=10, ) actions_x = actions[:, 0] actions_y = -actions[:, 1] ax.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], scale_units='xy', angles='xy', scale=1, width=0.005) ax.quiver( x[:-1], y[:-1], actions_x, actions_y, scale_units='xy', angles='xy', scale=1, color='r', width=0.0035, ) ax.plot( [ -self.boundary_dist, -self.boundary_dist, ], [ self.boundary_dist, -self.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ self.boundary_dist, -self.boundary_dist, ], [ self.boundary_dist, self.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ self.boundary_dist, self.boundary_dist, ], [ self.boundary_dist, -self.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ self.boundary_dist, -self.boundary_dist, ], [ -self.boundary_dist, -self.boundary_dist, ], color='k', linestyle='-', ) if goal is not None: ax.plot(goal[0], -goal[1], marker='*', color='g', markersize=15) ax.set_ylim( -self.boundary_dist - 1, self.boundary_dist + 1, ) ax.set_xlim( -self.boundary_dist - 1, self.boundary_dist + 1, )
class Point2DEnv(MultitaskEnv, Serializable): """ A little 2D point whose life goal is to reach a target. """ def __init__( self, render_dt_msec=0, action_l2norm_penalty=0, # disabled for now render_onscreen=False, render_size=84, get_image_base_render_size=None, reward_type="dense", action_scale=1.0, target_radius=0.60, boundary_dist=4, ball_radius=0.50, walls=None, fixed_goal=None, fixed_init_position=None, randomize_position_on_reset=True, images_are_rgb=False, # else black and white show_goal=True, pointmass_color="blue", bg_color="black", wall_color="white", **kwargs): if walls is None: walls = [] if walls is None: walls = [] if fixed_goal is not None: fixed_goal = np.array(fixed_goal) if fixed_init_position is not None: fixed_init_position = np.array(fixed_init_position) if len(kwargs) > 0: LOGGER = logging.getLogger(__name__) LOGGER.log(logging.WARNING, "WARNING, ignoring kwargs:", kwargs) self.quick_init(locals()) self.render_dt_msec = render_dt_msec self.action_l2norm_penalty = action_l2norm_penalty self.render_onscreen = render_onscreen self.render_size = render_size self.reward_type = reward_type self.action_scale = action_scale self.target_radius = target_radius self.boundary_dist = boundary_dist self.ball_radius = ball_radius self.walls = walls self.fixed_goal = fixed_goal self._fixed_init_position = fixed_init_position self.randomize_position_on_reset = randomize_position_on_reset self.images_are_rgb = images_are_rgb self.show_goal = show_goal self.pointmass_color = pointmass_color self.bg_color = bg_color self._wall_color = wall_color self.render_drawer = None self.max_target_distance = self.boundary_dist - self.target_radius self._target_position = np.zeros(2) self._position = np.zeros(2) u = np.ones(2) self.action_space = spaces.Box(-u, u, dtype=np.float32) o = self.boundary_dist * np.ones(2) self.obs_range = spaces.Box(-o, o, dtype='float32') self.observation_space = spaces.Dict([ ('observation', self.obs_range), ('desired_goal', self.obs_range), ('achieved_goal', self.obs_range), ('state_observation', self.obs_range), ('state_desired_goal', self.obs_range), ('state_achieved_goal', self.obs_range), ]) if get_image_base_render_size: base_width, base_height = get_image_base_render_size self._drawer = PygameViewer( screen_width=base_width, screen_height=base_height, x_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), y_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), render_onscreen=self.render_onscreen, ) self._fixed_get_image_render_size = True else: self._drawer = None self._fixed_get_image_render_size = False def step(self, velocities): assert self.action_scale <= 1.0 velocities = np.clip(velocities, a_min=-1, a_max=1) * self.action_scale new_position = self._position + velocities orig_new_pos = new_position.copy() for wall in self.walls: new_position = wall.handle_collision(self._position, new_position) if sum(new_position != orig_new_pos) > 1: """ Hack: sometimes you get caught on two walls at a time. If you process the input in the other direction, you might only get caught on one wall instead. """ new_position = orig_new_pos.copy() for wall in self.walls[::-1]: new_position = wall.handle_collision(self._position, new_position) self._position = new_position self._position = np.clip( self._position, a_min=-self.boundary_dist, a_max=self.boundary_dist, ) distance_to_target = np.linalg.norm(self._position - self._target_position) is_success = distance_to_target < self.target_radius ob = self._get_obs() reward = self.compute_reward(velocities, ob) info = { 'radius': self.target_radius, 'target_position': self._target_position, 'distance_to_target': distance_to_target, 'velocity': velocities, 'speed': np.linalg.norm(velocities), 'is_success': is_success, } done = False return ob, reward, done, info def reset(self): self._target_position = self.sample_goal()['state_desired_goal'] if self.randomize_position_on_reset: self._position = self._sample_position( self.obs_range.low, self.obs_range.high, ) else: self._position = self._fixed_init_position return self._get_obs() def _position_inside_wall(self, pos): for wall in self.walls: if wall.contains_point(pos): return True return False def _sample_position(self, low, high): pos = np.random.uniform(low, high) while self._position_inside_wall(pos) is True: pos = np.random.uniform(low, high) return pos def _get_obs(self): return dict( observation=self._position.copy(), desired_goal=self._target_position.copy(), achieved_goal=self._position.copy(), state_observation=self._position.copy(), state_desired_goal=self._target_position.copy(), state_achieved_goal=self._position.copy(), ) def compute_rewards(self, actions, obs): achieved_goals = obs['state_achieved_goal'] desired_goals = obs['state_desired_goal'] d = np.linalg.norm(achieved_goals - desired_goals, axis=-1) if self.reward_type == "sparse": return -(d > self.target_radius).astype(np.float32) elif self.reward_type == "dense": return -d elif self.reward_type == 'vectorized_dense': return -np.abs(achieved_goals - desired_goals) else: raise NotImplementedError() def get_diagnostics(self, paths, prefix=''): statistics = OrderedDict() for stat_name in [ 'radius', 'target_position', 'distance_to_target', 'velocity', 'speed', 'is_success', ]: stat_name = stat_name stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update( create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update( create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) return statistics def get_contextual_diagnostics(self, paths, contexts): diagnostics = OrderedDict() state_key = "state_observation" goal_key = "state_desired_goal" values = [] for i in range(len(paths)): state = paths[i]["observations"][-1][state_key] goal = contexts[i][goal_key] distance = np.linalg.norm(state - goal) values.append(distance) diagnostics_key = goal_key + "/final/distance" diagnostics.update(create_stats_ordered_dict( diagnostics_key, values, )) values = [] for i in range(len(paths)): for j in range(len(paths[i]["observations"])): state = paths[i]["observations"][j][state_key] goal = contexts[i][goal_key] distance = np.linalg.norm(state - goal) values.append(distance) diagnostics_key = goal_key + "/distance" diagnostics.update(create_stats_ordered_dict( diagnostics_key, values, )) return diagnostics def goal_conditioned_diagnostics(self, paths, goals): statistics = OrderedDict() distance_to_target_list = [] is_success_list = [] for path, goal in zip(paths, goals): distance_to_target = np.linalg.norm(path['observations'] - goal, axis=1) is_success = distance_to_target < self.target_radius distance_to_target_list.append(distance_to_target) is_success_list.append(is_success) for stat_name, stat_list in [ ('distance_to_target', distance_to_target_list), ('is_success', is_success_list), ]: statistics.update( create_stats_ordered_dict( stat_name, stat_list, always_show_all_stats=True, )) statistics.update( create_stats_ordered_dict( '{}/final'.format(stat_name), [s[-1:] for s in stat_list], always_show_all_stats=True, exclude_max_min=True, )) statistics.update( create_stats_ordered_dict( '{}/initial'.format(stat_name), [s[:1] for s in stat_list], always_show_all_stats=True, exclude_max_min=True, )) return statistics def get_goal(self): return { 'desired_goal': self._target_position.copy(), 'state_desired_goal': self._target_position.copy(), } def sample_goals(self, batch_size): if not self.fixed_goal is None: goals = np.repeat(self.fixed_goal.copy()[None], batch_size, 0) else: goals = np.zeros((batch_size, self.obs_range.low.size)) if len(self.walls) > 0: if batch_size > 1: logging.warning("This is very slow!") for b in range(batch_size): goals[b, :] = self._sample_position( self.obs_range.low, self.obs_range.high, ) else: goals = np.random.uniform( self.obs_range.low, self.obs_range.high, size=(batch_size, self.obs_range.low.size), ) return { 'desired_goal': goals, 'state_desired_goal': goals, } def set_position(self, pos): self._position[0] = pos[0] self._position[1] = pos[1] """Functions for ImageEnv wrapper""" def get_image(self, width=None, height=None): """Returns a black and white image""" if self._drawer is None or (not self._fixed_get_image_render_size and (self._drawer.width != width or self._drawer.height != height)): if width != height: raise NotImplementedError() self._drawer = PygameViewer( screen_width=width, screen_height=height, x_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), y_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), render_onscreen=self.render_onscreen, ) self.draw(self._drawer) if width and height: wh_size = (width, height) else: wh_size = None img = self._drawer.get_image(wh_size) if self.images_are_rgb: return img.transpose((1, 0, 2)) else: r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2] img = (-r + b).transpose().flatten() return img def set_to_goal(self, goal_dict): goal = goal_dict["state_desired_goal"] self._position = goal self._target_position = goal def get_env_state(self): return self._get_obs() def set_env_state(self, state): position = state["state_observation"] goal = state["state_desired_goal"] self._position = position self._target_position = goal def draw(self, drawer): drawer.fill(Color(self.bg_color)) if self.show_goal: drawer.draw_solid_circle( self._target_position, self.target_radius, Color('green'), ) drawer.draw_solid_circle( self._position, self.ball_radius, Color(self.pointmass_color), ) for wall in self.walls: drawer.draw_rect( wall.endpoint4, wall.endpoint1[0] - wall.endpoint4[0], -wall.endpoint1[1] + wall.endpoint2[1], Color(self._wall_color), thickness=0, ) drawer.render() def render(self, mode='human', close=False): if close: self.render_drawer = None return if self.render_drawer is None or self.render_drawer.terminated: self.render_drawer = PygameViewer( self.render_size, self.render_size, x_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), y_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), render_onscreen=True, ) self.draw(self.render_drawer) self.render_drawer.tick(self.render_dt_msec) if mode != 'interactive': self.render_drawer.check_for_exit() def get_diagnostics(self, paths, prefix=''): statistics = OrderedDict() for stat_name in [ 'distance_to_target', ]: stat_name = stat_name stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update( create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update( create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) return statistics """Static visualization/utility methods""" @staticmethod def true_model(state, action): velocities = np.clip(action, a_min=-1, a_max=1) position = state new_position = position + velocities return np.clip( new_position, a_min=-Point2DEnv.boundary_dist, a_max=Point2DEnv.boundary_dist, ) @staticmethod def true_states(state, actions): real_states = [state] for action in actions: next_state = Point2DEnv.true_model(state, action) real_states.append(next_state) state = next_state return real_states @staticmethod def plot_trajectory(ax, states, actions, goal=None): assert len(states) == len(actions) + 1 x = states[:, 0] y = -states[:, 1] num_states = len(states) plasma_cm = plt.get_cmap('plasma') for i, state in enumerate(states): color = plasma_cm(float(i) / num_states) ax.plot( state[0], -state[1], marker='o', color=color, markersize=10, ) actions_x = actions[:, 0] actions_y = -actions[:, 1] ax.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], scale_units='xy', angles='xy', scale=1, width=0.005) ax.quiver( x[:-1], y[:-1], actions_x, actions_y, scale_units='xy', angles='xy', scale=1, color='r', width=0.0035, ) ax.plot( [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) if goal is not None: ax.plot(goal[0], -goal[1], marker='*', color='g', markersize=15) ax.set_ylim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) ax.set_xlim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) def initialize_camera(self, init_fctn): pass
class Point2DEnv(MultitaskEnv, Serializable): """ A little 2D point whose life goal is to reach a target. """ def __init__( self, render_dt_msec=0, action_l2norm_penalty=0, # disabled for now render_onscreen=False, render_size=84, reward_type="dense", action_scale=0.01, target_radius=0.60, boundary_dist=0.15, ball_radius=0.50, walls=None, fixed_goal=None, randomize_position_on_reset=True, images_are_rgb=False, # else black and white show_goal=True, record_interval=1, **kwargs ): if walls is None: walls = [] if walls is None: walls = [] if fixed_goal is not None: fixed_goal = np.array(fixed_goal) if len(kwargs) > 0: LOGGER = logging.getLogger(__name__) LOGGER.log(logging.WARNING, "WARNING, ignoring kwargs:", kwargs) self.quick_init(locals()) self.render_dt_msec = render_dt_msec self.action_l2norm_penalty = action_l2norm_penalty self.render_onscreen = render_onscreen self.render_size = render_size self.reward_type = reward_type self.action_scale = action_scale self.target_radius = target_radius self.boundary_dist = boundary_dist self.ball_radius = ball_radius self.walls = walls self.fixed_goal = fixed_goal self.randomize_position_on_reset = randomize_position_on_reset self.images_are_rgb = images_are_rgb self.show_goal = show_goal self.record_interval = record_interval self.max_target_distance = self.boundary_dist - self.target_radius self._target_position = None self._position = np.zeros((2)) self._center = np.array([0.0, -0.05]) u = np.ones(2) self.action_space = spaces.Box(-u, u, dtype=np.float32) o = self.boundary_dist * np.ones(2) self.obs_range = spaces.Box(-o, o, dtype='float32') self.observation_space = spaces.Dict([ ('observations', self.obs_range), ('desired_goal', self.obs_range), ('achieved_goal', self.obs_range), ('state_observation', self.obs_range), ('state_desired_goal', self.obs_range), ('state_achieved_goal', self.obs_range), ]) self.drawer = None self.render_drawer = None self._meta_time = -1 self._dtheta = 0.2 self._index = -1 self._ac_hist = [] self._pos_hist = [] self._step_time = 0 def step(self, velocities): if self._step_time > 0 and (self._step_time + 1) % 50 == 0: self.reset(True) else: assert self.action_scale <= 1.0 velocities = np.clip(velocities, a_min=-1, a_max=1) * self.action_scale self._ac_hist.append(velocities) new_position = self._position + velocities for wall in self.walls: new_position = wall.handle_collision( self._position, new_position ) self._position = new_position self._position = np.clip( self._position, a_min=-self.boundary_dist, a_max=self.boundary_dist, ) self._pos_hist.append(self._position.copy()) distance_to_target = np.linalg.norm( self._position - self._target_position ) is_success = distance_to_target < self.target_radius ob = self._get_obs() reward = self.compute_reward(velocities, ob) info = { 'radius': self.target_radius, 'target_position': self._target_position, 'distance_to_target': distance_to_target, 'velocity': velocities, 'speed': np.linalg.norm(velocities), 'is_success': is_success, } done = False self._step_time += 1 return ob, reward, done, info def reset(self, called_by_myself=False): # if self._pos_hist and self._meta_time % self.record_interval == 0: # pos_hist = np.array(self._pos_hist) # assert pos_hist.shape[0] == 50 # plt.figure() # ax = plt.gca() # ax.set_aspect('equal') # circle = plt.Circle(self._center, 0.1, color='dimgray', fill=False) # ax.add_artist(circle) # plt.scatter(pos_hist[:, 0], pos_hist[:, 1], c=[(i+1)/pos_hist.shape[0] for i in range(pos_hist.shape[0])], cmap='winter', s=20) # plt.scatter(self._target_position[0], self._target_position[1], s=30, c='dimgray') # plt.xlim(self._center[0] - 0.12, self._center[0] + 0.12) # plt.ylim(self._center[1] - 0.12, self._center[1] + 0.12) # plt.savefig('/scr/annie/softlearning-ctrl/out1/{}.png'.format(self._meta_time)) # plt.close() if self._pos_hist: tmp1 = 0.1 * np.array([np.cos(self._dtheta * (self._index + 1)), np.sin(self._dtheta * (self._index + 1))]) + self._center tmp2 = 0.1 * np.array([np.cos(self._dtheta * (self._index - 1)), np.sin(self._dtheta * (self._index - 1))]) + self._center if np.linalg.norm(self._pos_hist[-1] - tmp1) > np.linalg.norm(self._pos_hist[-1] - tmp2): self._index += 1 else: self._index -= 1 self._ac_hist = [] self._pos_hist = [] if not called_by_myself: self._step_time = 0 # self._index = 0 # self._pos_hist = [] self._target_position = self.sample_goal()['state_desired_goal'] self._position = np.zeros(2) if called_by_myself: self._meta_time += 1 # self._meta_time += 1 return self._get_obs() def _position_inside_wall(self, pos): for wall in self.walls: if wall.contains_point(pos): return True return False def _sample_position(self, low, high): pos = np.random.uniform(low, high) while self._position_inside_wall(pos) is True: pos = np.random.uniform(low, high) return pos def _get_obs(self): return dict( observations=self._position.copy(), desired_goal=self._target_position.copy(), achieved_goal=self._position.copy(), state_observation=self._position.copy(), state_desired_goal=self._target_position.copy(), state_achieved_goal=self._position.copy(), ) def compute_rewards(self, actions, obs): achieved_goals = obs['state_achieved_goal'] desired_goals = obs['state_desired_goal'] d = np.linalg.norm(achieved_goals - desired_goals, axis=-1) if self.reward_type == "sparse": return -(d > self.target_radius).astype(np.float32) elif self.reward_type == "dense": return -d elif self.reward_type == 'vectorized_dense': return -np.abs(achieved_goals - desired_goals) else: raise NotImplementedError() def get_diagnostics(self, paths, prefix=''): statistics = OrderedDict() for stat_name in [ 'radius', 'target_position', 'distance_to_target', 'velocity', 'speed', 'is_success', ]: stat_name = stat_name stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update(create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update(create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) return statistics def get_goal(self): return { 'desired_goal': self._target_position.copy(), 'state_desired_goal': self._target_position.copy(), } def sample_goals(self, batch_size): if not self.fixed_goal is None: goals = np.repeat( self.fixed_goal.copy()[None], batch_size, 0 ) else: goals = np.zeros((batch_size, self.obs_range.low.size)) for b in range(batch_size): if batch_size > 1: logging.warning("This is very slow!") # DISCRETE # if self._meta_time % 4 == 0: # goals[b, 0] = 0.1 # goals[b, 1] = 0.0 # elif self._meta_time % 4 == 1: # goals[b, 0] = 0.0 # goals[b, 1] = 0.1 # elif self._meta_time % 4 == 2: # goals[b, 0] = -0.1 # goals[b, 1] = 0.0 # else: # goals[b, 0] = 0.0 # goals[b, 1] = -0.1 # CIRCLE # goals[b, 0] = 0.1 * np.cos(self._dtheta * self._meta_time) # goals[b, 1] = 0.1 * np.sin(self._dtheta * self._meta_time) goals[b, 0] = 0.1 * np.cos(self._dtheta * self._index) + self._center[0] goals[b, 1] = 0.1 * np.sin(self._dtheta * self._index) + self._center[1] # LINE # goals[b, 0] = 0.1 * self._meta_time / 2000 # goals[b, 1] = 0.1 - 0.1 * self._meta_time / 2000 return { 'desired_goal': goals, 'state_desired_goal': goals, } def set_position(self, pos): self._position[0] = pos[0] self._position[1] = pos[1] """Functions for ImageEnv wrapper""" def get_image(self, width=None, height=None): """Returns a black and white image""" if self.drawer is None: if width != height: raise NotImplementedError() self.drawer = PygameViewer( screen_width=width, screen_height=height, x_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), y_bounds=(-self.boundary_dist - self.ball_radius, self.boundary_dist + self.ball_radius), render_onscreen=self.render_onscreen, ) self.draw(self.drawer, False) img = self.drawer.get_image() if self.images_are_rgb: return img.transpose((1, 0, 2)) else: r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2] img = (-r + b).transpose().flatten() return img def set_to_goal(self, goal_dict): goal = goal_dict["state_desired_goal"] self._position = goal self._target_position = goal def get_env_state(self): return self._get_obs() def set_env_state(self, state): position = state["state_observation"] goal = state["state_desired_goal"] self._position = position self._target_position = goal def draw(self, drawer, tick): # if self.drawer is not None: # self.drawer.fill(Color('white')) # if self.render_drawer is not None: # self.render_drawer.fill(Color('white')) drawer.fill(Color('white')) if self.show_goal: drawer.draw_solid_circle( self._target_position, self.target_radius, Color('green'), ) drawer.draw_solid_circle( self._position, self.ball_radius, Color('blue'), ) for wall in self.walls: drawer.draw_segment( wall.endpoint1, wall.endpoint2, Color('black'), ) drawer.draw_segment( wall.endpoint2, wall.endpoint3, Color('black'), ) drawer.draw_segment( wall.endpoint3, wall.endpoint4, Color('black'), ) drawer.draw_segment( wall.endpoint4, wall.endpoint1, Color('black'), ) drawer.render() if tick: drawer.tick(self.render_dt_msec) def render(self, close=False): if close: self.render_drawer = None return if self.render_drawer is None or self.render_drawer.terminated: self.render_drawer = PygameViewer( self.render_size, self.render_size, x_bounds=(-self.boundary_dist-self.ball_radius, self.boundary_dist+self.ball_radius), y_bounds=(-self.boundary_dist-self.ball_radius, self.boundary_dist+self.ball_radius), render_onscreen=True, ) self.draw(self.render_drawer, True) def get_diagnostics(self, paths, prefix=''): statistics = OrderedDict() for stat_name in [ 'distance_to_target', ]: stat_name = stat_name stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update(create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update(create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) return statistics """Static visualization/utility methods""" @staticmethod def true_model(state, action): velocities = np.clip(action, a_min=-1, a_max=1) position = state new_position = position + velocities return np.clip( new_position, a_min=-Point2DEnv.boundary_dist, a_max=Point2DEnv.boundary_dist, ) @staticmethod def true_states(state, actions): real_states = [state] for action in actions: next_state = Point2DEnv.true_model(state, action) real_states.append(next_state) state = next_state return real_states @staticmethod def plot_trajectory(ax, states, actions, goal=None): assert len(states) == len(actions) + 1 x = states[:, 0] y = -states[:, 1] num_states = len(states) plasma_cm = plt.get_cmap('plasma') for i, state in enumerate(states): color = plasma_cm(float(i) / num_states) ax.plot(state[0], -state[1], marker='o', color=color, markersize=10, ) actions_x = actions[:, 0] actions_y = -actions[:, 1] ax.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], scale_units='xy', angles='xy', scale=1, width=0.005) ax.quiver(x[:-1], y[:-1], actions_x, actions_y, scale_units='xy', angles='xy', scale=1, color='r', width=0.0035, ) ax.plot( [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) if goal is not None: ax.plot(goal[0], -goal[1], marker='*', color='g', markersize=15) ax.set_ylim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) ax.set_xlim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) def initialize_camera(self, init_fctn): pass
class Point2DEnv(MultitaskEnv, Serializable): """ A little 2D point whose life goal is to reach a target. """ def __init__( self, render_dt_msec=0, action_l2norm_penalty=0, # disabled for now render_onscreen=False, render_size=256, reward_type="dense", action_scale=1.0, target_radius=0.5, boundary_dist=4, ball_radius=0.15, ball_pixel_radius=0, walls=None, init_pos_range=None, target_pos_range=None, sparse_goals=None, shuffle_states=False, images_are_rgb=False, # else black and white show_goal=True, n_bins=10, use_count_reward=False, show_discrete_grid=False, convert_obs_to_image=False, **kwargs): if walls is None: walls = [] if walls is None: walls = [] # if fixed_goal is not None: # fixed_goal = np.array(fixed_goal) self.quick_init(locals()) self.render_dt_msec = render_dt_msec self.action_l2norm_penalty = action_l2norm_penalty self.render_onscreen = render_onscreen self.render_size = render_size self.reward_type = reward_type self.action_scale = action_scale self.target_radius = target_radius self.boundary_dist = boundary_dist self.ball_radius = ball_radius self.ball_pixel_radius = ball_pixel_radius self.walls = walls self.n_bins = n_bins self.use_count_reward = use_count_reward self.show_discrete_grid = show_discrete_grid self.images_are_rgb = images_are_rgb self.show_goal = show_goal self.sparse_goals = sparse_goals self.shuffle_states = shuffle_states self.convert_obs_to_image = convert_obs_to_image self.x_bins = np.linspace(-self.boundary_dist, self.boundary_dist, self.n_bins) self.y_bins = np.linspace(-self.boundary_dist, self.boundary_dist, self.n_bins) self.bin_counts = np.ones((self.n_bins + 1, self.n_bins + 1)) self._random_mapping_x = np.random.permutation(range(N_SHUFFLE_BINS)) self._random_mapping_y = np.random.permutation(range(N_SHUFFLE_BINS)) self.max_target_distance = self.boundary_dist - self.target_radius self._target_position = None self._position = np.zeros((2)) u = np.ones(2) self.action_space = spaces.Box(-u, u, dtype=np.float32) o = self.boundary_dist * np.ones(2) self.obs_range = spaces.Box(-o, o, dtype='float32') print( f"[Point2D] Using boundary dist: {self.boundary_dist}, obs space: ({self.obs_range.low}, {self.obs_range.high})" ) if not init_pos_range: self.init_pos_range = self.obs_range else: assert np.all(np.abs(init_pos_range) < boundary_dist), ( f"Init position must be" "within the boundaries of the environment: ({-boundary_dist}, {boundary_dist})" ) low, high = init_pos_range self.init_pos_range = spaces.Box(np.array(low), np.array(high), dtype='float32') print( f"[Point2D] Using initial pos range of low={low}, high={high}") if not target_pos_range: self.target_pos_range = self.obs_range else: assert np.all(np.abs(target_pos_range) < boundary_dist), ( f"Goal position must be" "within the boundaries of the environment: ({-boundary_dist}, {boundary_dist})" ) low, high = target_pos_range self.target_pos_range = spaces.Box(np.array(low), np.array(high), dtype='float32') self.observation_space = spaces.Dict([ ('observation', self.obs_range), ('onehot_observation', spaces.Box(0, 1, shape=(2 * (self.n_bins + 1), ), dtype=np.float32)), ('desired_goal', self.obs_range), ('achieved_goal', self.obs_range), ('state_observation', self.obs_range), ('state_desired_goal', self.obs_range), ('state_achieved_goal', self.obs_range), ]) self.drawer = None self.render_drawer = None self.reset() def step(self, velocities): assert self.action_scale <= 1.0 velocities = np.clip(velocities, a_min=-1, a_max=1) * self.action_scale new_position = self._position + velocities orig_new_pos = new_position.copy() for wall in self.walls: new_position = wall.handle_collision(self._position, new_position) if sum(new_position != orig_new_pos) > 1: """ Hack: sometimes you get caught on two walls at a time. If you process the input in the other direction, you might only get caught on one wall instead. """ new_position = orig_new_pos.copy() for wall in self.walls[::-1]: new_position = wall.handle_collision(self._position, new_position) self._position = new_position self._position = np.clip( self._position, a_min=-self.boundary_dist, a_max=self.boundary_dist, ) distance_to_target = np.linalg.norm(self._position - self._target_position) is_success = distance_to_target < self.target_radius ob = self._get_obs() x_d, y_d = ob['discrete_observation'] self.bin_counts[x_d, y_d] += 1 reward = self.compute_reward(velocities, ob) info = { 'radius': self.target_radius, 'target_position': self._target_position, 'distance_to_target': distance_to_target, 'velocity': velocities, 'speed': np.linalg.norm(velocities), 'is_success': is_success, 'ext_reward': self._ext_reward, } if self.sparse_goals is not None: # Set reward for anything within threshold of goal(s) to 1 # (ignore any classifier reward) goals, threshold = self.sparse_goals min_dist = self.compute_min_dist(goals) info['is_sparse_success'] = min_dist < threshold if hasattr(self, 'wall_shape'): if self.wall_shape == 'medium-maze': info[ 'manhattan_dist_to_target'] = self._medium_maze_manhattan_distance( ob['state_achieved_goal'], ob['state_desired_goal']) elif self.wall_shape == 'hard-maze': info[ 'manhattan_dist_to_target'] = self._hard_maze_goal_distance( ob['state_achieved_goal'], ob['state_desired_goal']) elif self.wall_shape == 'double-medium-maze': goals, threshold = self.sparse_goals info['manhattan_dist_to_sparse_goal'] = self.compute_min_dist( goals, self._double_maze_manhattan_distance) info['manhattan_dist_to_target'] = self.compute_min_dist( ob['state_desired_goal'], self._double_maze_manhattan_distance) if self.use_count_reward: info['count_bonus'] = self._count_bonus done = False return ob, reward, done, info def compute_min_dist( self, goals, distance_fn=lambda s1, s2: np.linalg.norm(s1 - s2, axis=-1)): dists = [] if len(goals.shape) == 1: goals = goals[None] for goal in goals: dist = distance_fn(self._position, goal) dists.append(dist) min_dist = np.array(dists).min(axis=0) return min_dist def reset(self): # TODO: Make this more general self._target_position = self.sample_goal()['state_desired_goal'] # if self.randomize_position_on_reset: self._position = self._sample_position( # self.obs_range.low, # self.obs_range.high, self.init_pos_range.low, self.init_pos_range.high, ) return self._get_obs() def _position_inside_wall(self, pos): for wall in self.walls: if wall.contains_point(pos): return True return False def _sample_position(self, low, high): if np.all(low == high): return low pos = np.random.uniform(low, high) while self._position_inside_wall(pos) is True: pos = np.random.uniform(low, high) return pos def clear_bin_counts(self): self.bin_counts = np.ones((self.n_bins + 1, self.n_bins + 1)) def _discretize_observation(self, obs): x, y = obs['state_observation'].copy() x_d, y_d = np.digitize(x, self.x_bins), np.digitize(y, self.y_bins) return np.array([x_d, y_d]) def _discretized_states(self, states, bins=16, low=-4, high=4): """ Converts continuous to discrete states. Params - states: A shape (n, 2) batch of continuous observations - bins: Number of bins for both x and y coordinates - low: Lowest value (inclusive) for continuous x and y - high: Highest value (inclusive) for continuous x and y """ bin_size = (high - low) / bins shifted_states = states - low return np.clip(shifted_states // bin_size, 0, bins - 1).astype(np.int32) def _get_shuffled_states(self, states): if len(states.shape) == 1: states = states[None] states = self._discretized_states(states, bins=N_SHUFFLE_BINS, low=MAZE_LOW, high=MAZE_HIGH) results = (np.hstack([ self._random_mapping_x[states[:, 0]][:, None], self._random_mapping_y[states[:, 1]][:, None] ]) - 8) / 2 return results def _state_to_pixel_coords(self, states, state_min=-4, state_max=4, img_width=28): coords = (states - state_min) * (img_width / (state_max - state_min)) # Need to subtract 1 because pygame rendering shifts everything to the left/up by 1 for some reason # (e.g. [-3, -3] is at 5, 5 instead of 6, 6). However this does not apply to the top/left edges. return np.clip(coords, 0, None).astype(np.int32) def _get_obs(self): state_obs = self._get_shuffled_states(self._position.copy()).squeeze() \ if self.shuffle_states else self._position.copy() obs = collections.OrderedDict( observation=self._position.copy(), desired_goal=self._target_position.copy(), achieved_goal=self._position.copy(), state_observation=state_obs, state_desired_goal=self._target_position.copy(), state_achieved_goal=self._position.copy(), ) # Update with discretized state pos_discrete = self._discretize_observation(obs) pos_onehot = np.eye(self.n_bins + 1)[pos_discrete] obs['discrete_observation'] = pos_discrete obs['onehot_observation'] = pos_onehot return obs def _hard_maze_goal_distance(self, s1, goal=None): s1 = s1.copy() if len(s1.shape) == 1: s1 = s1[None] x1, y1 = s1[:, 0], s1[:, 1] dist = np.zeros(len(s1)) top_section = y1 <= -2 dist[top_section] += np.abs( 2 - x1[top_section]) # Move horizontally to corner dist[top_section] += -2 - y1[top_section] # Move vertically to corner x1[top_section], y1[top_section] = 2, -2 right_section = x1 >= 2 dist[right_section] += x1[right_section] - 2 # Move horizontally dist[right_section] += np.abs( 2 - y1[right_section]) # Move vertically to corner x1[right_section], y1[right_section] = 2, 2 bottom_section = y1 >= 2 dist[bottom_section] += np.abs(-2 - x1[bottom_section]) # Move horizontally dist[bottom_section] += y1[ bottom_section] - 2 # Move vertically to corner x1[bottom_section], y1[bottom_section] = -2, 2 left_section = x1 <= -2 dist[left_section] += (-2 - x1[left_section]) # Move horizontally dist[left_section] += np.abs( y1[left_section]) # Move vertically to corner x1[left_section], y1[left_section] = -2, 0 mid_section = np.logical_and(y1 <= 0, y1 >= -2) dist[mid_section] += np.abs(x1[mid_section]) # Move horizontally dist[mid_section] += np.abs( y1[mid_section]) # Move vertically to corner # Move to goal!!! dist += np.abs(x1 - (-0.5)) dist += np.abs(y1 - 1.25) return dist def _medium_maze_manhattan_distance(self, s1, s2): # Maze wall positions left_wall_x = -self.boundary_dist / 3 left_wall_bottom = self.inner_wall_max_dist right_wall_x = self.boundary_dist / 3 right_wall_top = -self.inner_wall_max_dist s1 = s1.copy() s2 = s2.copy() if len(s1.shape) == 1: s1, s2 = s1[None], s2[None] # Since maze distances are symmetric, redefine s1,s2 for convenience # so that points in s1 are to the left of those in s2 combined = np.hstack((s1[:, None], s2[:, None])) indices = np.argmin((s1[:, 0], s2[:, 0]), axis=0) s1 = np.take_along_axis(combined, indices[:, None, None], axis=1).squeeze(axis=1) s2 = np.take_along_axis(combined, 1 - indices[:, None, None], axis=1).squeeze(axis=1) x1 = s1[:, 0] x2 = s2[:, 0] # Horizontal movement x_dist = np.abs(x2 - x1) # Vertical movement boundary_ys = [left_wall_bottom, right_wall_top, 0] boundary_xs = [left_wall_x, right_wall_x, self.boundary_dist, -4.0001] y_directions = [ 1, -1, 0 ] # +1 means need to get to bottom, -1 means need to get to top curr_y, goal_y = s1[:, 1], s2[:, 1] y_dist = np.zeros(len(s1)) for i in range(3): # Get all points where s1 and s2 respectively are in the current vertical section of the maze curr_in_section = x1 <= boundary_xs[i] goal_in_section = np.logical_and(boundary_xs[i - 1] < x2, x2 <= boundary_xs[i]) goal_after_section = x2 > boundary_xs[i] # Both in current section: move directly to goal mask = np.logical_and(curr_in_section, goal_in_section) y_dist += mask * np.abs(curr_y - goal_y) curr_y[mask] = goal_y[mask] # s2 is further in maze: move to next corner mask = np.logical_and( curr_in_section, np.logical_and(goal_after_section, y_directions[i] * (boundary_ys[i] - curr_y) > 0)) y_dist += mask * np.clip( y_directions[i] * (boundary_ys[i] - curr_y), 0, None) curr_y[mask] = boundary_ys[i] return x_dist + y_dist def _double_maze_manhattan_distance(self, s1, s2): # Maze wall positions left_wall_bottom = self.inner_wall_max_dist + 1 right_wall_top = -self.inner_wall_max_dist - 1 s1 = s1.copy() s2 = s2.copy() if len(s1.shape) == 1: s1 = s1[None] if len(s2.shape) == 1: s2 = s2[None] # Since maze distances are symmetric, redefine s1,s2 for convenience # so that points in s1 are to the left of those in s2 combined = np.hstack((s1[:, None], s2[:, None])) indices = np.argmin((s1[:, 0], s2[:, 0]), axis=0) s1 = np.take_along_axis(combined, indices[:, None, None], axis=1).squeeze(axis=1) s2 = np.take_along_axis(combined, 1 - indices[:, None, None], axis=1).squeeze(axis=1) x1 = s1[:, 0] x2 = s2[:, 0] # Horizontal movement x_dist = np.abs(x2 - x1) # Vertical movement boundary_ys = [ right_wall_top, left_wall_bottom, right_wall_top, left_wall_bottom, 0 ] boundary_xs = [ -3 / 5 * self.boundary_dist, -1 / 5 * self.boundary_dist, 1 / 5 * self.boundary_dist, 3 / 5 * self.boundary_dist, self.boundary_dist, -4.0001 ] y_directions = [ -1, +1, -1, +1, 0 ] # +1 means need to get to bottom, -1 means need to get to top curr_y, goal_y = s1[:, 1], s2[:, 1] y_dist = np.zeros(len(s1)) for i in range(5): # Get all points where s1 and s2 respectively are in the current vertical section of the maze curr_in_section = x1 <= boundary_xs[i] # print(x1) # print(boundary_xs[i]) goal_in_section = np.logical_and(boundary_xs[i - 1] < x2, x2 <= boundary_xs[i]) goal_after_section = x2 > boundary_xs[i] # print("x1:", x1) # print("x2:", x2) # print(boundary_xs[i]) # print(boundary_xs[i-1]) # print(curr_in_section, goal_in_section) # Both in current section: move directly to goal mask = np.logical_and(curr_in_section, goal_in_section) y_dist += mask * np.abs(curr_y - goal_y) curr_y[mask] = goal_y[mask] # s2 is further in maze: move to next corner mask = np.logical_and( curr_in_section, np.logical_and(goal_after_section, y_directions[i] * (boundary_ys[i] - curr_y) > 0)) y_dist += mask * np.clip( y_directions[i] * (boundary_ys[i] - curr_y), 0, None) curr_y[mask] = boundary_ys[i] # print(x_dist, y_dist) return x_dist + y_dist def compute_rewards(self, actions, obs, reward_type=None): reward_type = reward_type or self.reward_type achieved_goals = obs['state_achieved_goal'] desired_goals = obs['state_desired_goal'] d = np.linalg.norm(achieved_goals - desired_goals, axis=-1) if reward_type == "sparse": r = -(d > self.target_radius).astype(np.float32) elif reward_type == "multi-sparse": goals, threshold = self.sparse_goals min_dist = np.array([self.compute_min_dist(goals)]) r = -(min_dist > self.target_radius).astype(np.float32) elif reward_type == "dense": r = -d elif reward_type == "vectorized_dense": r = -np.abs(achieved_goals - desired_goals) elif reward_type == "medium_maze_ground_truth_manhattan": """ # Use maze distance from current position to goal as the negative reward. # This is specifically for the medium Maze-v0 env, which has two vertical walls. """ r = -self._medium_maze_manhattan_distance(achieved_goals, desired_goals) elif reward_type == "laplace_smoothing": # Laplace smoothing: 1 within the goal region, 1/(n+2) at all other states # (where n is the number of visitations) r = np.zeros(d.shape) pos_d = obs['discrete_observation'] r = 1 / (self.bin_counts[pos_d[:, 0], pos_d[:, 1]] + 2) r[d <= self.target_radius] = 1 elif reward_type == "none": r = np.zeros(d.shape) else: raise NotImplementedError( f"Unrecognized reward type: {reward_type}") if self.use_count_reward: # TODO: Add different count based strategies pos_d = obs['discrete_observation'] self._ext_reward = r.copy() self._count_bonus = 1 / np.sqrt(self.bin_counts[pos_d[:, 0], pos_d[:, 1]]) r += self._count_bonus else: self._ext_reward = r.copy() return r def get_diagnostics(self, paths, prefix=''): statistics = OrderedDict() for stat_name in [ 'radius', 'target_position', 'distance_to_target', 'velocity', 'speed', 'is_success', 'ext_reward', 'count_bonus', ] + (['is_sparse_success'] if self.sparse_goals is not None else []): stat_name = stat_name stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update( create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update( create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) return statistics def get_goal(self): return { 'desired_goal': self._target_position.copy(), 'state_desired_goal': self._target_position.copy(), } def sample_goals(self, batch_size): # if self.fixed_goal: # goals = np.repeat( # self.fixed_goal.copy()[None], # batch_size, # 0) # else: goals = np.zeros((batch_size, self.obs_range.low.size)) for b in range(batch_size): if batch_size > 1: logging.warning("This is very slow!") goals[b, :] = self._sample_position( # self.obs_range.low, # self.obs_range.high, self.target_pos_range.low, self.target_pos_range.high, ) return { 'desired_goal': goals, 'state_desired_goal': goals, } def set_position(self, pos): self._position[0] = pos[0] self._position[1] = pos[1] """Functions for ImageEnv wrapper""" def get_image(self, width=None, height=None, invert_colors=False): """Returns a black and white image""" if self.drawer is None: if width != height: raise NotImplementedError() self.drawer = PygameViewer( screen_width=width, screen_height=height, # TODO: Action scale = 1 breaks rendering, why? # x_bounds=(-self.boundary_dist - self.ball_radius, # self.boundary_dist + self.ball_radius), # y_bounds=(-self.boundary_dist - self.ball_radius, # self.boundary_dist + self.ball_radius), x_bounds=(-self.boundary_dist, self.boundary_dist), y_bounds=(-self.boundary_dist, self.boundary_dist), render_onscreen=self.render_onscreen, ) old_position = self._position self._position = np.array([-4, -4]) self.draw(self.drawer) img = self.drawer.get_image() # img = skimage.transform.resize(img, (width, height), anti_aliasing=True, preserve_range=True) self._position = old_position if self.images_are_rgb: im = img.transpose((1, 0, 2)) else: r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2] im = (-r + b).transpose().flatten() im[0:2, 0:2] = np.array([255, 255, 255]) im = 255 - im # Invert image - make background black, walls white im = im[None] # Set pixel at ball position to blue for each individual observation pixel_coords = self._state_to_pixel_coords(self._position, img_width=width)[None] for i in range(-self.ball_pixel_radius, self.ball_pixel_radius + 1): for j in range(-self.ball_pixel_radius, self.ball_pixel_radius + 1): im[range(len(pixel_coords)), np.clip(pixel_coords[:, 1] + i, 0, 27), np.clip(pixel_coords[:, 0] + j, 0, 27)] = np.array( [0, 0, 255]) self._position = old_position return im[0] / 255 def set_to_goal(self, goal_dict): goal = goal_dict["state_desired_goal"] self._position = goal self._target_position = goal def get_env_state(self): return self._get_obs() def set_env_state(self, state): position = state["state_observation"] goal = state["state_desired_goal"] self._position = position self._target_position = goal def draw(self, drawer): drawer.fill(Color('white')) if self.show_discrete_grid: for x in self.x_bins: drawer.draw_segment((x, -self.boundary_dist), (x, self.boundary_dist), Color(220, 220, 220, 25), aa=False) for y in self.y_bins: drawer.draw_segment((-self.boundary_dist, y), (self.boundary_dist, y), Color(220, 220, 220, 25), aa=False) if self.show_goal: drawer.draw_solid_circle( self._target_position, self.target_radius, Color('green'), ) try: drawer.draw_solid_circle( self._position, self.ball_radius, Color('blue'), ) except ValueError as e: print('\n\n RENDER ERROR \n\n') for wall in self.walls: drawer.draw_segment( wall.endpoint1, wall.endpoint2, Color('black'), ) drawer.draw_segment( wall.endpoint2, wall.endpoint3, Color('black'), ) drawer.draw_segment( wall.endpoint3, wall.endpoint4, Color('black'), ) drawer.draw_segment( wall.endpoint4, wall.endpoint1, Color('black'), ) drawer.render() def render(self, mode='human', width=None, height=None, invert_colors=False, close=False): if close: self.render_drawer = None return if mode == 'rgb_array': return self.get_image(width=width, height=height, invert_colors=invert_colors) if self.render_drawer is None or self.render_drawer.terminated: self.render_drawer = PygameViewer( self.render_size, self.render_size, # x_bounds=(-self.boundary_dist-self.ball_radius, # self.boundary_dist+self.ball_radius), # y_bounds=(-self.boundary_dist-self.ball_radius, # self.boundary_dist+self.ball_radius), x_bounds=(-self.boundary_dist, self.boundary_dist), y_bounds=(-self.boundary_dist, self.boundary_dist), render_onscreen=True, ) self.draw(self.render_drawer) self.render_drawer.tick(self.render_dt_msec) if mode != 'interactive': self.render_drawer.check_for_exit() # def get_diagnostics(self, paths, prefix=''): # statistics = OrderedDict() # for stat_name in ('distance_to_target', ): # stat_name = stat_name # stat = get_stat_in_paths(paths, 'env_infos', stat_name) # statistics.update(create_stats_ordered_dict( # '%s%s' % (prefix, stat_name), # stat, # always_show_all_stats=True, # )) # statistics.update(create_stats_ordered_dict( # 'Final %s%s' % (prefix, stat_name), # [s[-1] for s in stat], # always_show_all_stats=True, # )) # return statistics """Static visualization/utility methods""" @staticmethod def true_model(state, action): velocities = np.clip(action, a_min=-1, a_max=1) position = state new_position = position + velocities return np.clip( new_position, a_min=-Point2DEnv.boundary_dist, a_max=Point2DEnv.boundary_dist, ) @staticmethod def true_states(state, actions): real_states = [state] for action in actions: next_state = Point2DEnv.true_model(state, action) real_states.append(next_state) state = next_state return real_states @staticmethod def plot_trajectory(ax, states, actions, goal=None): assert len(states) == len(actions) + 1 x = states[:, 0] y = -states[:, 1] num_states = len(states) plasma_cm = plt.get_cmap('plasma') for i, state in enumerate(states): color = plasma_cm(float(i) / num_states) ax.plot( state[0], -state[1], marker='o', color=color, markersize=10, ) actions_x = actions[:, 0] actions_y = -actions[:, 1] ax.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], scale_units='xy', angles='xy', scale=1, width=0.005) ax.quiver( x[:-1], y[:-1], actions_x, actions_y, scale_units='xy', angles='xy', scale=1, color='r', width=0.0035, ) ax.plot( [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, Point2DEnv.boundary_dist, ], [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) ax.plot( [ Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], [ -Point2DEnv.boundary_dist, -Point2DEnv.boundary_dist, ], color='k', linestyle='-', ) if goal is not None: ax.plot(goal[0], -goal[1], marker='*', color='g', markersize=15) ax.set_ylim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) ax.set_xlim( -Point2DEnv.boundary_dist - 1, Point2DEnv.boundary_dist + 1, ) def initialize_camera(self, init_fctn): pass