def __init__(self, goal_reward=10, actuation_cost_coeff=30, distance_cost_coeff=1, init_sigma=0.1): super().__init__() Serializable.quick_init(self, locals()) self.dynamics = PointDynamics(dim=2, sigma=0) self.init_mu = np.zeros(2, dtype=np.float32) self.init_sigma = init_sigma self.goal_positions = np.array([[5, 0], [-5, 0], [0, 5], [0, -5]], dtype=np.float32) self.goal_threshold = 1. self.goal_reward = goal_reward self.action_cost_coeff = actuation_cost_coeff self.distance_cost_coeff = distance_cost_coeff self.xlim = (-7, 7) self.ylim = (-7, 7) self.vel_bound = 1. self.reset() self.observation = None self._ax = None self._env_lines = list() self.fixed_plots = None self.dynamic_plots = [] self.__spec = EnvSpec(action_space=self.action_space, observation_space=self.observation_space)
def __init__(self, reward_type='dense', terminate_at_goal=True, goal_reward_weight=3e-1, goal_radius=0.25, goal_distance=5, goal_angle_range=(0, 2 * np.pi), velocity_reward_weight=0, ctrl_cost_coeff=1e-2, contact_cost_coeff=1e-3, survive_reward=5e-2, fixed_goal_position=None, *args, **kwargs): assert reward_type in REWARD_TYPES self._reward_type = reward_type self.terminate_at_goal = terminate_at_goal self.goal_reward_weight = goal_reward_weight self.goal_radius = goal_radius self.goal_distance = goal_distance self.goal_angle_range = goal_angle_range self.velocity_reward_weight = velocity_reward_weight self.ctrl_cost_coeff = ctrl_cost_coeff self.contact_cost_coeff = contact_cost_coeff self.survive_reward = survive_reward MujocoEnv.__init__(self, *args, **kwargs) Serializable.quick_init(self, locals())
def __init__(self): self.exteroceptive_observation = [12.0, 0, 0.5] self.hurdles_xpos = [-15., -13., -9., -5., -1., 3., 7., 11., 15.] # ,19.,23.,27.] path = os.path.join(MODELS_PATH, 'half_cheetah_hurdle.xml') MujocoEnv.__init__(self, file_path=path) # MujocoEnv.__init__(self) Serializable.quick_init(self, locals())
def __init__(self, observation_space, action_space): """ :type observation_space: Space :type action_space: Space """ Serializable.quick_init(self, locals()) self._observation_space = observation_space self._action_space = action_space
def __init__(self, env, base_policy, num_skills, steps_per_option=100): Serializable.quick_init(self, locals()) self._base_policy = base_policy self._env = env self._steps_per_option = steps_per_option self._num_skills = num_skills self.observation_space = self._env.observation_space self.action_space = spaces.Discrete(num_skills) self.spec = EnvSpec(self.observation_space, self.action_space) self._obs = self.reset()
def __init__(self, env, num_skills, z): Serializable.quick_init(self, locals()) self._env = env self._num_skills = num_skills self._z = z obs_space = self._env.observation_space low = np.hstack([obs_space.low, np.full(num_skills, 0)]) high = np.hstack([obs_space.high, np.full(num_skills, 1)]) self.observation_space = spaces.Box(low=low, high=high) self.action_space = self._env.action_space self.spec = EnvSpec(self.observation_space, self.action_space)
def __init__(self, goal=(-1, -1), arm_distance_coeff=0): """ goal (`list`): List of two elements denoting the x and y coordinates of the goal location. Either of the coordinate can also be a string 'any' to make the reward not to depend on the corresponding coordinate. arm_distance_coeff ('float'): Coefficient for the arm-to-object distance cost. """ super(PusherEnv, self).__init__(file_path=self.FILE_PATH) Serializable.quick_init(self, locals()) self._goal_mask = [coordinate != 'any' for coordinate in goal] self._goal = np.array(goal)[self._goal_mask].astype(np.float32) self._arm_distance_coeff = 0.1# arm_distance_coeff self._action_cost_coeff = 0.1 # Make the the complete robot visible when visualizing. self.model.stat.extent = 10
def __init__(self, *args, **kwargs): super(HalfCheetahEnv, self).__init__(*args, **kwargs) Serializable.__init__(self, *args, **kwargs)
def __init__(self, low_level_policy, *args, **kwargs): Serializable.quick_init(self, locals()) self._low_level_policy = low_level_policy super().__init__(*args, **kwargs)
def __init__(self, env, delay=0.01): Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) self._delay = delay