예제 #1
0
    def __init__(self,
                 goal_reward=10,
                 actuation_cost_coeff=30,
                 distance_cost_coeff=1,
                 init_sigma=0.1):
        super().__init__()
        Serializable.quick_init(self, locals())

        self.dynamics = PointDynamics(dim=2, sigma=0)
        self.init_mu = np.zeros(2, dtype=np.float32)
        self.init_sigma = init_sigma
        self.goal_positions = np.array([[5, 0], [-5, 0], [0, 5], [0, -5]],
                                       dtype=np.float32)
        self.goal_threshold = 1.
        self.goal_reward = goal_reward
        self.action_cost_coeff = actuation_cost_coeff
        self.distance_cost_coeff = distance_cost_coeff
        self.xlim = (-7, 7)
        self.ylim = (-7, 7)
        self.vel_bound = 1.
        self.reset()
        self.observation = None

        self._ax = None
        self._env_lines = list()
        self.fixed_plots = None
        self.dynamic_plots = []
        self.__spec = EnvSpec(action_space=self.action_space,
                              observation_space=self.observation_space)
예제 #2
0
    def __init__(self,
                 reward_type='dense',
                 terminate_at_goal=True,
                 goal_reward_weight=3e-1,
                 goal_radius=0.25,
                 goal_distance=5,
                 goal_angle_range=(0, 2 * np.pi),
                 velocity_reward_weight=0,
                 ctrl_cost_coeff=1e-2,
                 contact_cost_coeff=1e-3,
                 survive_reward=5e-2,
                 fixed_goal_position=None,
                 *args,
                 **kwargs):
        assert reward_type in REWARD_TYPES

        self._reward_type = reward_type
        self.terminate_at_goal = terminate_at_goal

        self.goal_reward_weight = goal_reward_weight
        self.goal_radius = goal_radius
        self.goal_distance = goal_distance
        self.goal_angle_range = goal_angle_range

        self.velocity_reward_weight = velocity_reward_weight

        self.ctrl_cost_coeff = ctrl_cost_coeff
        self.contact_cost_coeff = contact_cost_coeff
        self.survive_reward = survive_reward

        MujocoEnv.__init__(self, *args, **kwargs)
        Serializable.quick_init(self, locals())
예제 #3
0
 def __init__(self):
     self.exteroceptive_observation = [12.0, 0, 0.5]
     self.hurdles_xpos = [-15., -13., -9., -5., -1., 3., 7., 11.,
                          15.]  # ,19.,23.,27.]
     path = os.path.join(MODELS_PATH, 'half_cheetah_hurdle.xml')
     MujocoEnv.__init__(self, file_path=path)
     # MujocoEnv.__init__(self)
     Serializable.quick_init(self, locals())
예제 #4
0
 def __init__(self, observation_space, action_space):
     """
     :type observation_space: Space
     :type action_space: Space
     """
     Serializable.quick_init(self, locals())
     self._observation_space = observation_space
     self._action_space = action_space
예제 #5
0
 def __init__(self, env, base_policy, num_skills, steps_per_option=100):
     Serializable.quick_init(self, locals())
     self._base_policy = base_policy
     self._env = env
     self._steps_per_option = steps_per_option
     self._num_skills = num_skills
     self.observation_space = self._env.observation_space
     self.action_space = spaces.Discrete(num_skills)
     self.spec = EnvSpec(self.observation_space, self.action_space)
     self._obs = self.reset()
예제 #6
0
 def __init__(self, env, num_skills, z):
     Serializable.quick_init(self, locals())
     self._env = env
     self._num_skills = num_skills
     self._z = z
     obs_space = self._env.observation_space
     low = np.hstack([obs_space.low, np.full(num_skills, 0)])
     high = np.hstack([obs_space.high, np.full(num_skills, 1)])
     self.observation_space = spaces.Box(low=low, high=high)
     self.action_space = self._env.action_space
     self.spec = EnvSpec(self.observation_space, self.action_space)
예제 #7
0
파일: pusher.py 프로젝트: fangqyi/garage
    def __init__(self, goal=(-1, -1), arm_distance_coeff=0):
        """
        goal (`list`): List of two elements denoting the x and y coordinates of
            the goal location. Either of the coordinate can also be a string
            'any' to make the reward not to depend on the corresponding
            coordinate.
        arm_distance_coeff ('float'): Coefficient for the arm-to-object distance
            cost.
        """
        super(PusherEnv, self).__init__(file_path=self.FILE_PATH)
        Serializable.quick_init(self, locals())

        self._goal_mask = [coordinate != 'any' for coordinate in goal]
        self._goal = np.array(goal)[self._goal_mask].astype(np.float32)

        self._arm_distance_coeff = 0.1# arm_distance_coeff
        self._action_cost_coeff = 0.1

        # Make the the complete robot visible when visualizing.
        self.model.stat.extent = 10
예제 #8
0
 def __init__(self, *args, **kwargs):
     super(HalfCheetahEnv, self).__init__(*args, **kwargs)
     Serializable.__init__(self, *args, **kwargs)
예제 #9
0
 def __init__(self, low_level_policy, *args, **kwargs):
     Serializable.quick_init(self, locals())
     self._low_level_policy = low_level_policy
     super().__init__(*args, **kwargs)
예제 #10
0
    def __init__(self, env, delay=0.01):
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)

        self._delay = delay