def __init__(self, show_gui=True, center_camera=False, use_theta=True, use_rk4=True, dynamics_noise=False, noise_sigma=0.01):

        high = np.array([
            2.3,
            np.finfo(np.float32).max,
            np.finfo(np.float32).max,
            np.finfo(np.float32).max])
        self._observation_space = spaces.Box(-high, high) #[theta, alpha, theta_dot, alpha_dot]
        self._action_space = spaces.Box(np.array([-5.0]), np.array([5.0]))
        self.show_gui = show_gui
        self.use_theta = use_theta
        self.use_rk4 = use_rk4
        self.dynamic_noise = dynamics_noise
        self.noise_sigma = noise_sigma*self.fs
        self.noise_mu = 0

        if self.use_theta:
            self.theta_lin_param = np.array([self.Mp*self.Lr**2+self.Jr,
                                   1/4*self.Mp*self.Lp**2,
                                   1/2*self.Mp*self.Lp*self.Lr,
                                   self.Jp+1/4*self.Mp*self.Lp**2,
                                   1/2*self.Mp*self.Lp,
                                   self.Dr,
                                   self.Dp])
        if show_gui:
            self.set_gui(center_camera)
Beispiel #2
0
    def __init__(self):
        EzPickle.__init__(self)
        self.seed()
        self.viewer = None

        self.world = Box2D.b2World()
        self.moon = None
        self.lander = None
        self.particles = []

        self.prev_reward = None

        # useful range is -1 .. +1, but spikes can be higher
        self.observation_space = spaces.Box(-np.inf, np.inf, shape=(8, ))

        if self.continuous:
            # Action is two floats [main engine, left-right engines].
            # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
            # Left-right:  -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
            self.action_space = spaces.Box(-1, +1, (2, ))
        else:
            # Nop, fire left engine, main engine, right engine
            self.action_space = spaces.Discrete(4)

        self.reset()
Beispiel #3
0
    def __init__(self, randomize_tasks=False, n_tasks=2):

        if randomize_tasks:
            np.random.seed(1337)
            goals = [[np.random.uniform(-1., 1.),
                      np.random.uniform(-1., 1.)] for _ in range(n_tasks)]
        else:
            # some hand-coded goals for debugging
            goals = [
                np.array([10, -10]),
                np.array([10, 10]),
                np.array([-10, 10]),
                np.array([-10, -10]),
                np.array([0, 0]),
                np.array([7, 2]),
                np.array([0, 4]),
                np.array([-6, 9])
            ]
            goals = [g / 10. for g in goals]
        self.goals = goals

        self.reset_task(0)
        self.observation_space = spaces.Box(low=-np.inf,
                                            high=np.inf,
                                            shape=(2, ))
        self.action_space = spaces.Box(low=-0.1, high=0.1, shape=(2, ))
    def __init__(self,
                 model_path='./square2d.xml',
                 distance_threshold=1e-1,
                 frame_skip=2,
                 goal=[0.3, 0.3],
                 horizon=100):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)

        self.model = load_model_from_path(fullpath)
        self.seed()
        self.sim = MjSim(self.model)
        self.data = self.sim.data
        self.viewer = None
        self.distance_threshold = distance_threshold
        self.frame_skip = frame_skip
        self.set_goal_location(goal)
        self.reward_type = 'dense'
        self.horizon = horizon
        self.time_step = 0
        obs = self.get_current_observation()
        self.action_space = spaces.Box(-1., 1., shape=(2, ))
        self.observation_space = spaces.Box(-np.inf, np.inf, shape=obs.shape)
    def __init__(self, fence=False, *args, **kwargs):
        MODEL_CLASS = AntEnv
        if fence:
            MODEL_CLASS.FILE = 'ant1.xml'
        else:
            MODEL_CLASS.FILE = 'ant.xml'

        Serializable.quick_init(self, locals())
        MazeEnv.__init__(self, *args, **kwargs)
        self._blank_maze = False
        # add goal obs
        self.blank_maze_obs = np.concatenate(
            [np.zeros(self._n_bins),
             np.zeros(self._n_bins)])
        # self.blank_maze_obs = np.zeros(self._n_bins)

        # The following caches the spaces so they are not re-instantiated every time
        shp = self.get_current_obs().shape
        ub = BIG * np.ones(shp)
        self._observation_space = spaces.Box(ub * -1, ub)

        shp = self.get_current_robot_obs().shape
        ub = BIG * np.ones(shp)
        self._robot_observation_space = spaces.Box(ub * -1, ub)

        shp = self.get_current_maze_obs().shape
        ub = BIG * np.ones(shp)
        self._maze_observation_space = spaces.Box(ub * -1, ub)
Beispiel #6
0
    def __init__(
            self,
            *args,
            **kwargs):

        Serializable.quick_init(self, locals())
        MazeEnv.__init__(self, *args, **kwargs)
        self._blank_maze = False
        # add goal obs
        if self.direct_goal:
            self.blank_maze_obs = np.zeros(self._n_bins + 2)
        else:
            self.blank_maze_obs = np.concatenate([np.zeros(self._n_bins), np.zeros(self._n_bins)])
        # self.blank_maze_obs = np.zeros(self._n_bins)

        # The following caches the spaces so they are not re-instantiated every time
        shp = self.get_current_obs().shape
        ub = BIG * np.ones(shp)
        self._observation_space = spaces.Box(ub * -1, ub)

        shp = self.get_current_robot_obs().shape
        ub = BIG * np.ones(shp)
        self._robot_observation_space = spaces.Box(ub * -1, ub)

        shp = self.get_current_maze_obs().shape
        ub = BIG * np.ones(shp)
        self._maze_observation_space = spaces.Box(ub * -1, ub)
Beispiel #7
0
 def __init__(self, task={}, n_tasks=100, randomize_tasks=True, **kwargs):
     self.goals = self.sample_tasks(n_tasks)
     self.goal_radius = 0.09
     self._goal = [0, 0, 0.01]
     super(ReacherGoalEnv_sparse, self).__init__()
     self.reset_task(0)
     Serializable.__init__(self)
     self.observation_space = spaces.Box(low=self.observation_space.low,
                                         high=self.observation_space.high)
     self.action_space = spaces.Box(low=self.action_space.low,
                                    high=self.action_space.high)
Beispiel #8
0
    def action_space(self):
        bounds = self.model.actuator_ctrlrange
        lb = bounds[:, 0]
        ub = bounds[:, 1]
        self.action_space_ture = spaces.Box(lb, ub)  # 我家的

        bounds = np.array([
            [-1, 1],  ## TODO  我改的
            [-1, 1]
        ])
        lb = bounds[:, 0]
        ub = bounds[:, 1]

        return spaces.Box(lb, ub)
Beispiel #9
0
    def __init__(self,
                 dynamics_noise='no_noise',
                 std=np.array([1, 1]),
                 friction=0.0,
                 obs_noise=False,
                 obs_noise_bound=0.075):
        self.gravity = 9.8
        self.masscart = 0.5
        self.masspole = 0.5
        self.total_mass = (self.masspole + self.masscart)
        self.length = 0.5  # actually half the pole's length
        self.polemass_length = (self.masspole * self.length)
        self.max_force = 10.0
        self.tau = 0.02  # seconds between state updates
        self.kinematics_integrator = 'euler'
        self.dynamics_noise = dynamics_noise  # if we have noise on the dynamics
        self.friction = friction
        self.obs_noise = obs_noise
        self.obs_noise_bound = obs_noise_bound
        if dynamics_noise not in [
                'no_noise', 'gaussian', 'gaussian_state_dependent', "gumbel"
        ]:
            raise ValueError("wrong noise")
        if type(std) == np.ndarray:
            # note: we divide here by tau to have N(s_{t+1}|s_{t} + delta_{t} * \mu(s_{t},a_{t}), \sigma)
            self.std = std / self.tau
        else:
            self.std = np.ones(2) * std / self.tau
        self.std_torch = torch.from_numpy(self.std).float()
        # Angle at which to fail the episode
        self.theta_threshold_radians = math.pi
        self.x_threshold = 3

        # set a maximum velocity, s.t. we can clip too high velocities
        # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
        high = np.array([
            self.x_threshold * 1.2, 1e2, self.theta_threshold_radians * 1.2,
            1e2
        ])

        # continuous action space between -force_mag and force_mag
        self._action_space = spaces.Box(np.array([-self.max_force]),
                                        np.array([self.max_force]))
        self._observation_space = spaces.Box(-high, high)

        self.viewer = None
        self.state = None

        self.steps_beyond_done = None
Beispiel #10
0
def get_aug_obs_space(obs_space, variant):
    if variant['concat_type'] == 'concatenation':
        low = np.hstack([obs_space.low, np.full(variant['num_skills'], 0)])
        high = np.hstack([obs_space.high, np.full(variant['num_skills'], 1)])
        aug_obs_space = spaces.Box(low=low, high=high)
        return aug_obs_space

    elif variant['concat_type'] == 'bilinear':
        dim = obs_space.shape[0] * variant['num_skills']
        low = np.ones(dim) * obs_space.low[0]
        high = np.ones(dim) * obs_space.high[0]
        aug_obs_space = spaces.Box(low=low, high=high)
        return aug_obs_space

    else:
        raise NotImplementedError
Beispiel #11
0
 def action_space(self):
     lat_dim = self.low_policy_latent_dim
     if self.discrete_actions:
         return spaces.Discrete(lat_dim)  # the action is now just a selection
     else:
         ub = 1e6 * np.ones(lat_dim)
         return spaces.Box(-1 * ub, ub)
Beispiel #12
0
 def action_space(self):
     if isinstance(self._wrapped_env.action_space, Box):
         wrapped_low = np.append(self._wrapped_env.action_space.low, [-1])
         wrapped_high = np.append(self._wrapped_env.action_space.high, [1])
         return spaces.Box(wrapped_low, wrapped_high)
     else:
         raise NotImplementedError
    def __init__(self, visit_axis_bound=None, *args, **kwargs):
        super(MujocoEnv_ObsInit, self).__init__(*args, **kwargs)

        shp = self.get_current_obs().shape
        ub = BIG * np.ones(shp)
        self._observation_space = spaces.Box(ub * -1, ub)
        self.visit_axis_bound = visit_axis_bound
Beispiel #14
0
 def observation_space(self):
     if self.position_only:
         d = len(self._get_position_ids())
     else:
         d = len(self.extra_data.states)
     ub = BIG * np.ones(d)
     return spaces.Box(ub * -1, ub)
Beispiel #15
0
 def observation_space(self):
     dim = self.wrapped_env.observation_space.flat_dim
     if self.goal_vector_obs:
         newdim = dim + (self.n_apples + self.n_bombs) * 3
     else:
         newdim = dim + self.n_bins * 2
     ub = BIG * np.ones(newdim)
     return spaces.Box(ub * -1, ub)
Beispiel #16
0
 def action_space(self):
     if (self.mode == 1):
         lower_bound = np.array([0, 0])
         upper_bound = np.array([200, 200])
     if (self.mode == 0):
         lower_bound = np.array([0, 0])  #########################
         upper_bound = np.array([0.3, 0.3])
     return spaces.Box(lower_bound, upper_bound)
Beispiel #17
0
 def observation_space(self):
     if self._normalize_obs:
         if isinstance(self._wrapped_env.observation_space, Box):
             ub = np.ones(self._wrapped_env.observation_space.shape)
             return spaces.Box(-1 * ub, ub)
         return self._wrapped_env.observation_space
     else:
         return self._wrapped_env.observation_space
Beispiel #18
0
 def __init__(self, env, num_skills, z):
     Serializable.quick_init(self, locals())
     self._env = env
     self._num_skills = num_skills
     self._z = z
     obs_space = self._env.observation_space
     low = np.hstack([obs_space.low, np.full(num_skills, 0)])
     high = np.hstack([obs_space.high, np.full(num_skills, 1)])
     self.observation_space = spaces.Box(low=low, high=high)
     self.action_space = self._env.action_space
     self.spec = EnvSpec(self.observation_space, self.action_space)
 def observation_space(self):
     # self.observation_space = spaces.Box(np.array([-1,-1,0,0,-2,-1]), np.array([1,1,1,1,2,1]))
     if self.normlize_obs:
         if self.use_nearby_obs:
             return spaces.Box(-1, 1,
                               ((2 * self.sensor_range + 1)**2 + 2, ))
         else:
             return spaces.Box(-1, 1, (3 + 2 * self.n_key, ))
     else:
         if self.use_nearby_obs:
             return spaces.Box(
                 np.array([0, 0, 0] + [0] * 24),
                 np.array(
                     [self._map.shape[0], self._map.shape[1], self.n_key] +
                     [5] * ((2 * self.sensor_range + 1)**2 - 1)))
         else:
             return spaces.Box(
                 np.array([0, 0, 0] + [0] * self.n_key),
                 np.array(
                     [self._map.shape[0], self._map.shape[1], self.n_key] +
                     [self._map.shape[0], self._map.shape[1]] * self.n_key))
Beispiel #20
0
    def inject_action_noise(self, action):
        #########################
        bounds = self.model.actuator_ctrlrange
        lb = bounds[:, 0]
        ub = bounds[:, 1]
        action_space = spaces.Box(lb, ub)
        ######################原来没有

        # generate action noise
        noise = self.action_noise * \
                np.random.normal(size=action.shape)
        # rescale the noise to make it proportional to the action bounds
        #lb, ub = self.action_bounds  # 原来有
        lb, ub = action_space.bounds
        noise = 0.5 * (ub - lb) * noise
        return action + noise
Beispiel #21
0
    def __init__(self,
                 env,
                 init_selector,
                 goal_selector,
                 append_goal=False,
                 terminal_bonus=0,
                 terminal_eps=0.1,
                 distance_metric='L2',
                 goal_reward='NegativeDistance',
                 goal_weight=1,
                 inner_weight=0,
                 angle_idxs=(None, ),
                 persistence=3):
        """
        :param env: wrapped env  NEEDS RESET WITH INIT_STATE arg!
        :param init_selector: already instantiated: NEEDS GOOD DIM OF GOALS! --> TO DO: give the class a method to update dim?
        :param terminal_bonus: if not 0, the rollout terminates with this bonus if the goal state is reached
        :param terminal_eps: eps around which the ter/minal goal is considered reached
        :param distance_metric: L1 or L2 or a callable func
        :param goal_reward: NegativeDistance or InverseDistance or callable func
        :param goal_weight: coef of the goal-dist reward
        :param inner_weight: coef of the inner reward
        """

        Serializable.quick_init(self, locals())
        self.append_goal = append_goal
        self.terminal_bonus = terminal_bonus
        self.terminal_eps = terminal_eps
        self._distance_metric = distance_metric
        self._goal_reward = goal_reward
        self.goal_weight = goal_weight
        self.inner_weight = inner_weight
        self.persistence = persistence
        self.persistence_count = 1
        self.fig_number = 0
        ProxyEnv.__init__(self, env)
        InitBaseAngle.__init__(self,
                               angle_idxs=angle_idxs,
                               init_selector=init_selector,
                               goal_selector=goal_selector)
        ub = BIG * np.ones(self.get_current_obs().shape)
        self._observation_space = spaces.Box(ub * -1, ub)
        # keep around all sampled goals, one dict per training itr (ie, call of this env.log_diagnostics)
        self.inits_trained = []
Beispiel #22
0
    def set_param_space(self, eps_scale=0.5):
        """
        Set param_space
        @param param_space: dict(string, rllab.space.base.Space)
        """
        params = {**self._get_param1(), **self._get_param2()}

        if not isinstance(eps_scale, list):
            eps_scale = np.ones(len(params)) * eps_scale

        self._param_space = dict()
        i = 0
        for param, value in params.items():
            eps = np.abs(value) * eps_scale[i]
            ub = value + eps
            lb = value - eps
            for name in positive_params:
                if name in param:
                    lb = np.clip(lb, 1e-3, ub)
                    break
            space = spaces.Box(lb, ub)
            self._param_space[param] = space
            i += 1
Beispiel #23
0
 def maze_observation_space(self):
     shp = np.concatenate(self.get_readings()).shape
     ub = BIG * np.ones(shp)
     return spaces.Box(ub * -1, ub)
Beispiel #24
0
 def observation_space(self):
     dim = self.inner_env.observation_space.flat_dim
     newdim = dim + self.n_bins * 2
     ub = BIG * np.ones(newdim)
     return spaces.Box(ub * -1, ub)
Beispiel #25
0
 def action_space(self):
     bounds = self.model.actuator_ctrlrange
     lb = bounds[:, 0]
     ub = bounds[:, 1]
     return spaces.Box(lb, ub)
Beispiel #26
0
def run_experiment(variant):
    if variant['env_name'] == 'humanoid-rllab':
        from rllab.envs.mujoco.humanoid_env import HumanoidEnv
        env = normalize(HumanoidEnv())
    elif variant['env_name'] == 'swimmer-rllab':
        from rllab.envs.mujoco.swimmer_env import SwimmerEnv
        env = normalize(SwimmerEnv())
    elif variant["env_name"] == "Point2D-v0":
        import sac.envs.point2d_env
        env = GymEnv(variant["env_name"])
    else:
        env = normalize(GymEnv(variant['env_name']))

    obs_space = env.spec.observation_space
    assert isinstance(obs_space, spaces.Box)
    low = np.hstack([obs_space.low, np.full(variant['num_skills'], 0)])
    high = np.hstack([obs_space.high, np.full(variant['num_skills'], 1)])
    aug_obs_space = spaces.Box(low=low, high=high)
    aug_env_spec = EnvSpec(aug_obs_space, env.spec.action_space)
    pool = SimpleReplayBuffer(
        env_spec=aug_env_spec,
        max_replay_buffer_size=variant['max_pool_size'],
    )

    base_kwargs = dict(min_pool_size=variant['max_path_length'],
                       epoch_length=variant['epoch_length'],
                       n_epochs=variant['n_epochs'],
                       max_path_length=variant['max_path_length'],
                       batch_size=variant['batch_size'],
                       n_train_repeat=variant['n_train_repeat'],
                       eval_render=False,
                       eval_n_episodes=1,
                       eval_deterministic=True,
                       sampler=SimpleSampler(
                           max_path_length=variant["max_path_length"],
                           min_pool_size=variant["max_path_length"],
                           batch_size=variant["batch_size"]))

    M = variant['layer_size']
    qf = NNQFunction(
        env_spec=aug_env_spec,
        hidden_layer_sizes=[M, M],
    )

    vf = NNVFunction(
        env_spec=aug_env_spec,
        hidden_layer_sizes=[M, M],
    )

    policy = GaussianPolicy(
        env_spec=aug_env_spec,
        hidden_layer_sizes=[M, M],
        reg=0.001,
    )

    # policy = GMMPolicy(
    #     env_spec=aug_env_spec,
    #     K=variant['K'],
    #     hidden_layer_sizes=[M, M],
    #     qf=qf,
    #     reg=0.001,
    # )

    discriminator = NNDiscriminatorFunction(
        env_spec=env.spec,
        hidden_layer_sizes=[M, M],
        num_skills=variant['num_skills'],
    )

    algorithm = DIAYN(base_kwargs=base_kwargs,
                      env=env,
                      policy=policy,
                      discriminator=discriminator,
                      pool=pool,
                      qf=qf,
                      vf=vf,
                      lr=variant['lr'],
                      scale_entropy=variant['scale_entropy'],
                      discount=variant['discount'],
                      tau=variant['tau'],
                      num_skills=variant['num_skills'],
                      save_full_state=False,
                      include_actions=variant['include_actions'],
                      learn_p_z=variant['learn_p_z'],
                      add_p_z=variant['add_p_z'],
                      reparametrize=variant["reparametrize"])

    algorithm.train()
Beispiel #27
0
 def observation_space(self):
     shp = self.get_current_image_obs()[1].shape
     # shp = self.get_current_image_obs()[1].shape
     ub = BIG * np.ones(shp)
     return spaces.Box(ub * -1, ub)
 def maze_observation_space(self):
     shp = self.get_current_maze_obs().shape
     ub = BIG * np.ones(shp)
     return spaces.Box(ub * -1, ub)
Beispiel #29
0
 def action_space(self):
     if isinstance(self._wrapped_env.action_space, Box):
         ub = np.ones(self._wrapped_env.action_space.shape)
         return spaces.Box(-1 * ub, ub)
     return self._wrapped_env.action_space
Beispiel #30
0
 def action_space(self):
     lb = np.array(
         [control.ctrllimit[0] for control in self.extra_data.controls])
     ub = np.array(
         [control.ctrllimit[1] for control in self.extra_data.controls])
     return spaces.Box(lb, ub)