Beispiel #1
0
class BayesHumanoidPushingEnv(BayesEnv):
    # Wrapper envs for mujoco envs
    def __init__(self, reset_params=True):
        env = HumanoidPushingEnv()
        self.estimator = ParamEnvDiscreteEstimator(env, discretization=discretization)

        self.env_sampler = DiscreteParamEnvSampler(env, discretization)
        super(BayesHumanoidPushingEnv, self).__init__(env, self.estimator)
        self.nominal_env = env
        self.reset_params = reset_params

    def reset(self):
        if self.reset_params:
            self.env = self.env_sampler.sample()
        return super().reset()

    def step(self, action):
        prev_belief = self.estimator.get_belief()
        prev_state = self.env.get_state()
        obs, reward, done, info = self.env.step(action)
        info['prev_state'] = prev_state
        info['curr_state'] = self.env.get_state()

        # Estimate
        self.estimator.estimate(action, obs, **info)
        belief = self.estimator.get_belief()
        info['belief'] = belief

        obs = np.concatenate([obs, belief], axis=0)
        return obs, reward, done, info
Beispiel #2
0
class ExplicitBayesHumanoidPushingEnv(ExplicitBayesEnv):
    def __init__(self, reset_params=True):
        env = HumanoidPushingEnv()
        self.estimator = ParamEnvDiscreteEstimator(env, discretization=discretization)

        self.env_sampler = DiscreteParamEnvSampler(env, discretization)
        self.env_sampler.param_space['friction']
        super(ExplicitBayesHumanoidPushingEnv, self).__init__(env, self.estimator)
        self.nominal_env = env

        self.observation_space = Dict(
            {"obs": env.observation_space, "zbel": self.estimator.belief_space})
        self.internal_observation_space = env.observation_space
        self.env = env
        self.reset_params = reset_params

    def _update_belief(self,
                             action,
                             obs,
                             **kwargs):
        # Estimate
        self.estimator.estimate(
                action, obs, **kwargs)
        belief = self.estimator.get_belief()
        return belief, kwargs

    def step(self, action):
        prev_state = self.env.get_state()
        obs, reward, done, info = self.env.step(action)
        info['prev_state'] = prev_state
        info['curr_state'] = self.env.get_state()

        bel, info = self._update_belief(
                                        action,
                                        obs,
                                        **info)
        true_param = self.env.get_params()
        friction = true_param['friction']

        exp1 = np.argwhere(self.env_sampler.param_sampler_space['friction'] == friction)[0,0]
        exp_id = exp1
        info['expert'] = exp_id

        return {'obs':obs, 'zbel':bel}, reward, done, info

    def reset(self):
        if self.reset_params:
            self.env = self.env_sampler.sample()
        obs = self.env.reset()
        self.estimator.reset()
        bel, _ = self._update_belief(action=None, obs=obs)
        self.last_obs = (obs, bel)
        return {'obs':obs, 'zbel':bel}