Exemple #1
0
 def reset(self):
     obs = self.env.reset()
     self.estimator.reset()
     bel, _ = self._update_belief(action=None, obs=obs)
     obs = to_one_hot(obs, self.n)
     self.last_obs = (obs, bel)
     return {'obs': obs, 'zbel': bel}
    def step(self, action):
        obs, reward, done, info = super(ExplicitBayesRockSample,
                                        self).step(action)
        obs['obs'] = to_one_hot(obs['obs'], self.env.nS)

        info['expert'] = int(''.join(str(int(x)) for x in self.env.state[2:]),
                             2)
        return obs, reward, done, info
Exemple #3
0
    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        bel, info = self._update_belief(action, obs, **info)

        obs = to_one_hot(obs, self.n)
        self.last_obs = (obs, bel)
        true_prob = self.env.slip_prob[Action.A]
        # print(true_prob)
        info['expert'] = np.where(
            np.array([0.1, 0.2, 0.3, 0.4, 0.5]) == true_prob)[0][0]

        return {'obs': obs, 'zbel': bel}, reward, done, info
Exemple #4
0
    def _augment_observation(self, action, obs, **kwargs):
        # Estimate
        self.estimator.estimate(action, obs, **kwargs)
        mle = self.estimator.get_mle()

        if not isinstance(mle, np.ndarray):
            mle = np.array([mle])

        if isinstance(self.env.observation_space, Discrete):
            obs = to_one_hot(obs, self.env.observation_space.n)

        if not isinstance(obs, np.ndarray):
            obs = np.array([obs])

        kwargs['mle'] = mle
        if self.augment_to_obs:
            return np.concatenate([obs, mle], axis=0), kwargs
        else:

            return obs, kwargs
Exemple #5
0
    def _augment_observation(self, action, obs, **kwargs):
        # Estimate
        self.estimator.estimate(action, obs, **kwargs)
        belief = self.estimator.get_belief()

        if not isinstance(belief, np.ndarray):
            belief = np.array([belief])

        if isinstance(self.env.observation_space, Discrete):
            obs = to_one_hot(obs, self.env.observation_space.n)

        if not isinstance(obs, np.ndarray):
            obs = np.array([obs])

        kwargs['belief'] = belief
        if self.augment_to_obs:
            return np.concatenate([obs, belief], axis=0), kwargs
        else:

            return obs, kwargs