Beispiel #1
0
 def rollout(self):
     obs, actions, rewards, action_dists_mu, action_dists_logstd = [], [], [], [], []
     self.old_observation = None
     plain_obs = self.env.reset(difficulty = 2, seed = int(time.time()))
     processed_observation, self.old_observation = go(plain_obs, self.old_observation, step=1)
     ob = filter(processed_observation)
     for i in xrange(self.args.max_pathlength - 1):
         obs.append(ob)
         action, action_dist_mu, action_dist_logstd = self.act(ob)
         action = np.clip(action,a_max=1.0,a_min=0.0)
         actions.append(action)
         action_dists_mu.append(action_dist_mu)
         action_dists_logstd.append(action_dist_logstd)
         res = self.env.step(action)
         processed_observation, self.old_observation = go(res[0], self.old_observation, step=1)
         ob = filter(processed_observation)
         rewards.append((res[1]))
         if res[2] or i == self.args.max_pathlength - 2:
             path = {"obs": np.concatenate(np.expand_dims(obs, 0)),
                          "action_dists_mu": np.concatenate(action_dists_mu),
                          "action_dists_logstd": np.concatenate(action_dists_logstd),
                          "rewards": np.array(rewards),
                          "actions":  np.array(actions)}
             return path
             break
Beispiel #2
0
 def obg(self, plain_obs):
     # observation generator
     # derivatives of observations extracted here.
     processed_observation, self.old_observation = go(plain_obs,
                                                      self.old_observation,
                                                      step=self.stepcount)
     return np.array(processed_observation)
Beispiel #3
0
        def obg(plain_obs):
            nonlocal old_observation, stepno, ob_log

            # log csv observation into string
            ob_log += ','.join([str(i) for i in plain_obs]) + '\n'

            processed_observation, old_observation = go(plain_obs,
                                                        old_observation,
                                                        step=stepno)
            return np.array(processed_observation)
Beispiel #4
0
 def obg(plain_obs):
     nonlocal old_observation, stepno
     processed_observation, old_observation = go(plain_obs,
                                                 old_observation,
                                                 step=stepno)
     return np.array(processed_observation)