def rollout(self): obs, actions, rewards, action_dists_mu, action_dists_logstd = [], [], [], [], [] self.old_observation = None plain_obs = self.env.reset(difficulty = 2, seed = int(time.time())) processed_observation, self.old_observation = go(plain_obs, self.old_observation, step=1) ob = filter(processed_observation) for i in xrange(self.args.max_pathlength - 1): obs.append(ob) action, action_dist_mu, action_dist_logstd = self.act(ob) action = np.clip(action,a_max=1.0,a_min=0.0) actions.append(action) action_dists_mu.append(action_dist_mu) action_dists_logstd.append(action_dist_logstd) res = self.env.step(action) processed_observation, self.old_observation = go(res[0], self.old_observation, step=1) ob = filter(processed_observation) rewards.append((res[1])) if res[2] or i == self.args.max_pathlength - 2: path = {"obs": np.concatenate(np.expand_dims(obs, 0)), "action_dists_mu": np.concatenate(action_dists_mu), "action_dists_logstd": np.concatenate(action_dists_logstd), "rewards": np.array(rewards), "actions": np.array(actions)} return path break
def obg(self, plain_obs): # observation generator # derivatives of observations extracted here. processed_observation, self.old_observation = go(plain_obs, self.old_observation, step=self.stepcount) return np.array(processed_observation)
def obg(plain_obs): nonlocal old_observation, stepno, ob_log # log csv observation into string ob_log += ','.join([str(i) for i in plain_obs]) + '\n' processed_observation, old_observation = go(plain_obs, old_observation, step=stepno) return np.array(processed_observation)
def obg(plain_obs): nonlocal old_observation, stepno processed_observation, old_observation = go(plain_obs, old_observation, step=stepno) return np.array(processed_observation)