def run_controller(self, horizon, policy): logs = DotMap() logs.states = [] logs.actions = [] logs.rewards = [] logs.times = [] logs.obs = [] observation = self._env.reset() print("Env has been reset") for t in range(horizon): # env.render() state = self.state2touch(observation) print("Go from state to touch") # print(state) action = policy.act(state) print("Get an action based on policy") observation, reward, done, info = self._env.step(action) print("Perform an action") # Log # logs.times.append() logs.actions.append(action.tolist()) logs.rewards.append(reward) logs.states.append(state) logs.obs.append(observation) # Cluster state logs.actions = np.array(logs.actions) logs.rewards = np.array(logs.rewards) logs.states = np.array(logs.states) logs.obs = np.array(logs.obs) return logs
def run_controller(env, horizon, policy, video = False): """ Runs a Reacher3d gym environment for horizon timesteps, making actions according to policy :param env: A gym object :param horizon: The number of states forward to look :param policy: A policy object (see other python file) """ # WHat is going on here? # nol 29 feb - action only acts on first 5 variables def obs2q(obs): return obs[0:5] logs = DotMap() logs.states = [] logs.actions = [] logs.rewards = [] logs.times = [] observation = env.reset() for t in range(horizon): if(video): env.render() state = observation action, t = policy.act(obs2q(state)) # print(action) observation, reward, done, info = env.step(action) # Log # logs.times.append() logs.actions.append(action) logs.rewards.append(reward) logs.states.append(observation) # Cluster state logs.actions = np.array(logs.actions) logs.rewards = np.array(logs.rewards) logs.states = np.array(logs.states) return logs
plt.title('begin') plt.imshow(goal_log['states'][0]) plt.subplot(1, 2, 2) plt.title('end') plt.imshow(goal_log['states'][-1]) #goal_img = goal_log['states'][-1] #goal_diff_img goal_img = goal_log['states'][-1] - goal_log['states'][-2] obs_init = sim._env.reset() state_init = sim.state2touch(obs_init) #Init error to be massive error = 1e+3 logs = DotMap() logs.states = [] logs.actions = [] logs.rewards = [] logs.times = [] logs.obs = [] ii = 0 while (error > parsed.thresh): #for ii in range(200): #we take random action for 0th step if ii == 0: action = np.random.uniform(-10, 10, 2) error = 1e+3 elif ii == 1: curr_img = logs['states'][-1] - state_init error, action = compute_action(goal_img, curr_img) else: curr_img = logs['states'][-1] - logs['states'][-2]