def do_predictor_rollout(predictor_args, initial_state, act_list, noise_list, step): """act_list: list with num_rollout_per_cpu elements, each element is np.array with size (H, dim_u)""" # print('begin rollout...') predictor = Predictor(*predictor_args) print('predictor built successfully') paths = [] N = len(act_list) H = act_list[0].shape[0] for i in range(N): predictor.catch_up(*initial_state) act = [] noise = [] obs = [] cost = [] for k in range(H): obs.append(predictor._get_obs()) act.append(act_list[i][k]) noise.append(noise_list[i][k]) c = predictor.predict(act[-1], step) cost.append(c) path = dict(observations=np.array(obs), actions=np.array(act), costs=np.array(cost), noise=np.array(noise)) paths.append(path) return paths
class MPPI(): """MPPI algorithm for pushing """ def __init__(self, K, T, A): self.A = A # action scale self.T = T # time steps per sequence self.predictor = Predictor(1, 2, 1, self.T) self.trajectory = Trajectory() self.trajectory.reset() self.K = K # K sample action sequences self.lambd = 1 self.dim_u = 1 # U is theta # action init self.U_reset() self.u_init = np.array([0.0]) self.cost = np.zeros([self.K]) self.noise = np.random.uniform(-3.14, 3.14, size=(self.K, self.T, self.dim_u)) # self.noise = np.zeros([self.K, self.T, self.dim_u]) def _compute_cost(self, k, step): # self.noise[k] = np.concatenate([np.random.normal(loc = 0, scale = 1, size = (self.T, 1)), np.random.rand(self.T, 1)], axis = 1) self.noise[k] = np.clip( np.random.normal(loc=0, scale=2, size=(self.T, 1)), -1.57, 1.57) # self.noise[k] = np.clip(np.random.uniform(-np.pi, np.pi, size = (self.T, 1)), -3.14, 3.14) eps = self.noise[k] self.predictor.catch_up( self.trajectory.get_relative_state(), self.trajectory.get_absolute_state(), self.trajectory.get_obstacle_position(), self.trajectory.get_goal_position(), step ) # make the shadow state the same as the actual robot and object state for t in range(self.T): if t > 0: eps[t] = 0.4 * eps[t - 1] + 0.6 * eps[t] self.noise[k][t] = eps[t] theta = self.U[t] + eps[t] action = copy.copy( self.A * np.concatenate([np.sin(theta), np.cos(theta)])) cost = self.predictor.predict( action, step) # there will be shadow states in predictor self.cost[k] += cost def compute_cost(self, step): for k in range(self.K): self._compute_cost(k, step) def trajectory_clear(self): self.trajectory.reset() def trajectory_set_goal(self, pos_x, pos_y, theta): self.trajectory.set_goal(pos_x, pos_y, theta) def trajectory_update_state(self, pose_object, pose_tool): self.trajectory.update_state(pose_object, pose_tool) def trajectory_update_action(self, action): self.trajectory.update_action(action) def compute_noise_action(self): beta = np.min(self.cost) eta = np.sum(np.exp((-1 / self.lambd) * (self.cost - beta))) + 1e-6 w = (1 / eta) * np.exp((-1 / self.lambd) * (self.cost - beta)) self.U += [np.dot(w, self.noise[:, t]) for t in range(self.T)] # print(self.U) theta = self.U[0] action = copy.copy( self.A * np.concatenate([np.sin(theta), np.cos(theta)])) return action # return theta def compute_noise_theta(self): beta = np.min(self.cost) eta = np.sum(np.exp((-1 / self.lambd) * (self.cost - beta))) + 1e-6 w = (1 / eta) * np.exp((-1 / self.lambd) * (self.cost - beta)) self.U += [np.dot(w, self.noise[:, t]) for t in range(self.T)] # print(self.U) theta = self.U[0] action = copy.copy( self.A * np.concatenate([np.sin(theta), np.cos(theta)])) return theta def U_reset(self): self.U = np.zeros([self.T, self.dim_u]) self.U[:, 0] = 1.57 # self.U[:, 1] = 0.8 def get_K(self): return self.K def U_update(self): self.U = np.roll(self.U, -1, axis=0) self.U[-1] = self.u_init def cost_clear(self): self.cost = np.zeros([self.K])
class MPPI(): """MPPI algorithm for pushing """ def __init__(self, env, K, T): self.env = env self.T = T # time steps per sequence self.predictor = Predictor(1, 2, 1, self.T) # self.predictor = Predictor(1, 1, 2, self.T) self.trajectory = Trajectory(self.env) self.trajectory.reset() self.K = K # K sample action sequences # self.T = T # time steps per sequence self.lambd = 1 # self.dim_u = self.env.action_space.sample().shape[0] self.dim_u = 2 self.U = np.zeros([self.T, self.dim_u]) self.time_limit = self.env._max_episode_steps self.u_init = np.zeros([self.dim_u]) self.cost = np.zeros([self.K]) self.noise = np.random.normal(loc=0, scale=1, size=(self.K, self.T, self.dim_u)) def compute_cost(self, k, step): self.noise[k] = np.random.normal(loc=0, scale=1, size=(self.T, self.dim_u)) eps = self.noise[k] # print('noiseK: ', self.noise[k]) self.predictor.catch_up( self.trajectory.get_relative_state(), self.trajectory.get_absolute_state(), self.trajectory.get_obstacle_position(), self.trajectory.get_goal_position(), step ) # make the shadow state the same as the actual robot and object state for t in range(self.T): if t > 0: eps[t] = 0.8 * eps[t - 1] + 0.2 * eps[t] self.noise[k][t] = eps[t] action = self.U[t] + eps[t] # print('action: ', action) cost = self.predictor.predict( action, step) # there will be shadow states in predictor # print('predict step: ', t + 1, 'robot predict position: ', self.predictor.absolute_state[step + t + 1][3:]) # print('robot actual position current: ', self.predictor.absolute_state[step][3:]) self.cost[k] += cost def rollout(self, episode): for episode in range(episode): print('episode: {}'.format(episode)) obs = self.env.reset() self.trajectory.reset() self.trajectory.update_state(obs) self.U = np.zeros([self.T, self.dim_u]) for step in range(self.time_limit): print('step: ', step) # self.env.render() for k in range(self.K): self.compute_cost(k, step) beta = np.min(self.cost) eta = np.sum(np.exp( (-1 / self.lambd) * (self.cost - beta))) + 1e-6 w = (1 / eta) * np.exp((-1 / self.lambd) * (self.cost - beta)) self.U += [np.dot(w, self.noise[:, t]) for t in range(self.T)] obs, r, _, _ = self.env.step( np.concatenate([self.U[0], np.zeros([2])])) self.trajectory.update_state(obs) self.trajectory.update_action(self.U[0]) self.env.render() # print('step: ', step) self.U = np.roll(self.U, -1, axis=0) #shift left self.U[-1] = self.u_init self.cost = np.zeros([self.K]) # reset cost