def __init__(self, c_o, c_g, c_d, time_steps): self.T = time_steps # time_steps to predict self.model = Model(3, 2, 100, 64, 64, 4, load_data=False) self.model.load_weights(WEIGHTS_PATH) # self.model = model self.trajectory_length = self.model.env_time_step self.input_sequence_len = self.model.time_steps self.relative_state = np.zeros( [self.trajectory_length + self.T, 7] ) # all relative to object coordinate, (delta_x,delta_y,delta_theta,x_robot, y_robot, action_x, action_y) self.absolute_state = np.zeros( [self.trajectory_length + self.T, 5] ) # all absolute positions (x_object,y_object,theta_object,x_robot,y_robot) self.object_absolute_trajectory_imagine = [] # the shooting trajectory self.robot_absolute_trajectory_imagine = [] self.obstacle_position = np.zeros([2]) # obstacle absolute position self.goal_position = np.zeros( [3] ) # goal_position and orientation absolute data(relative to world coordinate) self.count = 0 # reset to zero every catch_up, count how many states have been changed self.c_o = c_o # cost coefficient for obstacle distance self.c_g = c_g # cost coefficient for goal distance self.c_d = c_d # cost coefficient for distance between object and robot
def __init__(self, env, K, T): self.model = Model(3, 2, 100, 64, 64, 10, load_data=False) self.model.load_weights(WEIGHTS_PATH) self.env = env self.T = T # time steps per sequence self.predictor_args = [self.model, 1, 3, 1, self.T] self.trajectory = Trajectory(self.env) self.K = K # K sample action sequences # self.T = T # time steps per sequence self.lambd = 1 # self.dim_u = self.env.action_space.sample().shape[0] self.dim_u = 2 self.U = np.zeros([self.T, self.dim_u]) self.base_act = np.zeros([self.T, self.dim_u]) self.time_limit = self.env._max_episode_steps self.u_init = np.zeros([self.dim_u]) self.cost = np.zeros([self.K]) self.noise = np.random.normal(loc=0, scale=1, size=(self.K, self.T, self.dim_u))
class Predictor(): def __init__(self, c_o, c_g, c_d, time_steps): self.T = time_steps # time_steps to predict self.model = Model(3, 2, 100, 64, 64, 4, load_data=False) self.model.load_weights(WEIGHTS_PATH) # self.model = model self.trajectory_length = self.model.env_time_step self.input_sequence_len = self.model.time_steps self.relative_state = np.zeros( [self.trajectory_length + self.T, 7] ) # all relative to object coordinate, (delta_x,delta_y,delta_theta,x_robot, y_robot, action_x, action_y) self.absolute_state = np.zeros( [self.trajectory_length + self.T, 5] ) # all absolute positions (x_object,y_object,theta_object,x_robot,y_robot) self.obstacle_position = np.zeros([2]) # obstacle absolute position self.goal_position = np.zeros( [3] ) # goal_position and orientation absolute data(relative to world coordinate) self.count = 0 # reset to zero every catch_up, count how many states have been changed self.c_o = c_o # cost coefficient for obstacle distance self.c_g = c_g # cost coefficient for goal distance self.c_d = c_d # cost coefficient for distance between object and robot def catch_up(self, relative_state, absolute_state, obstacle_position, goal_position, step): """update the current state and trajectory history of this episode for this sample agent Args: relative_state: np.array(step + 1, 7) absolute_state: np.array(step + 1, 5) obstacle_position: np.array(2,) step: (int) timestep """ assert relative_state.shape == (step + 1, 7) assert absolute_state.shape == (step + 1, 5) assert obstacle_position.shape == (2, ) assert goal_position.shape == (3, ) # relative state(for input of the model) self.relative_state[:( step + 1 )] = relative_state[:] # robot action of the relative_state[step] is zeros(fake) self.relative_state[(step + 1):] = np.zeros([7]) # absolute state(for cost_fun) self.absolute_state[:(step + 1)] = absolute_state[:] self.absolute_state[(step + 1):] = np.zeros([5]) # obstacle position and goal position self.goal_position[:] = goal_position[:] self.obstacle_position[:] = obstacle_position[:] # how many states it has predicted self.count = 0 #reset count def get_relative_action(self, action, step): object_pos = self.absolute_state[step + self.count][:3] theta = object_pos[2] action_x = action[0] action_y = action[1] action_relative_to_object_x = action_x * np.cos( theta) + action_y * np.sin(theta) action_relative_to_object_y = action_y * np.cos( theta) - action_x * np.sin(theta) return np.array( [action_relative_to_object_x, action_relative_to_object_y]) def predict(self, action, step): """action: relative to world coordinate""" assert action.shape == (2, ) # print('action: ', action) action = 0.05 * np.clip(action, -1, 1) input = np.zeros([self.input_sequence_len, 7]) # transfer the action to object coordinate relative_action = self.get_relative_action(action, step) # update the action data for current state which is set as (0. , 0.) # print('predictor predict ', step) self.relative_state[step + self.count][5:] = relative_action[:] # get input sequence for model, the model need self.input_sequence_len steps sequence as input for i in range(self.input_sequence_len): idx = i + step + self.count + 1 - self.input_sequence_len if idx < 0: input[i] = np.zeros([7]) input[i][3:5] = self.relative_state[0][3:5] else: input[i] = self.relative_state[idx] input = input[np.newaxis, :] state_increment = self.model.predict( input) # [delta_x, delta_y, delta_theta] # state_increment = np.zeros([3]) # update self.relative_state and self.count self.relative_state[step + self.count + 1][:3] = state_increment[:] # print('step: ', step, 'count:', self.count) self.relative_state[step + self.count + 1][3:5] = self.get_prediction_relative_position( state_increment, step)[:] # update self.absolute_state self.absolute_state[step + self.count + 1][:3] = self.get_prediction_absolute_position( state_increment, step)[:] self.absolute_state[ step + self.count + 1][3:] = self.absolute_state[step + self.count][3:] + action # compute the cost # print('step: ', step) # print('current robot pos: ', self.absolute_state[step][3:]) cost = self.cost_fun(self.absolute_state[step + self.count + 1][:3], self.absolute_state[step + self.count + 1][3:], self.obstacle_position, self.goal_position) # update count self.count += 1 return cost def test(self): self.model.model.summary() def cost_fun(self, object_position, robot_position, obstacle_position, goal_position): c_o = self.c_o c_g = self.c_g c_d = self.c_d object_position_ = object_position[:2] object_rotation_ = object_position[2] fixed_pos = np.array([1.35, 0.65]) # print('current robot pos: ', robot_position) cost = c_d * np.squeeze( np.sum(np.square(object_position_ - robot_position))) cost = c_g * np.squeeze( np.sum(np.square(object_position_ - goal_position[:2])) ) + 0.3 * c_d * np.squeeze( np.sum(np.square(object_position_ - robot_position))) # cost = c_d*np.squeeze(np.sum(np.square(goal_position[:2] - robot_position))) # print('distance: ', cost) return cost def get_prediction_relative_position(self, state_increment, step): """robot position relative to object""" x_original = self.relative_state[step + self.count][3] y_original = self.relative_state[step + self.count][4] action_x = self.relative_state[step + self.count][5] action_y = self.relative_state[step + self.count][6] coordinate_increment_x = state_increment[0] coordinate_increment_y = state_increment[1] coordinate_increment_theta = state_increment[2] # trasition without rotation x = x_original + action_x - coordinate_increment_x y = y_original + action_y - coordinate_increment_y # rotation x_relative_update = x * np.cos( coordinate_increment_theta) + y * np.sin( coordinate_increment_theta) y_relative_update = y * np.cos( coordinate_increment_theta) - x * np.sin( coordinate_increment_theta) return np.array([x_relative_update, y_relative_update]) def get_prediction_absolute_position(self, state_increment, step): object_x_original = self.absolute_state[step + self.count][0] object_y_original = self.absolute_state[step + self.count][1] object_theta_original = self.absolute_state[step + self.count][2] increment_x = state_increment[0] increment_y = state_increment[1] increment_theta = state_increment[2] delta_x = increment_x * np.cos( object_theta_original) - increment_y * np.sin( object_theta_original) delta_y = increment_x * np.sin( object_theta_original) + increment_y * np.cos( object_theta_original) x_absolute = delta_x + object_x_original y_absolute = delta_y + object_y_original theta_absolute = object_theta_original + increment_theta return np.array([x_absolute, y_absolute, theta_absolute]) def _get_obs(self): return 0
class MPPI(): """MPPI algorithm for pushing """ def __init__(self, env, K, T): self.model = Model(3, 2, 100, 64, 64, 10, load_data=False) self.model.load_weights(WEIGHTS_PATH) self.env = env self.T = T # time steps per sequence self.predictor_args = [self.model, 1, 3, 1, self.T] self.trajectory = Trajectory(self.env) self.K = K # K sample action sequences # self.T = T # time steps per sequence self.lambd = 1 # self.dim_u = self.env.action_space.sample().shape[0] self.dim_u = 2 self.U = np.zeros([self.T, self.dim_u]) self.base_act = np.zeros([self.T, self.dim_u]) self.time_limit = self.env._max_episode_steps self.u_init = np.zeros([self.dim_u]) self.cost = np.zeros([self.K]) self.noise = np.random.normal(loc=0, scale=1, size=(self.K, self.T, self.dim_u)) def compute_cost(self, k, step): self.noise[k] = np.random.normal(loc=0, scale=1, size=(self.T, self.dim_u)) self.predictor.catch_up( self.trajectory.get_goal_state(), self.trajectory.get_initial_state(), self.trajectory.get_action_history(), step ) # make the shadow state the same as the actual robot and object state for t in range(self.T): action = self.U[t] + self.noise[k][t] cost = self.predictor.predict( action, step) # there will be shadow states in predictor self.cost[k] += cost return self.cost[k] def mppi_compute_cost_parallel(self, predictor_args, initial_state, rollout_num_per_cpu, base_act, step): return compute_cost_parallel(predictor_args, initial_state, rollout_num_per_cpu, base_act, step) def rollout(self, episode): for episode in range(episode): print('episode: {}'.format(episode)) obs = self.env.reset() self.trajectory.reset() self.trajectory.update_state(obs) self.U = np.zeros([self.T, self.dim_u]) # rollout_num_per_cpu = 1 rollout_num_per_cpu = self.K // mp.cpu_count() for step in np.arange(self.time_limit): # env render self.env.render() print('step: ', step + 1) # update current_state current_state = [ self.trajectory.get_relative_state(), self.trajectory.get_absolute_state(), self.trajectory.get_obstacle_position(), self.trajectory.get_goal_position(), step ] # compute costs for K sample in multiprocess paths = self.mppi_compute_cost_parallel( self.predictor_args, current_state, rollout_num_per_cpu, self.U, step ) # list of K elements, each element is a dict{observation: (self.T, dim_obs), actions: (self.T, 2), costs: (self.T,)} # get cost from paths self.cost = score_trajectory(paths) # get actions from paths self.noise = stack_noise(paths) beta = np.min(self.cost) eta = np.sum(np.exp( (-1 / self.lambd) * (self.cost - beta))) + 1e-6 w = (1 / eta) * np.exp((-1 / self.lambd) * (self.cost - beta)) self.U += [np.dot(w, self.noise[:, t]) for t in range(self.T)] obs, r, _, _ = self.env.step( np.concatenate([self.U[0], np.zeros([2])])) # update obs and action for trajectory self.trajectory.update_state(obs) self.trajectory.update_action(self.U[0]) # self.env.render() # print('step: ', step) self.U = np.roll(self.U, -1, axis=0) #shift left self.U[-1] = self.u_init self.cost = np.zeros([self.K]) # reset cost
def main(): model = Model(3, 2, 60, 64, 64, 10) env_step = model.env_time_step time_sequence = model.time_steps model.model.load_weights(WEIGHTS_PATH) test_robot_data_raw = np.load( os.path.join(DATA_PATH, 'robot_pure_test.npy')).reshape(-1, 6) test_object_data_raw = np.load( os.path.join(DATA_PATH, 'object_pure_test.npy')).reshape(-1, 13) test_object_position = test_object_data_raw[:, :2] test_object_rotation = (np.arccos(test_object_data_raw[:, 3]) * 2).reshape( -1, 1) test_object_state = np.concatenate( (test_object_position, test_object_rotation), axis=1).reshape(-1, env_step, 3) # [episode_num, 101, 3] test_robot_state = test_robot_data_raw[:, :2].reshape( -1, env_step, 2) # [episode_num, 101, 2] # test_data_set = model.data_preprocess(test_object_data_raw, test_robot_data_raw) # predict = model.model.predict(test_data_set['input']) # error = predict - target ### generate predict trajectories #### episode_num = int(len(test_robot_state)) ### initialize ### predict_trajectory = np.zeros([episode_num, env_step, 3]) original_trajectory = np.zeros([episode_num, env_step, 3]) original_data_set = model.data_preprocess(test_object_data_raw, test_robot_data_raw) original_output = original_data_set['target'] original_output = original_output.reshape(episode_num, -1, time_sequence, 3) assert original_output.shape == (episode_num, (env_step - 1) // time_sequence, time_sequence, 3) #### loop #### for episode in range(episode_num): trajectory_p = np.zeros( [env_step, 3] ) # the first time_sequence points are regarded as the same as targets because we need these steps to predict # trajectory_o = np.zeros([env_step, 3]) trajectory_p[:time_sequence] = test_object_state[ episode][:time_sequence] # trajectory_o[:time_sequence] = test_object_state[episode][:time_sequence] robot_state = test_robot_state[episode] # print(robot_state.shape) for step in range(time_sequence, env_step): trajectory_p[step] = get_object_state_increment_world_coordinate( model, step, time_sequence, trajectory_p, robot_state) + trajectory_p[step - 1] # trajectory_o[step] = get_object_state_increment_world_coordinate_original(step, time_sequence, episode, original_output, trajectory_o) + trajectory_o[step - 1] predict_trajectory[episode] = trajectory_p # original_trajectory[episode] = trajectory_o template = 'Generate {} episodes.' print(template.format(episode + 1)) # print(predict_trajectory.shape) np.save(os.path.join(SAVED_DATA_PATH, 'pure_test'), predict_trajectory)