def collect_data(cfg, env, high_level_planning, low_level_TG, HL_replay_buffer): for iter in range(np.shape(target_speed_all)[0]): if cfg.sim: state = motion_library.exp_standing(env) low_level_TG.reset(state) target_speed = target_speed_all[iter] # target_speed[1] = -target_speed[1] for j in range(10): # generate foot footstep position. If test, the footstep comes from optimization process latent_action = high_level_planning.plan_latent_action( state, target_speed) low_level_TG.update_latent_action(state, latent_action) collect_state[iter][0][j] = state['base_pos_x'][0] collect_state[iter][1][j] = state['base_pos_y'][0] collect_state[iter][2][j] = state['base_ori_euler'][2] for step in range(50): action = low_level_TG.get_action(state, step + 1) if j > 7: collect_action[iter][(j - 8) * 50 + step] = action state = env.step(action) np.save('./exp_action_' + str(exp_index) + '.npy', collect_action) np.save('./exp_state_' + str(exp_index) + '.npy', collect_state)
def collect_data_train(cfg,env,high_level_planning,low_level_TG, HL_replay_buffer, com_utils): reward_record = [] for iter in range(cfg.num_iters): state = motion_library.exp_standing(env) low_level_TG.reset(state) target = target_all[iter%np.shape(target_all)[0]] total_reward = 0 for j in range(cfg.num_latent_action_per_iteration): # generate foot footstep position. If test, the footstep comes from optimization process pre_com_state = state high_level_obs = com_utils.HL_obs(pre_com_state,target) latent_action = high_level_planning.sample_latent_action(state, target, com_utils) low_level_TG.update_latent_action(state,latent_action) for step in range(1, cfg.num_timestep_per_footstep+1): action = low_level_TG.get_action(state, step) state = env.step(action) post_com_state = state high_level_obs_ = com_utils.HL_obs(post_com_state,target) reward = com_utils.calc_reward(post_com_state,target) total_reward += reward done = (j==(cfg.num_latent_action_per_iteration-1)) HL_replay_buffer.add(high_level_obs, latent_action, reward, high_level_obs_, done) print('Total Reward: ', total_reward) reward_record.append(total_reward) high_level_planning.update_policy(HL_replay_buffer) np.save('./reward_record.npy', reward_record)
def collect_data(cfg, env, high_level_planning, low_level_TG, HL_replay_buffer, com_utils): for iter in range(1): state = motion_library.exp_standing(env) low_level_TG.reset(state) action_all = np.load('./trash_action.npy') # action_all = print(action_all) for j in range(np.shape(action_all)[0] * 2): # generate foot footstep position. If test, the footstep comes from optimization process pre_com_state = state latent_action = [2.24433139, -3.3401089] low_level_TG.update_latent_action(state, latent_action) for step in range(1, cfg.num_timestep_per_footstep + 1): action = low_level_TG.get_action(state, step) state = env.step(action) post_com_state = state high_level_obs, high_level_delta_obs = com_utils.HL_obs( pre_com_state), com_utils.HL_delta_obs(pre_com_state, post_com_state) if high_level_delta_obs[3] + high_level_obs[ 0] > 0.5 or high_level_delta_obs[4] + high_level_obs[ 1] > 0.5: print(j, latent_action) HL_replay_buffer.add(high_level_obs, latent_action, 0, high_level_delta_obs, 1) if utils.check_robot_dead(state): break
def main(cfg): # define env & high level planning part & low level trajectory generator & replay buffer for HLP # initialize logger device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') env = daisy_API(sim=cfg.sim, render=False, logger=False) env.set_control_mode(cfg.control_mode) state = env.reset() com_utils = utils.CoM_frame_MPC() if cfg.sim: init_state = motion_library.exp_standing(env) model_obs_dim, model_output_dim = np.size( com_utils.HL_obs(state)), np.size(com_utils.HL_delta_obs(state, state)) HL_replay_buffer = utils.ReplayBuffer( model_obs_dim, cfg.z_dim, model_output_dim, device, cfg.num_iters * cfg.num_latent_action_per_iteration) high_level_planning = HLPM.high_level_planning( device=device, model_obs_dim=model_obs_dim, z_dim=cfg.z_dim, model_output_dim=model_output_dim, model_hidden_num=cfg.model_hidden_num, model_layer_num=cfg.model_layer_num, batch_size=cfg.batch_size, model_lr=cfg.model_lr, high_level_policy_type=cfg.high_level_policy_type, update_sample_policy=cfg.update_sample_policy, update_sample_policy_lr=cfg.update_sample_policy_lr, low_level_policy_type=cfg.low_level_policy_type, num_timestep_per_footstep=cfg.num_timestep_per_footstep, model_update_steps=cfg.model_update_steps, control_frequency=cfg.control_frequency) low_level_TG = LLTG.low_level_TG( device=device, z_dim=cfg.z_dim, a_dim=cfg.a_dim, num_timestep_per_footstep=cfg.num_timestep_per_footstep, batch_size=cfg.batch_size, low_level_policy_type=cfg.low_level_policy_type, update_low_level_policy=cfg.update_low_level_policy, update_low_level_policy_lr=cfg.update_low_level_policy_lr, init_state=init_state, ) if cfg.low_level_policy_type == 'NN': low_level_TG.load_model('.') # # # collect data collect_data(cfg, env, high_level_planning, low_level_TG, HL_replay_buffer, com_utils) # train model train_model(cfg, HL_replay_buffer, high_level_planning)
def main(cfg): print(cfg.pretty()) # define env & high level planning part & low level trajectory generator & replay buffer for HLP # initialize logger device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') env = daisy_API(sim=cfg.sim, render=False, logger=False) env.set_control_mode(cfg.control_mode) state = env.reset() if cfg.sim: init_state = motion_library.exp_standing(env) model_obs_dim, model_output_dim = 2, 5 HL_replay_buffer = utils.ReplayBuffer( model_obs_dim, cfg.z_dim, model_output_dim, device, cfg.num_iters * cfg.num_latent_action_per_iteration) high_level_planning = HLPM.high_level_planning( device=device, model_obs_dim=model_obs_dim, z_dim=3, model_output_dim=model_output_dim, model_hidden_num=cfg.model_hidden_num, model_layer_num=2, batch_size=cfg.batch_size, model_lr=cfg.model_lr, high_level_policy_type='raibert', update_sample_policy=cfg.update_sample_policy, update_sample_policy_lr=cfg.update_sample_policy_lr, low_level_policy_type=cfg.low_level_policy_type, num_timestep_per_footstep=50, model_update_steps=cfg.model_update_steps, control_frequency=cfg.control_frequency) low_level_TG = LLTG.low_level_TG( device=device, z_dim=3, a_dim=cfg.a_dim, num_timestep_per_footstep=50, batch_size=cfg.batch_size, low_level_policy_type='IK', update_low_level_policy=cfg.update_low_level_policy, update_low_level_policy_lr=cfg.update_low_level_policy_lr, init_state=init_state, ) # # if args.low_level_policy_type =='NN': # # low_level_TG.load_model('./save_data/trial_2') # # # collect data collect_data(cfg, env, high_level_planning, low_level_TG, HL_replay_buffer)
def main(): # define environment device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') env = daisy_API(sim=cfg.sim, render=False, logger=False) env.set_control_mode(cfg.control_mode) state = env.reset() if cfg.sim: init_state = motion_library.exp_standing(env) # define data_buffer # define agent & learning... # training train(env, )
def collect_data(cfg, env, high_level_planning, low_level_TG, HL_replay_buffer): for iter in range(1): if cfg.sim: state = motion_library.exp_standing(env) low_level_TG.reset(state) target_speed = target_speed_all[iter] target_speed = -target_speed for j in range(20): # generate foot footstep position. If test, the footstep comes from optimization process latent_action = high_level_planning.plan_latent_action( state, target_speed) low_level_TG.update_latent_action(state, latent_action) for step in range(50): action = low_level_TG.get_action(state, step + 1) state = env.step(action)
def collect_data(cfg, env, high_level_planning, low_level_TG, HL_replay_buffer, com_utils): action_save = [] save = 0 for iter in range(cfg.num_iters): state = motion_library.exp_standing(env) low_level_TG.reset(state) j = 0 while j < cfg.num_latent_action_per_iteration: # generate foot footstep position. If test, the footstep comes from optimization process pre_com_state = state if j % 2 == 0: latent_action = high_level_planning.sample_latent_action() low_level_TG.update_latent_action(state, latent_action) for step in range(1, cfg.num_timestep_per_footstep + 1): action = low_level_TG.get_action(state, step) state = env.step(action) post_com_state = state high_level_obs, high_level_delta_obs = com_utils.HL_obs( pre_com_state), com_utils.HL_delta_obs(pre_com_state, post_com_state) if abs(high_level_delta_obs[3] + high_level_obs[0]) > 0.5 or abs(high_level_delta_obs[4] + high_level_obs[1]) > 0.5: save = 1 action_save.append(latent_action) else: HL_replay_buffer.add(high_level_obs, latent_action, 0, high_level_delta_obs, 1) j += 1 if save: np.save('./trash_action.npy', np.array(action_save)) HL_replay_buffer.save_buffer()
def evaluate_model(cfg): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') env = daisy_API(sim=cfg.sim, render=cfg.render, logger = False) env.set_control_mode(cfg.control_mode) state = env.reset() com_utils = utils.CoM_frame_MPC() if cfg.sim: init_state = motion_library.exp_standing(env) model_obs_dim, model_output_dim = np.size(com_utils.HL_obs(state)), np.size(com_utils.HL_delta_obs(state, state)) high_level_planning = HLPM.high_level_planning( device = device, model_obs_dim = model_obs_dim, z_dim = cfg.z_dim, model_output_dim = model_output_dim, model_hidden_num = cfg.model_hidden_num, model_layer_num = cfg.model_layer_num, batch_size = cfg.batch_size, model_lr = cfg.model_lr, high_level_policy_type = cfg.high_level_policy_type, update_sample_policy = cfg.update_sample_policy, update_sample_policy_lr = cfg.update_sample_policy_lr, low_level_policy_type = cfg.low_level_policy_type, num_timestep_per_footstep = cfg.num_timestep_per_footstep, model_update_steps = cfg.model_update_steps, control_frequency=cfg.control_frequency ) high_level_planning.load_data('.') high_level_planning.load_mean_var('.'+'/buffer_data') low_level_TG = LLTG.low_level_TG( device = device, z_dim = cfg.z_dim, a_dim = cfg.a_dim, num_timestep_per_footstep = cfg.num_timestep_per_footstep, batch_size = cfg.batch_size, low_level_policy_type = cfg.low_level_policy_type, update_low_level_policy = cfg.update_low_level_policy, update_low_level_policy_lr = cfg.update_low_level_policy_lr, init_state = init_state, ) if cfg.low_level_policy_type =='NN': low_level_TG.load_model('.') prediction_evaluation = np.empty((2* model_output_dim,cfg.num_latent_action_per_iteration )) velocity_tracking = np.empty((cfg.num_iters,6,cfg.num_latent_action_per_iteration)) target_velocity_test = [ np.array([0.0, 0.2, 0.0,1]), np.array([0.0, 0.2, 0.0,1]), np.array([-0.0, 0.2, 0.0,1]), np.array([-0.2, 0.0,0.0,1]), np.array([-0.2, 0.0, 0.0,1]), np.array([-0.2, 0.0,0.0,1]), np.array([0.0, -0.2,0.0,1]), np.array([0.0, -0.2,0.0,1]), np.array([0.0, -0.2, 0.0,1]), np.array([0.0, -0.2, 0.0,1]), np.array([0.2, -0.0, 0.0,1]), np.array([0.2, -0.0, 0.0,1]), np.array([0.2, -0.0, 0.0,1]), np.array([0.2, -0.0, 0.0,1]), np.array([0.2, -0.0,0.0,1]), ] velocity_tracking = np.empty((cfg.num_iters, 6,cfg.num_latent_action_per_iteration)) for iter in range(cfg.num_iters): prediction_evaluation = np.empty((2* model_output_dim,cfg.num_latent_action_per_iteration )) total_cost = 0 init_state = motion_library.exp_standing(env) for j in range(cfg.num_latent_action_per_iteration): if not j%5: target = target_velocity_test[int((j+1)/5)] pre_com_state = state latent_action = high_level_planning.plan_latent_action(state, target, com_utils, cfg.mpc_sample_num, cfg.mpc_horizon) low_level_TG.update_latent_action(state,latent_action) for step in range(1, cfg.num_timestep_per_footstep+1): action = low_level_TG.get_action(state, step) state = env.step(action) post_com_state = state # Check if robot still alive high_level_obs , high_level_delta_obs = com_utils.HL_obs(pre_com_state), com_utils.HL_delta_obs(pre_com_state, post_com_state) predict_delta_state = high_level_planning.model_predict(high_level_obs, latent_action) predict_state_world = com_utils.predict2world(pre_com_state, predict_delta_state) # collect data velocity_tracking[iter][0][j] = target[0] velocity_tracking[iter][1][j] = target[1] velocity_tracking[iter][2][j] = predict_state_world[3] velocity_tracking[iter][3][j] = predict_state_world[4] velocity_tracking[iter][4][j] = post_com_state['base_velocity'][0] velocity_tracking[iter][5][j] = post_com_state['base_velocity'][1] np.save('./' + cfg.high_level_policy_type + '_velocity_tracking_fin.npy', velocity_tracking) return
z_action_all = [[3.5490, -3.6426], [-3.7186, -3.7996], [0, -3.70]] z_action_all = np.ones((7 * 7, z_dim)) # print(min(z_action_all), max(z_action_all)) total_step = 5 traj = np.load('./save_data/expert_action_total.npy') CoM_traj = np.empty((np.shape(z_action_all)[0], 5, total_step * 2)) action_record = np.empty((np.shape(z_action_all)[0], 100, 18)) gait_record = np.empty((np.shape(z_action_all)[0], 6, 100)) tra_learning.load_model(save_dir='./save_data/trial_' + str(z_dim)) env = daisy_API(sim=True, render=True, logger=False) env.set_control_mode('position') state = env.reset() for i in range(np.shape(z_action_all)[0]): state = motion_library.exp_standing(env) first = int(i / 7.0) second = i % 7 z_action_all[i][0] = -3 + first z_action_all[i][1] = -3 + second best_z_action = z_action_all[i] # min_loss = 10000 # for j in range(10000): # cur_z_action = 3*np.random.randn(3) # tra_learning.policy.z_action = cur_z_action # action_all = np.zeros((100,18)) # for k in range(100): # action_all[k] = tra_learning.policy.get_action(state, (k+1)/100.0) # loss = np.linalg.norm(utils.normalization(traj, mean_std[0], mean_std[1]) - action_all) # if loss< min_loss:
def evaluate_model(args): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') env = daisy_API(sim=args.sim, render=False, logger=False) env.set_control_mode(args.control_mode) state = env.reset() utils.make_dir(args.save_dir) save_dir = utils.make_dir( os.path.join(args.save_dir + '/trial_%s' % str(args.seed))) if args.save else None total_timestep, total_latent_action = 0, 0 if args.sim: if args.low_level_policy_type == 'NN': init_state = motion_library.exp_standing(env, shoulder=0.3, elbow=1.3) else: init_state = motion_library.exp_standing(env) model_obs_dim, model_output_dim = np.size(utils.HL_obs(state)), np.size( utils.HL_delta_obs(state, state)) high_level_planning = HLPM.high_level_planning( device=device, model_obs_dim=model_obs_dim, z_dim=args.z_dim, model_output_dim=model_output_dim, model_hidden_num=args.model_hidden_num, batch_size=args.batch_size, model_lr=args.model_lr, high_level_policy_type=args.high_level_policy_type, update_sample_policy=args.update_sample_policy, update_sample_policy_lr=args.update_sample_policy_lr, low_level_policy_type=args.low_level_policy_type, num_timestep_per_footstep=args.num_timestep_per_footstep, model_update_steps=args.model_update_steps, control_frequency=args.control_frequency) high_level_planning.load_data(save_dir) high_level_planning.load_mean_var(save_dir + '/buffer_data') low_level_TG = LLTG.low_level_TG( device=device, z_dim=args.z_dim, a_dim=args.a_dim, num_timestep_per_footstep=args.num_timestep_per_footstep, batch_size=args.batch_size, low_level_policy_type=args.low_level_policy_type, update_low_level_policy=args.update_low_level_policy, update_low_level_policy_lr=args.update_low_level_policy_lr, init_state=init_state, ) if args.low_level_policy_type == 'NN': low_level_TG.load_model('./save_data/trial_2') prediction_evaluation = np.empty( (2 * model_output_dim, args.num_latent_action_per_iteration)) velocity_tracking = np.empty((6, args.num_latent_action_per_iteration)) target_velocity_test = [ np.array([0.0, -0.1, 0.0]), np.array([0.0, -0.1, 0.0]), np.array([-0.00, -0.1, 0.0]), np.array([-0.0, -0.1, 0.0]), np.array([-0.0, -0.1, 0.0]), np.array([-0.00, -0.4, 0.0]), np.array([-0.00, -0.4, 0.0]), np.array([-0.0, -0.4, 0.0]), np.array([-0.0, -0.4, 0.0]), np.array([-0.0, -0.4, 0.0]), ] gait_record = np.empty( (args.num_iters, 6, args.num_latent_action_per_iteration * args.num_timestep_per_footstep)) for iter in range(args.num_iters): # reset robot to stand if args.sim: state = motion_library.exp_standing(env, shoulder=0.3, elbow=1.3) low_level_TG.reset(state) for j in range(args.num_latent_action_per_iteration): if not j % 5: target = target_velocity_test[int((j + 1) / 5)] pre_com_state = state latent_action = high_level_planning.plan_latent_action( state, target) low_level_TG.update_latent_action(state, latent_action) for step in range(1, args.num_timestep_per_footstep + 1): action = low_level_TG.get_action(state, step) state, total_timestep = env.step(action), total_timestep + 1 for q in range(6): if state['foot_pos'][q][2] <= -0.005: gait_record[iter][q][j * args.num_timestep_per_footstep + step - 1] = 1 else: gait_record[iter][q][j * args.num_timestep_per_footstep + step - 1] = 0 post_com_state = state # Check if robot still alive if utils.check_data_useful(state): high_level_obs, high_level_delta_obs = utils.HL_obs( pre_com_state), utils.HL_delta_obs(pre_com_state, post_com_state) predict_state = high_level_planning.model_predict( high_level_obs, latent_action) # collect data velocity_tracking[0][j] = target[0] velocity_tracking[1][j] = target[1] velocity_tracking[2][j] = predict_state[4] velocity_tracking[3][j] = predict_state[5] velocity_tracking[4][j] = high_level_delta_obs[4] velocity_tracking[5][j] = high_level_delta_obs[5] for q in range(model_output_dim): prediction_evaluation[q][j] = high_level_delta_obs[q] prediction_evaluation[q + model_output_dim][j] = predict_state[q] total_latent_action += 1 if utils.check_robot_dead(state): break np.save(save_dir + '/prediction_evaluation.npy', np.array(prediction_evaluation)) np.save(save_dir + '/velocity_tracking.npy', velocity_tracking) np.save('./save_data/trial_2/gait_record.npy', gait_record) return
def main(test_arg): # initialize data structure record time to measure the frequency tgt_pos = np.zeros( (6, 3, test_arg['total_step'] * test_arg['stance_time'])) actual_pos = np.zeros( (6, 3, test_arg['total_step'] * test_arg['stance_time'])) # initialize the configuration env = daisy_API(sim=test_arg['sim'], render=True, logger=False) env.set_control_mode('position') state = env.reset() if test_arg['sim']: state = motion_library.exp_standing(env) else: state = motion_library.demo_standing(env, shoulder=0.5, elbow=1.1) # initialize controllers high_level_planning = raibert_footstep_policy( stance_duration=test_arg['stance_time'], target_speed=np.array(test_arg['target_speed']), control_frequency=test_arg['control_frequency']) low_level_planning = IK_traj_generator(state) # run controller for several steps record data for index_step in range(test_arg['total_step']): latent_action = high_level_planning.plan_latent_action(state) low_level_planning.update_latent_action(state, latent_action) for i in range(1, test_arg['stance_time'] + 1): phase = float(i) / test_arg['stance_time'] action = low_level_planning.get_action(state, phase) # TODO: implement simplified non blocking stuff state = env.step(action) time.sleep(0.01) # calculate current XYZ foot_com_cur = daisy_kinematics.FK_CoM2Foot(state['j_pos']) foot_com_tgt = low_level_planning.des_foot_position_com for j in range(6): for k in range(3): tgt_pos[j][k][index_step * test_arg['stance_time'] + i - 1] = foot_com_tgt[j][k] actual_pos[j][k][index_step * test_arg['stance_time'] + i - 1] = foot_com_cur[j][k] # save data + load data + plotting np.save( './save_data/test_tra/tgt_tra_' + str(test_arg['stance_time']) + '.npy', tgt_pos) np.save( './save_data/test_tra/actual_tra_' + str(test_arg['stance_time']) + '.npy', actual_pos) tgt_pos = np.load('./save_data/test_tra/tgt_tra_' + str(test_arg['stance_time']) + '.npy') actual_pos = np.load('./save_data/test_tra/actual_tra_' + str(test_arg['stance_time']) + '.npy') leg_index = 0 axis = range(np.shape(tgt_pos)[2]) plt.rcParams['figure.figsize'] = (8, 10) fig, (ax1, ax2, ax3) = plt.subplots(3, 1) ax1.plot(axis, tgt_pos[leg_index][0], color='dodgerblue') ax1.plot(axis, actual_pos[leg_index][0], color='deeppink') ax1.set_title('X tracking') ax2.plot(axis, tgt_pos[leg_index][1], color='dodgerblue') ax2.plot(axis, actual_pos[leg_index][1], color='deeppink') ax2.set_title('Y tracking') ax3.plot(axis, tgt_pos[leg_index][2], color='dodgerblue') ax3.plot(axis, actual_pos[leg_index][2], color='deeppink') ax3.set_title('Z tracking') plt.show()
def evaluate_model(cfg): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') env = daisy_API(sim=cfg.sim, render=False, logger=False) env.set_control_mode(cfg.control_mode) state = env.reset() com_utils = utils.CoM_frame_MPC() if cfg.sim: init_state = motion_library.exp_standing(env) model_obs_dim, model_output_dim = np.size( com_utils.HL_obs(state)), np.size(com_utils.HL_delta_obs(state, state)) high_level_planning = HLPM.high_level_planning( device=device, model_obs_dim=model_obs_dim, z_dim=cfg.z_dim, model_output_dim=model_output_dim, model_hidden_num=cfg.model_hidden_num, model_layer_num=cfg.model_layer_num, batch_size=cfg.batch_size, model_lr=cfg.model_lr, high_level_policy_type=cfg.high_level_policy_type, update_sample_policy=cfg.update_sample_policy, update_sample_policy_lr=cfg.update_sample_policy_lr, low_level_policy_type=cfg.low_level_policy_type, num_timestep_per_footstep=cfg.num_timestep_per_footstep, model_update_steps=cfg.model_update_steps, control_frequency=cfg.control_frequency) high_level_planning.load_data('.') if cfg.high_level_policy_type != 'SAC': high_level_planning.load_mean_var('.' + '/buffer_data') low_level_TG = LLTG.low_level_TG( device=device, z_dim=cfg.z_dim, a_dim=cfg.a_dim, num_timestep_per_footstep=cfg.num_timestep_per_footstep, batch_size=cfg.batch_size, low_level_policy_type=cfg.low_level_policy_type, update_low_level_policy=cfg.update_low_level_policy, update_low_level_policy_lr=cfg.update_low_level_policy_lr, init_state=init_state, ) if cfg.low_level_policy_type == 'NN': low_level_TG.load_model('.') square_circle_test = [] total_num = 6 for i in range(1, total_num + 1): theta = i * math.pi / float(total_num) square_circle_test.append( np.array([1 - math.cos(theta), 1.5 * math.sin(theta), 0, 1])) for i in range(1, total_num + 1): theta = i * math.pi / float(total_num) square_circle_test.append( np.array([3 - math.cos(theta), -1.5 * math.sin(theta), 0, 1])) square_cost = [] for iter in range(cfg.num_iters): position_tracking = [[], [], []] # reset robot to stand state = motion_library.exp_standing(env) low_level_TG.reset(state) position_tracking[0].append(state['base_pos_x'][0]) position_tracking[1].append(state['base_pos_y'][0]) position_tracking[2].append(state['base_ori_euler'][2]) total_latent_action = 0 total_cost = 0 target_index = 0 while True: target = square_circle_test[target_index] pre_com_state = state latent_action = high_level_planning.plan_latent_action( state, target, com_utils, cfg.mpc_sample_num, cfg.mpc_horizon) low_level_TG.update_latent_action(state, latent_action) for step in range(1, cfg.num_timestep_per_footstep + 1): action = low_level_TG.get_action(state, step) state = env.step(action) # collect data position_tracking[0].append(state['base_pos_x'][0]) position_tracking[1].append(state['base_pos_y'][0]) position_tracking[2].append(state['base_ori_euler'][2]) if np.linalg.norm(target[0:2] - np.array( [state['base_pos_x'][0], state['base_pos_y'][0]])) < 0.15: print("Reach Goal %s!!!!" % str(target_index)) target_index += 1 if target_index >= np.shape(square_circle_test)[0]: break target = square_circle_test[target_index] post_com_state = state total_latent_action += 1 if target_index >= np.shape(square_circle_test)[0]: np.save('./square_circle_test_' + str(iter) + '.npy', np.array(position_tracking)) break if total_latent_action > 200: print('Did not reach goal') break return
def evaluate_model(cfg): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') env = daisy_API(sim=cfg.sim, render=cfg.render, logger=False) env.set_control_mode(cfg.control_mode) state = env.reset() com_utils = utils.CoM_frame_MPC() if cfg.high_level_policy_type == 'SAC': com_utils = utils.CoM_frame_RL() if cfg.sim: init_state = motion_library.exp_standing(env) if cfg.high_level_policy_type == 'SAC': model_obs_dim, model_output_dim = np.size( com_utils.HL_obs(state, np.zeros(3))), np.size( com_utils.HL_obs(state, np.zeros(3))) else: model_obs_dim, model_output_dim = np.size( com_utils.HL_obs(state)), np.size( com_utils.HL_delta_obs(state, state)) high_level_planning = HLPM.high_level_planning( device=device, model_obs_dim=model_obs_dim, z_dim=cfg.z_dim, model_output_dim=model_output_dim, model_hidden_num=cfg.model_hidden_num, model_layer_num=cfg.model_layer_num, batch_size=cfg.batch_size, model_lr=cfg.model_lr, high_level_policy_type=cfg.high_level_policy_type, update_sample_policy=cfg.update_sample_policy, update_sample_policy_lr=cfg.update_sample_policy_lr, low_level_policy_type=cfg.low_level_policy_type, num_timestep_per_footstep=cfg.num_timestep_per_footstep, model_update_steps=cfg.model_update_steps, control_frequency=cfg.control_frequency) high_level_planning.load_data('.') if cfg.high_level_policy_type != 'SAC': high_level_planning.load_mean_var('.' + '/buffer_data') low_level_TG = LLTG.low_level_TG( device=device, z_dim=cfg.z_dim, a_dim=cfg.a_dim, num_timestep_per_footstep=cfg.num_timestep_per_footstep, batch_size=cfg.batch_size, low_level_policy_type=cfg.low_level_policy_type, update_low_level_policy=cfg.update_low_level_policy, update_low_level_policy_lr=cfg.update_low_level_policy_lr, init_state=init_state, ) if cfg.low_level_policy_type == 'NN': low_level_TG.load_model('.') target_position_test = [ np.array([0.0, 2.0, 0.0, 1]), np.array([2.0, 2.0, 0.0, 1]), np.array([-2.0, 2.0, 0.0, 1]), np.array([2.0, 0.0, 0.0, 1]), np.array([-2.0, 0.0, 0.0, 1]), np.array([0.0, -2.0, 0.0, 1]), np.array([-2.0, -2.0, 0.0, 1]), np.array([2.0, -2.0, 0.0, 1]), ] for q in range(cfg.num_iters): position_tracking = [ [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ] for target_index in range(8): # reset robot to stand state = motion_library.exp_standing(env) low_level_TG.reset(state) position_tracking[2 * target_index].append(state['base_pos_x'][0]) position_tracking[2 * target_index + 1].append( state['base_pos_y'][0]) total_latent_action = 0 target = target_position_test[target_index] target[0] = target[0] + state['base_pos_x'][0] target[1] = target[1] + state['base_pos_y'][0] while True: pre_com_state = state latent_action = high_level_planning.plan_latent_action( state, target, com_utils, cfg.mpc_sample_num, cfg.mpc_horizon) low_level_TG.update_latent_action(state, latent_action) for step in range(1, cfg.num_timestep_per_footstep + 1): action = low_level_TG.get_action(state, step) state = env.step(action) position_tracking[2 * target_index].append( state['base_pos_x'][0]) position_tracking[2 * target_index + 1].append( state['base_pos_y'][0]) if np.linalg.norm(target[0:2] - np.array( [state['base_pos_x'][0], state['base_pos_y'][0]]) ) < 0.2: print("ReachGoal" + str(target_index) + "!!!!") break # collect data post_com_state = state total_latent_action += 1 if np.linalg.norm(target[0:2] - np.array( [state['base_pos_x'][0], state['base_pos_y'][0]])) < 0.2: break if total_latent_action > 30: break for i in range(8): position_tracking[i] = np.array(position_tracking[i]) np.save( './' + cfg.high_level_policy_type + '_multi_tgt_test_' + str(q) + '.npy', np.array(position_tracking)) return
def evaluate_model(args): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') env = daisy_API(sim=args.sim, render=False, logger = False) env.set_control_mode(args.control_mode) state = env.reset() utils.make_dir(args.save_dir) save_dir = utils.make_dir(os.path.join(args.save_dir + '/trial_%s' % str(args.seed))) if args.save else None total_timestep, total_latent_action = 0, 0 if args.sim: if args.low_level_policy_type =='NN': init_state = motion_library.exp_standing(env, shoulder = 0.3, elbow = 1.3) else: init_state = motion_library.exp_standing(env) model_obs_dim, model_output_dim = np.size(utils.HL_obs(state)), np.size(utils.HL_delta_obs(state, state)) high_level_planning = HLPM.high_level_planning( device = device, model_obs_dim = model_obs_dim, z_dim = args.z_dim, model_output_dim = model_output_dim, model_hidden_num = args.model_hidden_num, batch_size = args.batch_size, model_lr = args.model_lr, high_level_policy_type = args.high_level_policy_type, update_sample_policy = args.update_sample_policy, update_sample_policy_lr = args.update_sample_policy_lr, low_level_policy_type = args.low_level_policy_type, num_timestep_per_footstep = args.num_timestep_per_footstep, model_update_steps = args.model_update_steps, control_frequency= args.control_frequency ) high_level_planning.load_data(save_dir) high_level_planning.load_mean_var(save_dir+'/buffer_data') low_level_TG = LLTG.low_level_TG( device = device, z_dim = args.z_dim, a_dim = args.a_dim, num_timestep_per_footstep = args.num_timestep_per_footstep, batch_size = args.batch_size, low_level_policy_type = args.low_level_policy_type, update_low_level_policy = args.update_low_level_policy, update_low_level_policy_lr = args.update_low_level_policy_lr, init_state = init_state, ) if args.low_level_policy_type =='NN': low_level_TG.load_model('./save_data/trial_2') cost_record = [] position_tracking = [[],[],[],[]] for iter in range(args.num_iters): # reset robot to stand if args.sim: if args.low_level_policy_type =='NN': init_state = motion_library.exp_standing(env, shoulder = 0.3, elbow = 1.3) else: init_state = motion_library.exp_standing(env) low_level_TG.reset(state) position_tracking[0].append(state['base_pos_x'][0]) position_tracking[1].append(state['base_pos_y'][0]) position_tracking[2].append(state['base_ori_euler'][2]) position_tracking[3].append(np.linalg.norm(state['base_velocity'][0:2])) j = 0 total_cost = 0 # while True: for i in range(args.num_latent_action_per_iteration): # target = np.array([0,target_velocity,0]) target = target_velocity pre_com_state = state latent_action = high_level_planning.plan_latent_action(state, target) low_level_TG.update_latent_action(state,latent_action) for step in range(1, args.num_timestep_per_footstep+1): action = low_level_TG.get_action(state, step) state, total_timestep = env.step(action), total_timestep + 1 # collect data position_tracking[0].append(state['base_pos_x'][0]) position_tracking[1].append(state['base_pos_y'][0]) position_tracking[2].append(state['base_ori_euler'][2]) position_tracking[3].append(np.linalg.norm(state['base_velocity'][0:2])) post_com_state = state cost = utils.easy_cost(target,pre_com_state, post_com_state) total_cost += cost # Check if robot still alive if utils.check_data_useful(state): high_level_obs , high_level_delta_obs = utils.HL_obs(pre_com_state), utils.HL_delta_obs(pre_com_state, post_com_state) predict_state = high_level_planning.model_predict(high_level_obs, latent_action) total_latent_action += 1 j+=1 # if np.linalg.norm(target[0:2] - np.array([state['base_pos_x'][0], state['base_pos_y'][0]])) + np.linalg.norm(target[2:4] - high_level_delta_obs[0:2]) < 0.2 : # print("Reach Goal %s!!!!") # break # if j>20: # print('Did not reach goal') # break cost_record.append(total_cost) print(cost_record) np.save(save_dir + '/'+args.high_level_policy_type+'_' +args.low_level_policy_type +'_different_cost_test'+ str(cost_index)+'.npy', np.array(cost_record) ) np.save(save_dir + '/'+args.high_level_policy_type+'_' +args.low_level_policy_type +'_com.npy', np.array(position_tracking) ) return