def main(args): r = redis.Redis(host='10.10.1.2', port=6379, db=0) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_obs_dim, model_output_dim = 4, 6 # model_obs_dim, model_output_dim = np.size(utils.HL_obs(state)), np.size(utils.HL_delta_obs(state, state)) utils.make_dir(args.save_dir) HL_replay_buffer = utils.ReplayBuffer( model_obs_dim, args.z_dim, model_output_dim, device, args.num_iters * args.num_latent_action_per_iteration) HL_replay_buffer.load_buffer('./save_data/trial_4') # HL_replay_buffer.idx = 1415 high_level_planning = HLPM.high_level_planning( device=device, model_obs_dim=model_obs_dim, z_dim=args.z_dim, model_output_dim=model_output_dim, model_hidden_num=args.model_hidden_num, batch_size=args.batch_size, model_lr=args.model_lr, high_level_policy_type=args.high_level_policy_type, update_sample_policy=args.update_sample_policy, update_sample_policy_lr=args.update_sample_policy_lr, low_level_policy_type=args.low_level_policy_type, num_timestep_per_footstep=args.num_timestep_per_footstep, model_update_steps=args.model_update_steps, control_frequency=args.control_frequency) # collect_data_client(args, r, high_level_planning , HL_replay_buffer) train_model(args, HL_replay_buffer, high_level_planning)
def main(args): r = redis.Redis(host='10.10.1.2', port=6379, db=0) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_obs_dim, model_output_dim = 4, 6 # model_obs_dim, model_output_dim = np.size(utils.HL_obs(state)), np.size(utils.HL_delta_obs(state, state)) utils.make_dir(args.save_dir) save_dir = utils.make_dir( os.path.join(args.save_dir + '/trial_%s' % str(args.seed))) if args.save else None high_level_planning = HLPM.high_level_planning( device=device, model_obs_dim=model_obs_dim, z_dim=args.z_dim, model_output_dim=model_output_dim, model_hidden_num=args.model_hidden_num, batch_size=args.batch_size, model_lr=args.model_lr, high_level_policy_type=args.high_level_policy_type, update_sample_policy=args.update_sample_policy, update_sample_policy_lr=args.update_sample_policy_lr, low_level_policy_type=args.low_level_policy_type, num_timestep_per_footstep=args.num_timestep_per_footstep, model_update_steps=args.model_update_steps, control_frequency=args.control_frequency) high_level_planning.load_data(save_dir) high_level_planning.load_mean_var(save_dir + '/buffer_data') evaluation(args, r, high_level_planning)
def main(cfg): # define env & high level planning part & low level trajectory generator & replay buffer for HLP # initialize logger device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') env = daisy_API(sim=cfg.sim, render=False, logger=False) env.set_control_mode(cfg.control_mode) state = env.reset() com_utils = utils.CoM_frame_MPC() if cfg.sim: init_state = motion_library.exp_standing(env) model_obs_dim, model_output_dim = np.size( com_utils.HL_obs(state)), np.size(com_utils.HL_delta_obs(state, state)) HL_replay_buffer = utils.ReplayBuffer( model_obs_dim, cfg.z_dim, model_output_dim, device, cfg.num_iters * cfg.num_latent_action_per_iteration) high_level_planning = HLPM.high_level_planning( device=device, model_obs_dim=model_obs_dim, z_dim=cfg.z_dim, model_output_dim=model_output_dim, model_hidden_num=cfg.model_hidden_num, model_layer_num=cfg.model_layer_num, batch_size=cfg.batch_size, model_lr=cfg.model_lr, high_level_policy_type=cfg.high_level_policy_type, update_sample_policy=cfg.update_sample_policy, update_sample_policy_lr=cfg.update_sample_policy_lr, low_level_policy_type=cfg.low_level_policy_type, num_timestep_per_footstep=cfg.num_timestep_per_footstep, model_update_steps=cfg.model_update_steps, control_frequency=cfg.control_frequency) low_level_TG = LLTG.low_level_TG( device=device, z_dim=cfg.z_dim, a_dim=cfg.a_dim, num_timestep_per_footstep=cfg.num_timestep_per_footstep, batch_size=cfg.batch_size, low_level_policy_type=cfg.low_level_policy_type, update_low_level_policy=cfg.update_low_level_policy, update_low_level_policy_lr=cfg.update_low_level_policy_lr, init_state=init_state, ) if cfg.low_level_policy_type == 'NN': low_level_TG.load_model('.') # # # collect data collect_data(cfg, env, high_level_planning, low_level_TG, HL_replay_buffer, com_utils) # train model train_model(cfg, HL_replay_buffer, high_level_planning)
def main(cfg): print(cfg.pretty()) # define env & high level planning part & low level trajectory generator & replay buffer for HLP # initialize logger device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') env = daisy_API(sim=cfg.sim, render=False, logger=False) env.set_control_mode(cfg.control_mode) state = env.reset() if cfg.sim: init_state = motion_library.exp_standing(env) model_obs_dim, model_output_dim = 2, 5 HL_replay_buffer = utils.ReplayBuffer( model_obs_dim, cfg.z_dim, model_output_dim, device, cfg.num_iters * cfg.num_latent_action_per_iteration) high_level_planning = HLPM.high_level_planning( device=device, model_obs_dim=model_obs_dim, z_dim=3, model_output_dim=model_output_dim, model_hidden_num=cfg.model_hidden_num, model_layer_num=2, batch_size=cfg.batch_size, model_lr=cfg.model_lr, high_level_policy_type='raibert', update_sample_policy=cfg.update_sample_policy, update_sample_policy_lr=cfg.update_sample_policy_lr, low_level_policy_type=cfg.low_level_policy_type, num_timestep_per_footstep=50, model_update_steps=cfg.model_update_steps, control_frequency=cfg.control_frequency) low_level_TG = LLTG.low_level_TG( device=device, z_dim=3, a_dim=cfg.a_dim, num_timestep_per_footstep=50, batch_size=cfg.batch_size, low_level_policy_type='IK', update_low_level_policy=cfg.update_low_level_policy, update_low_level_policy_lr=cfg.update_low_level_policy_lr, init_state=init_state, ) # # if args.low_level_policy_type =='NN': # # low_level_TG.load_model('./save_data/trial_2') # # # collect data collect_data(cfg, env, high_level_planning, low_level_TG, HL_replay_buffer)
def evaluate_model(cfg): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') env = daisy_API(sim=cfg.sim, render=cfg.render, logger = False) env.set_control_mode(cfg.control_mode) state = env.reset() com_utils = utils.CoM_frame_MPC() if cfg.sim: init_state = motion_library.exp_standing(env) model_obs_dim, model_output_dim = np.size(com_utils.HL_obs(state)), np.size(com_utils.HL_delta_obs(state, state)) high_level_planning = HLPM.high_level_planning( device = device, model_obs_dim = model_obs_dim, z_dim = cfg.z_dim, model_output_dim = model_output_dim, model_hidden_num = cfg.model_hidden_num, model_layer_num = cfg.model_layer_num, batch_size = cfg.batch_size, model_lr = cfg.model_lr, high_level_policy_type = cfg.high_level_policy_type, update_sample_policy = cfg.update_sample_policy, update_sample_policy_lr = cfg.update_sample_policy_lr, low_level_policy_type = cfg.low_level_policy_type, num_timestep_per_footstep = cfg.num_timestep_per_footstep, model_update_steps = cfg.model_update_steps, control_frequency=cfg.control_frequency ) high_level_planning.load_data('.') high_level_planning.load_mean_var('.'+'/buffer_data') low_level_TG = LLTG.low_level_TG( device = device, z_dim = cfg.z_dim, a_dim = cfg.a_dim, num_timestep_per_footstep = cfg.num_timestep_per_footstep, batch_size = cfg.batch_size, low_level_policy_type = cfg.low_level_policy_type, update_low_level_policy = cfg.update_low_level_policy, update_low_level_policy_lr = cfg.update_low_level_policy_lr, init_state = init_state, ) if cfg.low_level_policy_type =='NN': low_level_TG.load_model('.') prediction_evaluation = np.empty((2* model_output_dim,cfg.num_latent_action_per_iteration )) velocity_tracking = np.empty((cfg.num_iters,6,cfg.num_latent_action_per_iteration)) target_velocity_test = [ np.array([0.0, 0.2, 0.0,1]), np.array([0.0, 0.2, 0.0,1]), np.array([-0.0, 0.2, 0.0,1]), np.array([-0.2, 0.0,0.0,1]), np.array([-0.2, 0.0, 0.0,1]), np.array([-0.2, 0.0,0.0,1]), np.array([0.0, -0.2,0.0,1]), np.array([0.0, -0.2,0.0,1]), np.array([0.0, -0.2, 0.0,1]), np.array([0.0, -0.2, 0.0,1]), np.array([0.2, -0.0, 0.0,1]), np.array([0.2, -0.0, 0.0,1]), np.array([0.2, -0.0, 0.0,1]), np.array([0.2, -0.0, 0.0,1]), np.array([0.2, -0.0,0.0,1]), ] velocity_tracking = np.empty((cfg.num_iters, 6,cfg.num_latent_action_per_iteration)) for iter in range(cfg.num_iters): prediction_evaluation = np.empty((2* model_output_dim,cfg.num_latent_action_per_iteration )) total_cost = 0 init_state = motion_library.exp_standing(env) for j in range(cfg.num_latent_action_per_iteration): if not j%5: target = target_velocity_test[int((j+1)/5)] pre_com_state = state latent_action = high_level_planning.plan_latent_action(state, target, com_utils, cfg.mpc_sample_num, cfg.mpc_horizon) low_level_TG.update_latent_action(state,latent_action) for step in range(1, cfg.num_timestep_per_footstep+1): action = low_level_TG.get_action(state, step) state = env.step(action) post_com_state = state # Check if robot still alive high_level_obs , high_level_delta_obs = com_utils.HL_obs(pre_com_state), com_utils.HL_delta_obs(pre_com_state, post_com_state) predict_delta_state = high_level_planning.model_predict(high_level_obs, latent_action) predict_state_world = com_utils.predict2world(pre_com_state, predict_delta_state) # collect data velocity_tracking[iter][0][j] = target[0] velocity_tracking[iter][1][j] = target[1] velocity_tracking[iter][2][j] = predict_state_world[3] velocity_tracking[iter][3][j] = predict_state_world[4] velocity_tracking[iter][4][j] = post_com_state['base_velocity'][0] velocity_tracking[iter][5][j] = post_com_state['base_velocity'][1] np.save('./' + cfg.high_level_policy_type + '_velocity_tracking_fin.npy', velocity_tracking) return
def evaluate_model(args): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') env = daisy_API(sim=args.sim, render=False, logger=False) env.set_control_mode(args.control_mode) state = env.reset() utils.make_dir(args.save_dir) save_dir = utils.make_dir( os.path.join(args.save_dir + '/trial_%s' % str(args.seed))) if args.save else None total_timestep, total_latent_action = 0, 0 if args.sim: if args.low_level_policy_type == 'NN': init_state = motion_library.exp_standing(env, shoulder=0.3, elbow=1.3) else: init_state = motion_library.exp_standing(env) model_obs_dim, model_output_dim = np.size(utils.HL_obs(state)), np.size( utils.HL_delta_obs(state, state)) high_level_planning = HLPM.high_level_planning( device=device, model_obs_dim=model_obs_dim, z_dim=args.z_dim, model_output_dim=model_output_dim, model_hidden_num=args.model_hidden_num, batch_size=args.batch_size, model_lr=args.model_lr, high_level_policy_type=args.high_level_policy_type, update_sample_policy=args.update_sample_policy, update_sample_policy_lr=args.update_sample_policy_lr, low_level_policy_type=args.low_level_policy_type, num_timestep_per_footstep=args.num_timestep_per_footstep, model_update_steps=args.model_update_steps, control_frequency=args.control_frequency) high_level_planning.load_data(save_dir) high_level_planning.load_mean_var(save_dir + '/buffer_data') low_level_TG = LLTG.low_level_TG( device=device, z_dim=args.z_dim, a_dim=args.a_dim, num_timestep_per_footstep=args.num_timestep_per_footstep, batch_size=args.batch_size, low_level_policy_type=args.low_level_policy_type, update_low_level_policy=args.update_low_level_policy, update_low_level_policy_lr=args.update_low_level_policy_lr, init_state=init_state, ) if args.low_level_policy_type == 'NN': low_level_TG.load_model('./save_data/trial_2') prediction_evaluation = np.empty( (2 * model_output_dim, args.num_latent_action_per_iteration)) velocity_tracking = np.empty((6, args.num_latent_action_per_iteration)) target_velocity_test = [ np.array([0.0, -0.1, 0.0]), np.array([0.0, -0.1, 0.0]), np.array([-0.00, -0.1, 0.0]), np.array([-0.0, -0.1, 0.0]), np.array([-0.0, -0.1, 0.0]), np.array([-0.00, -0.4, 0.0]), np.array([-0.00, -0.4, 0.0]), np.array([-0.0, -0.4, 0.0]), np.array([-0.0, -0.4, 0.0]), np.array([-0.0, -0.4, 0.0]), ] gait_record = np.empty( (args.num_iters, 6, args.num_latent_action_per_iteration * args.num_timestep_per_footstep)) for iter in range(args.num_iters): # reset robot to stand if args.sim: state = motion_library.exp_standing(env, shoulder=0.3, elbow=1.3) low_level_TG.reset(state) for j in range(args.num_latent_action_per_iteration): if not j % 5: target = target_velocity_test[int((j + 1) / 5)] pre_com_state = state latent_action = high_level_planning.plan_latent_action( state, target) low_level_TG.update_latent_action(state, latent_action) for step in range(1, args.num_timestep_per_footstep + 1): action = low_level_TG.get_action(state, step) state, total_timestep = env.step(action), total_timestep + 1 for q in range(6): if state['foot_pos'][q][2] <= -0.005: gait_record[iter][q][j * args.num_timestep_per_footstep + step - 1] = 1 else: gait_record[iter][q][j * args.num_timestep_per_footstep + step - 1] = 0 post_com_state = state # Check if robot still alive if utils.check_data_useful(state): high_level_obs, high_level_delta_obs = utils.HL_obs( pre_com_state), utils.HL_delta_obs(pre_com_state, post_com_state) predict_state = high_level_planning.model_predict( high_level_obs, latent_action) # collect data velocity_tracking[0][j] = target[0] velocity_tracking[1][j] = target[1] velocity_tracking[2][j] = predict_state[4] velocity_tracking[3][j] = predict_state[5] velocity_tracking[4][j] = high_level_delta_obs[4] velocity_tracking[5][j] = high_level_delta_obs[5] for q in range(model_output_dim): prediction_evaluation[q][j] = high_level_delta_obs[q] prediction_evaluation[q + model_output_dim][j] = predict_state[q] total_latent_action += 1 if utils.check_robot_dead(state): break np.save(save_dir + '/prediction_evaluation.npy', np.array(prediction_evaluation)) np.save(save_dir + '/velocity_tracking.npy', velocity_tracking) np.save('./save_data/trial_2/gait_record.npy', gait_record) return
def evaluate_model(cfg): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') env = daisy_API(sim=cfg.sim, render=False, logger=False) env.set_control_mode(cfg.control_mode) state = env.reset() com_utils = utils.CoM_frame_MPC() if cfg.sim: init_state = motion_library.exp_standing(env) model_obs_dim, model_output_dim = np.size( com_utils.HL_obs(state)), np.size(com_utils.HL_delta_obs(state, state)) high_level_planning = HLPM.high_level_planning( device=device, model_obs_dim=model_obs_dim, z_dim=cfg.z_dim, model_output_dim=model_output_dim, model_hidden_num=cfg.model_hidden_num, model_layer_num=cfg.model_layer_num, batch_size=cfg.batch_size, model_lr=cfg.model_lr, high_level_policy_type=cfg.high_level_policy_type, update_sample_policy=cfg.update_sample_policy, update_sample_policy_lr=cfg.update_sample_policy_lr, low_level_policy_type=cfg.low_level_policy_type, num_timestep_per_footstep=cfg.num_timestep_per_footstep, model_update_steps=cfg.model_update_steps, control_frequency=cfg.control_frequency) high_level_planning.load_data('.') if cfg.high_level_policy_type != 'SAC': high_level_planning.load_mean_var('.' + '/buffer_data') low_level_TG = LLTG.low_level_TG( device=device, z_dim=cfg.z_dim, a_dim=cfg.a_dim, num_timestep_per_footstep=cfg.num_timestep_per_footstep, batch_size=cfg.batch_size, low_level_policy_type=cfg.low_level_policy_type, update_low_level_policy=cfg.update_low_level_policy, update_low_level_policy_lr=cfg.update_low_level_policy_lr, init_state=init_state, ) if cfg.low_level_policy_type == 'NN': low_level_TG.load_model('.') square_circle_test = [] total_num = 6 for i in range(1, total_num + 1): theta = i * math.pi / float(total_num) square_circle_test.append( np.array([1 - math.cos(theta), 1.5 * math.sin(theta), 0, 1])) for i in range(1, total_num + 1): theta = i * math.pi / float(total_num) square_circle_test.append( np.array([3 - math.cos(theta), -1.5 * math.sin(theta), 0, 1])) square_cost = [] for iter in range(cfg.num_iters): position_tracking = [[], [], []] # reset robot to stand state = motion_library.exp_standing(env) low_level_TG.reset(state) position_tracking[0].append(state['base_pos_x'][0]) position_tracking[1].append(state['base_pos_y'][0]) position_tracking[2].append(state['base_ori_euler'][2]) total_latent_action = 0 total_cost = 0 target_index = 0 while True: target = square_circle_test[target_index] pre_com_state = state latent_action = high_level_planning.plan_latent_action( state, target, com_utils, cfg.mpc_sample_num, cfg.mpc_horizon) low_level_TG.update_latent_action(state, latent_action) for step in range(1, cfg.num_timestep_per_footstep + 1): action = low_level_TG.get_action(state, step) state = env.step(action) # collect data position_tracking[0].append(state['base_pos_x'][0]) position_tracking[1].append(state['base_pos_y'][0]) position_tracking[2].append(state['base_ori_euler'][2]) if np.linalg.norm(target[0:2] - np.array( [state['base_pos_x'][0], state['base_pos_y'][0]])) < 0.15: print("Reach Goal %s!!!!" % str(target_index)) target_index += 1 if target_index >= np.shape(square_circle_test)[0]: break target = square_circle_test[target_index] post_com_state = state total_latent_action += 1 if target_index >= np.shape(square_circle_test)[0]: np.save('./square_circle_test_' + str(iter) + '.npy', np.array(position_tracking)) break if total_latent_action > 200: print('Did not reach goal') break return
def evaluate_model(cfg): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') env = daisy_API(sim=cfg.sim, render=cfg.render, logger=False) env.set_control_mode(cfg.control_mode) state = env.reset() com_utils = utils.CoM_frame_MPC() if cfg.high_level_policy_type == 'SAC': com_utils = utils.CoM_frame_RL() if cfg.sim: init_state = motion_library.exp_standing(env) if cfg.high_level_policy_type == 'SAC': model_obs_dim, model_output_dim = np.size( com_utils.HL_obs(state, np.zeros(3))), np.size( com_utils.HL_obs(state, np.zeros(3))) else: model_obs_dim, model_output_dim = np.size( com_utils.HL_obs(state)), np.size( com_utils.HL_delta_obs(state, state)) high_level_planning = HLPM.high_level_planning( device=device, model_obs_dim=model_obs_dim, z_dim=cfg.z_dim, model_output_dim=model_output_dim, model_hidden_num=cfg.model_hidden_num, model_layer_num=cfg.model_layer_num, batch_size=cfg.batch_size, model_lr=cfg.model_lr, high_level_policy_type=cfg.high_level_policy_type, update_sample_policy=cfg.update_sample_policy, update_sample_policy_lr=cfg.update_sample_policy_lr, low_level_policy_type=cfg.low_level_policy_type, num_timestep_per_footstep=cfg.num_timestep_per_footstep, model_update_steps=cfg.model_update_steps, control_frequency=cfg.control_frequency) high_level_planning.load_data('.') if cfg.high_level_policy_type != 'SAC': high_level_planning.load_mean_var('.' + '/buffer_data') low_level_TG = LLTG.low_level_TG( device=device, z_dim=cfg.z_dim, a_dim=cfg.a_dim, num_timestep_per_footstep=cfg.num_timestep_per_footstep, batch_size=cfg.batch_size, low_level_policy_type=cfg.low_level_policy_type, update_low_level_policy=cfg.update_low_level_policy, update_low_level_policy_lr=cfg.update_low_level_policy_lr, init_state=init_state, ) if cfg.low_level_policy_type == 'NN': low_level_TG.load_model('.') target_position_test = [ np.array([0.0, 2.0, 0.0, 1]), np.array([2.0, 2.0, 0.0, 1]), np.array([-2.0, 2.0, 0.0, 1]), np.array([2.0, 0.0, 0.0, 1]), np.array([-2.0, 0.0, 0.0, 1]), np.array([0.0, -2.0, 0.0, 1]), np.array([-2.0, -2.0, 0.0, 1]), np.array([2.0, -2.0, 0.0, 1]), ] for q in range(cfg.num_iters): position_tracking = [ [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], ] for target_index in range(8): # reset robot to stand state = motion_library.exp_standing(env) low_level_TG.reset(state) position_tracking[2 * target_index].append(state['base_pos_x'][0]) position_tracking[2 * target_index + 1].append( state['base_pos_y'][0]) total_latent_action = 0 target = target_position_test[target_index] target[0] = target[0] + state['base_pos_x'][0] target[1] = target[1] + state['base_pos_y'][0] while True: pre_com_state = state latent_action = high_level_planning.plan_latent_action( state, target, com_utils, cfg.mpc_sample_num, cfg.mpc_horizon) low_level_TG.update_latent_action(state, latent_action) for step in range(1, cfg.num_timestep_per_footstep + 1): action = low_level_TG.get_action(state, step) state = env.step(action) position_tracking[2 * target_index].append( state['base_pos_x'][0]) position_tracking[2 * target_index + 1].append( state['base_pos_y'][0]) if np.linalg.norm(target[0:2] - np.array( [state['base_pos_x'][0], state['base_pos_y'][0]]) ) < 0.2: print("ReachGoal" + str(target_index) + "!!!!") break # collect data post_com_state = state total_latent_action += 1 if np.linalg.norm(target[0:2] - np.array( [state['base_pos_x'][0], state['base_pos_y'][0]])) < 0.2: break if total_latent_action > 30: break for i in range(8): position_tracking[i] = np.array(position_tracking[i]) np.save( './' + cfg.high_level_policy_type + '_multi_tgt_test_' + str(q) + '.npy', np.array(position_tracking)) return
def evaluate_model(args): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') env = daisy_API(sim=args.sim, render=False, logger = False) env.set_control_mode(args.control_mode) state = env.reset() utils.make_dir(args.save_dir) save_dir = utils.make_dir(os.path.join(args.save_dir + '/trial_%s' % str(args.seed))) if args.save else None total_timestep, total_latent_action = 0, 0 if args.sim: if args.low_level_policy_type =='NN': init_state = motion_library.exp_standing(env, shoulder = 0.3, elbow = 1.3) else: init_state = motion_library.exp_standing(env) model_obs_dim, model_output_dim = np.size(utils.HL_obs(state)), np.size(utils.HL_delta_obs(state, state)) high_level_planning = HLPM.high_level_planning( device = device, model_obs_dim = model_obs_dim, z_dim = args.z_dim, model_output_dim = model_output_dim, model_hidden_num = args.model_hidden_num, batch_size = args.batch_size, model_lr = args.model_lr, high_level_policy_type = args.high_level_policy_type, update_sample_policy = args.update_sample_policy, update_sample_policy_lr = args.update_sample_policy_lr, low_level_policy_type = args.low_level_policy_type, num_timestep_per_footstep = args.num_timestep_per_footstep, model_update_steps = args.model_update_steps, control_frequency= args.control_frequency ) high_level_planning.load_data(save_dir) high_level_planning.load_mean_var(save_dir+'/buffer_data') low_level_TG = LLTG.low_level_TG( device = device, z_dim = args.z_dim, a_dim = args.a_dim, num_timestep_per_footstep = args.num_timestep_per_footstep, batch_size = args.batch_size, low_level_policy_type = args.low_level_policy_type, update_low_level_policy = args.update_low_level_policy, update_low_level_policy_lr = args.update_low_level_policy_lr, init_state = init_state, ) if args.low_level_policy_type =='NN': low_level_TG.load_model('./save_data/trial_2') cost_record = [] position_tracking = [[],[],[],[]] for iter in range(args.num_iters): # reset robot to stand if args.sim: if args.low_level_policy_type =='NN': init_state = motion_library.exp_standing(env, shoulder = 0.3, elbow = 1.3) else: init_state = motion_library.exp_standing(env) low_level_TG.reset(state) position_tracking[0].append(state['base_pos_x'][0]) position_tracking[1].append(state['base_pos_y'][0]) position_tracking[2].append(state['base_ori_euler'][2]) position_tracking[3].append(np.linalg.norm(state['base_velocity'][0:2])) j = 0 total_cost = 0 # while True: for i in range(args.num_latent_action_per_iteration): # target = np.array([0,target_velocity,0]) target = target_velocity pre_com_state = state latent_action = high_level_planning.plan_latent_action(state, target) low_level_TG.update_latent_action(state,latent_action) for step in range(1, args.num_timestep_per_footstep+1): action = low_level_TG.get_action(state, step) state, total_timestep = env.step(action), total_timestep + 1 # collect data position_tracking[0].append(state['base_pos_x'][0]) position_tracking[1].append(state['base_pos_y'][0]) position_tracking[2].append(state['base_ori_euler'][2]) position_tracking[3].append(np.linalg.norm(state['base_velocity'][0:2])) post_com_state = state cost = utils.easy_cost(target,pre_com_state, post_com_state) total_cost += cost # Check if robot still alive if utils.check_data_useful(state): high_level_obs , high_level_delta_obs = utils.HL_obs(pre_com_state), utils.HL_delta_obs(pre_com_state, post_com_state) predict_state = high_level_planning.model_predict(high_level_obs, latent_action) total_latent_action += 1 j+=1 # if np.linalg.norm(target[0:2] - np.array([state['base_pos_x'][0], state['base_pos_y'][0]])) + np.linalg.norm(target[2:4] - high_level_delta_obs[0:2]) < 0.2 : # print("Reach Goal %s!!!!") # break # if j>20: # print('Did not reach goal') # break cost_record.append(total_cost) print(cost_record) np.save(save_dir + '/'+args.high_level_policy_type+'_' +args.low_level_policy_type +'_different_cost_test'+ str(cost_index)+'.npy', np.array(cost_record) ) np.save(save_dir + '/'+args.high_level_policy_type+'_' +args.low_level_policy_type +'_com.npy', np.array(position_tracking) ) return