예제 #1
0
def collect_data(cfg, env, high_level_planning, low_level_TG,
                 HL_replay_buffer):
    for iter in range(np.shape(target_speed_all)[0]):
        if cfg.sim:
            state = motion_library.exp_standing(env)
            low_level_TG.reset(state)
        target_speed = target_speed_all[iter]
        # target_speed[1] = -target_speed[1]

        for j in range(10):
            # generate foot footstep position. If test, the footstep comes from optimization process
            latent_action = high_level_planning.plan_latent_action(
                state, target_speed)
            low_level_TG.update_latent_action(state, latent_action)
            collect_state[iter][0][j] = state['base_pos_x'][0]
            collect_state[iter][1][j] = state['base_pos_y'][0]
            collect_state[iter][2][j] = state['base_ori_euler'][2]
            for step in range(50):
                action = low_level_TG.get_action(state, step + 1)
                if j > 7:
                    collect_action[iter][(j - 8) * 50 + step] = action
                state = env.step(action)

    np.save('./exp_action_' + str(exp_index) + '.npy', collect_action)
    np.save('./exp_state_' + str(exp_index) + '.npy', collect_state)
def collect_data_train(cfg,env,high_level_planning,low_level_TG, HL_replay_buffer, com_utils):
    reward_record = []
    for iter in range(cfg.num_iters):
        state = motion_library.exp_standing(env)
        low_level_TG.reset(state)
        target = target_all[iter%np.shape(target_all)[0]]
        total_reward = 0

        for j in range(cfg.num_latent_action_per_iteration):
            # generate foot footstep position. If test, the footstep comes from optimization process
            pre_com_state = state
            high_level_obs = com_utils.HL_obs(pre_com_state,target)

            latent_action = high_level_planning.sample_latent_action(state, target, com_utils)
            low_level_TG.update_latent_action(state,latent_action)
        
            for step in range(1, cfg.num_timestep_per_footstep+1):
                action = low_level_TG.get_action(state, step)
                state = env.step(action)
            
            post_com_state = state
            high_level_obs_ = com_utils.HL_obs(post_com_state,target)
            reward = com_utils.calc_reward(post_com_state,target)
            total_reward += reward
            done = (j==(cfg.num_latent_action_per_iteration-1))

            HL_replay_buffer.add(high_level_obs, latent_action, reward, high_level_obs_, done)
        
        print('Total Reward: ', total_reward)
        reward_record.append(total_reward)
        high_level_planning.update_policy(HL_replay_buffer)
    np.save('./reward_record.npy', reward_record)
예제 #3
0
def collect_data(cfg, env, high_level_planning, low_level_TG, HL_replay_buffer,
                 com_utils):
    for iter in range(1):
        state = motion_library.exp_standing(env)
        low_level_TG.reset(state)
        action_all = np.load('./trash_action.npy')
        # action_all =

        print(action_all)
        for j in range(np.shape(action_all)[0] * 2):
            # generate foot footstep position. If test, the footstep comes from optimization process
            pre_com_state = state

            latent_action = [2.24433139, -3.3401089]

            low_level_TG.update_latent_action(state, latent_action)

            for step in range(1, cfg.num_timestep_per_footstep + 1):
                action = low_level_TG.get_action(state, step)
                state = env.step(action)

            post_com_state = state
            high_level_obs, high_level_delta_obs = com_utils.HL_obs(
                pre_com_state), com_utils.HL_delta_obs(pre_com_state,
                                                       post_com_state)
            if high_level_delta_obs[3] + high_level_obs[
                    0] > 0.5 or high_level_delta_obs[4] + high_level_obs[
                        1] > 0.5:
                print(j, latent_action)
            HL_replay_buffer.add(high_level_obs, latent_action, 0,
                                 high_level_delta_obs, 1)

            if utils.check_robot_dead(state):
                break
def main(cfg):
    # define env & high level planning part & low level trajectory generator & replay buffer for HLP
    # initialize logger
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    env = daisy_API(sim=cfg.sim, render=False, logger=False)
    env.set_control_mode(cfg.control_mode)
    state = env.reset()
    com_utils = utils.CoM_frame_MPC()

    if cfg.sim:
        init_state = motion_library.exp_standing(env)

    model_obs_dim, model_output_dim = np.size(
        com_utils.HL_obs(state)), np.size(com_utils.HL_delta_obs(state, state))

    HL_replay_buffer = utils.ReplayBuffer(
        model_obs_dim, cfg.z_dim, model_output_dim, device,
        cfg.num_iters * cfg.num_latent_action_per_iteration)

    high_level_planning = HLPM.high_level_planning(
        device=device,
        model_obs_dim=model_obs_dim,
        z_dim=cfg.z_dim,
        model_output_dim=model_output_dim,
        model_hidden_num=cfg.model_hidden_num,
        model_layer_num=cfg.model_layer_num,
        batch_size=cfg.batch_size,
        model_lr=cfg.model_lr,
        high_level_policy_type=cfg.high_level_policy_type,
        update_sample_policy=cfg.update_sample_policy,
        update_sample_policy_lr=cfg.update_sample_policy_lr,
        low_level_policy_type=cfg.low_level_policy_type,
        num_timestep_per_footstep=cfg.num_timestep_per_footstep,
        model_update_steps=cfg.model_update_steps,
        control_frequency=cfg.control_frequency)

    low_level_TG = LLTG.low_level_TG(
        device=device,
        z_dim=cfg.z_dim,
        a_dim=cfg.a_dim,
        num_timestep_per_footstep=cfg.num_timestep_per_footstep,
        batch_size=cfg.batch_size,
        low_level_policy_type=cfg.low_level_policy_type,
        update_low_level_policy=cfg.update_low_level_policy,
        update_low_level_policy_lr=cfg.update_low_level_policy_lr,
        init_state=init_state,
    )

    if cfg.low_level_policy_type == 'NN':
        low_level_TG.load_model('.')

    # # # collect data
    collect_data(cfg, env, high_level_planning, low_level_TG, HL_replay_buffer,
                 com_utils)

    # train model
    train_model(cfg, HL_replay_buffer, high_level_planning)
예제 #5
0
def main(cfg):
    print(cfg.pretty())

    # define env & high level planning part & low level trajectory generator & replay buffer for HLP
    # initialize logger
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    env = daisy_API(sim=cfg.sim, render=False, logger=False)
    env.set_control_mode(cfg.control_mode)
    state = env.reset()

    if cfg.sim:
        init_state = motion_library.exp_standing(env)

    model_obs_dim, model_output_dim = 2, 5

    HL_replay_buffer = utils.ReplayBuffer(
        model_obs_dim, cfg.z_dim, model_output_dim, device,
        cfg.num_iters * cfg.num_latent_action_per_iteration)

    high_level_planning = HLPM.high_level_planning(
        device=device,
        model_obs_dim=model_obs_dim,
        z_dim=3,
        model_output_dim=model_output_dim,
        model_hidden_num=cfg.model_hidden_num,
        model_layer_num=2,
        batch_size=cfg.batch_size,
        model_lr=cfg.model_lr,
        high_level_policy_type='raibert',
        update_sample_policy=cfg.update_sample_policy,
        update_sample_policy_lr=cfg.update_sample_policy_lr,
        low_level_policy_type=cfg.low_level_policy_type,
        num_timestep_per_footstep=50,
        model_update_steps=cfg.model_update_steps,
        control_frequency=cfg.control_frequency)

    low_level_TG = LLTG.low_level_TG(
        device=device,
        z_dim=3,
        a_dim=cfg.a_dim,
        num_timestep_per_footstep=50,
        batch_size=cfg.batch_size,
        low_level_policy_type='IK',
        update_low_level_policy=cfg.update_low_level_policy,
        update_low_level_policy_lr=cfg.update_low_level_policy_lr,
        init_state=init_state,
    )

    # # if args.low_level_policy_type =='NN':
    # #     low_level_TG.load_model('./save_data/trial_2')

    # # # collect data
    collect_data(cfg, env, high_level_planning, low_level_TG, HL_replay_buffer)
def main():
    # define environment
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    env = daisy_API(sim=cfg.sim, render=False, logger=False)
    env.set_control_mode(cfg.control_mode)
    state = env.reset()

    if cfg.sim:
        init_state = motion_library.exp_standing(env)

    # define data_buffer

    # define agent & learning...

    # training
    train(env, )
예제 #7
0
def collect_data(cfg, env, high_level_planning, low_level_TG,
                 HL_replay_buffer):
    for iter in range(1):
        if cfg.sim:
            state = motion_library.exp_standing(env)
            low_level_TG.reset(state)
        target_speed = target_speed_all[iter]
        target_speed = -target_speed

        for j in range(20):
            # generate foot footstep position. If test, the footstep comes from optimization process
            latent_action = high_level_planning.plan_latent_action(
                state, target_speed)
            low_level_TG.update_latent_action(state, latent_action)

            for step in range(50):
                action = low_level_TG.get_action(state, step + 1)
                state = env.step(action)
def collect_data(cfg, env, high_level_planning, low_level_TG, HL_replay_buffer,
                 com_utils):
    action_save = []
    save = 0
    for iter in range(cfg.num_iters):
        state = motion_library.exp_standing(env)
        low_level_TG.reset(state)
        j = 0

        while j < cfg.num_latent_action_per_iteration:
            # generate foot footstep position. If test, the footstep comes from optimization process
            pre_com_state = state
            if j % 2 == 0:
                latent_action = high_level_planning.sample_latent_action()

            low_level_TG.update_latent_action(state, latent_action)

            for step in range(1, cfg.num_timestep_per_footstep + 1):
                action = low_level_TG.get_action(state, step)
                state = env.step(action)

            post_com_state = state
            high_level_obs, high_level_delta_obs = com_utils.HL_obs(
                pre_com_state), com_utils.HL_delta_obs(pre_com_state,
                                                       post_com_state)
            if abs(high_level_delta_obs[3] +
                   high_level_obs[0]) > 0.5 or abs(high_level_delta_obs[4] +
                                                   high_level_obs[1]) > 0.5:
                save = 1
                action_save.append(latent_action)
            else:
                HL_replay_buffer.add(high_level_obs, latent_action, 0,
                                     high_level_delta_obs, 1)
                j += 1

        if save:
            np.save('./trash_action.npy', np.array(action_save))

    HL_replay_buffer.save_buffer()
def evaluate_model(cfg):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    env = daisy_API(sim=cfg.sim, render=cfg.render, logger = False)
    env.set_control_mode(cfg.control_mode)
    state = env.reset()
    com_utils = utils.CoM_frame_MPC()
    
    if cfg.sim:
        init_state = motion_library.exp_standing(env)
        
    model_obs_dim, model_output_dim = np.size(com_utils.HL_obs(state)), np.size(com_utils.HL_delta_obs(state, state))

    high_level_planning = HLPM.high_level_planning(
        device = device,
        model_obs_dim = model_obs_dim,
        z_dim = cfg.z_dim,
        model_output_dim = model_output_dim,
        model_hidden_num = cfg.model_hidden_num,
        model_layer_num = cfg.model_layer_num,
        batch_size = cfg.batch_size,
        model_lr = cfg.model_lr,
        high_level_policy_type = cfg.high_level_policy_type,
        update_sample_policy = cfg.update_sample_policy,
        update_sample_policy_lr = cfg.update_sample_policy_lr,
        low_level_policy_type = cfg.low_level_policy_type,
        num_timestep_per_footstep = cfg.num_timestep_per_footstep,
        model_update_steps = cfg.model_update_steps,
        control_frequency=cfg.control_frequency
    )
    high_level_planning.load_data('.')
    high_level_planning.load_mean_var('.'+'/buffer_data')
    
    low_level_TG = LLTG.low_level_TG(
        device = device,
        z_dim = cfg.z_dim,
        a_dim = cfg.a_dim,
        num_timestep_per_footstep = cfg.num_timestep_per_footstep,
        batch_size = cfg.batch_size,
        low_level_policy_type = cfg.low_level_policy_type, 
        update_low_level_policy = cfg.update_low_level_policy,
        update_low_level_policy_lr = cfg.update_low_level_policy_lr,
        init_state = init_state,
    )

    if cfg.low_level_policy_type =='NN':
        low_level_TG.load_model('.')

    prediction_evaluation = np.empty((2* model_output_dim,cfg.num_latent_action_per_iteration ))
    velocity_tracking = np.empty((cfg.num_iters,6,cfg.num_latent_action_per_iteration))

    
    target_velocity_test = [ 
                        np.array([0.0, 0.2, 0.0,1]),
                        np.array([0.0, 0.2, 0.0,1]),
                        np.array([-0.0, 0.2, 0.0,1]),
                        np.array([-0.2, 0.0,0.0,1]),
                        np.array([-0.2, 0.0, 0.0,1]),
                        np.array([-0.2, 0.0,0.0,1]),
                        np.array([0.0, -0.2,0.0,1]),                   
                        np.array([0.0, -0.2,0.0,1]),
                        np.array([0.0, -0.2, 0.0,1]),
                        np.array([0.0, -0.2, 0.0,1]),
                        np.array([0.2, -0.0, 0.0,1]),
                        np.array([0.2, -0.0, 0.0,1]),
                        np.array([0.2, -0.0, 0.0,1]),
                        np.array([0.2, -0.0, 0.0,1]),
                        np.array([0.2, -0.0,0.0,1]),
                      ]
    
    velocity_tracking = np.empty((cfg.num_iters, 6,cfg.num_latent_action_per_iteration))
    for iter in range(cfg.num_iters):
        prediction_evaluation = np.empty((2* model_output_dim,cfg.num_latent_action_per_iteration ))
        
        total_cost = 0
        init_state = motion_library.exp_standing(env)

        for j in range(cfg.num_latent_action_per_iteration):
            if not j%5:
                target = target_velocity_test[int((j+1)/5)]

            pre_com_state = state
            latent_action = high_level_planning.plan_latent_action(state, target, com_utils, cfg.mpc_sample_num, cfg.mpc_horizon)
            low_level_TG.update_latent_action(state,latent_action)
            
            for step in range(1, cfg.num_timestep_per_footstep+1):
                action = low_level_TG.get_action(state, step)
                state = env.step(action)

            post_com_state = state
            
            # Check if robot still alive
            high_level_obs , high_level_delta_obs = com_utils.HL_obs(pre_com_state), com_utils.HL_delta_obs(pre_com_state, post_com_state)
            predict_delta_state = high_level_planning.model_predict(high_level_obs, latent_action)
            predict_state_world = com_utils.predict2world(pre_com_state, predict_delta_state)

            # collect data
            velocity_tracking[iter][0][j] = target[0]
            velocity_tracking[iter][1][j] = target[1]
            velocity_tracking[iter][2][j] = predict_state_world[3]
            velocity_tracking[iter][3][j] = predict_state_world[4]
            velocity_tracking[iter][4][j] = post_com_state['base_velocity'][0]
            velocity_tracking[iter][5][j] = post_com_state['base_velocity'][1]

      
    np.save('./' + cfg.high_level_policy_type + '_velocity_tracking_fin.npy', velocity_tracking) 
    return 
z_action_all = [[3.5490, -3.6426], [-3.7186, -3.7996], [0, -3.70]]
z_action_all = np.ones((7 * 7, z_dim))

# print(min(z_action_all), max(z_action_all))
total_step = 5
traj = np.load('./save_data/expert_action_total.npy')
CoM_traj = np.empty((np.shape(z_action_all)[0], 5, total_step * 2))
action_record = np.empty((np.shape(z_action_all)[0], 100, 18))
gait_record = np.empty((np.shape(z_action_all)[0], 6, 100))
tra_learning.load_model(save_dir='./save_data/trial_' + str(z_dim))
env = daisy_API(sim=True, render=True, logger=False)
env.set_control_mode('position')
state = env.reset()

for i in range(np.shape(z_action_all)[0]):
    state = motion_library.exp_standing(env)
    first = int(i / 7.0)
    second = i % 7

    z_action_all[i][0] = -3 + first
    z_action_all[i][1] = -3 + second
    best_z_action = z_action_all[i]
    # min_loss = 10000
    # for j in range(10000):
    #     cur_z_action  = 3*np.random.randn(3)
    #     tra_learning.policy.z_action = cur_z_action
    #     action_all = np.zeros((100,18))
    #     for k in range(100):
    #         action_all[k] = tra_learning.policy.get_action(state, (k+1)/100.0)
    #     loss = np.linalg.norm(utils.normalization(traj, mean_std[0], mean_std[1]) - action_all)
    #     if loss< min_loss:
def evaluate_model(args):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    env = daisy_API(sim=args.sim, render=False, logger=False)
    env.set_control_mode(args.control_mode)
    state = env.reset()
    utils.make_dir(args.save_dir)
    save_dir = utils.make_dir(
        os.path.join(args.save_dir +
                     '/trial_%s' % str(args.seed))) if args.save else None
    total_timestep, total_latent_action = 0, 0

    if args.sim:
        if args.low_level_policy_type == 'NN':
            init_state = motion_library.exp_standing(env,
                                                     shoulder=0.3,
                                                     elbow=1.3)
        else:
            init_state = motion_library.exp_standing(env)
    model_obs_dim, model_output_dim = np.size(utils.HL_obs(state)), np.size(
        utils.HL_delta_obs(state, state))

    high_level_planning = HLPM.high_level_planning(
        device=device,
        model_obs_dim=model_obs_dim,
        z_dim=args.z_dim,
        model_output_dim=model_output_dim,
        model_hidden_num=args.model_hidden_num,
        batch_size=args.batch_size,
        model_lr=args.model_lr,
        high_level_policy_type=args.high_level_policy_type,
        update_sample_policy=args.update_sample_policy,
        update_sample_policy_lr=args.update_sample_policy_lr,
        low_level_policy_type=args.low_level_policy_type,
        num_timestep_per_footstep=args.num_timestep_per_footstep,
        model_update_steps=args.model_update_steps,
        control_frequency=args.control_frequency)
    high_level_planning.load_data(save_dir)
    high_level_planning.load_mean_var(save_dir + '/buffer_data')

    low_level_TG = LLTG.low_level_TG(
        device=device,
        z_dim=args.z_dim,
        a_dim=args.a_dim,
        num_timestep_per_footstep=args.num_timestep_per_footstep,
        batch_size=args.batch_size,
        low_level_policy_type=args.low_level_policy_type,
        update_low_level_policy=args.update_low_level_policy,
        update_low_level_policy_lr=args.update_low_level_policy_lr,
        init_state=init_state,
    )
    if args.low_level_policy_type == 'NN':
        low_level_TG.load_model('./save_data/trial_2')

    prediction_evaluation = np.empty(
        (2 * model_output_dim, args.num_latent_action_per_iteration))
    velocity_tracking = np.empty((6, args.num_latent_action_per_iteration))

    target_velocity_test = [
        np.array([0.0, -0.1, 0.0]),
        np.array([0.0, -0.1, 0.0]),
        np.array([-0.00, -0.1, 0.0]),
        np.array([-0.0, -0.1, 0.0]),
        np.array([-0.0, -0.1, 0.0]),
        np.array([-0.00, -0.4, 0.0]),
        np.array([-0.00, -0.4, 0.0]),
        np.array([-0.0, -0.4, 0.0]),
        np.array([-0.0, -0.4, 0.0]),
        np.array([-0.0, -0.4, 0.0]),
    ]

    gait_record = np.empty(
        (args.num_iters, 6, args.num_latent_action_per_iteration *
         args.num_timestep_per_footstep))
    for iter in range(args.num_iters):
        # reset robot to stand
        if args.sim:
            state = motion_library.exp_standing(env, shoulder=0.3, elbow=1.3)
            low_level_TG.reset(state)

        for j in range(args.num_latent_action_per_iteration):
            if not j % 5:
                target = target_velocity_test[int((j + 1) / 5)]

            pre_com_state = state
            latent_action = high_level_planning.plan_latent_action(
                state, target)
            low_level_TG.update_latent_action(state, latent_action)

            for step in range(1, args.num_timestep_per_footstep + 1):
                action = low_level_TG.get_action(state, step)
                state, total_timestep = env.step(action), total_timestep + 1
                for q in range(6):
                    if state['foot_pos'][q][2] <= -0.005:
                        gait_record[iter][q][j * args.num_timestep_per_footstep
                                             + step - 1] = 1
                    else:
                        gait_record[iter][q][j * args.num_timestep_per_footstep
                                             + step - 1] = 0

            post_com_state = state

            # Check if robot still alive
            if utils.check_data_useful(state):
                high_level_obs, high_level_delta_obs = utils.HL_obs(
                    pre_com_state), utils.HL_delta_obs(pre_com_state,
                                                       post_com_state)
                predict_state = high_level_planning.model_predict(
                    high_level_obs, latent_action)

            # collect data

            velocity_tracking[0][j] = target[0]
            velocity_tracking[1][j] = target[1]
            velocity_tracking[2][j] = predict_state[4]
            velocity_tracking[3][j] = predict_state[5]
            velocity_tracking[4][j] = high_level_delta_obs[4]
            velocity_tracking[5][j] = high_level_delta_obs[5]
            for q in range(model_output_dim):
                prediction_evaluation[q][j] = high_level_delta_obs[q]
                prediction_evaluation[q +
                                      model_output_dim][j] = predict_state[q]

            total_latent_action += 1

            if utils.check_robot_dead(state):
                break

    np.save(save_dir + '/prediction_evaluation.npy',
            np.array(prediction_evaluation))
    np.save(save_dir + '/velocity_tracking.npy', velocity_tracking)
    np.save('./save_data/trial_2/gait_record.npy', gait_record)

    return
예제 #12
0
def main(test_arg):
    # initialize data structure record time to measure the frequency
    tgt_pos = np.zeros(
        (6, 3, test_arg['total_step'] * test_arg['stance_time']))
    actual_pos = np.zeros(
        (6, 3, test_arg['total_step'] * test_arg['stance_time']))

    # initialize the configuration
    env = daisy_API(sim=test_arg['sim'], render=True, logger=False)
    env.set_control_mode('position')

    state = env.reset()
    if test_arg['sim']:
        state = motion_library.exp_standing(env)
    else:
        state = motion_library.demo_standing(env, shoulder=0.5, elbow=1.1)

    # initialize controllers
    high_level_planning = raibert_footstep_policy(
        stance_duration=test_arg['stance_time'],
        target_speed=np.array(test_arg['target_speed']),
        control_frequency=test_arg['control_frequency'])
    low_level_planning = IK_traj_generator(state)

    # run controller for several steps record data
    for index_step in range(test_arg['total_step']):
        latent_action = high_level_planning.plan_latent_action(state)
        low_level_planning.update_latent_action(state, latent_action)

        for i in range(1, test_arg['stance_time'] + 1):
            phase = float(i) / test_arg['stance_time']
            action = low_level_planning.get_action(state, phase)
            # TODO: implement simplified non blocking stuff
            state = env.step(action)
            time.sleep(0.01)
            # calculate current XYZ
            foot_com_cur = daisy_kinematics.FK_CoM2Foot(state['j_pos'])
            foot_com_tgt = low_level_planning.des_foot_position_com

            for j in range(6):
                for k in range(3):
                    tgt_pos[j][k][index_step * test_arg['stance_time'] + i -
                                  1] = foot_com_tgt[j][k]
                    actual_pos[j][k][index_step * test_arg['stance_time'] + i -
                                     1] = foot_com_cur[j][k]

    # save data + load data + plotting
    np.save(
        './save_data/test_tra/tgt_tra_' + str(test_arg['stance_time']) +
        '.npy', tgt_pos)
    np.save(
        './save_data/test_tra/actual_tra_' + str(test_arg['stance_time']) +
        '.npy', actual_pos)

    tgt_pos = np.load('./save_data/test_tra/tgt_tra_' +
                      str(test_arg['stance_time']) + '.npy')
    actual_pos = np.load('./save_data/test_tra/actual_tra_' +
                         str(test_arg['stance_time']) + '.npy')

    leg_index = 0
    axis = range(np.shape(tgt_pos)[2])
    plt.rcParams['figure.figsize'] = (8, 10)
    fig, (ax1, ax2, ax3) = plt.subplots(3, 1)
    ax1.plot(axis, tgt_pos[leg_index][0], color='dodgerblue')
    ax1.plot(axis, actual_pos[leg_index][0], color='deeppink')
    ax1.set_title('X tracking')
    ax2.plot(axis, tgt_pos[leg_index][1], color='dodgerblue')
    ax2.plot(axis, actual_pos[leg_index][1], color='deeppink')
    ax2.set_title('Y tracking')
    ax3.plot(axis, tgt_pos[leg_index][2], color='dodgerblue')
    ax3.plot(axis, actual_pos[leg_index][2], color='deeppink')
    ax3.set_title('Z tracking')
    plt.show()
예제 #13
0
def evaluate_model(cfg):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    env = daisy_API(sim=cfg.sim, render=False, logger=False)
    env.set_control_mode(cfg.control_mode)
    state = env.reset()
    com_utils = utils.CoM_frame_MPC()

    if cfg.sim:
        init_state = motion_library.exp_standing(env)

    model_obs_dim, model_output_dim = np.size(
        com_utils.HL_obs(state)), np.size(com_utils.HL_delta_obs(state, state))

    high_level_planning = HLPM.high_level_planning(
        device=device,
        model_obs_dim=model_obs_dim,
        z_dim=cfg.z_dim,
        model_output_dim=model_output_dim,
        model_hidden_num=cfg.model_hidden_num,
        model_layer_num=cfg.model_layer_num,
        batch_size=cfg.batch_size,
        model_lr=cfg.model_lr,
        high_level_policy_type=cfg.high_level_policy_type,
        update_sample_policy=cfg.update_sample_policy,
        update_sample_policy_lr=cfg.update_sample_policy_lr,
        low_level_policy_type=cfg.low_level_policy_type,
        num_timestep_per_footstep=cfg.num_timestep_per_footstep,
        model_update_steps=cfg.model_update_steps,
        control_frequency=cfg.control_frequency)
    high_level_planning.load_data('.')
    if cfg.high_level_policy_type != 'SAC':
        high_level_planning.load_mean_var('.' + '/buffer_data')

    low_level_TG = LLTG.low_level_TG(
        device=device,
        z_dim=cfg.z_dim,
        a_dim=cfg.a_dim,
        num_timestep_per_footstep=cfg.num_timestep_per_footstep,
        batch_size=cfg.batch_size,
        low_level_policy_type=cfg.low_level_policy_type,
        update_low_level_policy=cfg.update_low_level_policy,
        update_low_level_policy_lr=cfg.update_low_level_policy_lr,
        init_state=init_state,
    )

    if cfg.low_level_policy_type == 'NN':
        low_level_TG.load_model('.')

    square_circle_test = []
    total_num = 6
    for i in range(1, total_num + 1):
        theta = i * math.pi / float(total_num)
        square_circle_test.append(
            np.array([1 - math.cos(theta), 1.5 * math.sin(theta), 0, 1]))

    for i in range(1, total_num + 1):
        theta = i * math.pi / float(total_num)
        square_circle_test.append(
            np.array([3 - math.cos(theta), -1.5 * math.sin(theta), 0, 1]))

    square_cost = []

    for iter in range(cfg.num_iters):
        position_tracking = [[], [], []]
        # reset robot to stand
        state = motion_library.exp_standing(env)
        low_level_TG.reset(state)
        position_tracking[0].append(state['base_pos_x'][0])
        position_tracking[1].append(state['base_pos_y'][0])
        position_tracking[2].append(state['base_ori_euler'][2])

        total_latent_action = 0
        total_cost = 0
        target_index = 0
        while True:
            target = square_circle_test[target_index]

            pre_com_state = state
            latent_action = high_level_planning.plan_latent_action(
                state, target, com_utils, cfg.mpc_sample_num, cfg.mpc_horizon)
            low_level_TG.update_latent_action(state, latent_action)

            for step in range(1, cfg.num_timestep_per_footstep + 1):
                action = low_level_TG.get_action(state, step)
                state = env.step(action)
                # collect data
                position_tracking[0].append(state['base_pos_x'][0])
                position_tracking[1].append(state['base_pos_y'][0])
                position_tracking[2].append(state['base_ori_euler'][2])
                if np.linalg.norm(target[0:2] - np.array(
                    [state['base_pos_x'][0], state['base_pos_y'][0]])) < 0.15:
                    print("Reach Goal %s!!!!" % str(target_index))
                    target_index += 1
                    if target_index >= np.shape(square_circle_test)[0]:
                        break
                    target = square_circle_test[target_index]

            post_com_state = state

            total_latent_action += 1

            if target_index >= np.shape(square_circle_test)[0]:
                np.save('./square_circle_test_' + str(iter) + '.npy',
                        np.array(position_tracking))
                break

            if total_latent_action > 200:
                print('Did not reach goal')
                break

    return
예제 #14
0
def evaluate_model(cfg):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    env = daisy_API(sim=cfg.sim, render=cfg.render, logger=False)
    env.set_control_mode(cfg.control_mode)
    state = env.reset()
    com_utils = utils.CoM_frame_MPC()
    if cfg.high_level_policy_type == 'SAC':
        com_utils = utils.CoM_frame_RL()

    if cfg.sim:
        init_state = motion_library.exp_standing(env)

    if cfg.high_level_policy_type == 'SAC':
        model_obs_dim, model_output_dim = np.size(
            com_utils.HL_obs(state, np.zeros(3))), np.size(
                com_utils.HL_obs(state, np.zeros(3)))
    else:
        model_obs_dim, model_output_dim = np.size(
            com_utils.HL_obs(state)), np.size(
                com_utils.HL_delta_obs(state, state))

    high_level_planning = HLPM.high_level_planning(
        device=device,
        model_obs_dim=model_obs_dim,
        z_dim=cfg.z_dim,
        model_output_dim=model_output_dim,
        model_hidden_num=cfg.model_hidden_num,
        model_layer_num=cfg.model_layer_num,
        batch_size=cfg.batch_size,
        model_lr=cfg.model_lr,
        high_level_policy_type=cfg.high_level_policy_type,
        update_sample_policy=cfg.update_sample_policy,
        update_sample_policy_lr=cfg.update_sample_policy_lr,
        low_level_policy_type=cfg.low_level_policy_type,
        num_timestep_per_footstep=cfg.num_timestep_per_footstep,
        model_update_steps=cfg.model_update_steps,
        control_frequency=cfg.control_frequency)
    high_level_planning.load_data('.')
    if cfg.high_level_policy_type != 'SAC':
        high_level_planning.load_mean_var('.' + '/buffer_data')

    low_level_TG = LLTG.low_level_TG(
        device=device,
        z_dim=cfg.z_dim,
        a_dim=cfg.a_dim,
        num_timestep_per_footstep=cfg.num_timestep_per_footstep,
        batch_size=cfg.batch_size,
        low_level_policy_type=cfg.low_level_policy_type,
        update_low_level_policy=cfg.update_low_level_policy,
        update_low_level_policy_lr=cfg.update_low_level_policy_lr,
        init_state=init_state,
    )

    if cfg.low_level_policy_type == 'NN':
        low_level_TG.load_model('.')

    target_position_test = [
        np.array([0.0, 2.0, 0.0, 1]),
        np.array([2.0, 2.0, 0.0, 1]),
        np.array([-2.0, 2.0, 0.0, 1]),
        np.array([2.0, 0.0, 0.0, 1]),
        np.array([-2.0, 0.0, 0.0, 1]),
        np.array([0.0, -2.0, 0.0, 1]),
        np.array([-2.0, -2.0, 0.0, 1]),
        np.array([2.0, -2.0, 0.0, 1]),
    ]

    for q in range(cfg.num_iters):
        position_tracking = [
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
        ]
        for target_index in range(8):

            # reset robot to stand
            state = motion_library.exp_standing(env)
            low_level_TG.reset(state)

            position_tracking[2 * target_index].append(state['base_pos_x'][0])
            position_tracking[2 * target_index + 1].append(
                state['base_pos_y'][0])
            total_latent_action = 0
            target = target_position_test[target_index]
            target[0] = target[0] + state['base_pos_x'][0]
            target[1] = target[1] + state['base_pos_y'][0]

            while True:
                pre_com_state = state
                latent_action = high_level_planning.plan_latent_action(
                    state, target, com_utils, cfg.mpc_sample_num,
                    cfg.mpc_horizon)
                low_level_TG.update_latent_action(state, latent_action)

                for step in range(1, cfg.num_timestep_per_footstep + 1):
                    action = low_level_TG.get_action(state, step)
                    state = env.step(action)
                    position_tracking[2 * target_index].append(
                        state['base_pos_x'][0])
                    position_tracking[2 * target_index + 1].append(
                        state['base_pos_y'][0])
                    if np.linalg.norm(target[0:2] - np.array(
                        [state['base_pos_x'][0], state['base_pos_y'][0]])
                                      ) < 0.2:
                        print("ReachGoal" + str(target_index) + "!!!!")
                        break

                # collect data
                post_com_state = state
                total_latent_action += 1

                if np.linalg.norm(target[0:2] - np.array(
                    [state['base_pos_x'][0], state['base_pos_y'][0]])) < 0.2:
                    break
                if total_latent_action > 30:
                    break

        for i in range(8):
            position_tracking[i] = np.array(position_tracking[i])
        np.save(
            './' + cfg.high_level_policy_type + '_multi_tgt_test_' + str(q) +
            '.npy', np.array(position_tracking))

    return
예제 #15
0
def evaluate_model(args):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    env = daisy_API(sim=args.sim, render=False, logger = False)
    env.set_control_mode(args.control_mode)
    state = env.reset()
    utils.make_dir(args.save_dir)
    save_dir = utils.make_dir(os.path.join(args.save_dir + '/trial_%s' % str(args.seed))) if args.save else None
    total_timestep, total_latent_action = 0, 0 

    if args.sim:
        if args.low_level_policy_type =='NN':
            init_state = motion_library.exp_standing(env, shoulder = 0.3, elbow = 1.3)
        else:
            init_state = motion_library.exp_standing(env)
    model_obs_dim, model_output_dim = np.size(utils.HL_obs(state)), np.size(utils.HL_delta_obs(state, state))

    
    high_level_planning = HLPM.high_level_planning(
        device = device,
        model_obs_dim = model_obs_dim,
        z_dim = args.z_dim,
        model_output_dim = model_output_dim,
        model_hidden_num = args.model_hidden_num,
        batch_size = args.batch_size,
        model_lr = args.model_lr,
        high_level_policy_type = args.high_level_policy_type,
        update_sample_policy = args.update_sample_policy,
        update_sample_policy_lr = args.update_sample_policy_lr,
        low_level_policy_type = args.low_level_policy_type,
        num_timestep_per_footstep = args.num_timestep_per_footstep,
        model_update_steps = args.model_update_steps,
        control_frequency= args.control_frequency
    )
    high_level_planning.load_data(save_dir)
    high_level_planning.load_mean_var(save_dir+'/buffer_data')
    
    low_level_TG = LLTG.low_level_TG(
        device = device,
        z_dim = args.z_dim,
        a_dim = args.a_dim,
        num_timestep_per_footstep = args.num_timestep_per_footstep,
        batch_size = args.batch_size,
        low_level_policy_type = args.low_level_policy_type,
        update_low_level_policy = args.update_low_level_policy,
        update_low_level_policy_lr = args.update_low_level_policy_lr,
        init_state = init_state,
    )

    if args.low_level_policy_type =='NN':
        low_level_TG.load_model('./save_data/trial_2')

    cost_record = []
    position_tracking = [[],[],[],[]]

    for iter in range(args.num_iters):
        # reset robot to stand 
        if args.sim:
            if args.low_level_policy_type =='NN':
                init_state = motion_library.exp_standing(env, shoulder = 0.3, elbow = 1.3)
            else:
                init_state = motion_library.exp_standing(env)
        low_level_TG.reset(state)
        position_tracking[0].append(state['base_pos_x'][0])
        position_tracking[1].append(state['base_pos_y'][0])
        position_tracking[2].append(state['base_ori_euler'][2])
        position_tracking[3].append(np.linalg.norm(state['base_velocity'][0:2]))
        
        j = 0
        total_cost = 0
        # while True:
        for i in range(args.num_latent_action_per_iteration):
            # target =  np.array([0,target_velocity,0])
            target = target_velocity

            pre_com_state = state
            latent_action = high_level_planning.plan_latent_action(state, target)
            low_level_TG.update_latent_action(state,latent_action)
            for step in range(1, args.num_timestep_per_footstep+1):
                action = low_level_TG.get_action(state, step)
                state, total_timestep = env.step(action), total_timestep + 1
                # collect data
                position_tracking[0].append(state['base_pos_x'][0])
                position_tracking[1].append(state['base_pos_y'][0])
                position_tracking[2].append(state['base_ori_euler'][2])
                position_tracking[3].append(np.linalg.norm(state['base_velocity'][0:2]))


            post_com_state = state
            cost = utils.easy_cost(target,pre_com_state, post_com_state)
            total_cost += cost

            
        
            # Check if robot still alive
            if utils.check_data_useful(state):
                high_level_obs , high_level_delta_obs = utils.HL_obs(pre_com_state), utils.HL_delta_obs(pre_com_state, post_com_state)
                predict_state = high_level_planning.model_predict(high_level_obs, latent_action)
            
           
            total_latent_action += 1
            j+=1

            # if np.linalg.norm(target[0:2] - np.array([state['base_pos_x'][0], state['base_pos_y'][0]])) + np.linalg.norm(target[2:4] - high_level_delta_obs[0:2])  < 0.2 :
            #     print("Reach Goal %s!!!!")
            #     break

            # if j>20:
            #     print('Did not reach goal')
            #     break
        
        cost_record.append(total_cost)
        print(cost_record)
    np.save(save_dir + '/'+args.high_level_policy_type+'_' +args.low_level_policy_type +'_different_cost_test'+ str(cost_index)+'.npy', np.array(cost_record) )
    np.save(save_dir + '/'+args.high_level_policy_type+'_' +args.low_level_policy_type +'_com.npy', np.array(position_tracking) )
    return