def evaluation(args, r, high_level_planning):
    velocity_tracking = np.empty(
        (6, args.num_iters, args.num_latent_action_per_iteration))
    save_dir = utils.make_dir(
        os.path.join(args.save_dir +
                     '/trial_%s' % str(args.seed))) if args.save else None
    exp_variables = {
        'finish_exp': [0],
        'not_finish_one_iter': [1],
        'finish_one_step': [0],
        'update_z_action': [0],
    }
    ru.set_variables(r, exp_variables)

    for i in range(0, args.num_iters):

        ru.wait_for_key(r, 'not_finish_one_iter', change=False)
        print('Iteration: ', i + 1)
        input('Press any key after initialized robot')
        ru.wait_for_key(r, 'finish_one_step')
        state, exp_variables = ru.get_state(r)

        for j in range(args.num_latent_action_per_iteration):
            pre_com_state = state
            if not j % 5:
                target = target_velocity_test[int((j + 1) / 5)] / 1.5 * 2

            # take current state and plan for next z_action and sent to daisy
            latent_action = high_level_planning.plan_latent_action(
                pre_com_state, target)

            exp_variables['z_action'] = latent_action.tolist()
            exp_variables['update_z_action'] = [1]
            ru.set_variables(r, exp_variables)

            # check if finish one step
            ru.wait_for_key(r, 'finish_one_step')
            state, exp_variables = ru.get_state(r)
            post_com_state = state

            # if utils.check_data_useful(post_com_state):
            high_level_obs, high_level_delta_obs = utils.HL_obs(
                pre_com_state), utils.HL_delta_obs(pre_com_state,
                                                   post_com_state)
            predict_state = high_level_planning.model_predict(
                high_level_obs, latent_action)

            velocity_tracking[0][i][j] = target[0]
            velocity_tracking[1][i][j] = target[1]
            velocity_tracking[2][i][j] = predict_state[4]
            velocity_tracking[3][i][j] = predict_state[5]
            velocity_tracking[4][i][j] = high_level_delta_obs[4]
            velocity_tracking[5][i][j] = high_level_delta_obs[5]

        np.save(
            save_dir + '/' + args.high_level_policy_type +
            '_velocity_tracking_test.npy', velocity_tracking)
        exp_variables['not_finish_one_iter'] = [0]
        ru.set_variables(r, exp_variables)

    exp_variables = ru.get_variables(r)
    velocity_record = exp_variables['record_vel']
    np.save(
        save_dir + '/' + args.high_level_policy_type + '_velocity_record.npy',
        np.array(velocity_record))
    # experiment ends
    exp_variables['finish_exp'] = [1]
    ru.set_variables(r, exp_variables)
def evaluation(args, r, high_level_planning):
    position_tracking = [
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
    ]
    save_dir = utils.make_dir(
        os.path.join(args.save_dir +
                     '/trial_%s' % str(args.seed))) if args.save else None
    exp_variables = {
        'finish_exp': [0],
        'not_finish_one_iter': [1],
        'finish_one_step': [0],
        'update_z_action': [0],
    }
    ru.set_variables(r, exp_variables)

    cost_record = []

    for iter in range(0, args.num_iters):

        position_tracking = [[], [], [], []]
        ru.wait_for_key(r, 'not_finish_one_iter', change=False)
        print('Iteration: ', iter + 1)
        input('Press any key after initialized robot')
        ru.wait_for_key(r, 'finish_one_step')
        state, exp_variables = ru.get_state(r)
        target = target_velocity

        # position_tracking[2*iter].append(state['base_pos_x'][0])
        # position_tracking[2*iter+1].append(state['base_pos_y'][0])

        j = 0
        total_cost = 0
        # while True:
        for i in range(args.num_latent_action_per_iteration):
            pre_com_state = state

            # take current state and plan for next z_action and sent to daisy
            latent_action = high_level_planning.plan_latent_action(
                pre_com_state, target)

            exp_variables['z_action'] = latent_action.tolist()
            exp_variables['update_z_action'] = [1]
            ru.set_variables(r, exp_variables)

            # check if finish one step
            ru.wait_for_key(r, 'finish_one_step')
            state, exp_variables = ru.get_state(r)
            post_com_state = state

            cost = utils.easy_cost(target, pre_com_state, post_com_state)
            total_cost += cost

            # if utils.check_data_useful(post_com_state):
            high_level_obs, high_level_delta_obs = utils.HL_obs(
                pre_com_state), utils.HL_delta_obs(pre_com_state,
                                                   post_com_state)
            predict_state = high_level_planning.model_predict(
                high_level_obs, latent_action)

            # position_tracking[2*iter].append(state['base_pos_x'][0])
            # position_tracking[2*iter+1].append(state['base_pos_y'][0])
            print([state['base_pos_x'][0], state['base_pos_y'][0]],
                  high_level_delta_obs[0:2], predict_state[0:2])

            j += 1

            # if np.linalg.norm(target[0:2] - np.array([state['base_pos_x'][0], state['base_pos_y'][0]])) < 0.25 :
            #     print("Reach One Goal !!!!")
            # np.save(save_dir +'/' + args.high_level_policy_type +'_multi_tgt_test_'+str(iter)+'.npy', np.array(position_tracking) )

            # break

            # if j>30:
            #     print('Did not reach goal')
            #     break

        cost_record.append(total_cost)
        print(cost_record)
        exp_variables['not_finish_one_iter'] = [0]
        ru.set_variables(r, exp_variables)
        input('wait for pos')
        exp_variables = ru.get_variables(r)
        # state_record = exp_variables['record_pos']
        # velocity_record = exp_variables['record_vel']
        # np.save(save_dir +'/' + args.high_level_policy_type +'_velocity_record_'+ str(cost_index) + '_' + str(iter), np.array(velocity_record))
        # np.save(save_dir +'/' + args.high_level_policy_type +'_state_record_'+ str(cost_index) + '_' + str(iter), np.array(state_record))
        np.save(
            save_dir + '/' + args.high_level_policy_type + '_' +
            args.low_level_policy_type + '_different_cost_test' +
            str(cost_index) + '.npy', np.array(cost_record))
        # np.save(save_dir + '/'+args.high_level_policy_type+'_' +args.low_level_policy_type + str(iter)+'_com.npy', np.array(position_tracking) )
    # experiment ends
    exp_variables['finish_exp'] = [1]
    ru.set_variables(r, exp_variables)
コード例 #3
0
def collect_data_client(args, r, high_level_planning, HL_replay_buffer):
    exp_variables = {
        'finish_exp': [0],
        'not_finish_one_iter': [1],
        'finish_one_step': [0],
        'update_z_action': [0],
    }
    ru.set_variables(r, exp_variables)
    for i in range(17, args.num_iters):

        ru.wait_for_key(r, 'not_finish_one_iter', change=False)
        print('Iteration: ', i + 1)
        input('Press any key after initialized robot')
        ru.wait_for_key(r, 'finish_one_step')
        state, exp_variables = ru.get_state(r)

        for k in range(args.num_latent_action_per_iteration):
            pre_com_state = state

            q = k % 5
            if q == 0:
                target = np.clip(0.2 * np.random.randn(3), -0.25, 0.25)
                latent_action = np.array(
                    latent_all[i]) + 0.1 * np.random.randn(args.z_dim)
                # latent_action = high_level_planning.sample_latent_action(target)
            if q == 3 or q == 4:
                target = np.zeros(3)
                latent_action = np.array(
                    [0.2068, 0.1836], ) + 0.1 * np.random.randn(args.z_dim)

            # take current state and plan for next z_action and sent to daisy
            t_start = datetime.datetime.now()
            # if args.test:
            #     latent_action = high_level_planning.plan_latent_action(pre_com_state, target)
            # else:
            #     latent_action = high_level_planning.sample_latent_action(target)

            t_end = datetime.datetime.now()

            exp_variables['z_action'] = latent_action.tolist()
            exp_variables['update_z_action'] = [1]
            ru.set_variables(r, exp_variables)

            # check if finish one step
            ru.wait_for_key(r, 'finish_one_step')
            state, exp_variables = ru.get_state(r)
            post_com_state = state

            if utils.check_data_useful(post_com_state):
                high_level_obs, high_level_delta_obs = utils.HL_obs(
                    pre_com_state), utils.HL_delta_obs(pre_com_state,
                                                       post_com_state)
                HL_replay_buffer.add(high_level_obs, latent_action, 0,
                                     high_level_delta_obs, 1)

            if utils.check_robot_dead(post_com_state):
                break

        exp_variables['not_finish_one_iter'] = [0]
        ru.set_variables(r, exp_variables)

        if (i + 1) % 1 == 0:
            model_save_dir = utils.make_dir(
                os.path.join(args.save_dir + '/trial_%s' %
                             str(args.seed))) if args.save else None
            HL_replay_buffer.save_buffer(model_save_dir)

    # experiment ends
    exp_variables['finish_exp'] = [1]
    ru.set_variables(r, exp_variables)
コード例 #4
0
def evaluate_model(args):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    env = daisy_API(sim=args.sim, render=False, logger=False)
    env.set_control_mode(args.control_mode)
    state = env.reset()
    utils.make_dir(args.save_dir)
    save_dir = utils.make_dir(
        os.path.join(args.save_dir +
                     '/trial_%s' % str(args.seed))) if args.save else None
    total_timestep, total_latent_action = 0, 0

    if args.sim:
        if args.low_level_policy_type == 'NN':
            init_state = motion_library.exp_standing(env,
                                                     shoulder=0.3,
                                                     elbow=1.3)
        else:
            init_state = motion_library.exp_standing(env)
    model_obs_dim, model_output_dim = np.size(utils.HL_obs(state)), np.size(
        utils.HL_delta_obs(state, state))

    high_level_planning = HLPM.high_level_planning(
        device=device,
        model_obs_dim=model_obs_dim,
        z_dim=args.z_dim,
        model_output_dim=model_output_dim,
        model_hidden_num=args.model_hidden_num,
        batch_size=args.batch_size,
        model_lr=args.model_lr,
        high_level_policy_type=args.high_level_policy_type,
        update_sample_policy=args.update_sample_policy,
        update_sample_policy_lr=args.update_sample_policy_lr,
        low_level_policy_type=args.low_level_policy_type,
        num_timestep_per_footstep=args.num_timestep_per_footstep,
        model_update_steps=args.model_update_steps,
        control_frequency=args.control_frequency)
    high_level_planning.load_data(save_dir)
    high_level_planning.load_mean_var(save_dir + '/buffer_data')

    low_level_TG = LLTG.low_level_TG(
        device=device,
        z_dim=args.z_dim,
        a_dim=args.a_dim,
        num_timestep_per_footstep=args.num_timestep_per_footstep,
        batch_size=args.batch_size,
        low_level_policy_type=args.low_level_policy_type,
        update_low_level_policy=args.update_low_level_policy,
        update_low_level_policy_lr=args.update_low_level_policy_lr,
        init_state=init_state,
    )
    if args.low_level_policy_type == 'NN':
        low_level_TG.load_model('./save_data/trial_2')

    prediction_evaluation = np.empty(
        (2 * model_output_dim, args.num_latent_action_per_iteration))
    velocity_tracking = np.empty((6, args.num_latent_action_per_iteration))

    target_velocity_test = [
        np.array([0.0, -0.1, 0.0]),
        np.array([0.0, -0.1, 0.0]),
        np.array([-0.00, -0.1, 0.0]),
        np.array([-0.0, -0.1, 0.0]),
        np.array([-0.0, -0.1, 0.0]),
        np.array([-0.00, -0.4, 0.0]),
        np.array([-0.00, -0.4, 0.0]),
        np.array([-0.0, -0.4, 0.0]),
        np.array([-0.0, -0.4, 0.0]),
        np.array([-0.0, -0.4, 0.0]),
    ]

    gait_record = np.empty(
        (args.num_iters, 6, args.num_latent_action_per_iteration *
         args.num_timestep_per_footstep))
    for iter in range(args.num_iters):
        # reset robot to stand
        if args.sim:
            state = motion_library.exp_standing(env, shoulder=0.3, elbow=1.3)
            low_level_TG.reset(state)

        for j in range(args.num_latent_action_per_iteration):
            if not j % 5:
                target = target_velocity_test[int((j + 1) / 5)]

            pre_com_state = state
            latent_action = high_level_planning.plan_latent_action(
                state, target)
            low_level_TG.update_latent_action(state, latent_action)

            for step in range(1, args.num_timestep_per_footstep + 1):
                action = low_level_TG.get_action(state, step)
                state, total_timestep = env.step(action), total_timestep + 1
                for q in range(6):
                    if state['foot_pos'][q][2] <= -0.005:
                        gait_record[iter][q][j * args.num_timestep_per_footstep
                                             + step - 1] = 1
                    else:
                        gait_record[iter][q][j * args.num_timestep_per_footstep
                                             + step - 1] = 0

            post_com_state = state

            # Check if robot still alive
            if utils.check_data_useful(state):
                high_level_obs, high_level_delta_obs = utils.HL_obs(
                    pre_com_state), utils.HL_delta_obs(pre_com_state,
                                                       post_com_state)
                predict_state = high_level_planning.model_predict(
                    high_level_obs, latent_action)

            # collect data

            velocity_tracking[0][j] = target[0]
            velocity_tracking[1][j] = target[1]
            velocity_tracking[2][j] = predict_state[4]
            velocity_tracking[3][j] = predict_state[5]
            velocity_tracking[4][j] = high_level_delta_obs[4]
            velocity_tracking[5][j] = high_level_delta_obs[5]
            for q in range(model_output_dim):
                prediction_evaluation[q][j] = high_level_delta_obs[q]
                prediction_evaluation[q +
                                      model_output_dim][j] = predict_state[q]

            total_latent_action += 1

            if utils.check_robot_dead(state):
                break

    np.save(save_dir + '/prediction_evaluation.npy',
            np.array(prediction_evaluation))
    np.save(save_dir + '/velocity_tracking.npy', velocity_tracking)
    np.save('./save_data/trial_2/gait_record.npy', gait_record)

    return
コード例 #5
0
def evaluation(args, r, high_level_planning):

    save_dir = utils.make_dir(
        os.path.join(args.save_dir +
                     '/trial_%s' % str(args.seed))) if args.save else None
    exp_variables = {
        'finish_exp': [0],
        'not_finish_one_iter': [1],
        'finish_one_step': [0],
        'update_z_action': [0],
    }
    ru.set_variables(r, exp_variables)

    for iter in range(args.num_iters):

        ru.wait_for_key(r, 'not_finish_one_iter', change=False)
        print('Iteration: ', iter + 1)
        input('Press any key after initialized robot')
        ru.wait_for_key(r, 'finish_one_step')
        state, exp_variables = ru.get_state(r)
        position_tracking[0].append(state['base_pos_x'][0])
        position_tracking[1].append(state['base_pos_y'][0])
        position_tracking[2].append(state['base_ori_euler'][2])

        j = 0
        target_index = 0
        total_cost = 0
        while True:
            target = square_circle_test[target_index]
            pre_com_state = state

            # take current state and plan for next z_action and sent to daisy
            latent_action = high_level_planning.plan_latent_action(
                pre_com_state, target)

            exp_variables['z_action'] = latent_action.tolist()
            exp_variables['update_z_action'] = [1]
            ru.set_variables(r, exp_variables)

            # check if finish one step
            ru.wait_for_key(r, 'finish_one_step')
            state, exp_variables = ru.get_state(r)
            post_com_state = state
            cost = utils.easy_cost(target, pre_com_state, post_com_state)
            total_cost += cost

            # if utils.check_data_useful(post_com_state):
            high_level_obs, high_level_delta_obs = utils.HL_obs(
                pre_com_state), utils.HL_delta_obs(pre_com_state,
                                                   post_com_state)
            predict_state = high_level_planning.model_predict(
                high_level_obs, latent_action)

            position_tracking[0].append(state['base_pos_x'][0])
            position_tracking[1].append(state['base_pos_y'][0])
            position_tracking[2].append(state['base_ori_euler'][2])
            print(state['base_pos_x'][0], state['base_pos_y'][0])

            j += 1

            if np.linalg.norm(target[0:2] - np.array(
                [state['base_pos_x'][0], state['base_pos_y'][0]])) < 0.2:
                print("Reach Goal %s!!!!" % str(target_index))
                np.save(
                    save_dir + '/' + args.high_level_policy_type +
                    '_square_test' + str(test_index) + '.npy',
                    np.array(position_tracking))
                target_index += 1
                if target_index == np.shape(square_circle_test)[0]:
                    np.save(
                        save_dir + '/' + args.high_level_policy_type +
                        '_square_test_cost_' + str(test_index) + '.npy',
                        np.array([total_cost]))
                    break

            if j > 500:
                print('Did not reach goal')
                break

        exp_variables['not_finish_one_iter'] = [0]
        ru.set_variables(r, exp_variables)
        input('wait for pos')
        exp_variables = ru.get_variables(r)
        state_record = exp_variables['record_pos']
        np.save(
            save_dir + '/' + 'state' + '_square_test' + str(test_index) +
            '.npy', np.array(state_record))

    # experiment ends
    exp_variables['finish_exp'] = [1]
    ru.set_variables(r, exp_variables)
コード例 #6
0
def evaluate_model(args):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    env = daisy_API(sim=args.sim, render=False, logger = False)
    env.set_control_mode(args.control_mode)
    state = env.reset()
    utils.make_dir(args.save_dir)
    save_dir = utils.make_dir(os.path.join(args.save_dir + '/trial_%s' % str(args.seed))) if args.save else None
    total_timestep, total_latent_action = 0, 0 

    if args.sim:
        if args.low_level_policy_type =='NN':
            init_state = motion_library.exp_standing(env, shoulder = 0.3, elbow = 1.3)
        else:
            init_state = motion_library.exp_standing(env)
    model_obs_dim, model_output_dim = np.size(utils.HL_obs(state)), np.size(utils.HL_delta_obs(state, state))

    
    high_level_planning = HLPM.high_level_planning(
        device = device,
        model_obs_dim = model_obs_dim,
        z_dim = args.z_dim,
        model_output_dim = model_output_dim,
        model_hidden_num = args.model_hidden_num,
        batch_size = args.batch_size,
        model_lr = args.model_lr,
        high_level_policy_type = args.high_level_policy_type,
        update_sample_policy = args.update_sample_policy,
        update_sample_policy_lr = args.update_sample_policy_lr,
        low_level_policy_type = args.low_level_policy_type,
        num_timestep_per_footstep = args.num_timestep_per_footstep,
        model_update_steps = args.model_update_steps,
        control_frequency= args.control_frequency
    )
    high_level_planning.load_data(save_dir)
    high_level_planning.load_mean_var(save_dir+'/buffer_data')
    
    low_level_TG = LLTG.low_level_TG(
        device = device,
        z_dim = args.z_dim,
        a_dim = args.a_dim,
        num_timestep_per_footstep = args.num_timestep_per_footstep,
        batch_size = args.batch_size,
        low_level_policy_type = args.low_level_policy_type,
        update_low_level_policy = args.update_low_level_policy,
        update_low_level_policy_lr = args.update_low_level_policy_lr,
        init_state = init_state,
    )

    if args.low_level_policy_type =='NN':
        low_level_TG.load_model('./save_data/trial_2')

    cost_record = []
    position_tracking = [[],[],[],[]]

    for iter in range(args.num_iters):
        # reset robot to stand 
        if args.sim:
            if args.low_level_policy_type =='NN':
                init_state = motion_library.exp_standing(env, shoulder = 0.3, elbow = 1.3)
            else:
                init_state = motion_library.exp_standing(env)
        low_level_TG.reset(state)
        position_tracking[0].append(state['base_pos_x'][0])
        position_tracking[1].append(state['base_pos_y'][0])
        position_tracking[2].append(state['base_ori_euler'][2])
        position_tracking[3].append(np.linalg.norm(state['base_velocity'][0:2]))
        
        j = 0
        total_cost = 0
        # while True:
        for i in range(args.num_latent_action_per_iteration):
            # target =  np.array([0,target_velocity,0])
            target = target_velocity

            pre_com_state = state
            latent_action = high_level_planning.plan_latent_action(state, target)
            low_level_TG.update_latent_action(state,latent_action)
            for step in range(1, args.num_timestep_per_footstep+1):
                action = low_level_TG.get_action(state, step)
                state, total_timestep = env.step(action), total_timestep + 1
                # collect data
                position_tracking[0].append(state['base_pos_x'][0])
                position_tracking[1].append(state['base_pos_y'][0])
                position_tracking[2].append(state['base_ori_euler'][2])
                position_tracking[3].append(np.linalg.norm(state['base_velocity'][0:2]))


            post_com_state = state
            cost = utils.easy_cost(target,pre_com_state, post_com_state)
            total_cost += cost

            
        
            # Check if robot still alive
            if utils.check_data_useful(state):
                high_level_obs , high_level_delta_obs = utils.HL_obs(pre_com_state), utils.HL_delta_obs(pre_com_state, post_com_state)
                predict_state = high_level_planning.model_predict(high_level_obs, latent_action)
            
           
            total_latent_action += 1
            j+=1

            # if np.linalg.norm(target[0:2] - np.array([state['base_pos_x'][0], state['base_pos_y'][0]])) + np.linalg.norm(target[2:4] - high_level_delta_obs[0:2])  < 0.2 :
            #     print("Reach Goal %s!!!!")
            #     break

            # if j>20:
            #     print('Did not reach goal')
            #     break
        
        cost_record.append(total_cost)
        print(cost_record)
    np.save(save_dir + '/'+args.high_level_policy_type+'_' +args.low_level_policy_type +'_different_cost_test'+ str(cost_index)+'.npy', np.array(cost_record) )
    np.save(save_dir + '/'+args.high_level_policy_type+'_' +args.low_level_policy_type +'_com.npy', np.array(position_tracking) )
    return