Beispiel #1
0
# (5,931), (6, 286)
from mpnet.sst_envs.utils import load_data, get_obs_3d
system = "quadrotor_obs"

import sys
#print 'Number of arguments:', len(sys.argv), 'arguments.'
#print 'Argument List:', str(sys.argv)
env_id = int(sys.argv[1])
traj_id = int(sys.argv[2])

data = load_data(system, env_id, traj_id)
print(data)
cost = data['cost'].sum()
print(cost)

obs_list = get_obs_3d('quadrotor_obs', "obs")[env_id]
obc_list = np.load(
    '/media/arclabdl1/HD1/YLmiao/YLmiao_from_unicorn/YLmiao/mpc-mpnet-cuda-yinglong/mpnet/sst_envs/{}_env_vox.npy'
    .format(system))
obc_list = obc_list[env_id].reshape(-1)

path_filename = '/media/arclabdl1/HD1/Linjun/mpc-mpnet-py/results/traj/quadrotor_obs/shm/%d/env_%d_id_%d.npy' % (
    env_id, env_id, traj_id)
mpc_path_path = np.load(path_filename, allow_pickle=True)

mpc_tree_path, mpc_tree_control, mpc_tree_time = np.load(
    '/media/arclabdl1/HD1/YLmiao/YLmiao_from_unicorn/YLmiao/mpc-mpnet-cuda-yinglong/results/cpp_full/quadrotor_obs/default/paths/path_%d_%d.npy'
    % (env_id, traj_id),
    allow_pickle=True)

print('env_id: ')
Beispiel #2
0
def experiment(env_id,
               traj_id,
               verbose=False,
               system='cartpole_obs',
               params=None):
    print("env {}, traj {}".format(env_id, traj_id))
    # print(params)
    if system in ['cartpole_obs', 'acrobot_obs', 'car_obs']:
        obs_list = get_obs(system, env_id)[env_id].reshape(-1, 2)
    elif system in ['quadrotor_obs']:
        obs_list = get_obs_3d(system, env_id)[env_id].reshape(-1, 3)
    data = load_data(system, env_id, traj_id)
    ref_path = data['path']
    start_goal = data['start_goal']
    # print(start_goal)
    env_vox = np.load('mpnet/sst_envs/data/{}_env_vox.npy'.format(system))
    obc = env_vox[env_id]
    # print(obc.reshape(-1), obc.reshape(-1).shape)
    # print(obs_list)
    number_of_iterations = params['number_of_iterations']  # 3000000#
    min_time_steps = params[
        'min_time_steps'] if 'min_time_steps' in params else 0
    max_time_steps = params[
        'max_time_steps'] if 'min_time_steps' in params else 800
    integration_step = params['dt']
    planner = _deep_smp_module.DSSTMPCWrapper(
        system_type=system,
        start_state=np.array(data['start_goal'][0]),
        # goal_state=np.array(ref_path[-1]),
        goal_state=np.array(data['start_goal'][1]),
        goal_radius=params['goal_radius'],
        random_seed=0,
        sst_delta_near=params['sst_delta_near'],
        sst_delta_drain=params['sst_delta_drain'],
        obs_list=obs_list,
        width=params['width'],
        verbose=params['verbose'],
        mpnet_weight_path=params['mpnet_weight_path'],
        cost_predictor_weight_path=params['cost_predictor_weight_path'],
        cost_to_go_predictor_weight_path=params[
            'cost_to_go_predictor_weight_path'],
        num_sample=params['cost_samples'],
        shm_max_step=params['shm_max_steps'],
        np=params['n_problem'],
        ns=params['n_sample'],
        nt=params['n_t'],
        ne=params['n_elite'],
        max_it=params['max_it'],
        converge_r=params['converge_r'],
        mu_u=params['mu_u'],
        std_u=params['sigma_u'],
        mu_t=params['mu_t'],
        std_t=params['sigma_t'],
        t_max=params['t_max'],
        step_size=params['step_size'],
        integration_step=params['dt'],
        device_id=params['device_id'],
        refine_lr=params['refine_lr'],
        weights_array=params['weights_array'],
        obs_voxel_array=obc.reshape(-1))
    solution = planner.get_solution()

    data_cost = np.sum(data['cost'])
    # th = 1.2 * data_cost
    # start experiment
    tic = time.perf_counter()
    for iteration in tqdm(range(number_of_iterations)):
        planner.deep_smp_step(
            params['refine'],
            refine_threshold=params['refine_threshold'],
            using_one_step_cost=params['using_one_step_cost'],
            cost_reselection=params['cost_reselection'],
            goal_bias=params['goal_bias'],
            NP=params['n_problem'])
        solution = planner.get_solution()
        # and np.sum(solution[2]) < th:
        if solution is not None or time.perf_counter(
        ) - tic > params['max_planning_time']:
            break
    toc = time.perf_counter()
    # if solution is not None:
    #     print(solution[0], solution[2])
    #     print(mpc_mpnet.costs)
    costs = solution[2].sum() if solution is not None else np.inf
    result = {
        'env_id': env_id,
        'traj_id': traj_id,
        'planning_time': toc - tic,
        'successful': solution is not None,
        'costs': costs,
        'traj': solution[0]
    }

    print("\t{}, time: {} seconds, {}(ref:{}) costs".format(
        result['successful'], result['planning_time'], result['costs'],
        np.sum(data['cost'])))
    return result
Beispiel #3
0
def experiment(env_id,
               traj_id,
               verbose=False,
               system='cartpole_obs',
               params=None):
    print("env {}, traj {}".format(env_id, traj_id))
    if system in ['cartpole_obs', 'acrobot_obs', 'car_obs']:
        obs_list = get_obs(system, env_id)[env_id]
    elif system in ['quadrotor_obs']:
        obs_list = get_obs_3d(system, env_id)[env_id].reshape(-1, 3)
    data = load_data(system, env_id, traj_id)
    ref_path = data['path']
    ref_cost = data['cost'].sum()
    start_goal = data['start_goal']
    min_time_steps = params['min_time_steps']
    max_time_steps = params['max_time_steps']
    integration_step = params['integration_step']

    if system == 'quadrotor_obs':
        env_constr = standard_cpp_systems.RectangleObs3D
        env = env_constr(obs_list, params['width'], 'quadrotor')
    else:
        print("unkown system")
        exit(-1)
    planner = _sst_module.SSTWrapper(state_bounds=env.get_state_bounds(),
                                     control_bounds=env.get_control_bounds(),
                                     distance=env.distance_computer(),
                                     start_state=start_goal[0],
                                     goal_state=start_goal[-1],
                                     goal_radius=params['goal_radius'],
                                     random_seed=params['random_seed'],
                                     sst_delta_near=params['sst_delta_near'],
                                     sst_delta_drain=params['sst_delta_drain'])
    solution = planner.get_solution()

    data_cost = np.sum(data['cost'])
    # th = 1.2 * data_cost
    ## start experiment
    tic = time.perf_counter()
    for iteration in tqdm(range(params['number_of_iterations'])):
        planner.step(env, min_time_steps, max_time_steps, integration_step)
        if iteration % 100 == 0:
            solution = planner.get_solution()
            if (solution is not None and solution[2].sum() <= ref_cost * 1.4
                ) or time.perf_counter() - tic > params[
                    'max_planning_time']:  #and np.sum(solution[2]) < th:
                break
    toc = time.perf_counter()

    costs = solution[2].sum() if solution is not None else np.inf
    result = {
        'env_id': env_id,
        'traj_id': traj_id,
        'planning_time': toc - tic,
        'successful': solution is not None,
        'costs': costs
    }

    print("\t{}, time: {} seconds, {}(ref:{}) costs".format(
        result['successful'], result['planning_time'], result['costs'],
        np.sum(data['cost'])))
    return result