def experiment(env_id, traj_id, verbose=False): obs_list = get_obs('acrobot_obs', env_id) data = load_data('acrobot_obs', env_id, traj_id) ref_path = data['path'] start_goal = data['start_goal'] env_vox = torch.from_numpy(np.load('mpnet/sst_envs/acrobot_obs_env_vox.npy')[env_id]).unsqueeze(0).float() tic = time.perf_counter() final_start = ref_path[0] final_goal = ref_path[-1] ## initiate planner params = get_params(final_goal) mpc_mpnet = MPCMPNetPlanner( params, final_start, final_goal, env_vox, system="acrobot_obs", setup="default_norm", #setup="default_norm_aug", #setup="norm_nodiff_noaug_20step2e-2", ep=5000, obs_list=obs_list[env_id], verbose=verbose) mpc_mpnet.mpnet.train() ## start experiment it = 0 while it < params['max_plan_it'] and not mpc_mpnet.reached: mpc_mpnet.reset() for i in range(20): if verbose: print('iteration:', i) mpc_mpnet.step() if mpc_mpnet.reached: break it += 1 toc = time.perf_counter() # print(mpc_mpnet.costs) costs = np.sum(mpc_mpnet.costs) if len(mpc_mpnet.costs) > 0 else np.inf result = { 'env_id': env_id, 'traj_id': traj_id, 'planning_time': toc-tic, 'successful': mpc_mpnet.reached, 'costs': costs } print("env {}, traj {}, {}, time: {} seconds, {}(ref:{}) costs".format( env_id, traj_id, result['successful'], result['planning_time'], result['costs'], data['cost'].sum())) return result
def experiment(env_id, traj_id, verbose=False, system='acrobot_obs', params=None): print("env {}, traj {}".format(env_id, traj_id)) data = load_data(system, env_id, traj_id) ref_path = data['path'] start_goal = data['start_goal'] costs = data['cost'].sum() # print(mpc_mpnet.costs) result = { 'env_id': env_id, 'traj_id': traj_id, 'planning_time': 0, 'successful': 1, 'costs': costs } print("\t{}, time: {} seconds, {}(ref:{}) costs".format( result['successful'], result['planning_time'], result['costs'], data['cost'].sum())) return result
def experiment(env_id, traj_id, verbose=False, system='cartpole_obs', params=None): print("env {}, traj {}".format(env_id, traj_id)) # print(params) if system in ['cartpole_obs', 'acrobot_obs', 'car_obs']: obs_list = get_obs(system, env_id)[env_id].reshape(-1, 2) elif system in ['quadrotor_obs']: obs_list = get_obs_3d(system, env_id)[env_id].reshape(-1, 3) data = load_data(system, env_id, traj_id) ref_path = data['path'] start_goal = data['start_goal'] # print(start_goal) env_vox = np.load('mpnet/sst_envs/data/{}_env_vox.npy'.format(system)) obc = env_vox[env_id] # print(obc.reshape(-1), obc.reshape(-1).shape) # print(obs_list) number_of_iterations = params['number_of_iterations'] # 3000000# min_time_steps = params[ 'min_time_steps'] if 'min_time_steps' in params else 0 max_time_steps = params[ 'max_time_steps'] if 'min_time_steps' in params else 800 integration_step = params['dt'] planner = _deep_smp_module.DSSTMPCWrapper( system_type=system, start_state=np.array(data['start_goal'][0]), # goal_state=np.array(ref_path[-1]), goal_state=np.array(data['start_goal'][1]), goal_radius=params['goal_radius'], random_seed=0, sst_delta_near=params['sst_delta_near'], sst_delta_drain=params['sst_delta_drain'], obs_list=obs_list, width=params['width'], verbose=params['verbose'], mpnet_weight_path=params['mpnet_weight_path'], cost_predictor_weight_path=params['cost_predictor_weight_path'], cost_to_go_predictor_weight_path=params[ 'cost_to_go_predictor_weight_path'], num_sample=params['cost_samples'], shm_max_step=params['shm_max_steps'], np=params['n_problem'], ns=params['n_sample'], nt=params['n_t'], ne=params['n_elite'], max_it=params['max_it'], converge_r=params['converge_r'], mu_u=params['mu_u'], std_u=params['sigma_u'], mu_t=params['mu_t'], std_t=params['sigma_t'], t_max=params['t_max'], step_size=params['step_size'], integration_step=params['dt'], device_id=params['device_id'], refine_lr=params['refine_lr'], weights_array=params['weights_array'], obs_voxel_array=obc.reshape(-1)) solution = planner.get_solution() data_cost = np.sum(data['cost']) # th = 1.2 * data_cost # start experiment tic = time.perf_counter() for iteration in tqdm(range(number_of_iterations)): planner.deep_smp_step( params['refine'], refine_threshold=params['refine_threshold'], using_one_step_cost=params['using_one_step_cost'], cost_reselection=params['cost_reselection'], goal_bias=params['goal_bias'], NP=params['n_problem']) solution = planner.get_solution() # and np.sum(solution[2]) < th: if solution is not None or time.perf_counter( ) - tic > params['max_planning_time']: break toc = time.perf_counter() # if solution is not None: # print(solution[0], solution[2]) # print(mpc_mpnet.costs) costs = solution[2].sum() if solution is not None else np.inf result = { 'env_id': env_id, 'traj_id': traj_id, 'planning_time': toc - tic, 'successful': solution is not None, 'costs': costs, 'traj': solution[0] } print("\t{}, time: {} seconds, {}(ref:{}) costs".format( result['successful'], result['planning_time'], result['costs'], np.sum(data['cost']))) return result
def experiment(env_id, traj_id, verbose=False, system='cartpole_obs', params_module=None): print("env {}, traj {}".format(env_id, traj_id)) obs_list = get_obs(system, env_id)[env_id].reshape(-1, 2) data = load_data(system, env_id, traj_id) ref_path = data['path'] start_goal = data['start_goal'] # print(start_goal) env_vox = np.load('mpnet/sst_envs/{}_env_vox.npy'.format(system)) obc = env_vox[env_id, 0] # print(obs_list) params = params_module.get_params() number_of_iterations = params['number_of_iterations'] #3000000# min_time_steps = params[ 'min_time_steps'] if 'min_time_steps' in params else 80 max_time_steps = params[ 'max_time_steps'] if 'min_time_steps' in params else 400 integration_step = params['dt'] planner = _deep_smp_module.DSSTMPCWrapper( system, start_state=np.array(ref_path[0]), goal_state=np.array(ref_path[-1]), # goal_state=np.array(data['start_goal'][-1]), goal_radius=params['goal_radius'], random_seed=0, sst_delta_near=params['sst_delta_near'], sst_delta_drain=params['sst_delta_drain'], goal_bias=params['goal_bias'], obs_list=obs_list, width=params['width'], verbose=params['verbose'], mpnet_weight_path=params['mpnet_weight_path'], cost_predictor_weight_path=params['cost_predictor_weight_path'], cost_to_go_predictor_weight_path=params[ 'cost_to_go_predictor_weight_path'], num_sample=params['cost_samples'], ns=params['n_sample'], nt=params['n_t'], ne=params['n_elite'], max_it=params['max_it'], converge_r=params['converge_r'], mu_u=params['mu_u'], std_u=params['sigma_u'], mu_t=params['mu_t'], std_t=params['sigma_t'], t_max=params['t_max'], step_size=params['step_size'], integration_step=params['dt'], device_id=params['device_id'], refine_lr=params['refine_lr'], ) solution = planner.get_solution() data_cost = np.sum(data['cost']) th = 1.2 * data_cost ## start experiment tic = time.perf_counter() for iteration in tqdm(range(number_of_iterations)): # planner.step(min_time_steps, max_time_steps, params['dt']) if params['hybrid']: if np.random.rand() < params['hybrid_p']: planner.step(min_time_steps, max_time_steps, integration_step) else: planner.neural_step( obc.reshape(-1), params['refine'], refine_threshold=params['refine_threshold'], using_one_step_cost=params['using_one_step_cost'], cost_reselection=params['cost_reselection']) else: # planner.step(min_time_steps, max_time_steps, integration_step) planner.neural_step( obc.reshape(-1), params['refine'], refine_threshold=params['refine_threshold'], using_one_step_cost=params['using_one_step_cost'], cost_reselection=params['cost_reselection']) # planner.mpc_step(integration_step) solution = planner.get_solution() if solution is not None: #and np.sum(solution[2]) < th: break toc = time.perf_counter() if solution is not None: print(solution[0], solution[2]) # print(mpc_mpnet.costs) costs = solution[2].sum() if solution is not None else np.inf result = { 'env_id': env_id, 'traj_id': traj_id, 'planning_time': toc - tic, 'successful': solution is not None, 'costs': costs } print("\t{}, time: {} seconds, {}(ref:{}) costs".format( result['successful'], result['planning_time'], result['costs'], np.sum(data['cost']))) return result
import importlib from matplotlib import pyplot as plt from sparse_rrt import _deep_smp_module, _sst_module #[(0, 932), (1, 935), (2, 923), (8, 141), (5,931), (7, 927)] # (5,931), (6, 286) from mpnet.sst_envs.utils import load_data, get_obs_3d system = "quadrotor_obs" import sys #print 'Number of arguments:', len(sys.argv), 'arguments.' #print 'Argument List:', str(sys.argv) env_id = int(sys.argv[1]) traj_id = int(sys.argv[2]) data = load_data(system, env_id, traj_id) print(data) cost = data['cost'].sum() print(cost) obs_list = get_obs_3d('quadrotor_obs', "obs")[env_id] obc_list = np.load( '/media/arclabdl1/HD1/YLmiao/YLmiao_from_unicorn/YLmiao/mpc-mpnet-cuda-yinglong/mpnet/sst_envs/{}_env_vox.npy' .format(system)) obc_list = obc_list[env_id].reshape(-1) path_filename = '/media/arclabdl1/HD1/Linjun/mpc-mpnet-py/results/traj/quadrotor_obs/shm/%d/env_%d_id_%d.npy' % ( env_id, env_id, traj_id) mpc_path_path = np.load(path_filename, allow_pickle=True) mpc_tree_path, mpc_tree_control, mpc_tree_time = np.load(
def experiment(env_id, traj_id, verbose=False, system='cartpole_obs', params=None): print("env {}, traj {}".format(env_id, traj_id)) if system in ['cartpole_obs', 'acrobot_obs', 'car_obs']: obs_list = get_obs(system, env_id)[env_id] elif system in ['quadrotor_obs']: obs_list = get_obs_3d(system, env_id)[env_id].reshape(-1, 3) data = load_data(system, env_id, traj_id) ref_path = data['path'] ref_cost = data['cost'].sum() start_goal = data['start_goal'] min_time_steps = params['min_time_steps'] max_time_steps = params['max_time_steps'] integration_step = params['integration_step'] if system == 'quadrotor_obs': env_constr = standard_cpp_systems.RectangleObs3D env = env_constr(obs_list, params['width'], 'quadrotor') else: print("unkown system") exit(-1) planner = _sst_module.SSTWrapper(state_bounds=env.get_state_bounds(), control_bounds=env.get_control_bounds(), distance=env.distance_computer(), start_state=start_goal[0], goal_state=start_goal[-1], goal_radius=params['goal_radius'], random_seed=params['random_seed'], sst_delta_near=params['sst_delta_near'], sst_delta_drain=params['sst_delta_drain']) solution = planner.get_solution() data_cost = np.sum(data['cost']) # th = 1.2 * data_cost ## start experiment tic = time.perf_counter() for iteration in tqdm(range(params['number_of_iterations'])): planner.step(env, min_time_steps, max_time_steps, integration_step) if iteration % 100 == 0: solution = planner.get_solution() if (solution is not None and solution[2].sum() <= ref_cost * 1.4 ) or time.perf_counter() - tic > params[ 'max_planning_time']: #and np.sum(solution[2]) < th: break toc = time.perf_counter() costs = solution[2].sum() if solution is not None else np.inf result = { 'env_id': env_id, 'traj_id': traj_id, 'planning_time': toc - tic, 'successful': solution is not None, 'costs': costs } print("\t{}, time: {} seconds, {}(ref:{}) costs".format( result['successful'], result['planning_time'], result['costs'], np.sum(data['cost']))) return result
def experiment(env_id, traj_id, verbose=False, model='acrobot_obs', params_module=None): print("env {}, traj {}".format(env_id, traj_id)) obs_list = get_obs(model, env_id)[env_id].reshape(-1, 2) data = load_data(model, env_id, traj_id) ref_path = data['path'] start_goal = data['start_goal'] env_vox = np.load('mpnet/sst_envs/data/acrobot_obs_env_vox.npy') obc = env_vox[env_id, 0] width = 6 number_of_iterations = 60000 number_of_refine = 120000 min_time_steps, max_time_steps = 1, 200 integration_step = 1e-2 params = params_module.get_params() planner = _deep_smp_module.DSSTMPCWrapper( start_state=np.array(ref_path[0]), goal_state=np.array(ref_path[-1]), goal_radius=params['goal_radius'], random_seed=0, sst_delta_near=params['sst_delta_near'], sst_delta_drain=params['sst_delta_drain'], obs_list=obs_list, width=params['width'], verbose=params['verbose'], mpnet_weight_path=params['mpnet_weight_path'], cost_predictor_weight_path=params['cost_predictor_weight_path'], cost_to_go_predictor_weight_path=params['cost_to_go_predictor_weight_path'], num_sample=params['cost_samples'], ns=params['n_sample'], nt=params['n_t'], ne=params['n_elite'], max_it=params['max_it'], converge_r=params['converge_r'], mu_u=params['mu_u'], std_u=params['sigma_u'], mu_t=params['mu_t'], std_t=params['sigma_t'], t_max=params['t_max'], step_size=params['step_size'], integration_step=params['dt'], device_id=params['device_id'], refine_lr=params['refine_lr'] ) solution = planner.get_solution() ## start experiment tic = time.perf_counter() for iteration in tqdm(range(number_of_iterations)): planner.mpc_step(integration_step) # planner.step(min_time_steps, max_time_steps, integration_step) solution = planner.get_solution() if solution is not None: break for iteration in tqdm(range(number_of_refine)): solution = planner.get_solution() # if iteration % 1000 == 0: # print(solution[2].sum(), data['cost'].sum() ) if solution[2].sum() < data['cost'].sum() * 1.2: break else: planner.step(min_time_steps, max_time_steps, integration_step) toc = time.perf_counter() # print(mpc_mpnet.costs) costs = solution[2].sum() if solution is not None else np.inf result = { 'env_id': env_id, 'traj_id': traj_id, 'planning_time': toc-tic, 'successful': solution is not None, 'costs': costs } print("\t{}, time: {} seconds, {}(ref:{}) costs".format( result['successful'], result['planning_time'], result['costs'], data['cost'].sum())) return result