def experiment(env_id, traj_id, verbose=False, system='cartpole_obs', params=None): print("env {}, traj {}".format(env_id, traj_id)) # print(params) if system in ['cartpole_obs', 'acrobot_obs', 'car_obs']: obs_list = get_obs(system, env_id)[env_id].reshape(-1, 2) elif system in ['quadrotor_obs']: obs_list = get_obs_3d(system, env_id)[env_id].reshape(-1, 3) data = load_data(system, env_id, traj_id) ref_path = data['path'] start_goal = data['start_goal'] # print(start_goal) env_vox = np.load('mpnet/sst_envs/data/{}_env_vox.npy'.format(system)) obc = env_vox[env_id] # print(obc.reshape(-1), obc.reshape(-1).shape) # print(obs_list) number_of_iterations = params['number_of_iterations'] # 3000000# min_time_steps = params[ 'min_time_steps'] if 'min_time_steps' in params else 0 max_time_steps = params[ 'max_time_steps'] if 'min_time_steps' in params else 800 integration_step = params['dt'] planner = _deep_smp_module.DSSTMPCWrapper( system_type=system, start_state=np.array(data['start_goal'][0]), # goal_state=np.array(ref_path[-1]), goal_state=np.array(data['start_goal'][1]), goal_radius=params['goal_radius'], random_seed=0, sst_delta_near=params['sst_delta_near'], sst_delta_drain=params['sst_delta_drain'], obs_list=obs_list, width=params['width'], verbose=params['verbose'], mpnet_weight_path=params['mpnet_weight_path'], cost_predictor_weight_path=params['cost_predictor_weight_path'], cost_to_go_predictor_weight_path=params[ 'cost_to_go_predictor_weight_path'], num_sample=params['cost_samples'], shm_max_step=params['shm_max_steps'], np=params['n_problem'], ns=params['n_sample'], nt=params['n_t'], ne=params['n_elite'], max_it=params['max_it'], converge_r=params['converge_r'], mu_u=params['mu_u'], std_u=params['sigma_u'], mu_t=params['mu_t'], std_t=params['sigma_t'], t_max=params['t_max'], step_size=params['step_size'], integration_step=params['dt'], device_id=params['device_id'], refine_lr=params['refine_lr'], weights_array=params['weights_array'], obs_voxel_array=obc.reshape(-1)) solution = planner.get_solution() data_cost = np.sum(data['cost']) # th = 1.2 * data_cost # start experiment tic = time.perf_counter() for iteration in tqdm(range(number_of_iterations)): planner.deep_smp_step( params['refine'], refine_threshold=params['refine_threshold'], using_one_step_cost=params['using_one_step_cost'], cost_reselection=params['cost_reselection'], goal_bias=params['goal_bias'], NP=params['n_problem']) solution = planner.get_solution() # and np.sum(solution[2]) < th: if solution is not None or time.perf_counter( ) - tic > params['max_planning_time']: break toc = time.perf_counter() # if solution is not None: # print(solution[0], solution[2]) # print(mpc_mpnet.costs) costs = solution[2].sum() if solution is not None else np.inf result = { 'env_id': env_id, 'traj_id': traj_id, 'planning_time': toc - tic, 'successful': solution is not None, 'costs': costs, 'traj': solution[0] } print("\t{}, time: {} seconds, {}(ref:{}) costs".format( result['successful'], result['planning_time'], result['costs'], np.sum(data['cost']))) return result
sgs = pickle.load(sgs) planner = _deep_smp_module.DSSTMPCWrapper( system_type='acrobot_obs', solver_type="cem", start_state=np.array(path[0]), # goal_state=np.array(ref_path[-1]), goal_state=np.array(sgs[-1]), goal_radius=params['goal_radius'], random_seed=0, sst_delta_near=params['sst_delta_near'], sst_delta_drain=params['sst_delta_drain'], obs_list=obs_list, width=params['width'], verbose=params['verbose'], mpnet_weight_path=params['mpnet_weight_path'], cost_predictor_weight_path=params['cost_predictor_weight_path'], cost_to_go_predictor_weight_path=params['cost_to_go_predictor_weight_path'], num_sample=params['cost_samples'], np=params['n_problem'], ns=params['n_sample'], nt=params['n_t'], ne=params['n_elite'], max_it=params['max_it'], converge_r=params['converge_r'], mu_u=params['mu_u'], std_u=params['sigma_u'], mu_t=params['mu_t'], std_t=params['sigma_t'], t_max=params['t_max'], step_size=params['step_size'], integration_step=params['dt'], device_id=params['device_id'], refine_lr=params['refine_lr'], weights_array=params['weights_array'], obs_voxel_array=obc_list.reshape(-1) )
def experiment(env_id, traj_id, verbose=False, system='cartpole_obs', params_module=None): print("env {}, traj {}".format(env_id, traj_id)) obs_list = get_obs(system, env_id)[env_id].reshape(-1, 2) data = load_data(system, env_id, traj_id) ref_path = data['path'] start_goal = data['start_goal'] # print(start_goal) env_vox = np.load('mpnet/sst_envs/{}_env_vox.npy'.format(system)) obc = env_vox[env_id, 0] # print(obs_list) params = params_module.get_params() number_of_iterations = params['number_of_iterations'] #3000000# min_time_steps = params[ 'min_time_steps'] if 'min_time_steps' in params else 80 max_time_steps = params[ 'max_time_steps'] if 'min_time_steps' in params else 400 integration_step = params['dt'] planner = _deep_smp_module.DSSTMPCWrapper( system, start_state=np.array(ref_path[0]), goal_state=np.array(ref_path[-1]), # goal_state=np.array(data['start_goal'][-1]), goal_radius=params['goal_radius'], random_seed=0, sst_delta_near=params['sst_delta_near'], sst_delta_drain=params['sst_delta_drain'], goal_bias=params['goal_bias'], obs_list=obs_list, width=params['width'], verbose=params['verbose'], mpnet_weight_path=params['mpnet_weight_path'], cost_predictor_weight_path=params['cost_predictor_weight_path'], cost_to_go_predictor_weight_path=params[ 'cost_to_go_predictor_weight_path'], num_sample=params['cost_samples'], ns=params['n_sample'], nt=params['n_t'], ne=params['n_elite'], max_it=params['max_it'], converge_r=params['converge_r'], mu_u=params['mu_u'], std_u=params['sigma_u'], mu_t=params['mu_t'], std_t=params['sigma_t'], t_max=params['t_max'], step_size=params['step_size'], integration_step=params['dt'], device_id=params['device_id'], refine_lr=params['refine_lr'], ) solution = planner.get_solution() data_cost = np.sum(data['cost']) th = 1.2 * data_cost ## start experiment tic = time.perf_counter() for iteration in tqdm(range(number_of_iterations)): # planner.step(min_time_steps, max_time_steps, params['dt']) if params['hybrid']: if np.random.rand() < params['hybrid_p']: planner.step(min_time_steps, max_time_steps, integration_step) else: planner.neural_step( obc.reshape(-1), params['refine'], refine_threshold=params['refine_threshold'], using_one_step_cost=params['using_one_step_cost'], cost_reselection=params['cost_reselection']) else: # planner.step(min_time_steps, max_time_steps, integration_step) planner.neural_step( obc.reshape(-1), params['refine'], refine_threshold=params['refine_threshold'], using_one_step_cost=params['using_one_step_cost'], cost_reselection=params['cost_reselection']) # planner.mpc_step(integration_step) solution = planner.get_solution() if solution is not None: #and np.sum(solution[2]) < th: break toc = time.perf_counter() if solution is not None: print(solution[0], solution[2]) # print(mpc_mpnet.costs) costs = solution[2].sum() if solution is not None else np.inf result = { 'env_id': env_id, 'traj_id': traj_id, 'planning_time': toc - tic, 'successful': solution is not None, 'costs': costs } print("\t{}, time: {} seconds, {}(ref:{}) costs".format( result['successful'], result['planning_time'], result['costs'], np.sum(data['cost']))) return result
def experiment(env_id, traj_id, verbose=False, model='acrobot_obs', params_module=None): print("env {}, traj {}".format(env_id, traj_id)) obs_list = get_obs(model, env_id)[env_id].reshape(-1, 2) data = load_data(model, env_id, traj_id) ref_path = data['path'] start_goal = data['start_goal'] env_vox = np.load('mpnet/sst_envs/data/acrobot_obs_env_vox.npy') obc = env_vox[env_id, 0] width = 6 number_of_iterations = 60000 number_of_refine = 120000 min_time_steps, max_time_steps = 1, 200 integration_step = 1e-2 params = params_module.get_params() planner = _deep_smp_module.DSSTMPCWrapper( start_state=np.array(ref_path[0]), goal_state=np.array(ref_path[-1]), goal_radius=params['goal_radius'], random_seed=0, sst_delta_near=params['sst_delta_near'], sst_delta_drain=params['sst_delta_drain'], obs_list=obs_list, width=params['width'], verbose=params['verbose'], mpnet_weight_path=params['mpnet_weight_path'], cost_predictor_weight_path=params['cost_predictor_weight_path'], cost_to_go_predictor_weight_path=params['cost_to_go_predictor_weight_path'], num_sample=params['cost_samples'], ns=params['n_sample'], nt=params['n_t'], ne=params['n_elite'], max_it=params['max_it'], converge_r=params['converge_r'], mu_u=params['mu_u'], std_u=params['sigma_u'], mu_t=params['mu_t'], std_t=params['sigma_t'], t_max=params['t_max'], step_size=params['step_size'], integration_step=params['dt'], device_id=params['device_id'], refine_lr=params['refine_lr'] ) solution = planner.get_solution() ## start experiment tic = time.perf_counter() for iteration in tqdm(range(number_of_iterations)): planner.mpc_step(integration_step) # planner.step(min_time_steps, max_time_steps, integration_step) solution = planner.get_solution() if solution is not None: break for iteration in tqdm(range(number_of_refine)): solution = planner.get_solution() # if iteration % 1000 == 0: # print(solution[2].sum(), data['cost'].sum() ) if solution[2].sum() < data['cost'].sum() * 1.2: break else: planner.step(min_time_steps, max_time_steps, integration_step) toc = time.perf_counter() # print(mpc_mpnet.costs) costs = solution[2].sum() if solution is not None else np.inf result = { 'env_id': env_id, 'traj_id': traj_id, 'planning_time': toc-tic, 'successful': solution is not None, 'costs': costs } print("\t{}, time: {} seconds, {}(ref:{}) costs".format( result['successful'], result['planning_time'], result['costs'], data['cost'].sum())) return result