#ctypes.cdll.LoadLibrary('') lib1 = CDLL("/home/yinglong/Documents/kinodynamic/sparse_rrt/deps/trajopt/build/lib/libsco.so") lib2 = CDLL("/home/yinglong/Documents/kinodynamic/sparse_rrt/deps/trajopt/build/lib/libutils.so") #from env.cartpole import CartPole import sparse_rrt from sparse_rrt.systems import standard_cpp_systems from sparse_rrt import _sst_module import numpy as np import time import matplotlib.pyplot as plt from sparse_rrt.systems.pendulum import Pendulum import pickle #obs_list = np.array(obs_list) #system = standard_cpp_systems.PSOPTCartPole() _system = sparse_rrt._sst_module.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(_system, 2, 1, 0) #start = np.array([0., 0.]) #end = np.array([np.pi/2, 0.]) low = [] high = [] state_bounds = _system.get_state_bounds() for i in range(len(state_bounds)): low.append(state_bounds[i][0]) high.append(state_bounds[i][1]) for i in range(10): f = open('data/pendulum/0/path_%d.pkl' % (i), 'rb') state = pickle.load(f) f = open('data/pendulum/0/control_%d.pkl' % (i), 'rb') control = pickle.load(f)
def main(args): # set seed print(args.model_path) torch_seed = np.random.randint(low=0, high=1000) np_seed = np.random.randint(low=0, high=1000) py_seed = np.random.randint(low=0, high=1000) torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) # Build the models if torch.cuda.is_available(): torch.cuda.set_device(args.device) # setup evaluation function and load function if args.env_type == 'pendulum': IsInCollision = pendulum.IsInCollision normalize = pendulum.normalize unnormalize = pendulum.unnormalize obs_file = None obc_file = None dynamics = pendulum.dynamics jax_dynamics = pendulum.jax_dynamics enforce_bounds = pendulum.enforce_bounds cae = cae_identity mlp = MLP obs_f = False #system = standard_cpp_systems.PSOPTPendulum() #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) elif args.env_type == 'cartpole_obs': IsInCollision = cartpole.IsInCollision normalize = cartpole.normalize unnormalize = cartpole.unnormalize obs_file = None obc_file = None dynamics = cartpole.dynamics jax_dynamics = cartpole.jax_dynamics enforce_bounds = cartpole.enforce_bounds cae = CAE_acrobot_voxel_2d mlp = mlp_acrobot.MLP obs_f = True #system = standard_cpp_systems.RectangleObs(obs_list, args.obs_width, 'cartpole') #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) elif args.env_type == 'acrobot_obs': IsInCollision = acrobot_obs.IsInCollision #IsInCollision = lambda x, obs: False normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) xdot = acrobot_obs.dynamics jax_dynamics = acrobot_obs.jax_dynamics enforce_bounds = acrobot_obs.enforce_bounds cae = CAE_acrobot_voxel_2d mlp = mlp_acrobot.MLP obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 50, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type == 'acrobot_obs_2': IsInCollision = acrobot_obs.IsInCollision normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) xdot = acrobot_obs.dynamics jax_dynamics = acrobot_obs.jax_dynamics enforce_bounds = acrobot_obs.enforce_bounds cae = CAE_acrobot_voxel_2d_2 mlp = mlp_acrobot.MLP2 obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type == 'acrobot_obs_3': IsInCollision = acrobot_obs.IsInCollision normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) xdot = acrobot_obs.dynamics jax_dynamics = acrobot_obs.jax_dynamics enforce_bounds = acrobot_obs.enforce_bounds mlp = mlp_acrobot.MLP3 cae = CAE_acrobot_voxel_2d_2 obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type == 'acrobot_obs_5': IsInCollision = acrobot_obs.IsInCollision normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) xdot = acrobot_obs.dynamics jax_dynamics = acrobot_obs.jax_dynamics enforce_bounds = acrobot_obs.enforce_bounds cae = CAE_acrobot_voxel_2d_3 mlp = mlp_acrobot.MLP obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type == 'acrobot_obs_6': IsInCollision = acrobot_obs.IsInCollision normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize obs_file = None obc_file = None xdot = acrobot_obs.dynamics system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) jax_dynamics = acrobot_obs.jax_dynamics enforce_bounds = acrobot_obs.enforce_bounds cae = CAE_acrobot_voxel_2d_3 mlp = mlp_acrobot.MLP4 obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type == 'acrobot_obs_6': IsInCollision = acrobot_obs.IsInCollision normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize obs_file = None obc_file = None xdot = acrobot_obs.dynamics system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) jax_dynamics = acrobot_obs.jax_dynamics enforce_bounds = acrobot_obs.enforce_bounds mlp = mlp_acrobot.MLP5 cae = CAE_acrobot_voxel_2d_3 obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type == 'acrobot_obs_8': IsInCollision = acrobot_obs.IsInCollision #IsInCollision = lambda x, obs: False normalize = acrobot_obs.normalize unnormalize = acrobot_obs.unnormalize obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) xdot = acrobot_obs.dynamics jax_dynamics = acrobot_obs.jax_dynamics enforce_bounds = acrobot_obs.enforce_bounds cae = CAE_acrobot_voxel_2d_3 mlp = mlp_acrobot.MLP6 obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 #num_steps = 21 num_steps = 21 #args.num_steps*2 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) #traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: #def cem_trajopt(x0, x1, step_sz, num_steps, x_init, u_init, t_init): # u, t = acrobot_obs.trajopt(x0, x1, 500, num_steps, step_sz*1, step_sz*(num_steps-1), x_init, u_init, t_init) # xs, us, dts, valid = propagate(x0, u, t, dynamics=dynamics, enforce_bounds=enforce_bounds, IsInCollision=lambda x: False, system=system, step_sz=step_sz) # return xs, us, dts #traj_opt = cem_trajopt goal_S0 = np.diag([1., 1., 0, 0]) goal_rho0 = 1.0 mpNet0 = KMPNet(args.total_input_size, args.AE_input_size, args.mlp_input_size, args.output_size, cae, mlp) mpNet1 = KMPNet(args.total_input_size, args.AE_input_size, args.mlp_input_size, args.output_size, cae, mlp) # load previously trained model if start epoch > 0 #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps) model_path = 'kmpnet_epoch_%d_direction_0.pkl' % (args.start_epoch) if args.start_epoch > 0: load_net_state(mpNet0, os.path.join(args.model_path, model_path)) torch_seed, np_seed, py_seed = load_seed( os.path.join(args.model_path, model_path)) # set seed after loading torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) if torch.cuda.is_available(): mpNet0.cuda() mpNet0.mlp.cuda() mpNet0.encoder.cuda() if args.opt == 'Adagrad': mpNet0.set_opt(torch.optim.Adagrad, lr=args.learning_rate) elif args.opt == 'Adam': mpNet0.set_opt(torch.optim.Adam, lr=args.learning_rate) elif args.opt == 'SGD': mpNet0.set_opt(torch.optim.SGD, lr=args.learning_rate, momentum=0.9) if args.start_epoch > 0: load_opt_state(mpNet0, os.path.join(args.model_path, model_path)) # load previously trained model if start epoch > 0 #model_path='kmpnet_epoch_%d_direction_1_step_%d.pkl' %(args.start_epoch, args.num_steps) model_path = 'kmpnet_epoch_%d_direction_1.pkl' % (args.start_epoch) if args.start_epoch > 0: load_net_state(mpNet1, os.path.join(args.model_path, model_path)) torch_seed, np_seed, py_seed = load_seed( os.path.join(args.model_path, model_path)) # set seed after loading torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) if torch.cuda.is_available(): mpNet1.cuda() mpNet1.mlp.cuda() mpNet1.encoder.cuda() if args.opt == 'Adagrad': mpNet1.set_opt(torch.optim.Adagrad, lr=args.learning_rate) elif args.opt == 'Adam': mpNet1.set_opt(torch.optim.Adam, lr=args.learning_rate) elif args.opt == 'SGD': mpNet1.set_opt(torch.optim.SGD, lr=args.learning_rate, momentum=0.9) if args.start_epoch > 0: load_opt_state(mpNet1, os.path.join(args.model_path, model_path)) # define informer circular = system.is_circular_topology() def informer(env, x0, xG, direction): x0_x = torch.from_numpy(x0.x).type(torch.FloatTensor) xG_x = torch.from_numpy(xG.x).type(torch.FloatTensor) x0_x = normalize_func(x0_x) xG_x = normalize_func(xG_x) if torch.cuda.is_available(): x0_x = x0_x.cuda() xG_x = xG_x.cuda() if direction == 0: x = torch.cat([x0_x, xG_x], dim=0) mpNet = mpNet0 if torch.cuda.is_available(): x = x.cuda() next_state = mpNet(x.unsqueeze(0), env.unsqueeze(0)).cpu().data next_state = unnormalize_func(next_state).numpy()[0] delta_x = next_state - x0.x # can be either clockwise or counterclockwise, take shorter one for i in range(len(delta_x)): if circular[i]: delta_x[i] = delta_x[i] - np.floor( delta_x[i] / (2 * np.pi)) * (2 * np.pi) if delta_x[i] > np.pi: delta_x[i] = delta_x[i] - 2 * np.pi # randomly pick either direction rand_d = np.random.randint(2) if rand_d < 1 and np.abs(delta_x[i]) >= np.pi * 0.5: if delta_x[i] > 0.: delta_x[i] = delta_x[i] - 2 * np.pi if delta_x[i] <= 0.: delta_x[i] = delta_x[i] + 2 * np.pi res = Node(x0.x + delta_x) cov = np.diag([0.02, 0.02, 0.02, 0.02]) #mean = next_state #next_state = np.random.multivariate_normal(mean=next_state,cov=cov) mean = np.zeros(next_state.shape) rand_x_init = np.random.multivariate_normal(mean=mean, cov=cov, size=num_steps) rand_x_init[0] = rand_x_init[0] * 0. rand_x_init[-1] = rand_x_init[-1] * 0. x_init = np.linspace(x0.x, x0.x + delta_x, num_steps) + rand_x_init ## TODO: : change this to general case u_init_i = np.random.uniform(low=[-4.], high=[4], size=(num_steps, 1)) u_init = u_init_i #u_init_i = control[max_d_i] cost_i = (num_steps - 1) * step_sz #TOEDIT #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i)) #u_init = u_init + np.random.normal(scale=1., size=u_init.shape) t_init = np.linspace(0, cost_i, num_steps) """ print('init:') print('x_init:') print(x_init) print('u_init:') print(u_init) print('t_init:') print(t_init) print('xw:') print(next_state) """ else: x = torch.cat([x0_x, xG_x], dim=0) mpNet = mpNet1 next_state = mpNet(x.unsqueeze(0), env.unsqueeze(0)).cpu().data next_state = unnormalize_func(next_state).numpy()[0] delta_x = next_state - x0.x # can be either clockwise or counterclockwise, take shorter one for i in range(len(delta_x)): if circular[i]: delta_x[i] = delta_x[i] - np.floor( delta_x[i] / (2 * np.pi)) * (2 * np.pi) if delta_x[i] > np.pi: delta_x[i] = delta_x[i] - 2 * np.pi # randomly pick either direction rand_d = np.random.randint(2) if rand_d < 1 and np.abs(delta_x[i]) >= np.pi * 0.5: if delta_x[i] > 0.: delta_x[i] = delta_x[i] - 2 * np.pi elif delta_x[i] <= 0.: delta_x[i] = delta_x[i] + 2 * np.pi #next_state = state[max_d_i] + delta_x next_state = x0.x + delta_x res = Node(next_state) # initial: from max_d_i to max_d_i+1 x_init = np.linspace(next_state, x0.x, num_steps) + rand_x_init # action: copy over to number of steps u_init_i = np.random.uniform(low=[-4.], high=[4], size=(num_steps, 1)) u_init = u_init_i cost_i = (num_steps - 1) * step_sz #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i)) #u_init = u_init + np.random.normal(scale=1., size=u_init.shape) t_init = np.linspace(0, cost_i, num_steps) return res, x_init, u_init, t_init def init_informer(env, x0, xG, direction): if direction == 0: next_state = xG.x delta_x = next_state - x0.x # can be either clockwise or counterclockwise, take shorter one for i in range(len(delta_x)): if circular[i]: delta_x[i] = delta_x[i] - np.floor( delta_x[i] / (2 * np.pi)) * (2 * np.pi) if delta_x[i] > np.pi: delta_x[i] = delta_x[i] - 2 * np.pi # randomly pick either direction rand_d = np.random.randint(2) #print('inside init_informer') #print('delta_x[%d]: %f' % (i, delta_x[i])) if rand_d < 1 and np.abs(delta_x[i]) >= np.pi * 0.9: if delta_x[i] > 0.: delta_x[i] = delta_x[i] - 2 * np.pi if delta_x[i] <= 0.: delta_x[i] = delta_x[i] + 2 * np.pi res = Node(next_state) cov = np.diag([0.02, 0.02, 0.02, 0.02]) #mean = next_state #next_state = np.random.multivariate_normal(mean=next_state,cov=cov) mean = np.zeros(next_state.shape) rand_x_init = np.random.multivariate_normal(mean=mean, cov=cov, size=num_steps) rand_x_init[0] = rand_x_init[0] * 0. rand_x_init[-1] = rand_x_init[-1] * 0. x_init = np.linspace(x0.x, x0.x + delta_x, num_steps) + rand_x_init ## TODO: : change this to general case u_init_i = np.random.uniform(low=[-4.], high=[4], size=(num_steps, 1)) u_init = u_init_i #u_init_i = control[max_d_i] #cost_i = 10*step_sz cost_i = (num_steps - 1) * step_sz #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i)) #u_init = u_init + np.random.normal(scale=1., size=u_init.shape) t_init = np.linspace(0, cost_i, num_steps) else: next_state = xG.x delta_x = x0.x - next_state # can be either clockwise or counterclockwise, take shorter one for i in range(len(delta_x)): if circular[i]: delta_x[i] = delta_x[i] - np.floor( delta_x[i] / (2 * np.pi)) * (2 * np.pi) if delta_x[i] > np.pi: delta_x[i] = delta_x[i] - 2 * np.pi # randomly pick either direction rand_d = np.random.randint(2) if rand_d < 1 and np.abs(delta_x[i]) >= np.pi * 0.5: if delta_x[i] > 0.: delta_x[i] = delta_x[i] - 2 * np.pi elif delta_x[i] <= 0.: delta_x[i] = delta_x[i] + 2 * np.pi #next_state = state[max_d_i] + delta_x res = Node(next_state) # initial: from max_d_i to max_d_i+1 x_init = np.linspace(next_state, next_state + delta_x, num_steps) + rand_x_init # action: copy over to number of steps u_init_i = np.random.uniform(low=[-4.], high=[4], size=(num_steps, 1)) u_init = u_init_i cost_i = (num_steps - 1) * step_sz #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i)) #u_init = u_init + np.random.normal(scale=1., size=u_init.shape) t_init = np.linspace(0, cost_i, num_steps) return x_init, u_init, t_init # load data print('loading...') if args.seen_N > 0: seen_test_data = data_loader.load_test_dataset(args.seen_N, args.seen_NP, args.data_folder, obs_f, args.seen_s, args.seen_sp) if args.unseen_N > 0: unseen_test_data = data_loader.load_test_dataset( args.unseen_N, args.unseen_NP, args.data_folder, obs_f, args.unseen_s, args.unseen_sp) # test # testing print('testing...') seen_test_suc_rate = 0. unseen_test_suc_rate = 0. T = 1 for _ in range(T): # unnormalize function normalize_func = lambda x: normalize(x, args.world_size) unnormalize_func = lambda x: unnormalize(x, args.world_size) # seen if args.seen_N > 0: time_file = os.path.join( args.model_path, 'time_seen_epoch_%d_mlp.p' % (args.start_epoch)) fes_path_, valid_path_ = eval_tasks( mpNet0, mpNet1, seen_test_data, args.model_path, time_file, IsInCollision, normalize_func, unnormalize_func, informer, init_informer, system, dynamics, xdot, jax_dynamics, enforce_bounds, traj_opt, step_sz, num_steps) valid_path = valid_path_.flatten() fes_path = fes_path_.flatten( ) # notice different environments are involved seen_test_suc_rate += fes_path.sum() / valid_path.sum() # unseen if args.unseen_N > 0: time_file = os.path.join( args.model_path, 'time_unseen_epoch_%d_mlp.p' % (args.start_epoch)) fes_path_, valid_path_ = eval_tasks( mpNet0, mpNet1, unseen_test_data, args.model_path, time_file, IsInCollision, normalize_func, unnormalize_func, informer, init_informer, system, dynamics, xdot, jax_dynamics, enforce_bounds, traj_opt, step_sz, num_steps) valid_path = valid_path_.flatten() fes_path = fes_path_.flatten( ) # notice different environments are involved unseen_test_suc_rate += fes_path.sum() / valid_path.sum() if args.seen_N > 0: seen_test_suc_rate = seen_test_suc_rate / T f = open( os.path.join(args.model_path, 'seen_accuracy_epoch_%d.txt' % (args.start_epoch)), 'w') f.write(str(seen_test_suc_rate)) f.close() if args.unseen_N > 0: unseen_test_suc_rate = unseen_test_suc_rate / T # Save the models f = open( os.path.join(args.model_path, 'unseen_accuracy_epoch_%d.txt' % (args.start_epoch)), 'w') f.write(str(unseen_test_suc_rate)) f.close()
def main(args): # set seed print(args.model_path) torch_seed = np.random.randint(low=0, high=1000) np_seed = np.random.randint(low=0, high=1000) py_seed = np.random.randint(low=0, high=1000) #torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) # Build the models #if torch.cuda.is_available(): # torch.cuda.set_device(args.device) # setup evaluation function and load function if args.env_type == 'pendulum': obs_file = None obc_file = None obs_f = False #system = standard_cpp_systems.PSOPTPendulum() #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) elif args.env_type == 'cartpole_obs': step_sz = 0.002 num_steps = 21 goal_radius = 1.5 random_seed = 0 delta_near = 2.0 delta_drain = 1.2 cost_threshold = 1.2 min_time_steps = 10 max_time_steps = 200 integration_step = 0.002 obs_f = True obs_file = None obc_file = None system = _sst_module.PSOPTCartPole() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) obs_width = 4.0 IsInCollision = cartpole_IsInCollision enforce_bounds = cartpole_enforce_bounds elif args.env_type == 'acrobot_obs': obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 200, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 IsInCollision = acrobot_IsInCollision enforce_bounds = acrobot_enforce_bounds if args.env_type == 'pendulum': step_sz = 0.002 num_steps = 20 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 0.1 delta_drain = 0.05 # load previously trained model if start epoch > 0 #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps) mlp_path = os.path.join(os.getcwd() + '/c++/', 'acrobot_mlp_annotated_test_gpu.pt') encoder_path = os.path.join(os.getcwd() + '/c++/', 'acrobot_encoder_annotated_test_cpu.pt') print('mlp_path:') print(mlp_path) ##################################################### def plan_one_path(obs_i, obs, obc, detailed_data_path, data_path, start_state, goal_state, goal_inform_state, cost_i, max_iteration, out_queue_t, out_queue_cost, random_seed): if args.env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz) elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole') obs_width = 4.0 psopt_system = _sst_module.PSOPTCartPole() propagate_system = standard_cpp_systems.RectangleObs( obs, 4., 'cartpole') #distance_computer = propagate_system.distance_computer() distance_computer = _sst_module.euclidean_distance( np.array(propagate_system.is_circular_topology())) step_sz = 0.002 num_steps = 21 goal_radius = 1.5 random_seed = 0 delta_near = 2.0 delta_drain = 1.2 #delta_near=.2 #delta_drain=.1 cost_threshold = 1.05 min_time_steps = 10 max_time_steps = 200 #min_time_steps = 5 #max_time_steps = 400 integration_step = 0.002 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 1.0 delta_drain = 0.5 cost_threshold = 1.05 min_time_steps = 5 max_time_steps = 100 integration_step = 0.02 planner = _sst_module.SSTWrapper( state_bounds=propagate_system.get_state_bounds(), control_bounds=propagate_system.get_control_bounds(), distance=distance_computer, start_state=start_state, goal_state=goal_state, goal_radius=goal_radius, random_seed=random_seed, sst_delta_near=delta_near, sst_delta_drain=delta_drain) #print('creating planner...') # generate a path by using SST to plan for some maximal iterations time0 = time.time() for i in range(max_iteration): planner.step(propagate_system, min_time_steps, max_time_steps, integration_step) # early break for initial path solution = planner.get_solution() if solution is not None: #print('solution found already.') # based on cost break xs, us, ts = solution t_sst = np.sum(ts) #print(t_sst) #print(cost_i) if t_sst <= cost_i * cost_threshold: print('solved in %d iterations' % (i)) break plan_time = time.time() - time0 solution = planner.get_solution() xs, us, ts = solution print(np.linalg.norm(np.array(xs[-1]) - goal_state)) """ # visualization plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) #ax.set_xlim(-30, 30) ax.set_xlim(-np.pi, np.pi) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi) if res[i] > np.pi: res[i] = res[i] - 2*np.pi return res dx = 1 dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 #imax = int(2*30./dx) imax = int(2*np.pi/dtheta) jmin = 0 jmax = int(2*np.pi/dtheta) for i in range(imin, imax): for j in range(jmin, jmax): x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) draw_update_line(ax) #state_t = start_state xs_to_plot = np.array(detailed_data_path) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(detailed_data_path[i], propagate_system) ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='lightgreen', s=0.5) # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) #plt.waitforbuttonpress() if solution is not None: xs, us, ts = solution # propagate data p_start = xs[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(us)): #state_i.append(len(detail_paths)-1) print(ts[k]) max_steps = int(np.round(ts[k]/step_sz)) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation for step in range(1,max_steps+1): p_start = dynamics(p_start, us[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) #state[-1] = xs[-1] xs_to_plot = np.array(state) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system) ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green', s=0.5) start_state_np = np.array(start_state) goal_state_np = np.array(goal_state) ax.scatter([start_state_np[0]], [start_state_np[1]], c='blue', marker='*') ax.scatter([goal_state_np[0]], [goal_state_np[1]], c='red', marker='*') # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) plt.waitforbuttonpress() """ # validate if the path contains collision if solution is not None: res_x, res_u, res_t = solution print('solution_x:') print(res_x) print('path_x:') print(np.array(data_path)) # propagate data p_start = res_x[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(res_u)): #state_i.append(len(detail_paths)-1) max_steps = int(np.round(res_t[k] / step_sz)) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation #p_start = res_x[k] #state[-1] = res_x[k] for step in range(1, max_steps + 1): p_start = dynamics(p_start, res_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. # check collision for the new state if IsInCollision(p_start, obs_i): print( 'collision happens at u_index: %d, step: %d' % (k, step)) assert not IsInCollision(p_start, obs_i) #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) #state[-1] = res_x[-1] # validation end print('plan time: %fs' % (plan_time)) if solution is None: print('failed.') out_queue_t.put(-1) out_queue_cost.put(-1) else: print('path succeeded.') out_queue_t.put(plan_time) out_queue_cost.put(t_sst) #################################################################################### # load data print('loading...') if args.seen_N > 0: seen_test_data = data_loader.load_test_dataset(args.seen_N, args.seen_NP, args.data_folder, obs_f, args.seen_s, args.seen_sp) if args.unseen_N > 0: unseen_test_data = data_loader.load_test_dataset( args.unseen_N, args.unseen_NP, args.data_folder, obs_f, args.unseen_s, args.unseen_sp) # test # testing queue_t = Queue(1) queue_cost = Queue(1) print('testing...') seen_test_suc_rate = 0. unseen_test_suc_rate = 0. obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data obc = obc.astype(np.float32) # for all planning, use a flattened vector to store plan_times = [] plan_res_all = [] plan_costs = [] data_costs = [] # store in a 2d vector, for env and path plan_res_env = [] plan_time_env = [] plan_cost_env = [] data_cost_env = [] # directory to save the results res_path = args.res_path res_path = res_path + args.env_type + "_sst_compare_with_mpnet/" if args.env_type == 'acrobot_obs': res_path = '/media/arclabdl1/HD1/YLmiao/mpc-mpnet-cuda-yinglong/results/cpp_full/acrobot_obs/default_small_model_batch/' elif args.env_type == 'cartpole_obs': res_path = '/media/arclabdl1/HD1/YLmiao/mpc-mpnet-cuda-yinglong/results/cpp_full/cartpole_obs/default_small_model_batch/' mpnet_tree_time = np.load(res_path + 'time_10_100.npy', allow_pickle=True) mpnet_tree_sr = np.load(res_path + 'sr_10_100.npy', allow_pickle=True) mpnet_tree_cost = np.load(res_path + 'costs_10_100.npy', allow_pickle=True) if not os.path.exists(res_path): os.makedirs(res_path) for i in range(len(paths)): new_obs_i = [] obs_i = obs[i] plan_res_path = [] plan_time_path = [] plan_cost_path = [] data_cost_path = [] for k in range(len(obs_i)): obs_pt = [] obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) new_obs_i.append(obs_pt) obs_i = new_obs_i #print(obs_i) for j in range(len(paths[i])): start_state = sgs[i][j][0] #goal_inform_state = paths[i][j][-1] goal_inform_state = sgs[i][j][1] goal_state = sgs[i][j][1] # propagate data p_start = paths[i][j][0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(controls[i][j])): #state_i.append(len(detail_paths)-1) #max_steps = int(costs[i][j][k]/step_sz) max_steps = 1000000 accum_cost = 0. for step in range(1, max_steps + 1): p_start = dynamics(p_start, controls[i][j][k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. # check collision for the new state if IsInCollision(p_start, obs_i): print( 'collision happens at u_index: %d, step: %d' % (k, step)) assert not IsInCollision(p_start, obs_i) if np.linalg.norm(p_start - paths[i][j][k + 1]) <= 1e-3: break # validation end cost_i = np.sum(cost) print('data cost:', cost_i) if mpnet_tree_sr[i][j] != 0: # use MPNet tree cost cost_i = mpnet_tree_cost[i][j] print('using mpnet cost: ', cost_i) #cost_i = 100000000. # acrobot: 300000 # cartpole: 500000 print('environment: %d/%d, path: %d/%d' % (i + 1, len(paths), j + 1, len(paths[i]))) plan_t_trials = [] plan_cost_trials = [] for trial in range(1): random_seed = random.randint(0, 100) #random_seed = 0 p = Process(target=plan_one_path, args=(obs_i, obs[i], obc[i], state, paths[i][j], start_state, goal_state, goal_inform_state, cost_i, args.num_iter, queue_t, queue_cost, random_seed)) #plan_one_path(obs_i, obs[i], obc[i], state, paths[i][j], start_state, goal_state, goal_inform_state, cost_i, args.num_iter, queue_t, queue_cost, random_seed) p.start() p.join() plan_t = queue_t.get() plan_cost = queue_cost.get() if plan_t != -1: plan_t_trials.append(plan_t) plan_cost_trials.append(plan_cost) #assert len(plan_ts) == 10 plan_t = np.mean(plan_t_trials) plan_cost = np.mean(plan_cost_trials) if plan_t == -1: # failed, do not record in the flattened list plan_res_all.append(0) # record in the 2d list plan_res_path.append(0) plan_time_path.append(plan_t) plan_cost_path.append(plan_cost) data_cost_path.append(-1.0) else: # record in the flattened list plan_res_all.append(1) plan_times.append(plan_t) plan_costs.append(plan_cost) data_costs.append(cost_i) # record in the 2d list plan_res_path.append(1) plan_time_path.append(plan_t) plan_cost_path.append(plan_cost) data_cost_path.append(cost_i) print('plan costs:') print(plan_costs) print('average accuracy up to now: %f' % (np.array(plan_res_all).flatten().mean())) print('plan average time: %f' % (np.array(plan_times).mean())) print('plan time std: %f' % (np.array(plan_times).std())) print('plan average cost: %f' % (np.array(plan_costs).mean())) print('plan cost std: %f' % (np.array(plan_costs).std())) print('data average cost: %f' % (np.array(data_costs).mean())) print('data cost std: %f' % (np.array(data_costs).std())) # store in the 2d list plan_res_env.append(plan_res_path) plan_time_env.append(plan_time_path) plan_cost_env.append(plan_cost_path) data_cost_env.append(data_cost_path) # for every environment planned, save # save the 2d list # save as numpy array #np.save(res_path+"plan_res.npy", np.array(plan_res_env)) #np.save(res_path+"plan_time.npy", np.array(plan_time_env)) #np.save(res_path+"plan_cost.npy", np.array(plan_cost_env)) #np.save(res_path+"data_cost.npy", np.array(data_cost_env)) print('plan accuracy: %f' % (np.array(plan_res_all).flatten().mean())) print('plan average time: %f' % (np.array(plan_times).mean())) print('plan time std: %f' % (np.array(plan_times).std())) print('plan average cost: %f' % (np.array(plan_costs).mean())) print('plan cost std: %f' % (np.array(plan_costs).std())) print('data average cost: %f' % (np.array(data_costs).mean())) print('data cost std: %f' % (np.array(data_costs).std())) # save the 2d list # save as numpy array plan_res_env = np.array(plan_res_env) plan_time_env = np.array(plan_time_env) plan_cost_env = np.array(plan_cost_env) data_cost_env = np.array(data_cost_env) np.save(res_path + "plan_res.npy", plan_res_env) np.save(res_path + "plan_time.npy", plan_time_env) np.save(res_path + "plan_cost.npy", plan_cost_env) np.save(res_path + "data_cost.npy", data_cost_env)
def plan_one_path(obs_i, obs, obc, detailed_data_path, data_path, start_state, goal_state, goal_inform_state, cost_i, max_iteration, out_queue_t, out_queue_cost, random_seed): if args.env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz) elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole') obs_width = 4.0 psopt_system = _sst_module.PSOPTCartPole() propagate_system = standard_cpp_systems.RectangleObs( obs, 4., 'cartpole') #distance_computer = propagate_system.distance_computer() distance_computer = _sst_module.euclidean_distance( np.array(propagate_system.is_circular_topology())) step_sz = 0.002 num_steps = 21 goal_radius = 1.5 random_seed = 0 delta_near = 2.0 delta_drain = 1.2 #delta_near=.2 #delta_drain=.1 cost_threshold = 1.05 min_time_steps = 10 max_time_steps = 200 #min_time_steps = 5 #max_time_steps = 400 integration_step = 0.002 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 1.0 delta_drain = 0.5 cost_threshold = 1.05 min_time_steps = 5 max_time_steps = 100 integration_step = 0.02 planner = _sst_module.SSTWrapper( state_bounds=propagate_system.get_state_bounds(), control_bounds=propagate_system.get_control_bounds(), distance=distance_computer, start_state=start_state, goal_state=goal_state, goal_radius=goal_radius, random_seed=random_seed, sst_delta_near=delta_near, sst_delta_drain=delta_drain) #print('creating planner...') # generate a path by using SST to plan for some maximal iterations time0 = time.time() for i in range(max_iteration): planner.step(propagate_system, min_time_steps, max_time_steps, integration_step) # early break for initial path solution = planner.get_solution() if solution is not None: #print('solution found already.') # based on cost break xs, us, ts = solution t_sst = np.sum(ts) #print(t_sst) #print(cost_i) if t_sst <= cost_i * cost_threshold: print('solved in %d iterations' % (i)) break plan_time = time.time() - time0 solution = planner.get_solution() xs, us, ts = solution print(np.linalg.norm(np.array(xs[-1]) - goal_state)) """ # visualization plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) #ax.set_xlim(-30, 30) ax.set_xlim(-np.pi, np.pi) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi) if res[i] > np.pi: res[i] = res[i] - 2*np.pi return res dx = 1 dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 #imax = int(2*30./dx) imax = int(2*np.pi/dtheta) jmin = 0 jmax = int(2*np.pi/dtheta) for i in range(imin, imax): for j in range(jmin, jmax): x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) draw_update_line(ax) #state_t = start_state xs_to_plot = np.array(detailed_data_path) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(detailed_data_path[i], propagate_system) ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='lightgreen', s=0.5) # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) #plt.waitforbuttonpress() if solution is not None: xs, us, ts = solution # propagate data p_start = xs[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(us)): #state_i.append(len(detail_paths)-1) print(ts[k]) max_steps = int(np.round(ts[k]/step_sz)) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation for step in range(1,max_steps+1): p_start = dynamics(p_start, us[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) #state[-1] = xs[-1] xs_to_plot = np.array(state) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system) ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green', s=0.5) start_state_np = np.array(start_state) goal_state_np = np.array(goal_state) ax.scatter([start_state_np[0]], [start_state_np[1]], c='blue', marker='*') ax.scatter([goal_state_np[0]], [goal_state_np[1]], c='red', marker='*') # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) plt.waitforbuttonpress() """ # validate if the path contains collision if solution is not None: res_x, res_u, res_t = solution print('solution_x:') print(res_x) print('path_x:') print(np.array(data_path)) # propagate data p_start = res_x[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(res_u)): #state_i.append(len(detail_paths)-1) max_steps = int(np.round(res_t[k] / step_sz)) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation #p_start = res_x[k] #state[-1] = res_x[k] for step in range(1, max_steps + 1): p_start = dynamics(p_start, res_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. # check collision for the new state if IsInCollision(p_start, obs_i): print( 'collision happens at u_index: %d, step: %d' % (k, step)) assert not IsInCollision(p_start, obs_i) #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) #state[-1] = res_x[-1] # validation end print('plan time: %fs' % (plan_time)) if solution is None: print('failed.') out_queue_t.put(-1) out_queue_cost.put(-1) else: print('path succeeded.') out_queue_t.put(plan_time) out_queue_cost.put(t_sst)
def main(args): # set seed print(args.model_path) torch_seed = np.random.randint(low=0, high=1000) np_seed = np.random.randint(low=0, high=1000) py_seed = np.random.randint(low=0, high=1000) #torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) # Build the models #if torch.cuda.is_available(): # torch.cuda.set_device(args.device) # setup evaluation function and load function if args.env_type == 'pendulum': obs_file = None obc_file = None obs_f = False #system = standard_cpp_systems.PSOPTPendulum() #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) elif args.env_type == 'cartpole_obs': normalize = cartpole.normalize unnormalize = cartpole.unnormalize obs_file = None obc_file = None #dynamics = cartpole.dynamics #jax_dynamics = cartpole.jax_dynamics #enforce_bounds = cartpole.enforce_bounds cae = CAE_acrobot_voxel_2d mlp = mlp_acrobot.MLP obs_f = True #system = standard_cpp_systems.RectangleObs(obs_list, args.obs_width, 'cartpole') #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) elif args.env_type == 'acrobot_obs': obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 20 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(x0, x1, 200, num_steps, step_sz*1, step_sz*(num_steps-1), x_init, u_init, t_init) obs_width = 6.0 step_sz = 0.02 num_steps = 20 goal_radius=2.0 random_seed=0 delta_near=0.1 delta_drain=0.05 elif args.env_type in ['acrobot_obs','acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8']: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 step_sz = 0.02 num_steps = 20 goal_radius=2.0 random_seed=0 delta_near=0.1 delta_drain=0.05 # load previously trained model if start epoch > 0 #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps) mlp_path = os.path.join(os.getcwd()+'/c++/','acrobot_obs_MLP_lr0.010000_epoch_2850_step_20.pt') encoder_path = os.path.join(os.getcwd()+'/c++/','acrobot_obs_encoder_lr0.010000_epoch_2850_step_20.pt') cost_mlp_path = os.path.join(os.getcwd()+'/c++/','costnet_acrobot_obs_8_MLP_epoch_300_step_20.pt') cost_encoder_path = os.path.join(os.getcwd()+'/c++/','costnet_acrobot_obs_8_encoder_epoch_300_step_20.pt') print('mlp_path:') print(mlp_path) ##################################################### def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, max_iteration, data, out_queue): if args.env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz) elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole') system = _sst_module.CartPole() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve(x0, x1, 200, num_steps, step_sz*1, step_sz*50, x_init, u_init, t_init) goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type in ['acrobot_obs','acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8']: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs(obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) bvp_wrapper = _sst_module.PSOPTBVPWrapper(psopt_system, 4, 1, 0) step_sz = 0.02 num_steps = 20 psopt_num_steps = 20 psopt_step_sz = 0.02 goal_radius=2 random_seed=0 #delta_near=1.0 #delta_drain=0.5 delta_near=0.1 delta_drain=0.05 #print('creating planner...') planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, 20, psopt_num_steps+1, psopt_step_sz, step_sz, propagate_system, args.device) # generate a path by using SST to plan for some maximal iterations time0 = time.time() #print('obc:') #print(obc.shape) #print(delta_near) #print(delta_drain) #print('start_state:') #print(start_state) #print('goal_state:') #print(goal_state) plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) ax.set_xlim(-np.pi, np.pi) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi) if res[i] > np.pi: res[i] = res[i] - 2*np.pi return res dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2*np.pi/dtheta) circular = psopt_system.is_circular_topology() for i in range(imin, imax): for j in range(imin, imax): x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) data = np.array(data) ax.scatter(data[:,0], data[:,1], c='lightblue', s=10) ax.scatter(data[-1,0], data[-1,1], c='red', s=10, marker='*') draw_update_line(ax) state_t = start_state state_t = data[0] for data_i in range(0,len(data),num_steps): print('iteration: %d' % (data_i)) print('state_t:') print(state_t) min_dis_to_goal = 100000. min_xs_to_plot = [] for trials in range(10): x_init, u_init, t_init = init_informer(propagate_system, state_t, data[data_i], psopt_num_steps+1, psopt_step_sz) print('x_init:') print(x_init) bvp_x, bvp_u, bvp_t = bvp_wrapper.solve(state_t, x_init[-1], 20, psopt_num_steps+1, 0.8*psopt_step_sz*psopt_num_steps, 2*psopt_step_sz*psopt_num_steps, \ x_init, u_init, t_init) print('bvp_x:') print(bvp_x) print('bvp_u:') print(bvp_u) print('bvp_t:') print(bvp_t) if len(bvp_u) != 0:# and bvp_t[0] > 0.01: # turn bvp_t off if want to use step_bvp # propagate data #p_start = bvp_x[0] p_start = state_t detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(bvp_t)): #state_i.append(len(detail_paths)-1) max_steps = int(np.round(bvp_t[k]/step_sz)) accum_cost = 0. for step in range(1,max_steps+1): p_start = dynamics(p_start, bvp_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) cost.append(accum_cost) accum_cost = 0. xs_to_plot = np.array(state) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system) delta_x = xs_to_plot[-1] - data[data_i] for i in range(len(delta_x)): if circular[i]: delta_x[i] = delta_x[i] - np.floor(delta_x[i] / (2*np.pi))*(2*np.pi) if delta_x[i] > np.pi: delta_x[i] = delta_x[i] - 2*np.pi dis = np.linalg.norm(delta_x) if dis <= min_dis_to_goal: min_dis_to_goal = dis min_xs_to_plot = xs_to_plot #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green') ax.scatter(min_xs_to_plot[:,0], min_xs_to_plot[:,1], c='green', s=10.0) # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) #state_t = min_xs_to_plot[-1] # try using mpnet_res as new start state_t = data[data_i] #state_t = min_xs_to_plot[-1] print('data_i:') print(data[data_i]) #else: # # in incollision # state_t = data[data_i] #if len(res_x) == 0: # print('failed.') out_queue.put(0) #else: # print('path succeeded.') # out_queue.put(1) #################################################################################### # load data print('loading...') if args.seen_N > 0: seen_test_data = data_loader.load_test_dataset(args.seen_N, args.seen_NP, args.data_folder, obs_f, args.seen_s, args.seen_sp) if args.unseen_N > 0: unseen_test_data = data_loader.load_test_dataset(args.unseen_N, args.unseen_NP, args.data_folder, obs_f, args.unseen_s, args.unseen_sp) # test # testing queue = Queue(1) print('testing...') seen_test_suc_rate = 0. unseen_test_suc_rate = 0. obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data obc = obc.astype(np.float32) #obc = torch.from_numpy(obc) #if torch.cuda.is_available(): # obc = obc.cuda() for i in range(len(paths)): new_obs_i = [] obs_i = obs[i] for k in range(len(obs_i)): obs_pt = [] obs_pt.append(obs_i[k][0]-obs_width/2) obs_pt.append(obs_i[k][1]-obs_width/2) obs_pt.append(obs_i[k][0]-obs_width/2) obs_pt.append(obs_i[k][1]+obs_width/2) obs_pt.append(obs_i[k][0]+obs_width/2) obs_pt.append(obs_i[k][1]+obs_width/2) obs_pt.append(obs_i[k][0]+obs_width/2) obs_pt.append(obs_i[k][1]-obs_width/2) new_obs_i.append(obs_pt) obs_i = new_obs_i #print(obs_i) for j in range(len(paths[i])): start_state = sgs[i][j][0] goal_inform_state = paths[i][j][-1] goal_state = sgs[i][j][1] #p = Process(target=plan_one_path, args=(obs[i], obc[i], start_state, goal_state, 500, queue)) # propagate data p_start = paths[i][j][0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(controls[i][j])): #state_i.append(len(detail_paths)-1) max_steps = int(costs[i][j][k]/step_sz) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation #p_start = paths[i][j][k] #state[-1] = paths[i][j][k] for step in range(1,max_steps+1): p_start = dynamics(p_start, controls[i][j][k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) detail_controls.append(controls[i][j]) detail_costs.append(step_sz) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) control.append(controls[i][j][k]) cost.append(accum_cost) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) state[-1] = paths[i][j][-1] data = state plan_one_path(obs_i, obs[i], obc[i], start_state, goal_state, goal_inform_state, 1000, data, queue)
def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, max_iteration, data, out_queue): if args.env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz) elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole') system = _sst_module.CartPole() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve(x0, x1, 200, num_steps, step_sz*1, step_sz*50, x_init, u_init, t_init) goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type in ['acrobot_obs','acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8']: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs(obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) bvp_wrapper = _sst_module.PSOPTBVPWrapper(psopt_system, 4, 1, 0) step_sz = 0.02 num_steps = 20 psopt_num_steps = 20 psopt_step_sz = 0.02 goal_radius=2 random_seed=0 #delta_near=1.0 #delta_drain=0.5 delta_near=0.1 delta_drain=0.05 #print('creating planner...') planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, 20, psopt_num_steps+1, psopt_step_sz, step_sz, propagate_system, args.device) # generate a path by using SST to plan for some maximal iterations time0 = time.time() #print('obc:') #print(obc.shape) #print(delta_near) #print(delta_drain) #print('start_state:') #print(start_state) #print('goal_state:') #print(goal_state) plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) ax.set_xlim(-np.pi, np.pi) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi) if res[i] > np.pi: res[i] = res[i] - 2*np.pi return res dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2*np.pi/dtheta) circular = psopt_system.is_circular_topology() for i in range(imin, imax): for j in range(imin, imax): x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) data = np.array(data) ax.scatter(data[:,0], data[:,1], c='lightblue', s=10) ax.scatter(data[-1,0], data[-1,1], c='red', s=10, marker='*') draw_update_line(ax) state_t = start_state state_t = data[0] for data_i in range(0,len(data),num_steps): print('iteration: %d' % (data_i)) print('state_t:') print(state_t) min_dis_to_goal = 100000. min_xs_to_plot = [] for trials in range(10): x_init, u_init, t_init = init_informer(propagate_system, state_t, data[data_i], psopt_num_steps+1, psopt_step_sz) print('x_init:') print(x_init) bvp_x, bvp_u, bvp_t = bvp_wrapper.solve(state_t, x_init[-1], 20, psopt_num_steps+1, 0.8*psopt_step_sz*psopt_num_steps, 2*psopt_step_sz*psopt_num_steps, \ x_init, u_init, t_init) print('bvp_x:') print(bvp_x) print('bvp_u:') print(bvp_u) print('bvp_t:') print(bvp_t) if len(bvp_u) != 0:# and bvp_t[0] > 0.01: # turn bvp_t off if want to use step_bvp # propagate data #p_start = bvp_x[0] p_start = state_t detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(bvp_t)): #state_i.append(len(detail_paths)-1) max_steps = int(np.round(bvp_t[k]/step_sz)) accum_cost = 0. for step in range(1,max_steps+1): p_start = dynamics(p_start, bvp_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) cost.append(accum_cost) accum_cost = 0. xs_to_plot = np.array(state) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system) delta_x = xs_to_plot[-1] - data[data_i] for i in range(len(delta_x)): if circular[i]: delta_x[i] = delta_x[i] - np.floor(delta_x[i] / (2*np.pi))*(2*np.pi) if delta_x[i] > np.pi: delta_x[i] = delta_x[i] - 2*np.pi dis = np.linalg.norm(delta_x) if dis <= min_dis_to_goal: min_dis_to_goal = dis min_xs_to_plot = xs_to_plot #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green') ax.scatter(min_xs_to_plot[:,0], min_xs_to_plot[:,1], c='green', s=10.0) # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) #state_t = min_xs_to_plot[-1] # try using mpnet_res as new start state_t = data[data_i] #state_t = min_xs_to_plot[-1] print('data_i:') print(data[data_i]) #else: # # in incollision # state_t = data[data_i] #if len(res_x) == 0: # print('failed.') out_queue.put(0)
def eval_tasks(mpNet0, mpNet1, env_type, test_data, save_dir, data_type, normalize_func = lambda x:x, unnormalize_func=lambda x: x, dynamics=None, jac_A=None, jac_B=None, enforce_bounds=None, IsInCollision=None): # data_type: seen or unseen obc, obs, paths, sgs, path_lengths, controls, costs = test_data if obs is not None: obc = obc.astype(np.float32) obc = torch.from_numpy(obc) if torch.cuda.is_available(): obc = obc.cuda() if env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 500, num_steps, 1, 20, step_sz) elif env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole') system = _sst_module.CartPole() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve(x0, x1, 500, num_steps, step_sz*1, step_sz*50, x_init, u_init, t_init) goal_S0 = np.identity(4) goal_rho0 = 1.0 elif env_type in ['acrobot_obs','acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8']: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 system = _sst_module.PSOPTAcrobot() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve(x0, x1, 500, num_steps, step_sz*1, step_sz*(num_steps-1), x_init, u_init, t_init) #step_sz = 0.002 goal_S0 = np.diag([1.,1.,0,0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 circular = system.is_circular_topology() def informer(env, x0, xG, direction): x0_x = torch.from_numpy(x0.x).type(torch.FloatTensor) xG_x = torch.from_numpy(xG.x).type(torch.FloatTensor) x0_x = normalize_func(x0_x) xG_x = normalize_func(xG_x) if torch.cuda.is_available(): x0_x = x0_x.cuda() xG_x = xG_x.cuda() if direction == 0: x = torch.cat([x0_x,xG_x], dim=0) mpNet = mpNet0 if torch.cuda.is_available(): x = x.cuda() next_state = mpNet(x.unsqueeze(0), env.unsqueeze(0)).cpu().data next_state = unnormalize_func(next_state).numpy()[0] cov = np.diag([0.01,0.01,0.01,0.01]) #mean = next_state #next_state = np.random.multivariate_normal(mean=next_state,cov=cov) mean = np.zeros(next_state.shape) rand_x_init = np.random.multivariate_normal(mean=mean, cov=cov, size=num_steps) rand_x_init[0] = rand_x_init[0]*0. rand_x_init[-1] = rand_x_init[-1]*0. delta_x = next_state - x0.x # can be either clockwise or counterclockwise, take shorter one for i in range(len(delta_x)): if circular[i]: delta_x[i] = delta_x[i] - np.floor(delta_x[i] / (2*np.pi))*(2*np.pi) if delta_x[i] > np.pi: delta_x[i] = delta_x[i] - 2*np.pi # randomly pick either direction rand_d = np.random.randint(2) if rand_d < 1 and np.abs(delta_x[i]) >= np.pi*0.5: if delta_x[i] > 0.: delta_x[i] = delta_x[i] - 2*np.pi if delta_x[i] <= 0.: delta_x[i] = delta_x[i] + 2*np.pi res = Node(next_state) x_init = np.linspace(x0.x, x0.x+delta_x, num_steps) + rand_x_init ## TODO: : change this to general case u_init_i = np.random.uniform(low=[-4.], high=[4], size=(num_steps,1)) u_init = u_init_i #u_init_i = control[max_d_i] cost_i = (num_steps-1)*step_sz #TOEDIT #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i)) #u_init = u_init + np.random.normal(scale=1., size=u_init.shape) t_init = np.linspace(0, cost_i, num_steps) #t_init = np.append(t_init, 0.) else: x = torch.cat([x0_x,xG_x], dim=0) mpNet = mpNet1 next_state = mpNet(x.unsqueeze(0), env.unsqueeze(0)).cpu().data next_state = unnormalize_func(next_state).numpy()[0] cov = np.diag([0.01,0.01,0.01,0.01]) #mean = next_state #next_state = np.random.multivariate_normal(mean=next_state,cov=cov) mean = np.zeros(next_state.shape) rand_x_init = np.random.multivariate_normal(mean=mean, cov=cov, size=num_steps) rand_x_init[0] = rand_x_init[0]*0. rand_x_init[-1] = rand_x_init[-1]*0. delta_x = x0.x - next_state # can be either clockwise or counterclockwise, take shorter one for i in range(len(delta_x)): if circular[i]: delta_x[i] = delta_x[i] - np.floor(delta_x[i] / (2*np.pi))*(2*np.pi) if delta_x[i] > np.pi: delta_x[i] = delta_x[i] - 2*np.pi # randomly pick either direction rand_d = np.random.randint(2) if rand_d < 1 and np.abs(delta_x[i]) >= np.pi*0.5: if delta_x[i] > 0.: delta_x[i] = delta_x[i] - 2*np.pi elif delta_x[i] <= 0.: delta_x[i] = delta_x[i] + 2*np.pi #next_state = state[max_d_i] + delta_x res = Node(next_state) # initial: from max_d_i to max_d_i+1 x_init = np.linspace(next_state, next_state + delta_x, num_steps) + rand_x_init # action: copy over to number of steps u_init_i = np.random.uniform(low=[-4.], high=[4], size=(num_steps,1)) u_init = u_init_i cost_i = (num_steps-1)*step_sz #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i)) #u_init = u_init + np.random.normal(scale=1., size=u_init.shape) t_init = np.linspace(0, cost_i, num_steps) return res, x_init, u_init, t_init def init_informer(env, x0, xG, direction): if direction == 0: next_state = xG.x delta_x = next_state - x0.x cov = np.diag([0.01,0.01,0.01,0.01]) #mean = next_state #next_state = np.random.multivariate_normal(mean=next_state,cov=cov) mean = np.zeros(next_state.shape) rand_x_init = np.random.multivariate_normal(mean=mean, cov=cov, size=num_steps) rand_x_init[0] = rand_x_init[0]*0. rand_x_init[-1] = rand_x_init[-1]*0. # can be either clockwise or counterclockwise, take shorter one for i in range(len(delta_x)): if circular[i]: delta_x[i] = delta_x[i] - np.floor(delta_x[i] / (2*np.pi))*(2*np.pi) if delta_x[i] > np.pi: delta_x[i] = delta_x[i] - 2*np.pi # randomly pick either direction rand_d = np.random.randint(2) if rand_d < 1 and np.abs(delta_x[i]) >= np.pi*0.5: if delta_x[i] > 0.: delta_x[i] = delta_x[i] - 2*np.pi if delta_x[i] <= 0.: delta_x[i] = delta_x[i] + 2*np.pi res = Node(next_state) x_init = np.linspace(x0.x, x0.x+delta_x, num_steps) + rand_x_init ## TODO: : change this to general case u_init_i = np.random.uniform(low=[-4.], high=[4], size=(num_steps,1)) u_init = u_init_i #u_init_i = control[max_d_i] cost_i = (num_steps-1)*step_sz #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i)) #u_init = u_init + np.random.normal(scale=1., size=u_init.shape) t_init = np.linspace(0, cost_i, num_steps) #t_init = np.append(t_init, 0.) else: next_state = xG.x cov = np.diag([0.01,0.01,0.01,0.01]) #mean = next_state #next_state = np.random.multivariate_normal(mean=next_state,cov=cov) mean = np.zeros(next_state.shape) rand_x_init = np.random.multivariate_normal(mean=mean, cov=cov, size=num_steps) rand_x_init[0] = rand_x_init[0]*0. rand_x_init[-1] = rand_x_init[-1]*0. delta_x = x0.x - next_state # can be either clockwise or counterclockwise, take shorter one for i in range(len(delta_x)): if circular[i]: delta_x[i] = delta_x[i] - np.floor(delta_x[i] / (2*np.pi))*(2*np.pi) if delta_x[i] > np.pi: delta_x[i] = delta_x[i] - 2*np.pi # randomly pick either direction rand_d = np.random.randint(2) if rand_d < 1 and np.abs(delta_x[i]) >= np.pi*0.5: if delta_x[i] > 0.: delta_x[i] = delta_x[i] - 2*np.pi elif delta_x[i] <= 0.: delta_x[i] = delta_x[i] + 2*np.pi #next_state = state[max_d_i] + delta_x res = Node(next_state) # initial: from max_d_i to max_d_i+1 x_init = np.linspace(next_state, next_state + delta_x, num_steps) + rand_x_init # action: copy over to number of steps u_init_i = np.random.uniform(low=[-4.], high=[4], size=(num_steps,1)) u_init = u_init_i cost_i = (num_steps-1)*step_sz #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i)) #u_init = u_init + np.random.normal(scale=1., size=u_init.shape) t_init = np.linspace(0, cost_i, num_steps) return x_init, u_init, t_init fes_env = [] # list of list valid_env = [] time_env = [] time_total = [] for i in range(len(paths)): time_path = [] fes_path = [] # 1 for feasible, 0 for not feasible valid_path = [] # if the feasibility is valid or not # save paths to different files, indicated by i #print(obs, flush=True) # feasible paths for each env for j in range(len(paths[0])): state_i = [] state = paths[i][j] # obtain the sequence p_start = paths[i][j][0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(controls[i][j])): #state_i.append(len(detail_paths)-1) max_steps = int(costs[i][j][k]/step_sz) accum_cost = 0. # modify it because of small difference between data and actual propagation p_start = paths[i][j][k] state[-1] = paths[i][j][k] for step in range(1,max_steps+1): p_start = dynamics(p_start, controls[i][j][k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) detail_controls.append(controls[i][j]) detail_costs.append(step_sz) accum_cost += step_sz if (step % 20 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) control.append(controls[i][j][k]) cost.append(accum_cost) accum_cost = 0. state[-1] = paths[i][j][-1] ############################# time0 = time.time() time_norm = 0. fp = 0 # indicator for feasibility print ("step: i="+str(i)+" j="+str(j)) p1_ind=0 p2_ind=0 p_ind=0 if path_lengths[i][j]==0: # invalid, feasible = 0, and path count = 0 fp = 0 valid_path.append(0) if path_lengths[i][j]>0: fp = 0 valid_path.append(1) #paths[i][j][0][1] = 0. #paths[i][j][path_lengths[i][j]-1][1] = 0. path = [paths[i][j][0], paths[i][j][path_lengths[i][j]-1]] # plot the entire path #plt.plot(paths[i][j][:,0], paths[i][j][:,1]) start = Node(path[0]) goal = Node(path[-1]) #goal = Node(sgs[i][j][1]) goal.S0 = goal_S0 goal.rho0 = goal_rho0 # change this later control = [] time_step = [] step_sz = step_sz MAX_NEURAL_REPLAN = 1 if obs is None: obs_i = None obc_i = None else: obs_i = obs[i] obc_i = obc[i] # convert obs_i center to points new_obs_i = [] for k in range(len(obs_i)): obs_pt = [] obs_pt.append(obs_i[k][0]-obs_width/2) obs_pt.append(obs_i[k][1]-obs_width/2) obs_pt.append(obs_i[k][0]-obs_width/2) obs_pt.append(obs_i[k][1]+obs_width/2) obs_pt.append(obs_i[k][0]+obs_width/2) obs_pt.append(obs_i[k][1]+obs_width/2) obs_pt.append(obs_i[k][0]+obs_width/2) obs_pt.append(obs_i[k][1]-obs_width/2) new_obs_i.append(obs_pt) #obs_i = new_obs_i collision_check = lambda x: IsInCollision(x, new_obs_i) for t in range(MAX_NEURAL_REPLAN): # adaptive step size on replanning attempts res, path_list = plan(obs_i, obc_i, start, goal, detail_paths, informer, init_informer, system, dynamics, \ enforce_bounds, collision_check, traj_opt, jac_A, jac_B, step_sz=step_sz, MAX_LENGTH=300) #print('after neural replan:') #print(path) #path = lvc(path, obc[i], IsInCollision, step_sz=step_sz) #print('after lvc:') #print(path) if res: fp = 1 print('feasible ok!') break #if feasibility_check(bvp_solver, path, obc_i, IsInCollision, step_sz=0.01): # fp = 1 # print('feasible, ok!') # break if fp: # only for successful paths time1 = time.time() - time0 time1 -= time_norm time_path.append(time1) print('test time: %f' % (time1)) # write the path #print('planned path:') #print(path) #path = np.array(path) #np.savetxt('results/path_%d.txt' % (j), path) #np.savetxt('results/control_%d.txt' % (j), np.array(control)) #np.savetxt('results/timestep_%d.txt' % (j), np.array(time_step)) fes_path.append(fp) time_env.append(time_path) time_total += time_path print('average test time up to now: %f' % (np.mean(time_total))) fes_env.append(fes_path) valid_env.append(valid_path) print('accuracy up to now: %f' % (float(np.sum(fes_env)) / np.sum(valid_env))) time_path = save_dir + 'mpnet_%s_time.pkl' % (data_type) pickle.dump(time_env, open(time_path, "wb" )) #print(fp/tp) return np.array(fes_env), np.array(valid_env)
def main(args): # set seed print(args.model_path) torch_seed = np.random.randint(low=0, high=1000) np_seed = np.random.randint(low=0, high=1000) py_seed = np.random.randint(low=0, high=1000) torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) # Build the models if torch.cuda.is_available(): torch.cuda.set_device(args.device) # setup evaluation function and load function if args.env_type == 'pendulum': IsInCollision = pendulum.IsInCollision normalize = pendulum.normalize unnormalize = pendulum.unnormalize obs_file = None obc_file = None cae = cae_identity mlp = MLP system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) max_iter = 200 min_time_steps = 20 max_time_steps = 200 integration_step = 0.002 goal_radius = 0.1 random_seed = 0 sst_delta_near = 0.05 sst_delta_drain = 0.02 vel_idx = [1] mpNet = KMPNet(args.total_input_size, args.AE_input_size, args.mlp_input_size, args.output_size, cae, mlp) # load previously trained model if start epoch > 0 model_path = 'kmpnet_epoch_%d.pkl' % (args.start_epoch) if args.start_epoch > 0: load_net_state(mpNet, os.path.join(args.model_path, model_path)) torch_seed, np_seed, py_seed = load_seed( os.path.join(args.model_path, model_path)) # set seed after loading torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) if torch.cuda.is_available(): mpNet.cuda() mpNet.mlp.cuda() mpNet.encoder.cuda() if args.opt == 'Adagrad': mpNet.set_opt(torch.optim.Adagrad, lr=args.learning_rate) elif args.opt == 'Adam': mpNet.set_opt(torch.optim.Adam, lr=args.learning_rate) elif args.opt == 'SGD': mpNet.set_opt(torch.optim.SGD, lr=args.learning_rate, momentum=0.9) if args.start_epoch > 0: load_opt_state(mpNet, os.path.join(args.model_path, model_path)) # load train and test data print('loading...') if args.seen_N > 0: seen_test_data = data_loader.load_test_dataset( N=args.seen_N, NP=args.seen_NP, s=args.seen_s, sp=args.seen_sp, p_folder=args.path_folder, obs_f=obs_file, obc_f=obc_file) if args.unseen_N > 0: unseen_test_data = data_loader.load_test_dataset( N=args.unseen_N, NP=args.unseen_NP, s=args.unseen_s, sp=args.unseen_sp, p_folder=args.path_folder, obs_f=obs_file, obc_f=obc_file) # test # testing print('testing...') seen_test_suc_rate = 0. unseen_test_suc_rate = 0. T = 1 obc, obs, paths, path_lengths = seen_test_data if obs is not None: obs = obs.astype(np.float32) obs = torch.from_numpy(obs) fes_env = [] # list of list valid_env = [] time_env = [] time_total = [] normalize_func = lambda x: normalize(x, args.world_size) unnormalize_func = lambda x: unnormalize(x, args.world_size) low = [] high = [] state_bounds = system.get_state_bounds() for i in range(len(state_bounds)): low.append(state_bounds[i][0]) high.append(state_bounds[i][1]) for i in range(len(paths)): time_path = [] fes_path = [] # 1 for feasible, 0 for not feasible valid_path = [] # if the feasibility is valid or not # save paths to different files, indicated by i # feasible paths for each env suc_n = 0 sst_suc_n = 0 for j in range(len(paths[0])): time0 = time.time() time_norm = 0. fp = 0 # indicator for feasibility print("step: i=" + str(i) + " j=" + str(j)) p1_ind = 0 p2_ind = 0 p_ind = 0 if path_lengths[i][j] == 0: # invalid, feasible = 0, and path count = 0 fp = 0 valid_path.append(0) if path_lengths[i][j] > 0: fp = 0 valid_path.append(1) path = [paths[i][j][0], paths[i][j][path_lengths[i][j] - 1]] start = paths[i][j][0] end = paths[i][j][path_lengths[i][j] - 1] start[1] = 0. end[1] = 0. # plot the entire path #plt.plot(paths[i][j][:,0], paths[i][j][:,1]) """ planner = SST( state_bounds=system.get_state_bounds(), control_bounds=system.get_control_bounds(), distance=system.distance_computer(), start_state=start, goal_state=end, goal_radius=goal_radius, random_seed=0, sst_delta_near=sst_delta_near, sst_delta_drain=sst_delta_drain ) """ planner = RRT(state_bounds=system.get_state_bounds(), control_bounds=system.get_control_bounds(), distance=system.distance_computer(), start_state=start, goal_state=end, goal_radius=goal_radius, random_seed=0) control = [] time_step = [] MAX_NEURAL_REPLAN = 11 if obs is None: obs_i = None obc_i = None else: obs_i = obs[i] obc_i = obc[i] print('created RRT') # Run planning and print out solution is some statistics every few iterations. time0 = time.time() start = paths[i][j][0] #end = paths[i][j][path_lengths[i][j]-1] new_sample = start sample = start N_sample = 10 for iteration in range(max_iter // N_sample): #if iteration % 50 == 0: # # from time to time use the goal # sample = end # #planner.step_with_sample(system, sample, 20, 200, 0.002) #else: #planner.step(system, min_time_steps, max_time_steps, integration_step) #sample = np.random.uniform(low=low, high=high) for num_sample in range(N_sample): ip1 = np.concatenate([new_sample, end]) np.expand_dims(ip1, 0) #ip1=torch.cat((obs,start,goal)).unsqueeze(0) time0 = time.time() ip1 = normalize_func(ip1) ip1 = torch.FloatTensor(ip1) time_norm += time.time() - time0 ip1 = to_var(ip1) if obs is not None: obs = torch.FloatTensor(obs).unsqueeze(0) obs = to_var(obs) sample = mpNet(ip1, obs).squeeze(0) # unnormalize to world size sample = sample.data.cpu().numpy() time0 = time.time() sample = unnormalize_func(sample) print('sample:') print(sample) print('start:') print(start) print('goal:') print(end) print('accuracy: %f' % (float(suc_n) / (j + 1))) print('sst accuracy: %f' % (float(sst_suc_n) / (j + 1))) sample = planner.step_with_sample(system, sample, min_time_steps, max_time_steps, 0.002) #planner.step_bvp(system, 10, 200, 0.002) im = planner.visualize_nodes(system) show_image(im, 'nodes', wait=False) new_sample = planner.step_with_sample(system, end, min_time_steps, max_time_steps, 0.002) solution = planner.get_solution() if solution is not None: print('solved.') suc_n += 1 break planner = SST(state_bounds=system.get_state_bounds(), control_bounds=system.get_control_bounds(), distance=system.distance_computer(), start_state=start, goal_state=end, goal_radius=goal_radius, random_seed=0, sst_delta_near=sst_delta_near, sst_delta_drain=sst_delta_drain) # Run planning and print out solution is some statistics every few iterations. time0 = time.time() start = paths[i][j][0] #end = paths[i][j][path_lengths[i][j]-1] new_sample = start sample = start N_sample = 10 for iteration in range(max_iter // N_sample): for k in range(N_sample): sample = np.random.uniform(low=low, high=high) planner.step_with_sample(system, sample, min_time_steps, max_time_steps, integration_step) im = planner.visualize_tree(system) show_image(im, 'tree', wait=False) print('accuracy: %f' % (float(suc_n) / (j + 1))) print('sst accuracy: %f' % (float(sst_suc_n) / (j + 1))) planner.step_with_sample(system, end, min_time_steps, max_time_steps, integration_step) solution = planner.get_solution() if solution is not None: print('solved.') sst_suc_n += 1 break print('accuracy: %f' % (float(suc_n) / (j + 1))) print('sst accuracy: %f' % (float(sst_suc_n) / (j + 1)))
def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, cost_i, max_iteration, data, out_queue): if args.env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz) elif args.env_type == 'cartpole_obs': obs_width = 4.0 psopt_system = _sst_module.PSOPTCartPole() propagate_system = standard_cpp_systems.RectangleObs( obs, obs_width, 'cartpole') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.002 num_steps = 101 goal_radius = 1.5 random_seed = 0 delta_near = 2.0 delta_drain = 1.2 device = 3 num_sample = 10 min_time_steps = 10 max_time_steps = 200 mpnet_goal_threshold = 2.0 mpnet_length_threshold = 40 pick_goal_init_threshold = 0.1 pick_goal_end_threshold = 0.8 pick_goal_start_percent = 0.4 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 1.0 delta_drain = 0.5 #print('creating planner...') planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, \ 200, num_steps, step_sz, propagate_system, 3) #cost_threshold = cost_i * 1.1 cost_threshold = 100000000. """ # visualization plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) ax.set_xlim(-np.pi, np.pi) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi) if res[i] > np.pi: res[i] = res[i] - 2*np.pi return res dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2*np.pi/dtheta) for i in range(imin, imax): for j in range(imin, imax): x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink') for i in range(len(data)): update_line(hl, ax, data[i]) draw_update_line(ax) # visualization end """ # visualize for cartpole plt.ion() fig = plt.figure() ax = fig.add_subplot(111) ax.set_autoscale_on(True) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') def update_line(h, ax, new_data): h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def draw_update_line(ax): ax.relim() ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() # randomly pick up a point in the data, and find similar data in the dataset # plot the next point #ax.set_autoscale_on(True) ax.set_xlim(-30, 30) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2 * np.pi)) * (2 * np.pi) if res[i] > np.pi: res[i] = res[i] - 2 * np.pi return res dx = 1 dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2 * 30. / dx) jmin = 0 jmax = int(2 * np.pi / dtheta) for i in range(imin, imax): for j in range(jmin, jmax): x = np.array([dx * i - 30, 0., dtheta * j - np.pi, 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:, 0], feasible_points[:, 2], c='yellow') ax.scatter(infeasible_points[:, 0], infeasible_points[:, 2], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) draw_update_line(ax) #state_t = start_state # visualization end # generate a path by using SST to plan for some maximal iterations state_t = start_state pick_goal_threshold = .0 ax.scatter(goal_inform_state[0], goal_inform_state[2], marker='*', c='red') # cartpole for i in range(max_iteration): time0 = time.time() # determine if picking goal based on iteration number goal_prob = random.random() #flag=1: using MPNet #flag=0: not using MPNet if goal_prob <= pick_goal_threshold: flag = 0 else: flag = 1 bvp_x, bvp_u, bvp_t, mpnet_res = planner.plan_tree_SMP_step("sst", propagate_system, psopt_system, obc.flatten(), state_t, goal_inform_state, goal_inform_state, \ flag, goal_radius, max_iteration, distance_computer, \ delta_near, delta_drain, cost_threshold) if len( bvp_u ) != 0: # and bvp_t[0] > 0.01: # turn bvp_t off if want to use step_bvp xw_scat = ax.scatter(mpnet_res[0], mpnet_res[2], c='lightgreen') draw_update_line(ax) # propagate data p_start = bvp_x[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(bvp_u)): #state_i.append(len(detail_paths)-1) max_steps = int(bvp_t[k] / step_sz) accum_cost = 0. for step in range(1, max_steps + 1): p_start = dynamics(p_start, bvp_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) cost.append(accum_cost) accum_cost = 0. xs_to_plot = np.array(state) for j in range(len(xs_to_plot)): xs_to_plot[j] = wrap_angle(xs_to_plot[j], propagate_system) xs_to_plot = xs_to_plot[::5] #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green') # acrobot ax.scatter(xs_to_plot[:, 0], xs_to_plot[:, 2], c='green') # cartpole #ax.scatter(bvp_x[:,0], bvp_x[:,1], c='green') print('solution: x') print(bvp_x) print('solution: u') print(bvp_u) print('solution: t') print(bvp_t) # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) #state_t = state[-1] print('state_t:') print(state_t) print('mpnet_res') print(mpnet_res) # based on flag, determine how to change state_t if flag: # only change state_t if in MPNet inform mode #if len(bvp_u) != 0: if True: # try using steered result as next start #if not IsInCollision(state_t, obs_i): state_t = mpnet_res print('after copying to state_t:') print('state_t') print(state_t) # if in collision, then not using it else: print('failure') state_t = start_state # failed BVP, back to origin plan_time = time.time() - time0 print('plan time: %fs' % (plan_time)) if len(res_u) == 0: print('failed.') out_queue.put(-1) else: print('path succeeded.') print('cost: %f' % (np.sum(res_t))) print('cost_threshold: %f' % (cost_threshold)) print('data cost: %f' % (cost_i)) out_queue.put(plan_time)
def main(args): # set seed print(args.model_path) torch_seed = np.random.randint(low=0, high=1000) np_seed = np.random.randint(low=0, high=1000) py_seed = np.random.randint(low=0, high=1000) #torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) # Build the models #if torch.cuda.is_available(): # torch.cuda.set_device(args.device) # setup evaluation function and load function if args.env_type == 'acrobot_obs': obs_file = None obc_file = None #cpp_propagator = _sst_module.SystemPropagator() #dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) obs_f = True #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 goal_S0 = np.diag([1.,1.,0,0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(x0, x1, 200, num_steps, step_sz*1, step_sz*(num_steps-1), x_init, u_init, t_init) goal_S0 = np.diag([1.,1.,0,0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 if args.env_type == 'pendulum': step_sz = 0.002 num_steps = 20 elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole') step_sz = 0.002 num_steps = 20 goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type in ['acrobot_obs','acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8']: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 step_sz = 0.02 num_steps = 21 goal_radius=10.0 random_seed=0 delta_near=1.0 delta_drain=0.5 # load previously trained model if start epoch > 0 #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps) mlp_path = os.path.join(os.getcwd()+'/c++/','acrobot_obs_MLP_epoch_5000.pt') encoder_path = os.path.join(os.getcwd()+'/c++/','acrobot_obs_encoder_epoch_5000.pt') cost_mlp_path = os.path.join(os.getcwd()+'/c++/','costnet_acrobot_obs_MLP_epoch_800_step_10.pt') cost_encoder_path = os.path.join(os.getcwd()+'/c++/','costnet_acrobot_obs_encoder_epoch_800_step_10.pt') print('mlp_path:') print(mlp_path) ##################################################### def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, cost_i, max_iteration, out_queue): if args.env_type in ['acrobot_obs','acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8']: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs(obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.02 num_steps = 21 goal_radius=2.0 random_seed=0 delta_near=1.0 delta_drain=0.5 device = 3 #print('creating planner...') planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, \ 200, num_steps, step_sz, propagate_system, device) #cost_threshold = cost_i * 1.1 cost_threshold = 100000000. # generate a path by using SST to plan for some maximal iterations time0 = time.time() res_x, res_u, res_t = planner.plan_tree_SMP_cost_gradient("sst", propagate_system, psopt_system, obc.flatten(), start_state, goal_inform_state, goal_inform_state, \ goal_radius, max_iteration, distance_computer, \ delta_near, delta_drain, cost_threshold, 15) plan_time = time.time() - time0 """ # visualization plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) ax.set_xlim(-np.pi, np.pi) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi) if res[i] > np.pi: res[i] = res[i] - 2*np.pi return res dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2*np.pi/dtheta) for i in range(imin, imax): for j in range(imin, imax): x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) draw_update_line(ax) #state_t = start_state if len(res_u): # propagate data p_start = res_x[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(res_u)): #state_i.append(len(detail_paths)-1) max_steps = int(res_t[k]/step_sz) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation p_start = res_x[k] state[-1] = res_x[k] for step in range(1,max_steps+1): p_start = dynamics(p_start, res_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) state[-1] = res_x[-1] xs_to_plot = np.array(state) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system) if IsInCollision(xs_to_plot[i], obs_i): print('in collision') ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green') # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) plt.waitforbuttonpress() """ #im = planner.visualize_nodes(propagate_system) #sec = input('Let us wait for user input') #show_image_opencv(im, "planning_tree", wait=True) # validate if the path contains collision """ if len(res_u): # propagate data p_start = res_x[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(res_u)): #state_i.append(len(detail_paths)-1) max_steps = int(res_t[k]/step_sz) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation p_start = res_x[k] state[-1] = res_x[k] for step in range(1,max_steps+1): p_start = dynamics(p_start, res_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. # check collision for the new state assert not IsInCollision(p_start, obs_i) #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) state[-1] = res_x[-1] # validation end """ print('plan time: %fs' % (plan_time)) if len(res_x) == 0: print('failed.') out_queue.put(-1) else: print('path succeeded.') print('cost: %f' % (np.sum(res_t))) print('cost_threshold: %f' % (cost_threshold)) print('data cost: %f' % (cost_i)) out_queue.put(plan_time) #################################################################################### # load data print('loading...') if args.seen_N > 0: seen_test_data = data_loader.load_test_dataset(args.seen_N, args.seen_NP, args.data_folder, obs_f, args.seen_s, args.seen_sp) if args.unseen_N > 0: unseen_test_data = data_loader.load_test_dataset(args.unseen_N, args.unseen_NP, args.data_folder, obs_f, args.unseen_s, args.unseen_sp) # test # testing queue = Queue(1) print('testing...') seen_test_suc_rate = 0. unseen_test_suc_rate = 0. obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data obc = obc.astype(np.float32) #obc = torch.from_numpy(obc) #if torch.cuda.is_available(): # obc = obc.cuda() plan_res = [] plan_times = [] plan_res_all = [] for i in range(len(paths)): new_obs_i = [] obs_i = obs[i] plan_res_env = [] plan_time_env = [] for k in range(len(obs_i)): obs_pt = [] obs_pt.append(obs_i[k][0]-obs_width/2) obs_pt.append(obs_i[k][1]-obs_width/2) obs_pt.append(obs_i[k][0]-obs_width/2) obs_pt.append(obs_i[k][1]+obs_width/2) obs_pt.append(obs_i[k][0]+obs_width/2) obs_pt.append(obs_i[k][1]+obs_width/2) obs_pt.append(obs_i[k][0]+obs_width/2) obs_pt.append(obs_i[k][1]-obs_width/2) new_obs_i.append(obs_pt) obs_i = new_obs_i #print(obs_i) for j in range(len(paths[i])): start_state = sgs[i][j][0] goal_inform_state = paths[i][j][-1] goal_state = sgs[i][j][1] cost_i = costs[i][j].sum() #cost_i = 100000000. print('environment: %d/%d, path: %d/%d' % (i+1, len(paths), j+1, len(paths[i]))) p = Process(target=plan_one_path, args=(obs_i, obs[i], obc[i], start_state, goal_state, goal_inform_state, cost_i, 300000, queue)) #plan_one_path(obs_i, obs[i], obc[i], start_state, goal_state, goal_inform_state, cost_i, 300000, queue) p.start() p.join() res = queue.get() if res == -1: plan_res_env.append(0) plan_res_all.append(0) else: plan_res_env.append(1) plan_times.append(res) plan_res_all.append(1) print('average accuracy up to now: %f' % (np.array(plan_res_all).flatten().mean())) print('plan average time: %f' % (np.array(plan_times).mean())) print('plan time std: %f' % (np.array(plan_times).std())) plan_res.append(plan_res_env) print('plan accuracy: %f' % (np.array(plan_res).flatten().mean())) print('plan average time: %f' % (np.array(plan_times).mean())) print('plan time std: %f' % (np.array(plan_times).std()))
def main(args): # set seed print(args.model_path) torch_seed = np.random.randint(low=0, high=1000) np_seed = np.random.randint(low=0, high=1000) py_seed = np.random.randint(low=0, high=1000) #torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) # Build the models #if torch.cuda.is_available(): # torch.cuda.set_device(args.device) # setup evaluation function and load function if args.env_type == 'pendulum': obs_file = None obc_file = None obs_f = False #system = standard_cpp_systems.PSOPTPendulum() #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) elif args.env_type == 'cartpole_obs': step_sz = 0.002 num_steps = 21 goal_radius = 1.5 random_seed = 0 delta_near = 2.0 delta_drain = 1.2 cost_threshold = 1.2 min_time_steps = 10 max_time_steps = 200 integration_step = 0.002 obs_width = 4.0 obs_f = True system = _sst_module.PSOPTCartPole() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) #system = standard_cpp_systems.RectangleObs(obs_list, args.obs_width, 'cartpole') #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) elif args.env_type == 'acrobot_obs': obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 200, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 if args.env_type == 'pendulum': step_sz = 0.002 num_steps = 20 elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole') step_sz = 0.002 num_steps = 21 goal_radius = 1.5 random_seed = 0 delta_near = 2.0 delta_drain = 1.2 cost_threshold = 1.2 min_time_steps = 10 max_time_steps = 200 integration_step = 0.002 obs_width = 4.0 obs_f = True IsInCollision = cartpole_IsInCollision enforce_bounds = cartpole_enforce_bounds elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 0.1 delta_drain = 0.05 # load previously trained model if start epoch > 0 #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps) mlp_path = os.path.join( os.getcwd() + '/c++/', '%s_MLP_lr%f_epoch_%d_step_%d.pt' % (args.env_type, args.learning_rate, args.start_epoch, args.num_steps)) encoder_path = os.path.join( os.getcwd() + '/c++/', '%s_encoder_lr%f_epoch_%d_step_%d.pt' % (args.env_type, args.learning_rate, args.start_epoch, args.num_steps)) #mlp_path = os.path.join(os.getcwd()+'/c++/','acrobot_obs_MLP_epoch_5000.pt') #encoder_path = os.path.join(os.getcwd()+'/c++/','acrobot_obs_encoder_epoch_5000.pt') #cost_mlp_path = os.path.join(os.getcwd()+'/c++/','costnet_%s_MLP_lr%f_epoch_%d_step_%d.pt' % (args.env_type, args.learning_rate, args.start_epoch, args.num_steps)) #cost_encoder_path = os.path.join(os.getcwd()+'/c++/','costnet_%s_encoder_lr%f_epoch_%d_step_%d.pt' % (args.env_type, args.learning_rate, args.start_epoch, args.num_steps)) cost_mlp_path = os.path.join( os.getcwd() + '/c++/', 'costnet_acrobot_obs_MLP_epoch_800_step_10.pt') cost_encoder_path = os.path.join( os.getcwd() + '/c++/', 'costnet_acrobot_obs_encoder_epoch_800_step_10.pt') print('mlp_path:') print(mlp_path) ##################################################### def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, cost_i, max_iteration, data, out_queue): if args.env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz) elif args.env_type == 'cartpole_obs': obs_width = 4.0 psopt_system = _sst_module.PSOPTCartPole() propagate_system = standard_cpp_systems.RectangleObs( obs, obs_width, 'cartpole') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.002 num_steps = 101 goal_radius = 1.5 random_seed = 0 delta_near = 2.0 delta_drain = 1.2 device = 3 num_sample = 10 min_time_steps = 10 max_time_steps = 200 mpnet_goal_threshold = 2.0 mpnet_length_threshold = 40 pick_goal_init_threshold = 0.1 pick_goal_end_threshold = 0.8 pick_goal_start_percent = 0.4 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 1.0 delta_drain = 0.5 #print('creating planner...') planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, \ 200, num_steps, step_sz, propagate_system, 3) #cost_threshold = cost_i * 1.1 cost_threshold = 100000000. """ # visualization plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) ax.set_xlim(-np.pi, np.pi) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi) if res[i] > np.pi: res[i] = res[i] - 2*np.pi return res dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2*np.pi/dtheta) for i in range(imin, imax): for j in range(imin, imax): x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink') for i in range(len(data)): update_line(hl, ax, data[i]) draw_update_line(ax) # visualization end """ # visualize for cartpole plt.ion() fig = plt.figure() ax = fig.add_subplot(111) ax.set_autoscale_on(True) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') def update_line(h, ax, new_data): h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def draw_update_line(ax): ax.relim() ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() # randomly pick up a point in the data, and find similar data in the dataset # plot the next point #ax.set_autoscale_on(True) ax.set_xlim(-30, 30) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2 * np.pi)) * (2 * np.pi) if res[i] > np.pi: res[i] = res[i] - 2 * np.pi return res dx = 1 dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2 * 30. / dx) jmin = 0 jmax = int(2 * np.pi / dtheta) for i in range(imin, imax): for j in range(jmin, jmax): x = np.array([dx * i - 30, 0., dtheta * j - np.pi, 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:, 0], feasible_points[:, 2], c='yellow') ax.scatter(infeasible_points[:, 0], infeasible_points[:, 2], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) draw_update_line(ax) #state_t = start_state # visualization end # generate a path by using SST to plan for some maximal iterations state_t = start_state pick_goal_threshold = .0 ax.scatter(goal_inform_state[0], goal_inform_state[2], marker='*', c='red') # cartpole for i in range(max_iteration): time0 = time.time() # determine if picking goal based on iteration number goal_prob = random.random() #flag=1: using MPNet #flag=0: not using MPNet if goal_prob <= pick_goal_threshold: flag = 0 else: flag = 1 bvp_x, bvp_u, bvp_t, mpnet_res = planner.plan_tree_SMP_step("sst", propagate_system, psopt_system, obc.flatten(), state_t, goal_inform_state, goal_inform_state, \ flag, goal_radius, max_iteration, distance_computer, \ delta_near, delta_drain, cost_threshold) if len( bvp_u ) != 0: # and bvp_t[0] > 0.01: # turn bvp_t off if want to use step_bvp xw_scat = ax.scatter(mpnet_res[0], mpnet_res[2], c='lightgreen') draw_update_line(ax) # propagate data p_start = bvp_x[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(bvp_u)): #state_i.append(len(detail_paths)-1) max_steps = int(bvp_t[k] / step_sz) accum_cost = 0. for step in range(1, max_steps + 1): p_start = dynamics(p_start, bvp_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) cost.append(accum_cost) accum_cost = 0. xs_to_plot = np.array(state) for j in range(len(xs_to_plot)): xs_to_plot[j] = wrap_angle(xs_to_plot[j], propagate_system) xs_to_plot = xs_to_plot[::5] #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green') # acrobot ax.scatter(xs_to_plot[:, 0], xs_to_plot[:, 2], c='green') # cartpole #ax.scatter(bvp_x[:,0], bvp_x[:,1], c='green') print('solution: x') print(bvp_x) print('solution: u') print(bvp_u) print('solution: t') print(bvp_t) # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) #state_t = state[-1] print('state_t:') print(state_t) print('mpnet_res') print(mpnet_res) # based on flag, determine how to change state_t if flag: # only change state_t if in MPNet inform mode #if len(bvp_u) != 0: if True: # try using steered result as next start #if not IsInCollision(state_t, obs_i): state_t = mpnet_res print('after copying to state_t:') print('state_t') print(state_t) # if in collision, then not using it else: print('failure') state_t = start_state # failed BVP, back to origin plan_time = time.time() - time0 print('plan time: %fs' % (plan_time)) if len(res_u) == 0: print('failed.') out_queue.put(-1) else: print('path succeeded.') print('cost: %f' % (np.sum(res_t))) print('cost_threshold: %f' % (cost_threshold)) print('data cost: %f' % (cost_i)) out_queue.put(plan_time) #################################################################################### # load data print('loading...') if args.seen_N > 0: seen_test_data = data_loader.load_test_dataset(args.seen_N, args.seen_NP, args.data_folder, obs_f, args.seen_s, args.seen_sp) if args.unseen_N > 0: unseen_test_data = data_loader.load_test_dataset( args.unseen_N, args.unseen_NP, args.data_folder, obs_f, args.unseen_s, args.unseen_sp) # test # testing queue = Queue(1) print('testing...') seen_test_suc_rate = 0. unseen_test_suc_rate = 0. obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data obc = obc.astype(np.float32) #obc = torch.from_numpy(obc) #if torch.cuda.is_available(): # obc = obc.cuda() plan_res = [] plan_times = [] plan_res_all = [] for i in range(len(paths)): new_obs_i = [] obs_i = obs[i] plan_res_env = [] plan_time_env = [] for k in range(len(obs_i)): obs_pt = [] obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) new_obs_i.append(obs_pt) obs_i = new_obs_i #print(obs_i) for j in range(len(paths[i])): start_state = sgs[i][j][0] goal_inform_state = paths[i][j][-1] goal_state = sgs[i][j][1] cost_i = costs[i][j].sum() #cost_i = 100000000. # propagate data p_start = paths[i][j][0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(controls[i][j])): #state_i.append(len(detail_paths)-1) max_steps = int(costs[i][j][k] / step_sz) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation p_start = paths[i][j][k] state[-1] = paths[i][j][k] for step in range(1, max_steps + 1): p_start = dynamics(p_start, controls[i][j][k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) detail_controls.append(controls[i][j]) detail_costs.append(step_sz) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) control.append(controls[i][j][k]) cost.append(accum_cost) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) state[-1] = paths[i][j][-1] data = state # end of propagation print('environment: %d/%d, path: %d/%d' % (i + 1, len(paths), j + 1, len(paths[i]))) #p = Process(target=plan_one_path, args=(obs_i, obs[i], obc[i], start_state, goal_state, goal_inform_state, cost_i, 300000, data, queue)) plan_one_path(obs_i, obs[i], obc[i], start_state, goal_state, goal_inform_state, cost_i, 300000, data, queue) #p.start() #p.join() res = queue.get() if res == -1: plan_res_env.append(0) plan_res_all.append(0) else: plan_res_env.append(1) plan_times.append(res) plan_res_all.append(1) print('average accuracy up to now: %f' % (np.array(plan_res_all).flatten().mean())) print('plan average time: %f' % (np.array(plan_times).mean())) print('plan time std: %f' % (np.array(plan_times).std())) plan_res.append(plan_res_env) print('plan accuracy: %f' % (np.array(plan_res).flatten().mean())) print('plan average time: %f' % (np.array(plan_times).mean())) print('plan time std: %f' % (np.array(plan_times).std()))
def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, max_iteration, data, out_queue): if args.env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz) elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole') system = _sst_module.CartPole() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 200, num_steps, step_sz * 1, step_sz * 50, x_init, u_init, t_init) goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.02 num_steps = 21 goal_radius = 2 random_seed = 0 #delta_near=1.0 #delta_drain=0.5 delta_near = 0.1 delta_drain = 0.05 #print('creating planner...') planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, 20, num_steps, step_sz, propagate_system, args.device) # generate a path by using SST to plan for some maximal iterations time0 = time.time() #print('obc:') #print(obc.shape) #print(delta_near) #print(delta_drain) #print('start_state:') #print(start_state) #print('goal_state:') #print(goal_state) state_t = start_state pick_goal_threshold = 0.10 goal_linear_inc_start_iter = int(0.6 * max_iteration) goal_linear_inc_end_iter = max_iteration goal_linear_inc_end_threshold = 0.95 goal_linear_inc = (goal_linear_inc_end_threshold - pick_goal_threshold) / (goal_linear_inc_end_iter - goal_linear_inc_start_iter) start_time = time.time() plan_time = -1 for i in tqdm(range(max_iteration)): print('iteration: %d' % (i)) print('state_t:') print(state_t) # calculate if using goal or not use_goal_prob = random.random() if i > goal_linear_inc_start_iter: pick_goal_threshold += goal_linear_inc if use_goal_prob <= pick_goal_threshold: flag = 0 else: flag = 1 bvp_x, bvp_u, bvp_t, mpnet_res = planner.plan_step("sst", propagate_system, psopt_system, obc.flatten(), state_t, goal_inform_state, goal_inform_state, \ flag, goal_radius, max_iteration, distance_computer, \ delta_near, delta_drain) solution = planner.get_solution() if solution is not None: plan_time = time.time() - start_time if plan_time == -1: print('failed.') out_queue.put(-1) else: print('path succeeded.') out_queue.put(plan_time) #if len(res_x) == 0:
def main(args): # set seed print(args.model_path) torch_seed = np.random.randint(low=0, high=1000) np_seed = np.random.randint(low=0, high=1000) py_seed = np.random.randint(low=0, high=1000) #torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) # Build the models #if torch.cuda.is_available(): # torch.cuda.set_device(args.device) # setup evaluation function and load function if args.env_type == 'pendulum': obs_file = None obc_file = None obs_f = False #system = standard_cpp_systems.PSOPTPendulum() #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) elif args.env_type == 'cartpole_obs': normalize = cartpole.normalize unnormalize = cartpole.unnormalize obs_file = None obc_file = None #dynamics = cartpole.dynamics #jax_dynamics = cartpole.jax_dynamics #enforce_bounds = cartpole.enforce_bounds cae = CAE_acrobot_voxel_2d mlp = mlp_acrobot.MLP obs_f = True #system = standard_cpp_systems.RectangleObs(obs_list, args.obs_width, 'cartpole') #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) elif args.env_type == 'acrobot_obs': obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 200, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) obs_width = 6.0 step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 0.1 delta_drain = 0.05 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 0.1 delta_drain = 0.05 # load previously trained model if start epoch > 0 #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps) mlp_path = os.path.join( os.getcwd() + '/c++/', 'acrobot_obs_MLP_lr0.010000_epoch_2850_step_20.pt') encoder_path = os.path.join( os.getcwd() + '/c++/', 'acrobot_obs_encoder_lr0.010000_epoch_2850_step_20.pt') cost_mlp_path = os.path.join( os.getcwd() + '/c++/', 'costnet_acrobot_obs_8_MLP_epoch_300_step_20.pt') cost_encoder_path = os.path.join( os.getcwd() + '/c++/', 'costnet_acrobot_obs_8_encoder_epoch_300_step_20.pt') print('mlp_path:') print(mlp_path) ##################################################### def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, max_iteration, data, out_queue): if args.env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz) elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole') system = _sst_module.CartPole() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 200, num_steps, step_sz * 1, step_sz * 50, x_init, u_init, t_init) goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.02 num_steps = 21 goal_radius = 2 random_seed = 0 #delta_near=1.0 #delta_drain=0.5 delta_near = 0.1 delta_drain = 0.05 #print('creating planner...') planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, 20, num_steps, step_sz, propagate_system, args.device) # generate a path by using SST to plan for some maximal iterations time0 = time.time() #print('obc:') #print(obc.shape) #print(delta_near) #print(delta_drain) #print('start_state:') #print(start_state) #print('goal_state:') #print(goal_state) state_t = start_state pick_goal_threshold = 0.10 goal_linear_inc_start_iter = int(0.6 * max_iteration) goal_linear_inc_end_iter = max_iteration goal_linear_inc_end_threshold = 0.95 goal_linear_inc = (goal_linear_inc_end_threshold - pick_goal_threshold) / (goal_linear_inc_end_iter - goal_linear_inc_start_iter) start_time = time.time() plan_time = -1 for i in tqdm(range(max_iteration)): print('iteration: %d' % (i)) print('state_t:') print(state_t) # calculate if using goal or not use_goal_prob = random.random() if i > goal_linear_inc_start_iter: pick_goal_threshold += goal_linear_inc if use_goal_prob <= pick_goal_threshold: flag = 0 else: flag = 1 bvp_x, bvp_u, bvp_t, mpnet_res = planner.plan_step("sst", propagate_system, psopt_system, obc.flatten(), state_t, goal_inform_state, goal_inform_state, \ flag, goal_radius, max_iteration, distance_computer, \ delta_near, delta_drain) solution = planner.get_solution() if solution is not None: plan_time = time.time() - start_time if plan_time == -1: print('failed.') out_queue.put(-1) else: print('path succeeded.') out_queue.put(plan_time) #if len(res_x) == 0: #################################################################################### # load data print('loading...') if args.seen_N > 0: seen_test_data = data_loader.load_test_dataset(args.seen_N, args.seen_NP, args.data_folder, obs_f, args.seen_s, args.seen_sp) if args.unseen_N > 0: unseen_test_data = data_loader.load_test_dataset( args.unseen_N, args.unseen_NP, args.data_folder, obs_f, args.unseen_s, args.unseen_sp) # test # testing queue = Queue(1) print('testing...') seen_test_suc_rate = 0. unseen_test_suc_rate = 0. obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data obc = obc.astype(np.float32) #obc = torch.from_numpy(obc) #if torch.cuda.is_available(): # obc = obc.cuda() for i in range(len(paths)): new_obs_i = [] obs_i = obs[i] for k in range(len(obs_i)): obs_pt = [] obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) new_obs_i.append(obs_pt) obs_i = new_obs_i #print(obs_i) for j in range(len(paths[i])): start_state = sgs[i][j][0] goal_inform_state = paths[i][j][-1] goal_state = sgs[i][j][1] #p = Process(target=plan_one_path, args=(obs[i], obc[i], start_state, goal_state, 500, queue)) # propagate data p_start = paths[i][j][0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(controls[i][j])): #state_i.append(len(detail_paths)-1) max_steps = int(costs[i][j][k] / step_sz) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation p_start = paths[i][j][k] state[-1] = paths[i][j][k] for step in range(1, max_steps + 1): p_start = dynamics(p_start, controls[i][j][k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) detail_controls.append(controls[i][j]) detail_costs.append(step_sz) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) control.append(controls[i][j][k]) cost.append(accum_cost) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) state[-1] = paths[i][j][-1] data = state p = Process(target=plan_one_path, args=(obs_i, obs[i], obc[i], start_state, goal_state, goal_inform_state, 1000, data, queue)) p.start() p.join() res = queue.get()
def main(args): # set seed print(args.model_path) torch_seed = np.random.randint(low=0, high=1000) np_seed = np.random.randint(low=0, high=1000) py_seed = np.random.randint(low=0, high=1000) #torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) # Build the models #if torch.cuda.is_available(): # torch.cuda.set_device(args.device) # setup evaluation function and load function if args.env_type == 'pendulum': obs_file = None obc_file = None obs_f = False #system = standard_cpp_systems.PSOPTPendulum() #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) elif args.env_type == 'cartpole_obs': normalize = cartpole.normalize unnormalize = cartpole.unnormalize obs_file = None obc_file = None dynamics = cartpole.dynamics jax_dynamics = cartpole.jax_dynamics enforce_bounds = cartpole.enforce_bounds cae = CAE_acrobot_voxel_2d mlp = mlp_acrobot.MLP obs_f = True #system = standard_cpp_systems.RectangleObs(obs_list, args.obs_width, 'cartpole') #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) elif args.env_type == 'acrobot_obs': obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 200, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type == 'acrobot_obs_2': obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type == 'acrobot_obs_3': obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type == 'acrobot_obs_5': obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type == 'acrobot_obs_6': obs_file = None obc_file = None dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 obs_width = 6.0 elif args.env_type == 'acrobot_obs_6': obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 num_steps = 21 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) goal_S0 = np.diag([1., 1., 0, 0]) #goal_S0 = np.identity(4) goal_rho0 = 1.0 obs_width = 6.0 elif args.env_type == 'acrobot_obs_8': obs_file = None obc_file = None system = _sst_module.PSOPTAcrobot() cpp_propagator = _sst_module.SystemPropagator() dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t) obs_f = True bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.02 #num_steps = 21 num_steps = 21 #args.num_steps*2 traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 400, num_steps, step_sz * 1, step_sz * (num_steps - 1), x_init, u_init, t_init) #traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: #def cem_trajopt(x0, x1, step_sz, num_steps, x_init, u_init, t_init): # u, t = acrobot_obs.trajopt(x0, x1, 500, num_steps, step_sz*1, step_sz*(num_steps-1), x_init, u_init, t_init) # xs, us, dts, valid = propagate(x0, u, t, dynamics=dynamics, enforce_bounds=enforce_bounds, IsInCollision=lambda x: False, system=system, step_sz=step_sz) # return xs, us, dts #traj_opt = cem_trajopt obs_width = 6.0 goal_S0 = np.diag([1., 1., 0, 0]) goal_rho0 = 1.0 if args.env_type == 'pendulum': step_sz = 0.002 num_steps = 20 elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole') step_sz = 0.002 num_steps = 20 goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 0.1 delta_drain = 0.05 # load previously trained model if start epoch > 0 #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps) mlp_path = os.path.join( os.getcwd() + '/c++/', 'acrobot_obs_MLP_lr0.010000_epoch_2850_step_20.pt') encoder_path = os.path.join( os.getcwd() + '/c++/', 'acrobot_obs_encoder_lr0.010000_epoch_2850_step_20.pt') cost_mlp_path = os.path.join( os.getcwd() + '/c++/', 'costnet_acrobot_obs_8_MLP_epoch_300_step_20.pt') cost_encoder_path = os.path.join( os.getcwd() + '/c++/', 'costnet_acrobot_obs_8_encoder_epoch_300_step_20.pt') print('mlp_path:') print(mlp_path) ##################################################### def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, max_iteration, out_queue): if args.env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz) elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole') system = _sst_module.CartPole() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 200, num_steps, step_sz * 1, step_sz * 50, x_init, u_init, t_init) goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = .1 delta_drain = 0.05 #print('creating planner...') planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, args.bvp_iter, num_steps, step_sz, propagate_system, 3) # generate a path by using SST to plan for some maximal iterations time0 = time.time() #print('obc:') #print(obc.shape) #print(delta_near) #print(delta_drain) #print('start_state:') #print(start_state) #print('goal_state:') #print(goal_state) res_x, res_u, res_t = planner.plan("sst", args.plan_type, propagate_system, psopt_system, obc.flatten(), start_state, goal_inform_state, goal_inform_state, \ goal_radius, max_iteration, distance_computer, \ args.delta_near, args.delta_drain) #res_x, res_u, res_t = planner.plan("sst", propagate_system, psopt_system, obc.flatten(), start_state, goal_state, goal_inform_state, \ # goal_radius, max_iteration, propagate_system.distance_computer(), \ # delta_near, delta_drain) plan_time = time.time() - time0 """ # visualization plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) ax.set_xlim(-np.pi, np.pi) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi) if res[i] > np.pi: res[i] = res[i] - 2*np.pi return res dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2*np.pi/dtheta) for i in range(imin, imax): for j in range(imin, imax): x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink') if len(res_x) != 0: xs_to_plot = np.array(res_x) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system) ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='orange') print('solution: x') print(res_x) print('solution: u') print(res_u) print('solution: t') print(res_t) # draw start and goal ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) plt.waitforbuttonpress() #im = planner.visualize_nodes(propagate_system) #sec = input('Let us wait for user input') #show_image_opencv(im, "planning_tree", wait=True) """ print('plan time: %fs' % (plan_time)) if len(res_x) == 0: print('failed.') out_queue.put(-1) else: print('path succeeded.') out_queue.put(plan_time) #################################################################################### # load data print('loading...') if args.seen_N > 0: seen_test_data = data_loader.load_test_dataset(args.seen_N, args.seen_NP, args.data_folder, obs_f, args.seen_s, args.seen_sp) if args.unseen_N > 0: unseen_test_data = data_loader.load_test_dataset( args.unseen_N, args.unseen_NP, args.data_folder, obs_f, args.unseen_s, args.unseen_sp) # test # testing queue = Queue(1) print('testing...') seen_test_suc_rate = 0. unseen_test_suc_rate = 0. obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data obc = obc.astype(np.float32) #obc = torch.from_numpy(obc) #if torch.cuda.is_available(): # obc = obc.cuda() plan_res = [] plan_times = [] plan_res_all = [] for i in range(len(paths)): new_obs_i = [] obs_i = obs[i] plan_res_env = [] plan_time_env = [] for k in range(len(obs_i)): obs_pt = [] obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) obs_pt.append(obs_i[k][0] - obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] + obs_width / 2) obs_pt.append(obs_i[k][0] + obs_width / 2) obs_pt.append(obs_i[k][1] - obs_width / 2) new_obs_i.append(obs_pt) obs_i = new_obs_i #print(obs_i) for j in range(len(paths[i])): start_state = sgs[i][j][0] goal_inform_state = paths[i][j][-1] goal_state = sgs[i][j][1] print('environment: %d/%d, path: %d/%d' % (i + 1, len(paths), j + 1, len(paths[i]))) p = Process(target=plan_one_path, args=(obs_i, obs[i], obc[i], start_state, goal_state, goal_inform_state, 1000, queue)) #plan_one_path(obs_i, obs[i], obc[i], start_state, goal_state, goal_inform_state, 500, queue) p.start() p.join() res = queue.get() if res == -1: plan_res_env.append(0) plan_res_all.append(0) else: plan_res_env.append(1) plan_times.append(res) plan_res_all.append(1) print('average accuracy up to now: %f' % (np.array(plan_res_all).flatten().mean())) print('plan average time: %f' % (np.array(plan_times).mean())) print('plan time std: %f' % (np.array(plan_times).std())) plan_res.append(plan_res_env) print('plan accuracy: %f' % (np.array(plan_res).flatten().mean())) print('plan average time: %f' % (np.array(plan_times).mean())) print('plan time std: %f' % (np.array(plan_times).std()))
def main(args): # set seed print(args.model_path) torch_seed = np.random.randint(low=0, high=1000) np_seed = np.random.randint(low=0, high=1000) py_seed = np.random.randint(low=0, high=1000) torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) # Build the models if torch.cuda.is_available(): torch.cuda.set_device(args.device) # setup evaluation function and load function if args.env_type == 'pendulum': IsInCollision = pendulum.IsInCollision normalize = pendulum.normalize unnormalize = pendulum.unnormalize obs_file = None obc_file = None cae = cae_identity mlp = MLP system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) max_iter = 100 min_time_steps = 10 max_time_steps = 200 integration_step = 0.002 goal_radius = 0.1 random_seed = 0 sst_delta_near = 0.05 sst_delta_drain = 0.02 vel_idx = [1] mpNet = KMPNet(args.total_input_size, args.AE_input_size, args.mlp_input_size, args.output_size, cae, mlp) # load previously trained model if start epoch > 0 model_path = 'kmpnet_epoch_%d.pkl' % (args.start_epoch) if args.start_epoch > 0: load_net_state(mpNet, os.path.join(args.model_path, model_path)) torch_seed, np_seed, py_seed = load_seed( os.path.join(args.model_path, model_path)) # set seed after loading torch.manual_seed(torch_seed) np.random.seed(np_seed) random.seed(py_seed) if torch.cuda.is_available(): mpNet.cuda() mpNet.mlp.cuda() mpNet.encoder.cuda() if args.opt == 'Adagrad': mpNet.set_opt(torch.optim.Adagrad, lr=args.learning_rate) elif args.opt == 'Adam': mpNet.set_opt(torch.optim.Adam, lr=args.learning_rate) elif args.opt == 'SGD': mpNet.set_opt(torch.optim.SGD, lr=args.learning_rate, momentum=0.9) if args.start_epoch > 0: load_opt_state(mpNet, os.path.join(args.model_path, model_path)) # load train and test data print('loading...') if args.seen_N > 0: seen_test_data = data_loader.load_test_dataset( N=args.seen_N, NP=args.seen_NP, s=args.seen_s, sp=args.seen_sp, p_folder=args.path_folder, obs_f=obs_file, obc_f=obc_file) if args.unseen_N > 0: unseen_test_data = data_loader.load_test_dataset( N=args.unseen_N, NP=args.unseen_NP, s=args.unseen_s, sp=args.unseen_sp, p_folder=args.path_folder, obs_f=obs_file, obc_f=obc_file) # test # testing print('testing...') seen_test_suc_rate = 0. unseen_test_suc_rate = 0. T = 1 obc, obs, paths, path_lengths = seen_test_data if obs is not None: obs = obs.astype(np.float32) obs = torch.from_numpy(obs) fes_env = [] # list of list valid_env = [] time_env = [] time_total = [] normalize_func = lambda x: normalize(x, args.world_size) unnormalize_func = lambda x: unnormalize(x, args.world_size) for i in range(len(paths)): time_path = [] fes_path = [] # 1 for feasible, 0 for not feasible valid_path = [] # if the feasibility is valid or not # save paths to different files, indicated by i # feasible paths for each env suc_n = 0 for j in range(len(paths[0])): plt.ion() fig = plt.figure() ax = fig.add_subplot(111) ax.set_autoscale_on(True) hl, = ax.plot([], [], 'black') hl_real, = ax.plot([], [], 'yellow') time0 = time.time() time_norm = 0. fp = 0 # indicator for feasibility print("step: i=" + str(i) + " j=" + str(j)) p1_ind = 0 p2_ind = 0 p_ind = 0 if path_lengths[i][j] == 0: # invalid, feasible = 0, and path count = 0 fp = 0 valid_path.append(0) if path_lengths[i][j] > 0: fp = 0 valid_path.append(1) path = [paths[i][j][0], paths[i][j][path_lengths[i][j] - 1]] start = paths[i][j][0] end = paths[i][j][path_lengths[i][j] - 1] #start[1] = 0. #end[1] = 0. # plot the entire path #plt.plot(paths[i][j][:,0], paths[i][j][:,1]) control = [] time_step = [] MAX_NEURAL_REPLAN = 11 if obs is None: obs_i = None obc_i = None else: obs_i = obs[i] obc_i = obc[i] for k in range(path_lengths[i][j]): update_line(hl, ax, fig, paths[i][j][k]) print('created RRT') # Run planning and print out solution is some statistics every few iterations. time0 = time.time() start = paths[i][j][0] #end = paths[i][j][path_lengths[i][j]-1] new_sample = start print(new_sample) ax.scatter(new_sample[0], new_sample[1], c='r') ax.scatter(end[0], end[1], c='g') for iteration in range(max_iter): clear_line(hl_real, ax, fig) #hl_real, = ax.plot([], [], 'yellow') ip1 = np.concatenate([new_sample, end]) np.expand_dims(ip1, 0) #ip1=torch.cat((obs,start,goal)).unsqueeze(0) time0 = time.time() ip1 = normalize_func(ip1) ip1 = torch.FloatTensor(ip1) time_norm += time.time() - time0 ip1 = to_var(ip1) if obs is not None: obs = torch.FloatTensor(obs).unsqueeze(0) obs = to_var(obs) sample = mpNet(ip1, obs).squeeze(0) # unnormalize to world size sample = sample.data.cpu().numpy() time0 = time.time() sample = unnormalize_func(sample) ax.scatter(sample[0], sample[1], c='b') plt.pause(0.01) steer, steer_state, steer_control, steer_time_step = plan_general.steerTo( bvp_solver, start, sample, None, None, step_sz=0.02) for k in range(len(steer_state)): update_line(hl_real, ax, fig, steer_state[k]) plt.waitforbuttonpress()
def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, max_iteration, out_queue): if args.env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz) elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole') system = _sst_module.CartPole() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 200, num_steps, step_sz * 1, step_sz * 50, x_init, u_init, t_init) goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = .1 delta_drain = 0.05 #print('creating planner...') planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, args.bvp_iter, num_steps, step_sz, propagate_system, 3) # generate a path by using SST to plan for some maximal iterations time0 = time.time() #print('obc:') #print(obc.shape) #print(delta_near) #print(delta_drain) #print('start_state:') #print(start_state) #print('goal_state:') #print(goal_state) res_x, res_u, res_t = planner.plan("sst", args.plan_type, propagate_system, psopt_system, obc.flatten(), start_state, goal_inform_state, goal_inform_state, \ goal_radius, max_iteration, distance_computer, \ args.delta_near, args.delta_drain) #res_x, res_u, res_t = planner.plan("sst", propagate_system, psopt_system, obc.flatten(), start_state, goal_state, goal_inform_state, \ # goal_radius, max_iteration, propagate_system.distance_computer(), \ # delta_near, delta_drain) plan_time = time.time() - time0 """ # visualization plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) ax.set_xlim(-np.pi, np.pi) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi) if res[i] > np.pi: res[i] = res[i] - 2*np.pi return res dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2*np.pi/dtheta) for i in range(imin, imax): for j in range(imin, imax): x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink') if len(res_x) != 0: xs_to_plot = np.array(res_x) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system) ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='orange') print('solution: x') print(res_x) print('solution: u') print(res_u) print('solution: t') print(res_t) # draw start and goal ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) plt.waitforbuttonpress() #im = planner.visualize_nodes(propagate_system) #sec = input('Let us wait for user input') #show_image_opencv(im, "planning_tree", wait=True) """ print('plan time: %fs' % (plan_time)) if len(res_x) == 0: print('failed.') out_queue.put(-1) else: print('path succeeded.') out_queue.put(plan_time)
def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, max_iteration, data, out_queue): if args.env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz) elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole') system = _sst_module.CartPole() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve( x0, x1, 200, num_steps, step_sz * 1, step_sz * 50, x_init, u_init, t_init) goal_S0 = np.identity(4) goal_rho0 = 1.0 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) psopt_step_sz = 0.02 psopt_num_steps = 21 step_sz = 0.02 num_steps = 21 goal_radius = 2 random_seed = 0 #delta_near=1.0 #delta_drain=0.5 delta_near = 0.1 delta_drain = 0.05 #print('creating planner...') planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, 20, psopt_num_steps, psopt_step_sz, step_sz, propagate_system, args.device) # generate a path by using SST to plan for some maximal iterations time0 = time.time() #print('obc:') #print(obc.shape) #print(delta_near) #print(delta_drain) #print('start_state:') #print(start_state) #print('goal_state:') #print(goal_state) plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) ax.set_xlim(-np.pi, np.pi) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2 * np.pi)) * (2 * np.pi) if res[i] > np.pi: res[i] = res[i] - 2 * np.pi return res dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2 * np.pi / dtheta) for i in range(imin, imax): for j in range(imin, imax): x = np.array([dtheta * i - np.pi, dtheta * j - np.pi, 0., 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:, 0], feasible_points[:, 1], c='yellow') ax.scatter(infeasible_points[:, 0], infeasible_points[:, 1], c='pink') for i in range(len(data)): update_line(hl, ax, data[i]) draw_update_line(ax) state_t = start_state pick_goal_threshold = 0.10 goal_linear_inc_start_iter = int(0.6 * max_iteration) goal_linear_inc_end_iter = max_iteration goal_linear_inc_end_threshold = 0.95 goal_linear_inc = (goal_linear_inc_end_threshold - pick_goal_threshold) / (goal_linear_inc_end_iter - goal_linear_inc_start_iter) for i in range(max_iteration): print('iteration: %d' % (i)) print('state_t:') print(state_t) # calculate if using goal or not use_goal_prob = random.random() if i > goal_linear_inc_start_iter: pick_goal_threshold += goal_linear_inc if use_goal_prob <= pick_goal_threshold: flag = 0 else: flag = 1 bvp_x, bvp_u, bvp_t, mpnet_res = planner.plan_step("sst", propagate_system, psopt_system, obc.flatten(), state_t, goal_inform_state, goal_inform_state, \ flag, goal_radius, max_iteration, distance_computer, \ delta_near, delta_drain) plan_time = time.time() - time0 if len( bvp_u ) != 0: # and bvp_t[0] > 0.01: # turn bvp_t off if want to use step_bvp # propagate data p_start = bvp_x[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(bvp_u)): #state_i.append(len(detail_paths)-1) max_steps = int(bvp_t[k] / step_sz) accum_cost = 0. for step in range(1, max_steps + 1): p_start = dynamics(p_start, bvp_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) cost.append(accum_cost) accum_cost = 0. xs_to_plot = np.array(state) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system) #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green') ax.scatter(bvp_x[:, 0], bvp_x[:, 1], c='green', s=10.0) print('solution: x') print(bvp_x) print('solution: u') print(bvp_u) print('solution: t') print(bvp_t) # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) xw_scat = ax.scatter(mpnet_res[0], mpnet_res[1], c='brown', s=10.) draw_update_line(ax) #state_t = state[-1] # try using mpnet_res as new start state_t = mpnet_res else: # in incollision state_t = start_state #if len(res_x) == 0: # print('failed.') out_queue.put(0)