def plan_one_path_bvp(env, start, end, out_queue, path_file, control_file, cost_file, time_file): planner = _sst_module.SSTWrapper( state_bounds=env.get_state_bounds(), control_bounds=env.get_control_bounds(), distance=env.distance_computer(), start_state=start, goal_state=end, goal_radius=goal_radius, random_seed=random_seed, sst_delta_near=sst_delta_near, sst_delta_drain=sst_delta_drain) # generate a path by using SST to plan for some maximal iterations time0 = time.time() #print('obs: %d, path: %d' % (i, j)) for iter in range(args.max_iter): if iter % 1000 == 0: print('still alive after %d iterations' % (iter)) if iter % 100 == 0: # from time to time use the goal sample = end #planner.step_with_sample(env, sample, min_time_steps, max_time_steps, integration_step) else: sample = np.random.uniform(low=low, high=high) #planner.step_with_sample(env, sample, min_time_steps, max_time_steps, integration_step) #planner.step(env, min_time_steps, max_time_steps, integration_step) planner.step_with_sample(env, sample, min_time_steps, max_time_steps, integration_step) solution = planner.get_solution() # don't break the searching to find better solutions #if solution is not None: # break plan_time = time.time() - time0 if solution is None: out_queue.put(0) else: print('path succeeded.') path, controls, cost = solution print(path) path = np.array(path) controls = np.array(controls) cost = np.array(cost) file = open(path_file, 'wb') pickle.dump(path, file) file.close() file = open(control_file, 'wb') pickle.dump(controls, file) file.close() file = open(cost_file, 'wb') pickle.dump(cost, file) file.close() file = open(time_file, 'wb') pickle.dump(plan_time, file) file.close() out_queue.put(1)
def plan_one_path_sst(env, start, end, path_file, control_file, cost_file, time_file): planner = _sst_module.SSTWrapper( state_bounds=env.get_state_bounds(), control_bounds=env.get_control_bounds(), distance=_sst_module.TwoLinkAcrobotDistance(), start_state=start, goal_state=end, goal_radius=goal_radius, random_seed=random_seed, sst_delta_near=sst_delta_near, sst_delta_drain=sst_delta_drain) # generate a path by using SST to plan for some maximal iterations time0 = time.time() #print('obs: %d, path: %d' % (i, j)) for iter in range(args.max_iter): print('iteration: %d' % (iter)) planner.step(env, min_time_steps, max_time_steps, integration_step) print('after step') #planner.step_with_sample(env, sample, min_time_steps, max_time_steps, integration_step) #solution = planner.get_solution() # don't break the searching to find better solutions #if solution is not None: # break solution = planner.get_solution() plan_time = time.time() - time0 if solution is None: return 0 else: print('path succeeded.') path, controls, cost = solution print(path) path = np.array(path) controls = np.array(controls) cost = np.array(cost) file = open(path_file, 'wb') pickle.dump(path, file) file.close() file = open(control_file, 'wb') pickle.dump(controls, file) file.close() file = open(cost_file, 'wb') pickle.dump(cost, file) file.close() file = open(time_file, 'wb') pickle.dump(plan_time, file) file.close() return 1
def plan(obs, env, x0, xG, data, informer, init_informer, system, dynamics, enforce_bounds, IsInCollisionWithObs, traj_opt, step_sz=0.02, num_steps=21, MAX_LENGTH=1000): """ For each node xt, we record how many explorations have been made from xt. We do planning according to the following rules: 1. if n_explored >= n_max_explore: if not the root, backtrace to the previous node hat(x)_t+1 = informer(xt, G) x_t+1, tau = BVP(xt, hat(x)_t+1) n_exlored(x_t) += 1 2. if explored(x_t+1) (w.r.t. some regions as defined in SST paper) ignore x_t+1 # may also update the "score" using the already explored region 3. if too_far(x_t+1, hat(x)_t+1) ignore x_t+1 4. if inCollision(x_t+1) ignore x_t+1 # may also update the "score" of points # otherwise it is safe establish the connections between x_t and x_t+1, move to x_t+1 """ env_constr = standard_cpp_systems.RectangleObs propagate_system = env_constr(obs, 6., 'acrobot') planner = _sst_module.SSTWrapper( state_bounds=propagate_system.get_state_bounds(), control_bounds=propagate_system.get_control_bounds(), distance=propagate_system.distance_computer(), start_state=x0.x, goal_state=xG.x, goal_radius=2., random_seed=0, sst_delta_near=1.5, sst_delta_drain=1.) # visualization obs_width = 6. new_obs_i = [] for k in range(len(obs)): obs_pt = [] obs_pt.append(obs[k][0] - obs_width / 2) obs_pt.append(obs[k][1] - obs_width / 2) obs_pt.append(obs[k][0] - obs_width / 2) obs_pt.append(obs[k][1] + obs_width / 2) obs_pt.append(obs[k][0] + obs_width / 2) obs_pt.append(obs[k][1] + obs_width / 2) obs_pt.append(obs[k][0] + obs_width / 2) obs_pt.append(obs[k][1] - obs_width / 2) new_obs_i.append(obs_pt) IsInCollision = lambda x: IsInCollisionWithObs(x, new_obs_i) # visualization """ print('step_sz: %f' % (step_sz)) params = {} params['obs_w'] = 6. params['obs_h'] = 6. params['integration_step'] = step_sz vis = AcrobotVisualizer(Acrobot(), params) vis.obs = obs plt.ion() fig = plt.figure() ax = fig.add_subplot(121) ax.set_autoscale_on(True) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') ax_ani = fig.add_subplot(122) vis._init(ax_ani) print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): ax.relim() ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def animation(states, actions): vis._animate(states[-1], ax_ani) draw_update_line(ax_ani) dtheta = 0.1 feasible_points = [] infeasible_points = [] goal_region = [] imin = 0 imax = int(2*np.pi/dtheta) for i in range(imin, imax): for j in range(imin, imax): x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.]) if IsInCollision(x): infeasible_points.append(x) else: feasible_points.append(x) if goal_check(Node(x), xG, system): goal_region.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) goal_region = np.array(goal_region) ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink') ax.scatter(goal_region[:,0], goal_region[:,1], c='green') for i in range(len(data)): update_line(hl, ax, data[i]) draw_update_line(ax) update_line(hl_for, ax, x0.x) draw_update_line(ax) update_line(hl_back, ax, xG.x) draw_update_line(ax) """ env = torch.FloatTensor(env) env = to_var(env) #explored_nodes = [x0] xt = x0 max_explore = 1 xt.n_explored = 0 xt.cost = 0. xw_scat = None fes = False frontier_nodes = [] tie_breaker = 0 entry = (x0.cost + h_cost(x0, xG, system), tie_breaker, x0) heapq.heappush(frontier_nodes, entry) for itr in range(MAX_LENGTH): if xw_scat is not None: xw_scat.remove() xw_scat = None # pop nodes from frontier_nodes if len(frontier_nodes) == 0: tie_breaker = 0 entry = (x0.cost + h_cost(x0, xG, system), tie_breaker, x0) heapq.heappush(frontier_nodes, entry) # push it back entry = heapq.heappop(frontier_nodes) print('popping...') print("entry cost:") print(entry[0]) xt = entry[2] if xt.n_explored >= max_explore and xt is not x0: xt = xt.prev #print('retreating...') continue # try connecting to goal every now and then x_init, u_init, t_init = init_informer(env, xt, xG, direction=0) x_G_, edge, valid = pathSteerTo(xt, xG, x_init, u_init, t_init, dynamics, enforce_bounds, IsInCollision, \ traj_opt, step_sz=step_sz, num_steps=num_steps, system=system, direction=0, propagating=True) """ xs_to_plot = np.array(edge.xs[::10]) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], system) ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='orange') draw_update_line(ax) """ if edge is not None and goal_check(x_G_, xG, system): print('bingo!') fes = True break """ xs, us, dts = planner.step_bvp(propagate_system, system, xt.x, x_init[-1], 400, num_steps, step_sz, x_init, u_init, t_init) if len(us) != 0: #xs_to_plot = np.array(edge.xs[::10]) #for i in range(len(xs_to_plot)): # xs_to_plot[i] = wrap_angle(xs_to_plot[i], system) x_t_1 = xs[-1] ax.scatter(x_t_1[0], x_t_1[1], c='orange') #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='orange') draw_update_line(ax) edge_dt = np.sum(dts) start = xt goal = Node(wrap_angle(xs[-1], system)) # after trajopt, make actions of dimension 2 us = us.reshape(len(us), -1) # notice that controller time starts from 0, hence locally need to shift the time by minusing t0_edges # start from 0 time_knot = np.cumsum(dts) time_knot = np.insert(time_knot, 0, 0.) # can also change the resolution by the following function (for instance, every 10) edge = Edge(xs, us, dts, time_knot, edge_dt) edge.next = goal start.edge = edge start.next = goal goal.prev = start goal.n_explored = 0 goal.cost = xt.cost + xt.edge.dt # push to frontier tie_breaker += 1 entry = (goal.cost+h_cost(goal,xG,system), tie_breaker, goal) heapq.heappush(frontier_nodes, entry) if goal_check(Node(x_t_1), xG, system): print('bingo!') fes = True break """ for i in range(max_explore): xw, x_init, u_init, t_init = informer(env, xt, xG, direction=0) """ xw_scat = ax.scatter(xw.x[0], xw.x[1], c='lightgreen') draw_update_line(ax) """ xs, us, dts = planner.step_bvp(propagate_system, system, xt.x, xw.x, 400, num_steps, step_sz, x_init, u_init, t_init) #print('xs:') #print(xs) # stop at the last node that is not in collision if len(us) == 0: # the same node, it hasn't changed continue xt.n_explored += 1 # establish connections to x_t+1 edge_dt = np.sum(dts) start = xt goal = Node(wrap_angle(xs[-1], system)) # after trajopt, make actions of dimension 2 us = us.reshape(len(us), -1) # notice that controller time starts from 0, hence locally need to shift the time by minusing t0_edges # start from 0 time_knot = np.cumsum(dts) time_knot = np.insert(time_knot, 0, 0.) # can also change the resolution by the following function (for instance, every 10) edge = Edge(xs, us, dts, time_knot, edge_dt) edge.next = goal start.edge = edge start.next = goal goal.prev = start #print('n_explored: %d' % (xt.n_explored)) #for i in range(len(edge.xs)): # update_line(hl_for, ax, edge.xs[i]) """ xs_to_plot = np.array(xs[::5]) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], system) ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='g') draw_update_line(ax) animation(xs, us) """ #x_t_1.prev = xt goal.n_explored = 0 goal.cost = xt.cost + xt.edge.dt # push to frontier tie_breaker += 1 entry = (goal.cost + h_cost(goal, xG, system), tie_breaker, goal) heapq.heappush(frontier_nodes, entry) # check if the new node is near goal if goal_check(goal, xG, system): print("success") fes = True break if fes: break # check if the new node is near goal if goal_check(xt, xG, system): print("success") fes = True break return fes
def plan_one_path(obs_i, obs, obc, detailed_data_path, data_path, start_state, goal_state, goal_inform_state, cost_i, max_iteration, out_queue_t, out_queue_cost, random_seed): if args.env_type == 'pendulum': system = standard_cpp_systems.PSOPTPendulum() bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0) step_sz = 0.002 num_steps = 20 traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz) elif args.env_type == 'cartpole_obs': #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole') obs_width = 4.0 psopt_system = _sst_module.PSOPTCartPole() propagate_system = standard_cpp_systems.RectangleObs( obs, 4., 'cartpole') #distance_computer = propagate_system.distance_computer() distance_computer = _sst_module.euclidean_distance( np.array(propagate_system.is_circular_topology())) step_sz = 0.002 num_steps = 21 goal_radius = 1.5 random_seed = 0 delta_near = 2.0 delta_drain = 1.2 #delta_near=.2 #delta_drain=.1 cost_threshold = 1.05 min_time_steps = 10 max_time_steps = 200 #min_time_steps = 5 #max_time_steps = 400 integration_step = 0.002 elif args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 1.0 delta_drain = 0.5 cost_threshold = 1.05 min_time_steps = 5 max_time_steps = 100 integration_step = 0.02 planner = _sst_module.SSTWrapper( state_bounds=propagate_system.get_state_bounds(), control_bounds=propagate_system.get_control_bounds(), distance=distance_computer, start_state=start_state, goal_state=goal_state, goal_radius=goal_radius, random_seed=random_seed, sst_delta_near=delta_near, sst_delta_drain=delta_drain) #print('creating planner...') # generate a path by using SST to plan for some maximal iterations time0 = time.time() for i in range(max_iteration): planner.step(propagate_system, min_time_steps, max_time_steps, integration_step) # early break for initial path solution = planner.get_solution() if solution is not None: #print('solution found already.') # based on cost break xs, us, ts = solution t_sst = np.sum(ts) #print(t_sst) #print(cost_i) if t_sst <= cost_i * cost_threshold: print('solved in %d iterations' % (i)) break plan_time = time.time() - time0 solution = planner.get_solution() xs, us, ts = solution print(np.linalg.norm(np.array(xs[-1]) - goal_state)) """ # visualization plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) #ax.set_xlim(-30, 30) ax.set_xlim(-np.pi, np.pi) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi) if res[i] > np.pi: res[i] = res[i] - 2*np.pi return res dx = 1 dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 #imax = int(2*30./dx) imax = int(2*np.pi/dtheta) jmin = 0 jmax = int(2*np.pi/dtheta) for i in range(imin, imax): for j in range(jmin, jmax): x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) draw_update_line(ax) #state_t = start_state xs_to_plot = np.array(detailed_data_path) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(detailed_data_path[i], propagate_system) ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='lightgreen', s=0.5) # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) #plt.waitforbuttonpress() if solution is not None: xs, us, ts = solution # propagate data p_start = xs[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(us)): #state_i.append(len(detail_paths)-1) print(ts[k]) max_steps = int(np.round(ts[k]/step_sz)) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation for step in range(1,max_steps+1): p_start = dynamics(p_start, us[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) #state[-1] = xs[-1] xs_to_plot = np.array(state) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system) ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green', s=0.5) start_state_np = np.array(start_state) goal_state_np = np.array(goal_state) ax.scatter([start_state_np[0]], [start_state_np[1]], c='blue', marker='*') ax.scatter([goal_state_np[0]], [goal_state_np[1]], c='red', marker='*') # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) plt.waitforbuttonpress() """ # validate if the path contains collision if solution is not None: res_x, res_u, res_t = solution print('solution_x:') print(res_x) print('path_x:') print(np.array(data_path)) # propagate data p_start = res_x[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(res_u)): #state_i.append(len(detail_paths)-1) max_steps = int(np.round(res_t[k] / step_sz)) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation #p_start = res_x[k] #state[-1] = res_x[k] for step in range(1, max_steps + 1): p_start = dynamics(p_start, res_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. # check collision for the new state if IsInCollision(p_start, obs_i): print( 'collision happens at u_index: %d, step: %d' % (k, step)) assert not IsInCollision(p_start, obs_i) #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) #state[-1] = res_x[-1] # validation end print('plan time: %fs' % (plan_time)) if solution is None: print('failed.') out_queue_t.put(-1) out_queue_cost.put(-1) else: print('path succeeded.') out_queue_t.put(plan_time) out_queue_cost.put(t_sst)
from sparse_rrt import _sst_module from sparse_rrt.systems import standard_cpp_systems import numpy as np import time from sparse_rrt.systems.acrobot import Acrobot, AcrobotDistance from sparse_rrt.systems.point import Point # this is a test code for SST in CartPole environment system = standard_cpp_systems.Point() planner = _sst_module.SSTWrapper(state_bounds=system.get_state_bounds(), control_bounds=system.get_control_bounds(), distance=system.distance_computer(), start_state=np.array([0., 0.]), goal_state=np.array([9., 9.]), goal_radius=0.5, random_seed=0, sst_delta_near=0.4, sst_delta_drain=0.2) number_of_iterations = 10000 min_time_steps = 20 max_time_steps = 200 integration_step = 0.002 print("Starting the planner.") start_time = time.time() state_bounds = system.get_state_bounds()
def plan_one_path_sst(env, start, end, out_queue, path_file, control_file, cost_file, time_file, env_id=None, id_list=None): planner = _sst_module.SSTWrapper( state_bounds=env.get_state_bounds(), control_bounds=env.get_control_bounds(), distance=env.distance_computer(), start_state=start, goal_state=end, goal_radius=goal_radius, random_seed=random_seed, sst_delta_near=sst_delta_near, sst_delta_drain=sst_delta_drain) # generate a path by using SST to plan for some maximal iterations time0 = time.time() #print('obs: %d, path: %d' % (i, j)) for iter in range(args.max_iter): planner.step(env, min_time_steps, max_time_steps, integration_step) # if iter%5000 == 0: # if planner.get_solution() is not None: # print(time.time() - time0) if time.time() - time0 > args.max_time: break solution = planner.get_solution() plan_time = time.time() - time0 if solution is None: out_queue.put(0) else: print('path succeeded.') path, controls, cost = solution print(path) path = np.array(path) controls = np.array(controls) cost = np.array(cost) file = open(path_file, 'wb') pickle.dump(path, file) file.close() file = open(control_file, 'wb') pickle.dump(controls, file) file.close() file = open(cost_file, 'wb') pickle.dump(cost, file) file.close() file = open(time_file, 'wb') pickle.dump(plan_time, file) file.close() assert env_id is not None assert id_list is not None saved = False while not saved: try: table = load_occ_table(env_id) table.add(tuple(id_list)) save_occ_table(env_id, table) saved = True except EOFError: time.sleep(3) #pass out_queue.put(1)
def experiment(env_id, traj_id, verbose=False, system='cartpole_obs', params=None): print("env {}, traj {}".format(env_id, traj_id)) if system in ['cartpole_obs', 'acrobot_obs', 'car_obs']: obs_list = get_obs(system, env_id)[env_id] elif system in ['quadrotor_obs']: obs_list = get_obs_3d(system, env_id)[env_id].reshape(-1, 3) data = load_data(system, env_id, traj_id) ref_path = data['path'] ref_cost = data['cost'].sum() start_goal = data['start_goal'] min_time_steps = params['min_time_steps'] max_time_steps = params['max_time_steps'] integration_step = params['integration_step'] if system == 'quadrotor_obs': env_constr = standard_cpp_systems.RectangleObs3D env = env_constr(obs_list, params['width'], 'quadrotor') else: print("unkown system") exit(-1) planner = _sst_module.SSTWrapper(state_bounds=env.get_state_bounds(), control_bounds=env.get_control_bounds(), distance=env.distance_computer(), start_state=start_goal[0], goal_state=start_goal[-1], goal_radius=params['goal_radius'], random_seed=params['random_seed'], sst_delta_near=params['sst_delta_near'], sst_delta_drain=params['sst_delta_drain']) solution = planner.get_solution() data_cost = np.sum(data['cost']) # th = 1.2 * data_cost ## start experiment tic = time.perf_counter() for iteration in tqdm(range(params['number_of_iterations'])): planner.step(env, min_time_steps, max_time_steps, integration_step) if iteration % 100 == 0: solution = planner.get_solution() if (solution is not None and solution[2].sum() <= ref_cost * 1.4 ) or time.perf_counter() - tic > params[ 'max_planning_time']: #and np.sum(solution[2]) < th: break toc = time.perf_counter() costs = solution[2].sum() if solution is not None else np.inf result = { 'env_id': env_id, 'traj_id': traj_id, 'planning_time': toc - tic, 'successful': solution is not None, 'costs': costs } print("\t{}, time: {} seconds, {}(ref:{}) costs".format( result['successful'], result['planning_time'], result['costs'], np.sum(data['cost']))) return result
def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, cost_i, max_iteration, out_queue_t, out_queue_cost): if args.env_type in [ 'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8' ]: #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot') obs_width = 6.0 psopt_system = _sst_module.PSOPTAcrobot() propagate_system = standard_cpp_systems.RectangleObs( obs, 6., 'acrobot') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.02 num_steps = 21 goal_radius = 2.0 random_seed = 0 delta_near = 1.0 delta_drain = 0.5 device = 3 num_sample = 10 min_time_steps = 5 max_time_steps = 100 mpnet_goal_threshold = 2.0 mpnet_length_threshold = 30 pick_goal_init_threshold = 0.1 pick_goal_end_threshold = 0.8 pick_goal_start_percent = 0.4 elif args.env_type == 'cartpole_obs': obs_width = 4.0 psopt_system = _sst_module.PSOPTCartPole() propagate_system = standard_cpp_systems.RectangleObs( obs, obs_width, 'cartpole') distance_computer = propagate_system.distance_computer() #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology())) step_sz = 0.002 integration_step = 0.002 num_steps = 101 goal_radius = 1.5 random_seed = 0 delta_near = 2.0 delta_drain = 1.2 #delta_near=1.0 #delta_drain=0.5 device = 3 num_sample = 10 min_time_steps = 10 max_time_steps = 200 mpnet_goal_threshold = 2.0 mpnet_length_threshold = 90 pick_goal_init_threshold = 0.1 pick_goal_end_threshold = 0.8 pick_goal_start_percent = 0.4 #print('creating planner...') #planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, 200, num_steps, step_sz, propagate_system, device) planner = _sst_module.SSTWrapper( state_bounds=propagate_system.get_state_bounds(), control_bounds=propagate_system.get_control_bounds(), distance=distance_computer, start_state=start_state, goal_state=goal_state, goal_radius=goal_radius, random_seed=0, sst_delta_near=delta_near, sst_delta_drain=delta_drain) cost_threshold = cost_i * args.cost_threshold #cost_threshold = 100000000. # generate a path by using SST to plan for some maximal iterations time0 = time.time() print('before plan_tree_SMP...') print('start_state:') print(start_state) print('goal_state:') print(goal_inform_state) res_x, res_u, res_t = planner.plan_tree_SMP("sst", propagate_system, psopt_system, obc.flatten(), start_state, goal_state, goal_inform_state, \ goal_radius, max_iteration, distance_computer, \ delta_near, delta_drain, cost_threshold, \ num_sample, min_time_steps, max_time_steps, \ mpnet_goal_threshold, mpnet_length_threshold, \ pick_goal_init_threshold, pick_goal_end_threshold, \ pick_goal_start_percent) print('after plan_tree_SMP.') plan_time = time.time() - time0 """ # visualization plt.ion() fig = plt.figure() ax = fig.add_subplot(111) #ax.set_autoscale_on(True) ax.set_xlim(-np.pi, np.pi) ax.set_ylim(-np.pi, np.pi) hl, = ax.plot([], [], 'b') #hl_real, = ax.plot([], [], 'r') hl_for, = ax.plot([], [], 'g') hl_back, = ax.plot([], [], 'r') hl_for_mpnet, = ax.plot([], [], 'lightgreen') hl_back_mpnet, = ax.plot([], [], 'salmon') #print(obs) def update_line(h, ax, new_data): new_data = wrap_angle(new_data, propagate_system) h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1])) #h.set_xdata(np.append(h.get_xdata(), new_data[0])) #h.set_ydata(np.append(h.get_ydata(), new_data[1])) def remove_last_k(h, ax, k): h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k]) def draw_update_line(ax): #ax.relim() #ax.autoscale_view() fig.canvas.draw() fig.canvas.flush_events() #plt.show() def wrap_angle(x, system): circular = system.is_circular_topology() res = np.array(x) for i in range(len(x)): if circular[i]: # use our previously saved version res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi) if res[i] > np.pi: res[i] = res[i] - 2*np.pi return res dtheta = 0.1 feasible_points = [] infeasible_points = [] imin = 0 imax = int(2*np.pi/dtheta) for i in range(imin, imax): for j in range(imin, imax): x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.]) if IsInCollision(x, obs_i): infeasible_points.append(x) else: feasible_points.append(x) feasible_points = np.array(feasible_points) infeasible_points = np.array(infeasible_points) print('feasible points') print(feasible_points) print('infeasible points') print(infeasible_points) ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow') ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink') #for i in range(len(data)): # update_line(hl, ax, data[i]) draw_update_line(ax) #state_t = start_state if len(res_u): # propagate data p_start = res_x[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(res_u)): #state_i.append(len(detail_paths)-1) max_steps = int(res_t[k]/step_sz) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation p_start = res_x[k] state[-1] = res_x[k] for step in range(1,max_steps+1): p_start = dynamics(p_start, res_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) state[-1] = res_x[-1] xs_to_plot = np.array(state) for i in range(len(xs_to_plot)): xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system) if IsInCollision(xs_to_plot[i], obs_i): print('in collision') ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green') # draw start and goal #ax.scatter(start_state[0], goal_state[0], marker='X') draw_update_line(ax) plt.waitforbuttonpress() """ #im = planner.visualize_nodes(propagate_system) #sec = input('Let us wait for user input') #show_image_opencv(im, "planning_tree", wait=True) # validate if the path contains collision """ if len(res_u): # propagate data p_start = res_x[0] detail_paths = [p_start] detail_controls = [] detail_costs = [] state = [p_start] control = [] cost = [] for k in range(len(res_u)): #state_i.append(len(detail_paths)-1) max_steps = int(res_t[k]/step_sz) accum_cost = 0. #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][k]) # modify it because of small difference between data and actual propagation p_start = res_x[k] state[-1] = res_x[k] for step in range(1,max_steps+1): p_start = dynamics(p_start, res_u[k], step_sz) p_start = enforce_bounds(p_start) detail_paths.append(p_start) accum_cost += step_sz if (step % 1 == 0) or (step == max_steps): state.append(p_start) #print('control') #print(controls[i][j]) cost.append(accum_cost) accum_cost = 0. # check collision for the new state assert not IsInCollision(p_start, obs_i) #print('p_start:') #print(p_start) #print('data:') #print(paths[i][j][-1]) state[-1] = res_x[-1] # validation end """ print('plan time: %fs' % (plan_time)) if len(res_x) == 0: print('failed.') out_queue_t.put(-1) out_queue_cost.put(-1.0) else: print('path succeeded.') print('cost: %f' % (np.sum(res_t))) print('cost_threshold: %f' % (cost_threshold)) print('data cost: %f' % (cost_i)) out_queue_t.put(plan_time) out_queue_cost.put(np.sum(res_t))