def __init__(self, sys, sampler, step_size, reached_goal_function, linearize=True): self.sys = sys self.step_size = step_size self.reached_goal_function = reached_goal_function pend = Pendulum() self.nonlinear_dynamics_step_size = pend.time_step self.linearize = linearize # self.posi_list = [] # self.orie_list = [] # self.new_postion = np.zeros([3,1]) def plan_collision_free_path_towards(nearest_state, new_state): # possible_inputs = np.linspace(*sys.input_limits[:, 0], num=3) best_input = None best_new_state = None best_distance = np.inf best_states_list = None # for input in possible_inputs: #todo lqr states_list = [np.atleast_1d(nearest_state)] state = np.atleast_1d(nearest_state) for step in range( int(self.step_size / self.nonlinear_dynamics_step_size)): # todo change alcontrol_step state = self.sys.lqr_forward_step( starting_state=state, target_state=new_state, u=None, modify_system=False, return_as_env=False, step_size=self.nonlinear_dynamics_step_size) states_list.append(state) #fixme cost=||distance||? # state, al_states_list = control_step(starting_state=state, target_state=new_state, # states_list=states_list, sz=self.step_size, ndsz=self.nonlinear_dynamics_step_size) new_distance = np.linalg.norm(new_state - state) # print(new_distance) if new_distance < best_distance: best_input = input best_new_state = state best_distance = new_distance best_states_list = states_list return best_distance, best_new_state, best_states_list RGRRT.__init__(self, self.sys.get_current_state(), sampler, reached_goal_function, plan_collision_free_path_towards)
def test_pendulum_planning(): initial_state = np.zeros(2) pendulum_system = Pendulum(initial_state=initial_state, input_limits=np.asarray([[-input_limit], [input_limit]]), m=1, l=0.5, g=9.8, b=0.1) goal_state = np.asarray([np.pi, 0.0]) goal_state_2 = np.asarray([-np.pi, 0.0]) step_size = 0.2 # 0.075 nonlinear_dynamic_step_size = 1e-2 def uniform_sampler(): rnd = np.random.rand(2) rnd[0] = (rnd[0] - 0.5) * 2 * 1.5 * np.pi rnd[1] = (rnd[1] - 0.5) * 2 * 12 goal_bias_rnd = np.random.rand(1) # if goal_bias_rnd <0.2: # return goal_state + [2*np.pi*np.random.randint(-1,1),0] + [np.random.normal(0,0.8),np.random.normal(0,1.5)] return rnd def gaussian_mixture_sampler(): gaussian_ratio = 0.0 rnd = np.random.rand(2) rnd[0] = np.random.normal(goal_state[0], 1) rnd[1] = np.random.normal(goal_state[1], 1) if np.random.rand(1) > gaussian_ratio: return uniform_sampler() return rnd def ring_sampler(): theta = np.random.rand(1) * 2 * np.pi rnd = np.zeros(2) r = np.random.rand(1) + 2.5 rnd[0] = r * np.cos(theta) rnd[1] = r * np.sin(theta) return rnd def contains_goal_function(reachable_set, goal_state): distance = np.inf if np.linalg.norm(reachable_set.parent_state - goal_state) < 2: distance, projection = distance_point_polytope( reachable_set.polytope_list, goal_state) elif np.linalg.norm(reachable_set.parent_state - goal_state_2) < 2: distance, projection = distance_point_polytope( reachable_set.polytope_list, goal_state_2) else: return False, None if distance > reachable_set_epsilon: return False, None #enumerate inputs potential_inputs = np.linspace(pendulum_system.input_limits[0, 0], pendulum_system.input_limits[1, 0], input_samples) for u_i in potential_inputs: state_list = [] state = reachable_set.parent_state for step in range(int(step_size / nonlinear_dynamic_step_size)): state = pendulum_system.forward_step( u=np.atleast_1d(u_i), linearlize=False, modify_system=False, step_size=nonlinear_dynamic_step_size, return_as_env=False, starting_state=state) state_list.append(state) if np.linalg.norm(goal_state - state) < goal_tolerance: print('Goal error is %d' % np.linalg.norm(goal_state - state)) return True, np.asarray(state_list) if np.linalg.norm(goal_state_2 - state) < goal_tolerance: print('Goal error is %d' % np.linalg.norm(goal_state_2 - state)) return True, np.asarray(state_list) return False, None rrt = SymbolicSystem_R3T(pendulum_system, uniform_sampler, step_size, contains_goal_function=contains_goal_function, \ use_true_reachable_set=True, use_convex_hull=True) found_goal = False experiment_name = datetime.fromtimestamp( time.time()).strftime('%Y%m%d_%H-%M-%S') duration = 0 os.makedirs('R3T_Pendulum_' + experiment_name) allocated_time = 1.25 for i in range(max_iterations): start_time = time.time() if rrt.build_tree_to_goal_state( goal_state, stop_on_first_reach=False, allocated_time=allocated_time, rewire=False, explore_deterministic_next_state=False, save_true_dynamics_path=True) is not None: found_goal = True end_time = time.time() #get rrt polytopes polytope_reachable_sets = rrt.reachable_set_tree.id_to_reachable_sets.values( ) reachable_polytopes = [] explored_states = [] for prs in polytope_reachable_sets: reachable_polytopes.append(prs.polytope_list) explored_states.append(prs.parent_state) # print(explored_states) # print(len(explored_states)) # print('number of nodes',rrt.node_tally) goal_override = None if found_goal: p = rrt.goal_node.parent.state if np.linalg.norm(p - np.asarray([np.pi, 0.0])) < np.linalg.norm( p - np.asarray([-np.pi, 0.0])): goal_override = np.asarray([np.pi, 0.0]) else: goal_override = np.asarray([-np.pi, 0.0]) # # Plot state tree # fig = plt.figure() # ax = fig.add_subplot(111) # fig, ax = visualize_node_tree_2D(rrt, fig, ax, s=0.5, linewidths=0.15, show_path_to_goal=found_goal, goal_override=goal_override) # # fig, ax = visZ(reachable_polytopes, title="", alpha=0.07, fig=fig, ax=ax, color='gray') # # for explored_state in explored_states: # # plt.scatter(explored_state[0], explored_state[1], facecolor='red', s=6) # ax.scatter(initial_state[0], initial_state[1], facecolor='red', s=5) # ax.scatter(goal_state[0], goal_state[1], facecolor='green', s=5) # ax.scatter(goal_state[0]-2*np.pi, goal_state[1], facecolor='green', s=5) # ax.grid(True, which='both') # y_formatter = matplotlib.ticker.ScalarFormatter(useOffset=False) # ax.yaxis.set_major_formatter(y_formatter) # ax.set_yticks(np.arange(-10, 10, 2)) # ax.set_xlim([-4, 4]) # ax.set_ylim([-10, 10]) # ax.set_xlabel('$\\theta (rad)$') # ax.set_ylabel('$\dot{\\theta} (rad/s)$') # duration += (end_time - start_time) # plt.title('R3T after %.2f seconds (explored %d nodes)' %(duration, len(polytope_reachable_sets))) # plt.savefig('R3T_Pendulum_'+experiment_name+'/%.2f_seconds_tree.png' % duration, dpi=500) # # plt.show() # plt.xlim([-4, 4]) # plt.ylim([-10,10]) # plt.clf() # plt.close() # # # # Plot explored reachable sets # # FIXME: Handle degenerated reachable set fig = plt.figure() ax = fig.add_subplot(111) fig, ax = visualize_2D_AH_polytope(reachable_polytopes, fig=fig, ax=ax, N=200, epsilon=0.01) ax.scatter(initial_state[0], initial_state[1], facecolor='red', s=5) ax.scatter(goal_state[0], goal_state[1], facecolor='green', s=5) ax.scatter(goal_state[0] - 2 * np.pi, goal_state[1], facecolor='green', s=5) # ax.set_aspect('equal') plt.xlabel('$x$') plt.ylabel('$\dot{x}$') plt.xlim([-4, 4]) plt.ylim([-10, 10]) plt.tight_layout() plt.title('$|u| \leq %.2f$ Reachable Set after %.2fs (%d nodes)' % (input_limit, duration, len(polytope_reachable_sets))) plt.savefig('R3T_Pendulum_' + experiment_name + '/%.2f_seconds_reachable_sets.png' % duration, dpi=500) # plt.show() plt.clf() # plt.close() # # if found_goal: # break allocated_time *= 2 # store polytopes reachable_polytopes_clean = [[p.T, p.t, p.P.H, p.P.h] for p in reachable_polytopes] with open( 'R3T_Pendulum_' + experiment_name + '/%.2f_seconds_reachable_sets.p' % duration, "wb") as f: pickle.dump(reachable_polytopes_clean, f)
def test_pendulum_planning(): global best_distance best_distance = np.inf initial_state = np.zeros(2) pendulum_system = Pendulum(initial_state= initial_state, input_limits=np.asarray([[-1],[1]]), m=1, l=0.5, g=9.8, b=0.1) goal_state = np.asarray([np.pi,0.0]) goal_state_2 = np.asarray([-np.pi,0.0]) step_size = 0.08 def uniform_sampler(): rnd = np.random.rand(2) rnd[0] = (rnd[0]-0.5)*2*1.5*np.pi rnd[1] = (rnd[1]-0.5)*2*12 goal_bias_rnd = np.random.rand(1) # if goal_bias_rnd <0.2: # return goal_state + [2*np.pi*np.random.randint(-1,1),0] + [np.random.normal(0,0.8),np.random.normal(0,1.5)] return rnd def gaussian_mixture_sampler(): gaussian_ratio = 0.0 rnd = np.random.rand(2) rnd[0] = np.random.normal(goal_state[0],1) rnd[1] = np.random.normal(goal_state[1],1) if np.random.rand(1) > gaussian_ratio: return uniform_sampler() return rnd def ring_sampler(): theta = np.random.rand(1)*2*np.pi rnd = np.zeros(2) r = np.random.rand(1)+2.5 rnd[0] = r*np.cos(theta) rnd[1] = r*np.sin(theta) return rnd def big_gaussian_sampler(): rnd = np.random.rand(2) rnd[0] = np.random.normal(0,1.5) rnd[1] = np.random.normal(0,3) goal_bias_rnd = np.random.rand(1) if goal_bias_rnd <0.05: return goal_state elif goal_bias_rnd < 0.1: return np.asarray([-np.pi,0.0]) return rnd def reached_goal_function(state, goal_state): global best_distance d1 = np.linalg.norm(state-goal_state) d2 = np.linalg.norm(state-goal_state_2) best_distance = min(best_distance, min(d1, d2)) if d1<5e-2 or d2<5e-2: print('Goal error %f' %min(np.linalg.norm(state-goal_state), np.linalg.norm(state-goal_state_2))) return True return False rrt = SymbolicSystem_RGRRT(pendulum_system, uniform_sampler, step_size, reached_goal_function=reached_goal_function) found_goal = False experiment_name = datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H-%M-%S') duration = 0 os.makedirs('RG_RRT_Pendulum_'+experiment_name) while(1): start_time = time.time() if rrt.build_tree_to_goal_state(goal_state,stop_on_first_reach=True, allocated_time= 100, rewire=False, explore_deterministic_next_state=False) is not None: found_goal = True end_time = time.time() if found_goal: p = rrt.goal_node.parent.state if np.linalg.norm(p-np.asarray([np.pi,0.0])) < np.linalg.norm(p-np.asarray([-np.pi,0.0])): goal_override = np.asarray([np.pi,0.0]) else: goal_override = np.asarray([-np.pi, 0.0]) rrt.goal_node.children = [] rrt.goal_node.true_dynamics_path = [p, goal_override] # Plot state tree print('Best distance %f' %best_distance) fig = plt.figure() ax = fig.add_subplot(111) if found_goal: fig, ax = visualize_node_tree_2D(rrt, fig, ax, s=0.5, linewidths=0.15, show_path_to_goal=True)#, goal_override=goal_override) else: fig, ax = visualize_node_tree_2D(rrt, fig, ax, s=0.5, linewidths=0.15, show_path_to_goal=found_goal) # fig, ax = visZ(reachable_polytopes, title="", alpha=0.07, fig=fig, ax=ax, color='gray') # for explored_state in explored_states: # plt.scatter(explored_state[0], explored_state[1], facecolor='red', s=6) ax.scatter(initial_state[0], initial_state[1], facecolor='red', s=5) ax.scatter(goal_state[0], goal_state[1], facecolor='green', s=5) ax.scatter(goal_state[0]-2*np.pi, goal_state[1], facecolor='green', s=5) ax.grid(True, which='both') y_formatter = matplotlib.ticker.ScalarFormatter(useOffset=False) ax.yaxis.set_major_formatter(y_formatter) ax.set_yticks(np.arange(-10, 10, 2)) ax.set_xlim([-4, 4]) ax.set_ylim([-10, 10]) ax.set_xlabel('$\\theta (rad)$') ax.set_ylabel('$\dot{\\theta} (rad/s)$') duration += (end_time-start_time) plt.title('RG-RRT after %.2f seconds (explored %d nodes)' %(duration, rrt.node_tally)) plt.savefig('RG_RRT_Pendulum_'+experiment_name+'/%.2f_seconds_tree.png' % duration, dpi=500) # plt.show() plt.xlim([-4, 4]) plt.ylim([-10,10]) plt.clf() plt.close() if found_goal: break
def alcontrol_step(starting_state=None, target_state=None, states_list=None, sz=None, ndsz=None): pend = Pendulum() # ntc = Pendulum.set_ntc() ### the timing parameters for the quad are used ### to construct the koopman operator and the adjoint dif-eq koopman_operator = KoopmanOperator(pend.time_step) adjoint = Adjoint(pend.time_step) simulation_time = 1000 horizon = 20 ### time horizon # control_reg = np.diag([1.] * 4) ### control regularization control_reg = np.diag([1.]) * 25 inv_control_reg = np.linalg.inv(control_reg) ### precompute this # default_action = lambda x : np.random.uniform(-0.1, 0.1, size=(4,)) ### in case lqr control returns NAN default_action = lambda x: np.random.uniform(-0.1, 0.1, size=(1, )) ### initialize the state state = starting_state #np.array([-0.5 * math.pi, 0]) # target_position = np.array([-1.5 * math.pi, 0.]) task = Task() ### this creates the task err = np.zeros([simulation_time, 2]) # posi = np.zeros([simulation_time, 3]) al_states_list = states_list for t in range(int(sz / ndsz)): #### measure state and transform through koopman observables m_state = get_koop_measurement(starting_state) # z t_state = koopman_operator.transform_all_state(m_state) # err[t] = np.linalg.norm(m_state[:3] - target_orientation) + (m_state[3:])) err[t, :] = np.array( [state - target_state] ) # + np.linalg.norm(m_state[3:6] - target_orientation) + np.linalg.norm(m_state[6:])task.target_expanded_state Kx, Ku = koopman_operator.get_linearization( ) ### 21*21, 21*4 grab the linear matrices lqr_policy = FiniteHorizonLQR( Kx, Ku, task.Q, task.R, task.Qf, horizon=horizon) # instantiate a lqr controller lqr_policy.set_target_state( task.cal_target_expanded_state(target_state) ) ## set target state to koopman observable state, task.target_expanded_state lqr_policy.sat_val = pend.sat_val ### set the saturation value ### forward sim the koopman dynamical system. # traj is z=20*(21*1) fdx 20*(21*21) fdu20(21*4) # (optimal policy) action_schedule=policy(state): -K[0]*err trajectory, fdx, fdu, action_schedule = koopman_operator.simulate( t_state, horizon, policy=lqr_policy) #(here fdx, fdu is just Kx, Ku in a list) ldx, ldu = task.get_linearization_from_trajectory( trajectory, action_schedule) # ldx 19*(21) ldu19*(4*1) mudx = lqr_policy.get_linearization_from_trajectory( trajectory) # control_gains 20*(4*21) rhof = task.mdx(trajectory[-1]) ### get terminal condition for adjoint rho = adjoint.simulate_adjoint(rhof, ldx, ldu, fdx, fdu, mudx, horizon) ustar = -np.dot(inv_control_reg, fdu[0].T.dot( rho[0])) + lqr_policy(t_state) ustar = np.clip(ustar, -pend.sat_val, pend.sat_val) ### saturate control if np.isnan(ustar).any(): ustar = default_action(None) ### advacne quad subject to ustar next_state = pend.step(state, ustar) # next_position=get_position(next_state) ### update the koopman operator from data koopman_operator.compute_operator_from_data( get_koop_measurement(state), ustar, get_koop_measurement(next_state)) state = next_state ### backend : update the simulator state al_states_list.append(state) # position=next_position ### we can also use a decaying weight on inf gain task.inf_weight = 100.0 * (0.99**(t)) # if t % 100 == 0: # print('time : {}, pose : {}, {}'.format(t*quad.time_step, # get_all_measurement(state), ustar)) t = [i * pend.time_step for i in range(simulation_time)] return state, al_states_list