def apply_controller(plant, params, H, policy=None): ''' Starts the plant and applies the current policy to the plant for a duration specified by H (in seconds). @param plant is a class that controls our robot (simulation) @param params is a dictionary with some useful values @param H Horizon for applying controller (in seconds) @param policy is a function pointer to the code that implements your control solution. It will be called as u = policy( state ) ''' # start robot x_t, t = plant.get_plant_state() if plant.noise is not None: # randomize state Sx_t = np.zeros((x_t.shape[0], x_t.shape[0])) L_noise = np.linalg.cholesky(plant.noise) x_t = x_t + np.random.randn(x_t.shape[0]).dot(L_noise) sum_of_error = 0 H_steps = int(np.ceil(H / plant.dt)) for i in xrange(H_steps): # convert input angle dimensions to complex representation x_t_ = gTrig_np(x_t[None, :], params['angle_dims']).flatten() # get command from policy (this should be fast, or at least account for delays in processing): u_t = policy(x_t_) # send command to robot: plant.apply_control(u_t) plant.step() x_t, t = plant.get_plant_state() l = plant.params['l'] st = np.sin(x_t_[3]) ct = np.cos(x_t_[3]) goal = np.array([0, l]) end = np.array([x_t[0] + l * st, -l * ct]) dist = np.sqrt((goal[0] - end[0]) * (goal[0] - end[0]) + (goal[1] - end[1]) * (goal[1] - end[1])) sum_of_error = sum_of_error + dist if plant.noise is not None: # randomize state x_t = x_t + np.random.randn(x_t.shape[0]).dot(L_noise) if plant.done: break print("Error this episode was: ", sum_of_error) # stop robot plant.stop()
def apply_controller(plant, agent_swingup, agent_stable, params, H, horizon=None): x_t, t = plant.get_plant_state() for i in range(H): x_t_ = gTrig_np(x_t[None, :], params['angle_dims']).flatten() if (horizon is None) or i < horizon: u_t = agent_swingup.policy(x_t, i) else: if (horizon is not None) and (i == horizon): print("Reached end of control sequence") print("Final state : {}".format(x_t)) input("Press ENTER to continue...") u_t = agent_stable.policy(x_t, i) plant.apply_control(u_t) plant.step() x_t, t = plant.get_plant_state() if plant.done: break plant.stop()
def apply_controller(plant, params, H, policy=None): ''' Starts the plant and applies the current policy to the plant for a duration specified by H (in seconds). @param plant is a class that controls our robot (simulation) @param params is a dictionary with some useful values @param H Horizon for applying controller (in seconds) @param policy is a function pointer to the code that implements your control solution. It will be called as u = policy( state ) ''' # start robot x_t, t = plant.get_plant_state() if plant.noise is not None: # randomize state Sx_t = np.zeros((x_t.shape[0], x_t.shape[0])) L_noise = np.linalg.cholesky(plant.noise) x_t = x_t + np.random.randn(x_t.shape[0]).dot(L_noise) sum_of_error = 0 integral_pos = 0 integral_angle = 0 error_pos = 0 error_angle = 0 error_prev_pos = 0 error_prev_angle = 0 angle = [] position = [] #x_t_prev=gTrig_np(x_t[None,:], params['angle_dims']).flatten() H_steps = int(np.ceil(H / plant.dt)) for i in range(H_steps): # convert input angle dimensions to complex representation x_t_ = gTrig_np(x_t[None, :], params['angle_dims']).flatten() #print(x_t_[4]) # get command from policy (this should be fast, or at least account for delays in processing): #u_t,integral_pos,integral_angle,error_pos,error_angle,error_prev_pos,error_prev_angle = policy(x_t_,integral_pos,integral_angle,error_pos,error_angle,error_prev_pos,error_prev_angle) u_t = LQR(plant, x_t_) error_prev_pos = error_pos error_prev_angle = error_angle angle.append(x_t_[3]) position.append(x_t_[0]) # send command to robot: plant.apply_control(u_t) plant.step() x_t, t = plant.get_plant_state() l = plant.params['l'] err = np.array([0, l]) - np.array([x_t[0] + l * x_t_[3], -l * x_t_[4]]) dist = np.dot(err, err) sum_of_error = sum_of_error + dist if plant.noise is not None: # randomize state x_t = x_t + np.random.randn(x_t.shape[0]).dot(L_noise) if plant.done: break plt.plot(position) plt.ylim(-1, 1) plt.title("Plot of position values with respect to steps ") plt.ylabel("Positions") plt.xlabel("steps") #plt.plot(position) plt.show() print "Error this episode %f" % (sum_of_error) # stop robot plant.stop()