def reset(self): # random initial state xy = random.uniform(generate_key(), minval=-self.map_width, maxval=self.map_width, shape=(2, )) z = random.uniform(generate_key(), minval=1.0, maxval=self.map_height, shape=(1, )) init_pos = np.concatenate((xy, z)) init_pos = np.array([-3, -3, 7]) # constant starting position for debugging self.state = np.concatenate((init_pos, np.zeros(9))) self._render() return self.state
def test_kalman_filter(steps=100, show_plot=True): T = steps x_true = 1 env_noise = 0.3 x0 = 0 controller = KalmanFilter() controller.initialize(x0, 1, 0, 1, 1, 0, env_noise) loss = lambda x_true, x_pred: (x_true - x_pred)**2 results = [] for i in range(T): z = x_true + float( random.normal(generate_key(), shape=(1, )) * env_noise) x_pred = controller.step(0, z) cur_loss = float(loss(x_true, x_pred)) results.append(cur_loss) if show_plot: plt.plot(results) plt.title("KalmanFilter controller on constant signal") plt.show(block=False) plt.pause(15) plt.close() print("test_kalman_filter passed") return
def reset(self): """ Description: Reset the environment and return the start state """ self._state = random.uniform(generate_key(), shape=(4, ), minval=-0.05, maxval=0.05) #self._state = np.array([0.0, 0.03, 0.03, 0.03]) # reproducible results return self._state
def reset(self): x = random.uniform(generate_key(), minval=-0.5, maxval=0.5) y = random.uniform(generate_key(), minval=-0.5, maxval=0.5) th = random.uniform(generate_key(), minval=-30 * np.pi / 180, maxval=30 * np.pi / 180) xdot = random.uniform(generate_key(), minval=-0.1, maxval=0.1) ydot = random.uniform(generate_key(), minval=-0.1, maxval=0.1) thdot = random.uniform(generate_key(), minval=-0.1, maxval=0.1) self.state = np.array([x, y, th, xdot, ydot, thdot]) self.last_u = np.array([0.0, 0.0]) return self.state
def __init__(self, n=3, m=2, d = None, partially_observable = False, noise_distribution=None, \ noise_magnitude=1.0, system_params = {}, initial_state = None): """ Description: Randomly initialize the hidden dynamics of the system. Args: n (int): State dimension. m (int): control dimension. d (int): Observation dimension. (Note: MUST specify partially_observable=True for d to be used!) partially_observable (bool): whether to project x to y noise_distribution (None, string, func): if None then no noise. Valid strings include ['normal', 'uniform']. Valid noise functions must map inputs n (x dim), x (state), u (action), w (previous noise), and t (current time) to either a scalar or an n-dimensional vector of real values. noise magnitude (float): magnitude of noise params (dict): specify A, B, C, and D matrices in system dynamics initial_state (None, vector): initial x. If None then randomly initialized Returns: The first value in the time-series """ self.initialized = True self.T = 0 self.n, self.m, self.d = n, m, d self.noise_magnitude = noise_magnitude params = system_params.copy( ) # avoid overwriting system_params input dict if d != None: # d may only be specified in a partially observable system assert partially_observable # random normal helper function gaussian = lambda dims: random.normal(generate_key(), shape=dims) # determine the noise function to use, allowing for conditioning on x, u, previous noise, and current t if (noise_distribution == None): # case no noise if partially_observable: self.noise = lambda x, u: (0.0, 0.0) else: self.noise = lambda x, u: 0.0 elif (noise_distribution == 'normal'): # case normal distribution if partially_observable: self.noise = lambda x, u: (gaussian((n, )), gaussian((d, ))) else: self.noise = lambda x, u: gaussian((n, )) elif (noise_distribution == 'uniform'): # case uniform distribution if partially_observable: self.noise = lambda x, u: (random.uniform(generate_key(), shape=(n,), minval=-1, maxval=1), \ random.uniform(generate_key(), shape=(d,), minval=-1, maxval=1)) else: self.noise = lambda x, u: random.uniform( generate_key(), shape=(n, ), minval=-1, maxval=1) else: # case custom function assert callable( noise_distribution ), "noise_distribution not valid input" # assert input is callable from inspect import getargspec arg_sub = getargspec( noise_distribution ).args # retrieve all parameters taken by provided function # for arg in arg_sub: # assert arg in ['n', 'x', 'u', 'w', 't'], "noise_distribution takes invalid input" assert len(arg_sub) == 0 or len(arg_sub) == 2 # noise_args = {'x': x, 'u': u} # arg_dict = {k:v for k,v in noise_args.items() if k in arg_sub} if len(arg_sub) == 0: self.noise = lambda x, u: noise_distribution() else: self.noise = lambda x, u: noise_distribution(x, u) # helper function that generates a random matrix with given dimensions for matrix, shape in { 'A': (n, n), 'B': (n, m), 'C': (d, n), 'D': (d, m) }.items(): if matrix not in params: if (d == None) and (matrix == 'C' or matrix == 'D'): continue params[matrix] = gaussian(shape) else: assert params[ matrix].shape == shape # check input has valid shape normalize = lambda M, k: k * M / np.linalg.norm( M, ord=2) # scale largest eigenvalue to k self.A = normalize(params['A'], 1.0) self.B = normalize(params['B'], 1.0) if partially_observable: self.C = normalize(params['C'], 1.0) self.D = normalize(params['D'], 1.0) # initial state self.x = gaussian((n, )) if initial_state is None else initial_state # different dynamics depending on whether the system is fully observable or not if partially_observable: def _step(x, u, eps): eps_x, eps_y = eps next_x = np.dot(self.A, x) + np.dot( self.B, u) + self.noise_magnitude * eps_x y = np.dot(self.C, next_x) + np.dot( self.D, u) + self.noise_magnitude * eps_y return (next_x, y) self._step = jax.jit(_step) else: def _step(x, u, eps): eps_x = eps next_x = np.dot(self.A, x) + np.dot( self.B, u) + self.noise_magnitude * eps_x return (next_x, next_x) self._step = jax.jit(_step) if partially_observable: # return partially observable state u, w = np.zeros(m), np.zeros(d) # self.prev_noise = self.noise(n, self.x, u, w, self.T) self.prev_noise = self.noise(self.x, u) y = np.dot(self.C, self.x) + np.dot( self.D, u) + self.noise_magnitude * self.prev_noise[ 1] # (state_noise, obs_noise) return y self.prev_noise = np.zeros(n)
def _generate_uniform(d, norm=1.0): v = random.normal(generate_key(), shape=(d, )) v = norm * v / np.linalg.norm(v) return v
def reset(self): self.state = random.uniform(generate_key(), minval=-0.1, maxval=0.1, shape=(4,)) return self.state
def reset(self): theta = random.uniform(generate_key(), minval=-np.pi, maxval=np.pi) thdot = random.uniform(generate_key(), minval=-1., maxval=1.) self._state = np.array([theta, thdot]) self.last_u = 0.0 return self._state
def plan(self, x_0, T): dim_x, dim_u = self.dim_x, self.dim_u u_old = random.uniform(generate_key(), shape=(T, dim_u), minval=self.env.min_bounds, maxval=self.env.max_bounds) # print(u_old) pos = 0 for i in range(len(u_old)): if u_old[i][0] > 0: pos += 1 u_0 = u_old # print("pos = " + str(pos)) # print("neg = " + str(T-pos)) # u_old = [np.zeros((dim_u,)) for t in range(T)] '''x_old = [x_0] for t in range(T): x_old.append(self.env.dynamics(x_old[-1], u_old[t]))''' x_old = [x_0 for t in range(T + 1)] K, k = T * [np.zeros((dim_u, dim_x))], T * [np.zeros((dim_u, ))] controller = self.OpenLoopController(u_old, x_old, K, k, 1.0) opt_cost = self.total_cost(x_old, u_old) count = 0 transcript_opt = {'x': x_old, 'u': u_old} # print("lamb:" + str(self.lamb)) delta = 2.0 delta_0 = 8.0 lamb_min = 1e-6 lamb_max = 1e5 alphas = 1.1**(-np.arange(10)**2) converged = False accepted = False while count < self.max_iterations: count += 1 alpha_count = 0 accepted = False for alpha in alphas: # print("alpha = " + str(alpha)) alpha_count += 1 transcript = self.env.rollout(controller, T, dynamics_grad=True, loss_grad=True, loss_hessian=True) x_new, u_new = transcript['x'], transcript['u'] new_cost = self.total_cost(x_new, u_new) if alpha_count == 1 and count == 1: transcript_opt = transcript opt_cost = new_cost else: if new_cost < opt_cost: # print("ACCEPTED") # print("opt_cost = " + str(opt_cost)) # print("new_cost = " + str(new_cost)) accepted = True if self.threshold and count > 1 and ( opt_cost - new_cost) / opt_cost < self.threshold: # print("CONVERGED") # print("opt_cost = " + str(opt_cost)) # print("new_cost = " + str(new_cost)) converged = True transcript_opt = transcript opt_cost = new_cost # delta /= delta_0 self.lamb /= delta_0 if self.lamb < lamb_min: self.lamb = 0 break else: transcript = transcript_opt controller = self._form_next_controller(transcript, alpha) if not accepted: # print("NOT ACCEPTED") # delta *= delta_0 self.lamb = max(lamb_min, self.lamb * delta_0) if self.lamb >= lamb_max: break if converged: break # print("---------------------------") # print("opt_cost = " + str(opt_cost)) # print("lamb = " + str(self.lamb)) # print("final state: " + str(self.reduce_state(transcript_opt['x'][-1]))) # state_trajectory = [self.reduce_state(state) for state in transcript_opt['x']] # print("state_trajectory: ") # print(state_trajectory) return transcript_opt['u'], pos, u_0
def initialize(self, observation_space, action_space): self.initialized = True self.weights_dense_w = random.normal(generate_key(), shape=(action_space[0], observation_space[0]))