Exemple #1
0
 def reset(self):
     # random initial state
     xy = random.uniform(generate_key(),
                         minval=-self.map_width,
                         maxval=self.map_width,
                         shape=(2, ))
     z = random.uniform(generate_key(),
                        minval=1.0,
                        maxval=self.map_height,
                        shape=(1, ))
     init_pos = np.concatenate((xy, z))
     init_pos = np.array([-3, -3,
                          7])  # constant starting position for debugging
     self.state = np.concatenate((init_pos, np.zeros(9)))
     self._render()
     return self.state
def test_kalman_filter(steps=100, show_plot=True):
    T = steps

    x_true = 1
    env_noise = 0.3
    x0 = 0

    controller = KalmanFilter()
    controller.initialize(x0, 1, 0, 1, 1, 0, env_noise)

    loss = lambda x_true, x_pred: (x_true - x_pred)**2

    results = []
    for i in range(T):
        z = x_true + float(
            random.normal(generate_key(), shape=(1, )) * env_noise)
        x_pred = controller.step(0, z)
        cur_loss = float(loss(x_true, x_pred))
        results.append(cur_loss)

    if show_plot:
        plt.plot(results)
        plt.title("KalmanFilter controller on constant signal")
        plt.show(block=False)
        plt.pause(15)
        plt.close()
    print("test_kalman_filter passed")
    return
Exemple #3
0
 def reset(self):
     """ Description: Reset the environment and return the start state """
     self._state = random.uniform(generate_key(),
                                  shape=(4, ),
                                  minval=-0.05,
                                  maxval=0.05)
     #self._state = np.array([0.0, 0.03, 0.03, 0.03]) # reproducible results
     return self._state
    def reset(self):
        x = random.uniform(generate_key(), minval=-0.5, maxval=0.5)
        y = random.uniform(generate_key(), minval=-0.5, maxval=0.5)
        th = random.uniform(generate_key(),
                            minval=-30 * np.pi / 180,
                            maxval=30 * np.pi / 180)
        xdot = random.uniform(generate_key(), minval=-0.1, maxval=0.1)
        ydot = random.uniform(generate_key(), minval=-0.1, maxval=0.1)
        thdot = random.uniform(generate_key(), minval=-0.1, maxval=0.1)

        self.state = np.array([x, y, th, xdot, ydot, thdot])
        self.last_u = np.array([0.0, 0.0])
        return self.state
Exemple #5
0
    def __init__(self, n=3, m=2, d = None, partially_observable = False, noise_distribution=None, \
        noise_magnitude=1.0, system_params = {}, initial_state = None):
        """
        Description: Randomly initialize the hidden dynamics of the system.
        Args:
            n (int): State dimension.
            m (int): control dimension.
            d (int): Observation dimension. (Note: MUST specify partially_observable=True for d to be used!)
            partially_observable (bool): whether to project x to y
            noise_distribution (None, string, func): if None then no noise. Valid strings include ['normal', 'uniform']. 
                Valid noise functions must map inputs n (x dim), x (state), u (action), w (previous noise), and t (current time)
                to either a scalar or an n-dimensional vector of real values.
            noise magnitude (float): magnitude of noise
            params (dict): specify A, B, C, and D matrices in system dynamics
            initial_state (None, vector): initial x. If None then randomly initialized
        Returns:
            The first value in the time-series
        """
        self.initialized = True
        self.T = 0
        self.n, self.m, self.d = n, m, d
        self.noise_magnitude = noise_magnitude
        params = system_params.copy(
        )  # avoid overwriting system_params input dict

        if d != None:  # d may only be specified in a partially observable system
            assert partially_observable

        # random normal helper function
        gaussian = lambda dims: random.normal(generate_key(), shape=dims)

        # determine the noise function to use, allowing for conditioning on x, u, previous noise, and current t
        if (noise_distribution == None):  # case no noise
            if partially_observable:
                self.noise = lambda x, u: (0.0, 0.0)
            else:
                self.noise = lambda x, u: 0.0
        elif (noise_distribution == 'normal'):  # case normal distribution
            if partially_observable:
                self.noise = lambda x, u: (gaussian((n, )), gaussian((d, )))
            else:
                self.noise = lambda x, u: gaussian((n, ))
        elif (noise_distribution == 'uniform'):  # case uniform distribution
            if partially_observable:
                self.noise = lambda x, u: (random.uniform(generate_key(), shape=(n,), minval=-1, maxval=1), \
                    random.uniform(generate_key(), shape=(d,), minval=-1, maxval=1))
            else:
                self.noise = lambda x, u: random.uniform(
                    generate_key(), shape=(n, ), minval=-1, maxval=1)
        else:  # case custom function
            assert callable(
                noise_distribution
            ), "noise_distribution not valid input"  # assert input is callable
            from inspect import getargspec
            arg_sub = getargspec(
                noise_distribution
            ).args  # retrieve all parameters taken by provided function
            # for arg in arg_sub:
            #    assert arg in ['n', 'x', 'u', 'w', 't'], "noise_distribution takes invalid input"
            assert len(arg_sub) == 0 or len(arg_sub) == 2
            # noise_args = {'x': x, 'u': u}
            # arg_dict = {k:v for k,v in noise_args.items() if k in arg_sub}
            if len(arg_sub) == 0:
                self.noise = lambda x, u: noise_distribution()
            else:
                self.noise = lambda x, u: noise_distribution(x, u)

        # helper function that generates a random matrix with given dimensions
        for matrix, shape in {
                'A': (n, n),
                'B': (n, m),
                'C': (d, n),
                'D': (d, m)
        }.items():
            if matrix not in params:
                if (d == None) and (matrix == 'C' or matrix == 'D'): continue
                params[matrix] = gaussian(shape)
            else:
                assert params[
                    matrix].shape == shape  # check input has valid shape
        normalize = lambda M, k: k * M / np.linalg.norm(
            M, ord=2)  # scale largest eigenvalue to k
        self.A = normalize(params['A'], 1.0)
        self.B = normalize(params['B'], 1.0)
        if partially_observable:
            self.C = normalize(params['C'], 1.0)
            self.D = normalize(params['D'], 1.0)

        # initial state
        self.x = gaussian((n, )) if initial_state is None else initial_state

        # different dynamics depending on whether the system is fully observable or not
        if partially_observable:

            def _step(x, u, eps):
                eps_x, eps_y = eps
                next_x = np.dot(self.A, x) + np.dot(
                    self.B, u) + self.noise_magnitude * eps_x
                y = np.dot(self.C, next_x) + np.dot(
                    self.D, u) + self.noise_magnitude * eps_y
                return (next_x, y)

            self._step = jax.jit(_step)
        else:

            def _step(x, u, eps):
                eps_x = eps
                next_x = np.dot(self.A, x) + np.dot(
                    self.B, u) + self.noise_magnitude * eps_x
                return (next_x, next_x)

            self._step = jax.jit(_step)

        if partially_observable:  # return partially observable state
            u, w = np.zeros(m), np.zeros(d)
            # self.prev_noise = self.noise(n, self.x, u, w, self.T)
            self.prev_noise = self.noise(self.x, u)
            y = np.dot(self.C, self.x) + np.dot(
                self.D, u) + self.noise_magnitude * self.prev_noise[
                    1]  # (state_noise, obs_noise)
            return y
        self.prev_noise = np.zeros(n)
Exemple #6
0
 def _generate_uniform(d, norm=1.0):
     v = random.normal(generate_key(), shape=(d, ))
     v = norm * v / np.linalg.norm(v)
     return v
Exemple #7
0
 def reset(self):
     self.state = random.uniform(generate_key(), minval=-0.1, maxval=0.1, shape=(4,))
     return self.state
Exemple #8
0
 def reset(self):
     theta = random.uniform(generate_key(), minval=-np.pi, maxval=np.pi)
     thdot = random.uniform(generate_key(), minval=-1., maxval=1.)
     self._state = np.array([theta, thdot])
     self.last_u = 0.0
     return self._state
Exemple #9
0
    def plan(self, x_0, T):
        dim_x, dim_u = self.dim_x, self.dim_u
        u_old = random.uniform(generate_key(),
                               shape=(T, dim_u),
                               minval=self.env.min_bounds,
                               maxval=self.env.max_bounds)
        # print(u_old)
        pos = 0
        for i in range(len(u_old)):
            if u_old[i][0] > 0:
                pos += 1
        u_0 = u_old
        # print("pos = " + str(pos))
        # print("neg = " + str(T-pos))
        # u_old = [np.zeros((dim_u,)) for t in range(T)]
        '''x_old = [x_0]
        for t in range(T):
            x_old.append(self.env.dynamics(x_old[-1], u_old[t]))'''
        x_old = [x_0 for t in range(T + 1)]
        K, k = T * [np.zeros((dim_u, dim_x))], T * [np.zeros((dim_u, ))]
        controller = self.OpenLoopController(u_old, x_old, K, k, 1.0)
        opt_cost = self.total_cost(x_old, u_old)
        count = 0
        transcript_opt = {'x': x_old, 'u': u_old}
        # print("lamb:" + str(self.lamb))
        delta = 2.0
        delta_0 = 8.0
        lamb_min = 1e-6
        lamb_max = 1e5

        alphas = 1.1**(-np.arange(10)**2)
        converged = False
        accepted = False

        while count < self.max_iterations:
            count += 1
            alpha_count = 0
            accepted = False
            for alpha in alphas:
                # print("alpha = " + str(alpha))
                alpha_count += 1
                transcript = self.env.rollout(controller,
                                              T,
                                              dynamics_grad=True,
                                              loss_grad=True,
                                              loss_hessian=True)
                x_new, u_new = transcript['x'], transcript['u']

                new_cost = self.total_cost(x_new, u_new)
                if alpha_count == 1 and count == 1:
                    transcript_opt = transcript
                    opt_cost = new_cost
                else:
                    if new_cost < opt_cost:
                        # print("ACCEPTED")
                        # print("opt_cost = " + str(opt_cost))
                        # print("new_cost = " + str(new_cost))

                        accepted = True
                        if self.threshold and count > 1 and (
                                opt_cost -
                                new_cost) / opt_cost < self.threshold:
                            # print("CONVERGED")
                            # print("opt_cost = " + str(opt_cost))
                            # print("new_cost = " + str(new_cost))
                            converged = True

                        transcript_opt = transcript
                        opt_cost = new_cost
                        # delta /= delta_0
                        self.lamb /= delta_0
                        if self.lamb < lamb_min:
                            self.lamb = 0
                        break
                    else:
                        transcript = transcript_opt
                controller = self._form_next_controller(transcript, alpha)
            if not accepted:
                # print("NOT ACCEPTED")
                # delta *= delta_0
                self.lamb = max(lamb_min, self.lamb * delta_0)
                if self.lamb >= lamb_max:
                    break
            if converged:
                break
            # print("---------------------------")
            # print("opt_cost = " + str(opt_cost))
            # print("lamb = " + str(self.lamb))
            # print("final state: " + str(self.reduce_state(transcript_opt['x'][-1])))
        # state_trajectory = [self.reduce_state(state) for state in transcript_opt['x']]
        # print("state_trajectory: ")
        # print(state_trajectory)
        return transcript_opt['u'], pos, u_0
Exemple #10
0
 def initialize(self, observation_space, action_space):
     self.initialized = True
     self.weights_dense_w = random.normal(generate_key(), shape=(action_space[0], observation_space[0]))