예제 #1
0
def test_stress(opt_id):
    optironment = tigercontrol.optironment('LQR')
    x = optironment.reset(p=2, q=0)

    controller = tigercontrol.controllers('LSTM')
    controller.initialize(n=1, m=1, l=5, h=10,
                          optimizer=OGD)  # initialize with class
    controller.predict(1.0)  # call controllers to verify it works
    controller.update(1.0)

    optimizer = OGD(learning_rate=0.001)
    controller = tigercontrol.controllers('LSTM')
    controller.initialize(n=1, m=1, l=3, h=10,
                          optimizer=optimizer)  # reinitialize with instance

    loss = []
    for t in range(1000):
        y_pred = controller.predict(x)
        y_true = optironment.step()
        loss.append(mse(y_pred, y_true))
        controller.update(y_true)
        x = y_true

    if show:
        plt.plot(loss)
        plt.show(block=False)
        plt.pause(3)
        plt.close()
    print("test_ogd passed")
예제 #2
0
def test_sgd_lstm(show=False):
    environment = tigercontrol.environment('LDS')
    x = environment.reset(p=2,q=0)

    controller = tigercontrol.controllers('LSTM')
    controller.initialize(n=1, m=1, l=3, h=10, optimizer=SGD) # initialize with class
    controller.predict(1.0) # call controllers to verify it works
    controller.update(1.0)

    optimizer = SGD(learning_rate=0.001)
    controller = tigercontrol.controllers('LSTM')
    controller.initialize(n=1, m=1, l=3, h=10, optimizer=optimizer) # reinitialize with instance

    loss = []
    for t in range(1000):
        y_pred = controller.predict(x)
        y_true = environment.step()
        loss.append(mse(y_pred, y_true))
        controller.update(y_true)
        x = y_true

    if show:
        plt.title("Test SGD on LQR(3) with LSTM controller")
        plt.plot(loss)
        plt.show(block=False)
        plt.pause(3)
        plt.close()
예제 #3
0
def test_ons(show=False):

    #tigercontrol.set_key(0) # consistent randomness

    environment = tigercontrol.environment('LDS')
    x, y_true = environment.reset()

    controllers = []
    labels = ['OGD', 'ONS', 'Adam'] # don't run deprecated ONS

    controller = tigercontrol.controllers('LSTM')
    controller.initialize(n = 1, m = 1, optimizer=OGD) # initialize with class
    controllers.append(controller)

    #controller = tigercontrol.controllers('AutoRegressor')
    #controller.initialize(optimizer=Adagrad) # initialize with class
    #controllers.append(controller)

    controller = tigercontrol.controllers('LSTM')
    controller.initialize(n = 1, m = 1, optimizer=ONS) # initialize with class
    controllers.append(controller)

    #controller = tigercontrol.controllers('AutoRegressor')
    #controller.initialize(optimizer=Adam) # initialize with class
    #controllers.append(controller)

    losses = [[] for i in range(len(controllers))]
    update_time = [0.0 for i in range(len(controllers))]
    for t in tqdm(range(2000)):
        for i in range(len(controllers)):
            l, controller = losses[i], controllers[i]
            y_pred = controller.predict(x)
            l.append(mse(y_pred, y_true))

            t = time.time()
            controller.update(y_true)
            update_time[i] += time.time() - t
        x, y_true = environment.step()

    print("time taken:")
    for t, label in zip(update_time, labels):
        print(label + ": " + str(t))

    if show:
        plt.yscale('log')
        for l, label in zip(losses, labels):
            plt.plot(l, label = label)
            #plt.plot(avg_regret(l), label = label)
        plt.legend()
        plt.title("Autoregressors on ENSO-T6")
        plt.show(block=False)
        plt.pause(300)
        plt.close()
        
    print("test_ons passed")
예제 #4
0
def test_simulator(verbose=False):
    environment = tigercontrol.environment("PyBullet-CartPole")
    obs = environment.reset(render=verbose)

    controller = tigercontrol.controllers("CartPoleNN")
    controller.initialize(environment.get_observation_space(),
                          environment.get_action_space())

    t_start = time.time()
    save_to_mem_ID = -1

    frame = 0
    score = 0
    restart_delay = 0
    while time.time() - t_start < 3:
        a = controller.predict(obs)
        obs, r, done, _ = environment.step(a)
        score += r
        frame += 1
        if verbose:
            time.sleep(1. / 60.)

    if verbose:
        print("about to save state")
    save_to_mem_ID = environment.getState()
    if verbose:
        print("save_state_ID: " + str(save_to_mem_ID))

    # run simulator for 4 seconds
    environment.loadState(environment.getState())
    sim = environment.fork()

    if verbose:
        print("environment.loadState worked")
    sim_score = score
    sim_frame = frame
    while time.time() - t_start < 3:
        if verbose:
            time.sleep(1. / 60.)
        a = controller.predict(obs)
        obs, r, done, _ = environment.step(a)
        sim_score += r
        sim_frame += 1

    # resume stepping through environment for 2 seconds from the point when the simulator was launched (i.e. t = 1)
    environment.loadState(save_to_mem_ID)
    if verbose:
        print("environment.loadState worked")
    while time.time() - t_start < 3:
        a = controller.predict(obs)
        obs, r, done, _ = environment.step(a)
        score += r
        frame += 1
        if verbose:
            time.sleep(1. / 60.)

    environment.close()
    print("test_simulator passed")
예제 #5
0
def test_double_pendulum(verbose=False):
    environment = tigercontrol.environment("DoublePendulum")
    # observe [cos(theta1) sin(theta1) cos(theta2) sin(theta2) thetaDot1 thetaDot2]
    L = lambda x, u: (x[0] - x[2])**2
    dim_x, dim_u = 4, 1
    obs = environment.reset()

    update_period = 75
    T = 75  # horizon
    threshold = 0.01
    lamb = 0.1
    max_iterations = 25

    controller = tigercontrol.controllers("ILQR")
    controller.initialize(environment, L, dim_x, dim_u, update_period,
                          max_iterations, lamb, threshold)

    if verbose:
        print("Running iLQR...")
    # u = controller.plan(obs, T, max_iterations, lamb, threshold)

    index = 0
    for t in range(10 * T):
        if verbose:
            environment.render()
            time.sleep(1. / 15.)
        u = controller.plan(obs)
        obs, r, done, _ = environment.step(u)
        index += 1

        if done:
            if verbose:
                print("solved double pendulum in {} time steps!".format(t + 1))
            obs = environment.reset()
        '''
        if done or index == T:
            if verbose:
                print("recomputing u...")
            u = controller.plan(obs, T, max_iterations, lamb, threshold)
            index = 0'''

    environment.close()
    print("test_double_pendulum passed")
예제 #6
0
def test_grid_search_arma(show=False):
    environment_id = "LDS"
    controller_id = "GPC"
    environment_params = {'n':3, 'm':2}
    controller_params = {}
    loss = lambda a, b: np.sum((a-b)**2)
    search_space = {'optimizer':[]} # parameters for LQR controller
    opts = [Adam, Adagrad, ONS, OGD]
    lr_start, lr_stop = 0, -4 # search learning rates from 10^start to 10^stop 
    learning_rates = np.logspace(lr_start, lr_stop, 1+2*np.abs(lr_start - lr_stop))
    for opt, lr in itertools.product(opts, learning_rates):
        search_space['optimizer'].append(opt(learning_rate=lr)) # create instance and append

    trials = 15
    hpo = GridSearch() # hyperparameter optimizer
    optimal_params, optimal_loss = hpo.search(controller_id, controller_params, environment_id, environment_params, loss, 
        search_space, trials=trials, smoothing=10, start_steps=100, verbose=show)

    if show:
        print("optimal loss: ", optimal_loss)
        print("optimal params: ", optimal_params)

    # test resulting controller params
    controller = tigercontrol.controllers(controller_id)
    controller.initialize(**optimal_params)
    environment = tigercontrol.environment(environment_id)
    x = environment.reset(**environment_params)
    loss = []
    if show:
        print("run final test with optimal parameters")
    for t in range(5000):
        y_pred = controller.predict(x)
        y_true = environment.step()
        loss.append(mse(y_pred, y_true))
        controller.update(y_true)
        x = y_true

    if show:
        plt.plot(loss)
        plt.show(block=False)
        plt.pause(10)
        plt.close()
예제 #7
0
def test_sgd_autoregressor(show=False):
    environment = tigercontrol.environment('LDS')
    x = environment.reset(p=2,q=0)

    optimizer = SGD(learning_rate=0.0003)
    controller = tigercontrol.controllers('AutoRegressor')
    controller.initialize(p=3, optimizer=optimizer) # reinitialize with instance

    loss = []
    for t in range(1000):
        y_pred = controller.predict(x)
        y_true = environment.step()
        loss.append(mse(y_pred, y_true))
        controller.update(y_true)
        x = y_true

    if show:
        plt.title("Test SGD on LQR(3) with AutoRegressor controller")
        plt.plot(loss)
        plt.show(block=False)
        plt.pause(3)
        plt.close()
예제 #8
0
def test_cartpole(verbose=False):
    environment = tigercontrol.environment("PyBullet-CartPole")
    obs = environment.reset(render=verbose)

    controller = tigercontrol.controllers("CartPoleNN")
    controller.initialize(environment.get_observation_space(),
                          environment.get_action_space())

    t_start = time.time()
    save_to_mem_ID = -1

    frame = 0
    score = 0
    restart_delay = 0
    saved = False
    while time.time() - t_start < 3:
        time.sleep(1. / 60.)
        a = controller.predict(obs)
        obs, r, done, _ = environment.step(a)

        score += r
        frame += 1
        if time.time() - t_start > 0 and not saved:
            if verbose:
                print("about to save to memory")
            #save_to_mem_ID = environment.getState()
            saved = True
        if not done: continue
        if restart_delay == 0:
            if verbose:
                print("score=%0.2f in %i frames" % (score, frame))
            restart_delay = 60 * 2  # 2 sec at 60 fps
        else:
            restart_delay -= 1
            if restart_delay > 0: continue
            break

    environment.close()
    print("test_cartpole passed")
예제 #9
0
def test_dynaboost_lstm(steps=500, show=True):
    # controller initialize
    T = steps
    controller_id = "LSTM"
    ogd = OGD(learning_rate=0.01)
    controller_params = {'n': 1, 'm': 1, 'l': 5, 'h': 10, 'optimizer': ogd}
    controllers = []
    Ns = [1, 3, 6]
    for n in Ns:  # number of weak learners
        controller = tigercontrol.controllers("DynaBoost")
        controller.initialize(controller_id, controller_params, n,
                              reg=1.0)  # regularization
        controllers.append(controller)

    # regular AutoRegressor for comparison
    autoreg = tigercontrol.controllers("AutoRegressor")
    autoreg.initialize(p=4)  # regularization

    # environment initialize
    p, q = 4, 0
    environment = tigercontrol.environment("LDS")
    y_true = environment.reset(p, q, noise_magnitude=0.1)

    # run all boosting controller
    result_list = [[] for n in Ns]
    last_value = []
    autoreg_loss = []
    for i in range(T):
        y_next = environment.step()

        # predictions for every boosting controller
        for result_i, controller_i in zip(result_list, controllers):
            y_pred = controller_i.predict(y_true)
            result_i.append(mse(y_next, y_pred))
            controller_i.update(y_next)

        # last value and autoregressor predictions
        last_value.append(mse(y_true, y_next))
        autoreg_loss.append(mse(autoreg.predict(y_true), y_next))
        autoreg.update(y_next)
        y_true = y_next

    # plot performance
    if show:
        start = 100
        x = np.arange(start, steps)
        plt.figure(figsize=(12, 8))

        # plot every boosting controller loss
        for n, results in zip(Ns, result_list):
            print("Mean loss for n={}: {}".format(
                n, np.mean(np.array(results[start:]))))
            plt.plot(x,
                     avg_regret(results[start:]),
                     label="DynaBoost, n={}".format(n))

        # plot loss for last value and autoregressor controllers
        print("Mean loss for LastValue: {}".format(
            np.mean(np.array(last_value[start:]))))
        plt.plot(x,
                 avg_regret(last_value[start:]),
                 label="Last value controller")
        print("Mean loss for AutoRegressor: {}".format(
            np.mean(np.array(autoreg_loss[start:]))))
        plt.plot(x,
                 avg_regret(autoreg_loss[start:]),
                 label="AutoRegressor controller")

        plt.title("DynaBoost controller on LQR environment")
        plt.legend()
        plt.show(block=False)
        plt.pause(10)
        plt.close()
예제 #10
0
def test_dynaboost_arma(steps=500, show=True):
    # controller initialize
    T = steps
    controller_id = "AutoRegressor"
    controller_params = {'p': 18, 'optimizer': OGD}
    Ns = [64]
    timelines = [6, 9, 12]

    # regular AutoRegressor for comparison
    autoreg = tigercontrol.controllers("AutoRegressor")
    autoreg.initialize(p=18, optimizer=OGD)

    fig, ax = plt.subplots(nrows=1, ncols=3)
    cur = 0

    # run all boosting controller
    for timeline in timelines:

        # environment initialize
        environment = tigercontrol.environment("ENSO")
        x, y_true = environment.reset(input_signals=['oni'], timeline=timeline)
        controllers = []

        for n in Ns:  # number of weak learners
            controller = tigercontrol.controllers("DynaBoost")
            controller.initialize(controller_id, controller_params, n,
                                  reg=0.0)  # regularization
            controllers.append(controller)

        result_list = [[] for n in Ns]
        autoreg_loss = []

        for i in tqdm(range(T)):

            # predictions for every boosting controller
            for result_i, controller_i in zip(result_list, controllers):
                y_pred = controller_i.predict(x)
                result_i.append(mse(y_true, y_pred))
                controller_i.update(y_true)

            # last value and autoregressor predictions
            autoreg_loss.append(mse(autoreg.predict(x), y_true))
            autoreg.update(y_true)
            x, y_true = environment.step()

        # plot performance
        if show:

            start = T // 2

            # plot every boosting controller loss
            for n, results in zip(Ns, result_list):
                print("Mean loss for n={}: {}".format(
                    n, np.mean(np.array(results))))
                ax[cur].plot(avg_regret(results[-start:]),
                             label="DynaBoost, n={}".format(n))

            # plot loss for last value and autoregressor controllers
            print("Mean loss for AutoRegressor: {}".format(
                np.mean(np.array(autoreg_loss))))
            ax[cur].plot(avg_regret(autoreg_loss[-start:]),
                         label="AutoRegressor controller")
            ax[cur].legend(loc="upper right", fontsize=8)

        cur += 1

    fig.tight_layout()
    plt.show()
예제 #11
0
    def __init__(self,
                 env,
                 controller_id,
                 controller_hyperparams,
                 N=3,
                 H=3,
                 cost_fn=quad_loss):
        """
        Description: Initializes autoregressive controller parameters
        Args:
            controller_id (string): id of weak learner controller
            controller_params (dict): dict of params to pass controller
            N (int): default 3. Number of weak learners
        """
        self.initialized = True

        self.n, self.m = env.n, env.m  # State & Action Dimensions
        self.env = env  # System

        # 1. Maintain N copies of the algorithm
        assert N > 0
        self.N, self.H = N, H
        self.controllers = []

        #past state
        self.x = np.zeros((self.n, 1))
        # Past 2H noises
        self.w = np.zeros((2 * H, self.n, 1))

        # 2. Initialize the N weak learners
        weak_controller_class = tigercontrol.controllers(controller_id)
        self.weak_controller = controller_class(controller_hyperparams)
        for _ in range(N):
            new_controller = new_controller_class(controller_hyperparams)
            self.controllers.append(new_controller)

        self.past_partial_actions = np.zeros((N + 1, H, self.m, 1))

        # Extract the set of actions of previous learners
        def get_partial_actions(x):
            u = np.zeros((self.N + 1, self.m, 1))
            partial_u = np.zeros((self.m, 1))
            for i, controller_i in enumerate(self.controllers):
                eta_i = 2 / (i + 2)
                pred_u = controller_i.get_action(x)
                partial_u = (1 - eta_i) * partial_u + eta_i * pred_u
                u = jax.ops.index_update(u, i + 1, partial_u)
            return u

        self.get_partial_actions = get_partial_actions

        self.grad_action = grad(action_loss)

        # Extract the set of actions of previous learners
        def get_grads(partial_actions, w, cost_fn=quad_loss):
            v_list = [
                self.grad_action(partial_actions[i], w, self.H, self.env,
                                 cost_fn) for i in range(self.N)
            ]
            return v_list

        self.get_grads = get_grads

        def linear_loss(controller_i_params, grad_i, w):
            linear_loss_i = 0

            y = np.zeros((n, 1))

            for h in range(self.H):
                v = self.weak_controller.determine_action(
                    controller_i_params, y, w[:h + H])
                linear_loss_i += np.dot(grad_i[h], v)
                y = self.env.dyn(y, v) + w[h + H]

            v = self.weak_controller.determine_action(controller_i_params, y,
                                                      w[:h + H])
            linear_loss_i += np.dot(grad_i[h], v)

            return np.sum(linear_loss_i)

        self.grad_linear = grad(linear_loss)