def test_stress(opt_id): optironment = tigercontrol.optironment('LQR') x = optironment.reset(p=2, q=0) controller = tigercontrol.controllers('LSTM') controller.initialize(n=1, m=1, l=5, h=10, optimizer=OGD) # initialize with class controller.predict(1.0) # call controllers to verify it works controller.update(1.0) optimizer = OGD(learning_rate=0.001) controller = tigercontrol.controllers('LSTM') controller.initialize(n=1, m=1, l=3, h=10, optimizer=optimizer) # reinitialize with instance loss = [] for t in range(1000): y_pred = controller.predict(x) y_true = optironment.step() loss.append(mse(y_pred, y_true)) controller.update(y_true) x = y_true if show: plt.plot(loss) plt.show(block=False) plt.pause(3) plt.close() print("test_ogd passed")
def test_sgd_lstm(show=False): environment = tigercontrol.environment('LDS') x = environment.reset(p=2,q=0) controller = tigercontrol.controllers('LSTM') controller.initialize(n=1, m=1, l=3, h=10, optimizer=SGD) # initialize with class controller.predict(1.0) # call controllers to verify it works controller.update(1.0) optimizer = SGD(learning_rate=0.001) controller = tigercontrol.controllers('LSTM') controller.initialize(n=1, m=1, l=3, h=10, optimizer=optimizer) # reinitialize with instance loss = [] for t in range(1000): y_pred = controller.predict(x) y_true = environment.step() loss.append(mse(y_pred, y_true)) controller.update(y_true) x = y_true if show: plt.title("Test SGD on LQR(3) with LSTM controller") plt.plot(loss) plt.show(block=False) plt.pause(3) plt.close()
def test_ons(show=False): #tigercontrol.set_key(0) # consistent randomness environment = tigercontrol.environment('LDS') x, y_true = environment.reset() controllers = [] labels = ['OGD', 'ONS', 'Adam'] # don't run deprecated ONS controller = tigercontrol.controllers('LSTM') controller.initialize(n = 1, m = 1, optimizer=OGD) # initialize with class controllers.append(controller) #controller = tigercontrol.controllers('AutoRegressor') #controller.initialize(optimizer=Adagrad) # initialize with class #controllers.append(controller) controller = tigercontrol.controllers('LSTM') controller.initialize(n = 1, m = 1, optimizer=ONS) # initialize with class controllers.append(controller) #controller = tigercontrol.controllers('AutoRegressor') #controller.initialize(optimizer=Adam) # initialize with class #controllers.append(controller) losses = [[] for i in range(len(controllers))] update_time = [0.0 for i in range(len(controllers))] for t in tqdm(range(2000)): for i in range(len(controllers)): l, controller = losses[i], controllers[i] y_pred = controller.predict(x) l.append(mse(y_pred, y_true)) t = time.time() controller.update(y_true) update_time[i] += time.time() - t x, y_true = environment.step() print("time taken:") for t, label in zip(update_time, labels): print(label + ": " + str(t)) if show: plt.yscale('log') for l, label in zip(losses, labels): plt.plot(l, label = label) #plt.plot(avg_regret(l), label = label) plt.legend() plt.title("Autoregressors on ENSO-T6") plt.show(block=False) plt.pause(300) plt.close() print("test_ons passed")
def test_simulator(verbose=False): environment = tigercontrol.environment("PyBullet-CartPole") obs = environment.reset(render=verbose) controller = tigercontrol.controllers("CartPoleNN") controller.initialize(environment.get_observation_space(), environment.get_action_space()) t_start = time.time() save_to_mem_ID = -1 frame = 0 score = 0 restart_delay = 0 while time.time() - t_start < 3: a = controller.predict(obs) obs, r, done, _ = environment.step(a) score += r frame += 1 if verbose: time.sleep(1. / 60.) if verbose: print("about to save state") save_to_mem_ID = environment.getState() if verbose: print("save_state_ID: " + str(save_to_mem_ID)) # run simulator for 4 seconds environment.loadState(environment.getState()) sim = environment.fork() if verbose: print("environment.loadState worked") sim_score = score sim_frame = frame while time.time() - t_start < 3: if verbose: time.sleep(1. / 60.) a = controller.predict(obs) obs, r, done, _ = environment.step(a) sim_score += r sim_frame += 1 # resume stepping through environment for 2 seconds from the point when the simulator was launched (i.e. t = 1) environment.loadState(save_to_mem_ID) if verbose: print("environment.loadState worked") while time.time() - t_start < 3: a = controller.predict(obs) obs, r, done, _ = environment.step(a) score += r frame += 1 if verbose: time.sleep(1. / 60.) environment.close() print("test_simulator passed")
def test_double_pendulum(verbose=False): environment = tigercontrol.environment("DoublePendulum") # observe [cos(theta1) sin(theta1) cos(theta2) sin(theta2) thetaDot1 thetaDot2] L = lambda x, u: (x[0] - x[2])**2 dim_x, dim_u = 4, 1 obs = environment.reset() update_period = 75 T = 75 # horizon threshold = 0.01 lamb = 0.1 max_iterations = 25 controller = tigercontrol.controllers("ILQR") controller.initialize(environment, L, dim_x, dim_u, update_period, max_iterations, lamb, threshold) if verbose: print("Running iLQR...") # u = controller.plan(obs, T, max_iterations, lamb, threshold) index = 0 for t in range(10 * T): if verbose: environment.render() time.sleep(1. / 15.) u = controller.plan(obs) obs, r, done, _ = environment.step(u) index += 1 if done: if verbose: print("solved double pendulum in {} time steps!".format(t + 1)) obs = environment.reset() ''' if done or index == T: if verbose: print("recomputing u...") u = controller.plan(obs, T, max_iterations, lamb, threshold) index = 0''' environment.close() print("test_double_pendulum passed")
def test_grid_search_arma(show=False): environment_id = "LDS" controller_id = "GPC" environment_params = {'n':3, 'm':2} controller_params = {} loss = lambda a, b: np.sum((a-b)**2) search_space = {'optimizer':[]} # parameters for LQR controller opts = [Adam, Adagrad, ONS, OGD] lr_start, lr_stop = 0, -4 # search learning rates from 10^start to 10^stop learning_rates = np.logspace(lr_start, lr_stop, 1+2*np.abs(lr_start - lr_stop)) for opt, lr in itertools.product(opts, learning_rates): search_space['optimizer'].append(opt(learning_rate=lr)) # create instance and append trials = 15 hpo = GridSearch() # hyperparameter optimizer optimal_params, optimal_loss = hpo.search(controller_id, controller_params, environment_id, environment_params, loss, search_space, trials=trials, smoothing=10, start_steps=100, verbose=show) if show: print("optimal loss: ", optimal_loss) print("optimal params: ", optimal_params) # test resulting controller params controller = tigercontrol.controllers(controller_id) controller.initialize(**optimal_params) environment = tigercontrol.environment(environment_id) x = environment.reset(**environment_params) loss = [] if show: print("run final test with optimal parameters") for t in range(5000): y_pred = controller.predict(x) y_true = environment.step() loss.append(mse(y_pred, y_true)) controller.update(y_true) x = y_true if show: plt.plot(loss) plt.show(block=False) plt.pause(10) plt.close()
def test_sgd_autoregressor(show=False): environment = tigercontrol.environment('LDS') x = environment.reset(p=2,q=0) optimizer = SGD(learning_rate=0.0003) controller = tigercontrol.controllers('AutoRegressor') controller.initialize(p=3, optimizer=optimizer) # reinitialize with instance loss = [] for t in range(1000): y_pred = controller.predict(x) y_true = environment.step() loss.append(mse(y_pred, y_true)) controller.update(y_true) x = y_true if show: plt.title("Test SGD on LQR(3) with AutoRegressor controller") plt.plot(loss) plt.show(block=False) plt.pause(3) plt.close()
def test_cartpole(verbose=False): environment = tigercontrol.environment("PyBullet-CartPole") obs = environment.reset(render=verbose) controller = tigercontrol.controllers("CartPoleNN") controller.initialize(environment.get_observation_space(), environment.get_action_space()) t_start = time.time() save_to_mem_ID = -1 frame = 0 score = 0 restart_delay = 0 saved = False while time.time() - t_start < 3: time.sleep(1. / 60.) a = controller.predict(obs) obs, r, done, _ = environment.step(a) score += r frame += 1 if time.time() - t_start > 0 and not saved: if verbose: print("about to save to memory") #save_to_mem_ID = environment.getState() saved = True if not done: continue if restart_delay == 0: if verbose: print("score=%0.2f in %i frames" % (score, frame)) restart_delay = 60 * 2 # 2 sec at 60 fps else: restart_delay -= 1 if restart_delay > 0: continue break environment.close() print("test_cartpole passed")
def test_dynaboost_lstm(steps=500, show=True): # controller initialize T = steps controller_id = "LSTM" ogd = OGD(learning_rate=0.01) controller_params = {'n': 1, 'm': 1, 'l': 5, 'h': 10, 'optimizer': ogd} controllers = [] Ns = [1, 3, 6] for n in Ns: # number of weak learners controller = tigercontrol.controllers("DynaBoost") controller.initialize(controller_id, controller_params, n, reg=1.0) # regularization controllers.append(controller) # regular AutoRegressor for comparison autoreg = tigercontrol.controllers("AutoRegressor") autoreg.initialize(p=4) # regularization # environment initialize p, q = 4, 0 environment = tigercontrol.environment("LDS") y_true = environment.reset(p, q, noise_magnitude=0.1) # run all boosting controller result_list = [[] for n in Ns] last_value = [] autoreg_loss = [] for i in range(T): y_next = environment.step() # predictions for every boosting controller for result_i, controller_i in zip(result_list, controllers): y_pred = controller_i.predict(y_true) result_i.append(mse(y_next, y_pred)) controller_i.update(y_next) # last value and autoregressor predictions last_value.append(mse(y_true, y_next)) autoreg_loss.append(mse(autoreg.predict(y_true), y_next)) autoreg.update(y_next) y_true = y_next # plot performance if show: start = 100 x = np.arange(start, steps) plt.figure(figsize=(12, 8)) # plot every boosting controller loss for n, results in zip(Ns, result_list): print("Mean loss for n={}: {}".format( n, np.mean(np.array(results[start:])))) plt.plot(x, avg_regret(results[start:]), label="DynaBoost, n={}".format(n)) # plot loss for last value and autoregressor controllers print("Mean loss for LastValue: {}".format( np.mean(np.array(last_value[start:])))) plt.plot(x, avg_regret(last_value[start:]), label="Last value controller") print("Mean loss for AutoRegressor: {}".format( np.mean(np.array(autoreg_loss[start:])))) plt.plot(x, avg_regret(autoreg_loss[start:]), label="AutoRegressor controller") plt.title("DynaBoost controller on LQR environment") plt.legend() plt.show(block=False) plt.pause(10) plt.close()
def test_dynaboost_arma(steps=500, show=True): # controller initialize T = steps controller_id = "AutoRegressor" controller_params = {'p': 18, 'optimizer': OGD} Ns = [64] timelines = [6, 9, 12] # regular AutoRegressor for comparison autoreg = tigercontrol.controllers("AutoRegressor") autoreg.initialize(p=18, optimizer=OGD) fig, ax = plt.subplots(nrows=1, ncols=3) cur = 0 # run all boosting controller for timeline in timelines: # environment initialize environment = tigercontrol.environment("ENSO") x, y_true = environment.reset(input_signals=['oni'], timeline=timeline) controllers = [] for n in Ns: # number of weak learners controller = tigercontrol.controllers("DynaBoost") controller.initialize(controller_id, controller_params, n, reg=0.0) # regularization controllers.append(controller) result_list = [[] for n in Ns] autoreg_loss = [] for i in tqdm(range(T)): # predictions for every boosting controller for result_i, controller_i in zip(result_list, controllers): y_pred = controller_i.predict(x) result_i.append(mse(y_true, y_pred)) controller_i.update(y_true) # last value and autoregressor predictions autoreg_loss.append(mse(autoreg.predict(x), y_true)) autoreg.update(y_true) x, y_true = environment.step() # plot performance if show: start = T // 2 # plot every boosting controller loss for n, results in zip(Ns, result_list): print("Mean loss for n={}: {}".format( n, np.mean(np.array(results)))) ax[cur].plot(avg_regret(results[-start:]), label="DynaBoost, n={}".format(n)) # plot loss for last value and autoregressor controllers print("Mean loss for AutoRegressor: {}".format( np.mean(np.array(autoreg_loss)))) ax[cur].plot(avg_regret(autoreg_loss[-start:]), label="AutoRegressor controller") ax[cur].legend(loc="upper right", fontsize=8) cur += 1 fig.tight_layout() plt.show()
def __init__(self, env, controller_id, controller_hyperparams, N=3, H=3, cost_fn=quad_loss): """ Description: Initializes autoregressive controller parameters Args: controller_id (string): id of weak learner controller controller_params (dict): dict of params to pass controller N (int): default 3. Number of weak learners """ self.initialized = True self.n, self.m = env.n, env.m # State & Action Dimensions self.env = env # System # 1. Maintain N copies of the algorithm assert N > 0 self.N, self.H = N, H self.controllers = [] #past state self.x = np.zeros((self.n, 1)) # Past 2H noises self.w = np.zeros((2 * H, self.n, 1)) # 2. Initialize the N weak learners weak_controller_class = tigercontrol.controllers(controller_id) self.weak_controller = controller_class(controller_hyperparams) for _ in range(N): new_controller = new_controller_class(controller_hyperparams) self.controllers.append(new_controller) self.past_partial_actions = np.zeros((N + 1, H, self.m, 1)) # Extract the set of actions of previous learners def get_partial_actions(x): u = np.zeros((self.N + 1, self.m, 1)) partial_u = np.zeros((self.m, 1)) for i, controller_i in enumerate(self.controllers): eta_i = 2 / (i + 2) pred_u = controller_i.get_action(x) partial_u = (1 - eta_i) * partial_u + eta_i * pred_u u = jax.ops.index_update(u, i + 1, partial_u) return u self.get_partial_actions = get_partial_actions self.grad_action = grad(action_loss) # Extract the set of actions of previous learners def get_grads(partial_actions, w, cost_fn=quad_loss): v_list = [ self.grad_action(partial_actions[i], w, self.H, self.env, cost_fn) for i in range(self.N) ] return v_list self.get_grads = get_grads def linear_loss(controller_i_params, grad_i, w): linear_loss_i = 0 y = np.zeros((n, 1)) for h in range(self.H): v = self.weak_controller.determine_action( controller_i_params, y, w[:h + H]) linear_loss_i += np.dot(grad_i[h], v) y = self.env.dyn(y, v) + w[h + H] v = self.weak_controller.determine_action(controller_i_params, y, w[:h + H]) linear_loss_i += np.dot(grad_i[h], v) return np.sum(linear_loss_i) self.grad_linear = grad(linear_loss)