bounds.append((-0.025, 0.025)) # Bounds on the velocity bounds.append((0.4, 0.6)) # Bounds on the goal position bounds.append((0.055, 0.075)) # Bounds on the max speed bounds.append((0.0005, 0.0025)) # Bounds on the power magnitude smooth_details_r1 = [] random_details_r1 = [] # This set assumes random sampling and checking for r in rand_nums: np.random.seed(r) node0 = pred_node(f=lambda traj: traj[1]['reward']) TM = test_module(bounds=bounds, sut=lambda x0: sut(x0), f_tree=node0, init_sample=60, optimize_restarts=5, exp_weight=2, normalizer=True) TM.initialize() TM.run_BO(140) smooth_vals = np.array(TM.f_acqu.find_GP_func()) smooth_details_r1.append([ np.sum(smooth_vals < 75), np.sum(smooth_vals < 0), TM.smooth_min_x, TM.smooth_min_val ]) # With cost function np.random.seed(r) node0_cf = pred_node(f=lambda traj: traj[1]['reward'])
bounds = [(-0.05, 0.05)] * 4 # Bounds on the state bounds.append((0.05, 0.15)) # Bounds on the mass of the pole bounds.append((0.4, 0.6)) # Bounds on the length of the pole bounds.append((8.00, 12.00)) # Bounds on the force magnitude smooth_details_r1 = [] random_details_r1 = [] # This set assumes random sampling and checking for r in rand_nums: np.random.seed(r) node0 = pred_node(f=lambda traj: traj[1]['reward']) TM = test_module(bounds=bounds, sut=lambda x0: sut(200, x0, ead=True), f_tree=node0, with_random=True, init_sample=70, optimize_restarts=5, exp_weight=2, seed=r) TM.initialize() TM.run_BO(130) smooth_details_r1.append([ np.sum(TM.f_acqu.GP.Y < 100), np.sum(TM.f_acqu.GP.Y < 150), TM.smooth_min_x, TM.smooth_min_val, TM.smooth_min_loc ]) random_details_r1.append([ np.sum(np.array(TM.random_Y) < 100), np.sum(np.array(TM.random_Y) < 150), TM.rand_min_x, TM.rand_min_val, TM.rand_min_loc ])
# Requirement 1: Find the initial state and velocity that minimizes the reward # We need only one node for the reward. The reward is a smooth function # given that the closed loop system is deterministic smooth_details_r1 = [] random_details_r1 = [] # This set assumes random sampling and checking for r in rand_nums: np.random.seed(r) node0 = pred_node(f=lambda traj: traj[1]['reward']) TM = test_module(bounds=bounds, sut=lambda x0: sut(x0), f_tree = node0, with_random = True, init_sample = 70, optimize_restarts=5, exp_weight=10) TM.initialize() TM.run_BO(180) smooth_details_r1.append([np.sum(TM.f_acqu.GP.Y < -3.75), TM.smooth_min_x,TM.smooth_min_val]) random_details_r1.append([np.sum(np.array(TM.random_Y) < -3.75), TM.rand_min_x, TM.rand_min_val]) print(r, smooth_details_r1[-1], random_details_r1[-1]) # Requirement 2: Find the initial state, goal state that minimizes the amount # of time the robot is able to stay upright # We need only one node for the time. The time taken is a smooth function # given that the closed loop system is deterministic. smooth_details_r2 = []
return traj def f_prednode(traj): return np.array([5 - t[0] for t in traj]).min() bounds = [(4.5, 5.5)] * 100 x0 = np.array([0., 3.]) rand_nums = [] rand_details = [] smooth_details = [] for _ in range(15): r = np.random.randint(2**32 - 1) np.random.seed(r) node0 = pred_node(f=f_prednode) TM = test_module(bounds=bounds, sut=lambda x: compute_traj(x0, x), f_tree=node0, with_random=True, init_sample=100, optimize_restarts=1, exp_weight=10, kernel_type=GPy.kern.RBF) TM.initialize() TM.run_BO(150) smooth_details.append([TM.smooth_min_val, TM.smooth_count]) rand_details.append([TM.rand_min_val, TM.rand_count]) rand_nums.append(r)
# given that the closed loop system is deterministic bounds = [(-np.pi, np.pi)] # Bounds on theta bounds.append((-1., 1.)) # Bounds on theta dot bounds.append((7., 9.)) # Bounds on the speed bounds.append((1.5, 2.5)) # Bounds on the torque magnitude smooth_details_r1 = [] random_details_r1 = [] # This set assumes random sampling and checking for _ in range(10): r = np.random.randint(2**32-1) np.random.seed(r) node0 = pred_node(f=lambda traj: traj[1]['reward']/200 ) TM = test_module(bounds=bounds, sut=lambda x0: sut(500,x0, ead=True), f_tree = node0,init_sample = 70, optimize_restarts=5, exp_weight=10, normalizer=True) TM.initialize() TM.run_BO(140) smooth_details_r1.append([np.sum(TM.f_acqu.GP.Y < -5.), np.sum(TM.f_acqu.GP.Y < -7.5), TM.smooth_min_x,TM.smooth_min_val]) # With cost function np.random.seed(r) node0 = pred_node(f=lambda traj: traj[1]['reward']/200) TM = test_module(bounds=bounds, sut=lambda x0: sut(500,x0, ead=True), f_tree = node0, with_random = True, init_sample = 70, optimize_restarts=5, exp_weight=10, normalizer=True)
random_ints = [ 1414065923, 2448114469, 1906628456, 2644070884, 24268670, 2664125290, 1182137443, 100813220, 1822558109, 2027862653 ] i = 0 for r in random_ints: np.random.seed(r) node1 = pred_node(f=lambda x: np.sin(x) + 0.65) node2 = pred_node(f=lambda x: np.cos(x) + 0.65) node0 = max_node(children=[node1, node2]) TM = test_module(bounds=bounds, sut=lambda x: x, f_tree=node0, with_ns=True, with_random=True, init_sample=5, optimizer=opt, optimize_restarts=3, exp_weight=2) TM.initialize() TM.run_BO(50) plt.figure(i + 1) plt.plot(TM.smooth_X[5:]) plt.plot(TM.ns_GP.X[5:]) plt.plot(TM.random_X[5:]) plt.ylabel('Sample returned in iteration i') plt.xlabel('BO iterations') plt.title('Finding the minimum of max(sin(x), cos(x))') i = i + 1
# Requirement 1: Find the initial state, goal state that minimizes the reward # We need only one node for the reward. The reward is a smooth function # given that the closed loop system is deterministic smooth_details_r1 = [] random_details_r1 = [] # This set assumes random sampling and checking for r in rand_nums: np.random.seed(r) node0 = pred_node(f=lambda traj: traj[1]['reward']) TM = test_module(bounds=bounds, sut=lambda x0: sut(2048, x0, done_early=True), f_tree=node0, with_random=True, init_sample=70, optimize_restarts=5, exp_weight=10, seed=r) TM.initialize() TM.run_BO(30) TM.k = 5 TM.run_BO(50) TM.k = 2 TM.run_BO(50) smooth_details_r1.append( [np.sum(TM.f_acqu.GP.Y < -10.), TM.smooth_min_x, TM.smooth_min_val]) random_details_r1.append( [np.sum(np.array(TM.random_Y) < -10.), TM.rand_min_x, TM.rand_min_val]) print(r, smooth_details_r1[-1], random_details_r1[-1])