bounds.append((-0.025, 0.025))  # Bounds on the velocity
bounds.append((0.4, 0.6))  # Bounds on the goal position
bounds.append((0.055, 0.075))  # Bounds on the max speed
bounds.append((0.0005, 0.0025))  # Bounds on the power magnitude

smooth_details_r1 = []
random_details_r1 = []

# This set assumes random sampling and checking
for r in rand_nums:
    np.random.seed(r)
    node0 = pred_node(f=lambda traj: traj[1]['reward'])
    TM = test_module(bounds=bounds,
                     sut=lambda x0: sut(x0),
                     f_tree=node0,
                     init_sample=60,
                     optimize_restarts=5,
                     exp_weight=2,
                     normalizer=True)
    TM.initialize()

    TM.run_BO(140)
    smooth_vals = np.array(TM.f_acqu.find_GP_func())
    smooth_details_r1.append([
        np.sum(smooth_vals < 75),
        np.sum(smooth_vals < 0), TM.smooth_min_x, TM.smooth_min_val
    ])

    # With cost function
    np.random.seed(r)
    node0_cf = pred_node(f=lambda traj: traj[1]['reward'])
예제 #2
0
bounds = [(-0.05, 0.05)] * 4  # Bounds on the state
bounds.append((0.05, 0.15))  # Bounds on the mass of the pole
bounds.append((0.4, 0.6))  # Bounds on the length of the pole
bounds.append((8.00, 12.00))  # Bounds on the force magnitude

smooth_details_r1 = []
random_details_r1 = []

# This set assumes random sampling and checking
for r in rand_nums:
    np.random.seed(r)
    node0 = pred_node(f=lambda traj: traj[1]['reward'])
    TM = test_module(bounds=bounds,
                     sut=lambda x0: sut(200, x0, ead=True),
                     f_tree=node0,
                     with_random=True,
                     init_sample=70,
                     optimize_restarts=5,
                     exp_weight=2,
                     seed=r)
    TM.initialize()
    TM.run_BO(130)
    smooth_details_r1.append([
        np.sum(TM.f_acqu.GP.Y < 100),
        np.sum(TM.f_acqu.GP.Y < 150), TM.smooth_min_x, TM.smooth_min_val,
        TM.smooth_min_loc
    ])
    random_details_r1.append([
        np.sum(np.array(TM.random_Y) < 100),
        np.sum(np.array(TM.random_Y) < 150), TM.rand_min_x, TM.rand_min_val,
        TM.rand_min_loc
    ])
예제 #3
0

# Requirement 1: Find the initial state and velocity that minimizes the reward
# We need only one node for the reward. The reward is a smooth function
# given that the closed loop system is deterministic

smooth_details_r1 = []
random_details_r1 = []


# This set assumes random sampling and checking
for r in rand_nums:
    np.random.seed(r)
    node0 = pred_node(f=lambda traj: traj[1]['reward'])
    TM = test_module(bounds=bounds, sut=lambda x0: sut(x0),
                     f_tree = node0, with_random = True, init_sample = 70,
                     optimize_restarts=5, exp_weight=10)
    TM.initialize()
    TM.run_BO(180)
    smooth_details_r1.append([np.sum(TM.f_acqu.GP.Y < -3.75),
                              TM.smooth_min_x,TM.smooth_min_val])
    random_details_r1.append([np.sum(np.array(TM.random_Y) < -3.75),
                              TM.rand_min_x, TM.rand_min_val])
    print(r, smooth_details_r1[-1], random_details_r1[-1])

# Requirement 2: Find the initial state, goal state that minimizes the amount
# of time the robot is able to stay upright
# We need only one node for the time. The time taken is a smooth function
# given that the closed loop system is deterministic.

smooth_details_r2 = []
예제 #4
0
    return traj


def f_prednode(traj):
    return np.array([5 - t[0] for t in traj]).min()


bounds = [(4.5, 5.5)] * 100
x0 = np.array([0., 3.])

rand_nums = []
rand_details = []
smooth_details = []
for _ in range(15):
    r = np.random.randint(2**32 - 1)
    np.random.seed(r)
    node0 = pred_node(f=f_prednode)

    TM = test_module(bounds=bounds,
                     sut=lambda x: compute_traj(x0, x),
                     f_tree=node0,
                     with_random=True,
                     init_sample=100,
                     optimize_restarts=1,
                     exp_weight=10,
                     kernel_type=GPy.kern.RBF)
    TM.initialize()
    TM.run_BO(150)
    smooth_details.append([TM.smooth_min_val, TM.smooth_count])
    rand_details.append([TM.rand_min_val, TM.rand_count])
    rand_nums.append(r)
# given that the closed loop system is deterministic
bounds = [(-np.pi, np.pi)] # Bounds on theta
bounds.append((-1., 1.)) # Bounds on theta dot
bounds.append((7., 9.)) # Bounds on the speed
bounds.append((1.5, 2.5)) # Bounds on the torque magnitude

smooth_details_r1 = []
random_details_r1 = []

# This set assumes random sampling and checking
for _ in range(10):
    r = np.random.randint(2**32-1)
    np.random.seed(r)
    node0 = pred_node(f=lambda traj: traj[1]['reward']/200 )
    TM = test_module(bounds=bounds, sut=lambda x0: sut(500,x0, ead=True),
                     f_tree = node0,init_sample = 70,
                     optimize_restarts=5, exp_weight=10, normalizer=True)
    TM.initialize()
    TM.run_BO(140)
    smooth_details_r1.append([np.sum(TM.f_acqu.GP.Y < -5.),
                              np.sum(TM.f_acqu.GP.Y < -7.5),
                              TM.smooth_min_x,TM.smooth_min_val])


    # With cost function
    np.random.seed(r)
    node0 = pred_node(f=lambda traj: traj[1]['reward']/200)
    TM = test_module(bounds=bounds, sut=lambda x0: sut(500,x0, ead=True),
                     f_tree = node0, with_random = True, init_sample = 70,
                     optimize_restarts=5, exp_weight=10,
                     normalizer=True)
예제 #6
0
random_ints = [
    1414065923, 2448114469, 1906628456, 2644070884, 24268670, 2664125290,
    1182137443, 100813220, 1822558109, 2027862653
]
i = 0
for r in random_ints:
    np.random.seed(r)
    node1 = pred_node(f=lambda x: np.sin(x) + 0.65)
    node2 = pred_node(f=lambda x: np.cos(x) + 0.65)
    node0 = max_node(children=[node1, node2])

    TM = test_module(bounds=bounds,
                     sut=lambda x: x,
                     f_tree=node0,
                     with_ns=True,
                     with_random=True,
                     init_sample=5,
                     optimizer=opt,
                     optimize_restarts=3,
                     exp_weight=2)
    TM.initialize()
    TM.run_BO(50)

    plt.figure(i + 1)
    plt.plot(TM.smooth_X[5:])
    plt.plot(TM.ns_GP.X[5:])
    plt.plot(TM.random_X[5:])
    plt.ylabel('Sample returned in iteration i')
    plt.xlabel('BO iterations')
    plt.title('Finding the minimum of max(sin(x), cos(x))')
    i = i + 1
예제 #7
0
# Requirement 1: Find the initial state, goal state that minimizes the reward
# We need only one node for the reward. The reward is a smooth function
# given that the closed loop system is deterministic

smooth_details_r1 = []
random_details_r1 = []

# This set assumes random sampling and checking
for r in rand_nums:
    np.random.seed(r)
    node0 = pred_node(f=lambda traj: traj[1]['reward'])
    TM = test_module(bounds=bounds,
                     sut=lambda x0: sut(2048, x0, done_early=True),
                     f_tree=node0,
                     with_random=True,
                     init_sample=70,
                     optimize_restarts=5,
                     exp_weight=10,
                     seed=r)
    TM.initialize()
    TM.run_BO(30)
    TM.k = 5
    TM.run_BO(50)
    TM.k = 2
    TM.run_BO(50)
    smooth_details_r1.append(
        [np.sum(TM.f_acqu.GP.Y < -10.), TM.smooth_min_x, TM.smooth_min_val])
    random_details_r1.append(
        [np.sum(np.array(TM.random_Y) < -10.), TM.rand_min_x, TM.rand_min_val])
    print(r, smooth_details_r1[-1], random_details_r1[-1])