# Requirement 1: Find the initial configuration that minimizes the reward
# We need only one node for the reward. The reward is a smooth function
# given that the closed loop system is deterministic
bounds = [(-0.6, -0.4)]  # Bounds on the position
bounds.append((-0.025, 0.025))  # Bounds on the velocity
bounds.append((0.4, 0.6))  # Bounds on the goal position
bounds.append((0.055, 0.075))  # Bounds on the max speed
bounds.append((0.0005, 0.0025))  # Bounds on the power magnitude

smooth_details_r1 = []
random_details_r1 = []

# This set assumes random sampling and checking
for r in rand_nums:
    np.random.seed(r)
    node0 = pred_node(f=lambda traj: traj[1]['reward'])
    TM = test_module(bounds=bounds,
                     sut=lambda x0: sut(x0),
                     f_tree=node0,
                     init_sample=60,
                     optimize_restarts=5,
                     exp_weight=2,
                     normalizer=True)
    TM.initialize()

    TM.run_BO(140)
    smooth_vals = np.array(TM.f_acqu.find_GP_func())
    smooth_details_r1.append([
        np.sum(smooth_vals < 75),
        np.sum(smooth_vals < 0), TM.smooth_min_x, TM.smooth_min_val
    ])
Esempio n. 2
0
# Requirement 1: Find the initial configuration that minimizes the reward
# We need only one node for the reward. The reward is a smooth function
# given that the closed loop system is deterministic
bounds = [(-0.05, 0.05)] * 4  # Bounds on the state
bounds.append((0.05, 0.15))  # Bounds on the mass of the pole
bounds.append((0.4, 0.6))  # Bounds on the length of the pole
bounds.append((8.00, 12.00))  # Bounds on the force magnitude

smooth_details_r1 = []
random_details_r1 = []

# This set assumes random sampling and checking
for r in rand_nums:
    np.random.seed(r)
    node0 = pred_node(f=lambda traj: traj[1]['reward'])
    TM = test_module(bounds=bounds,
                     sut=lambda x0: sut(200, x0, ead=True),
                     f_tree=node0,
                     with_random=True,
                     init_sample=70,
                     optimize_restarts=5,
                     exp_weight=2,
                     seed=r)
    TM.initialize()
    TM.run_BO(130)
    smooth_details_r1.append([
        np.sum(TM.f_acqu.GP.Y < 100),
        np.sum(TM.f_acqu.GP.Y < 150), TM.smooth_min_x, TM.smooth_min_val,
        TM.smooth_min_loc
    ])
Esempio n. 3
0
    return compute_traj(init_state=init_qpos, state_per=state_per,
                        vel_per=vel_per)


# Requirement 1: Find the initial state and velocity that minimizes the reward
# We need only one node for the reward. The reward is a smooth function
# given that the closed loop system is deterministic

smooth_details_r1 = []
random_details_r1 = []


# This set assumes random sampling and checking
for r in rand_nums:
    np.random.seed(r)
    node0 = pred_node(f=lambda traj: traj[1]['reward'])
    TM = test_module(bounds=bounds, sut=lambda x0: sut(x0),
                     f_tree = node0, with_random = True, init_sample = 70,
                     optimize_restarts=5, exp_weight=10)
    TM.initialize()
    TM.run_BO(180)
    smooth_details_r1.append([np.sum(TM.f_acqu.GP.Y < -3.75),
                              TM.smooth_min_x,TM.smooth_min_val])
    random_details_r1.append([np.sum(np.array(TM.random_Y) < -3.75),
                              TM.rand_min_x, TM.rand_min_val])
    print(r, smooth_details_r1[-1], random_details_r1[-1])

# Requirement 2: Find the initial state, goal state that minimizes the amount
# of time the robot is able to stay upright
# We need only one node for the time. The time taken is a smooth function
# given that the closed loop system is deterministic.
Esempio n. 4
0
    return traj


def f_prednode(traj):
    return np.array([5 - t[0] for t in traj]).min()


bounds = [(4.5, 5.5)] * 100
x0 = np.array([0., 3.])

rand_nums = []
rand_details = []
smooth_details = []
for _ in range(15):
    r = np.random.randint(2**32 - 1)
    np.random.seed(r)
    node0 = pred_node(f=f_prednode)

    TM = test_module(bounds=bounds,
                     sut=lambda x: compute_traj(x0, x),
                     f_tree=node0,
                     with_random=True,
                     init_sample=100,
                     optimize_restarts=1,
                     exp_weight=10,
                     kernel_type=GPy.kern.RBF)
    TM.initialize()
    TM.run_BO(150)
    smooth_details.append([TM.smooth_min_val, TM.smooth_count])
    rand_details.append([TM.rand_min_val, TM.rand_count])
    rand_nums.append(r)
# Requirement 1: Find the initial configuration that minimizes the reward
# We need only one node for the reward. The reward is a smooth function
# given that the closed loop system is deterministic
bounds = [(-np.pi, np.pi)] # Bounds on theta
bounds.append((-1., 1.)) # Bounds on theta dot
bounds.append((7., 9.)) # Bounds on the speed
bounds.append((1.5, 2.5)) # Bounds on the torque magnitude

smooth_details_r1 = []
random_details_r1 = []

# This set assumes random sampling and checking
for _ in range(10):
    r = np.random.randint(2**32-1)
    np.random.seed(r)
    node0 = pred_node(f=lambda traj: traj[1]['reward']/200 )
    TM = test_module(bounds=bounds, sut=lambda x0: sut(500,x0, ead=True),
                     f_tree = node0,init_sample = 70,
                     optimize_restarts=5, exp_weight=10, normalizer=True)
    TM.initialize()
    TM.run_BO(140)
    smooth_details_r1.append([np.sum(TM.f_acqu.GP.Y < -5.),
                              np.sum(TM.f_acqu.GP.Y < -7.5),
                              TM.smooth_min_x,TM.smooth_min_val])


    # With cost function
    np.random.seed(r)
    node0 = pred_node(f=lambda traj: traj[1]['reward']/200)
    TM = test_module(bounds=bounds, sut=lambda x0: sut(500,x0, ead=True),
                     f_tree = node0, with_random = True, init_sample = 70,
Esempio n. 6
0
from active_testing import delta_opt

import numpy as np
from matplotlib import pyplot as plt

bounds = [(0, 10)]
opt = delta_opt(bounds=bounds, epsilon=0.0001)

random_ints = [
    1414065923, 2448114469, 1906628456, 2644070884, 24268670, 2664125290,
    1182137443, 100813220, 1822558109, 2027862653
]
i = 0
for r in random_ints:
    np.random.seed(r)
    node1 = pred_node(f=lambda x: np.sin(x) + 0.65)
    node2 = pred_node(f=lambda x: np.cos(x) + 0.65)
    node0 = max_node(children=[node1, node2])

    TM = test_module(bounds=bounds,
                     sut=lambda x: x,
                     f_tree=node0,
                     with_ns=True,
                     with_random=True,
                     init_sample=5,
                     optimizer=opt,
                     optimize_restarts=3,
                     exp_weight=2)
    TM.initialize()
    TM.run_BO(50)
                        goal=goal,
                        state_per=state_per,
                        vel_per=vel_per)


# Requirement 1: Find the initial state, goal state that minimizes the reward
# We need only one node for the reward. The reward is a smooth function
# given that the closed loop system is deterministic

smooth_details_r1 = []
random_details_r1 = []

# This set assumes random sampling and checking
for r in rand_nums:
    np.random.seed(r)
    node0 = pred_node(f=lambda traj: traj[1]['reward'])
    TM = test_module(bounds=bounds,
                     sut=lambda x0: sut(2048, x0, done_early=True),
                     f_tree=node0,
                     with_random=True,
                     init_sample=70,
                     optimize_restarts=5,
                     exp_weight=10,
                     seed=r)
    TM.initialize()
    TM.run_BO(30)
    TM.k = 5
    TM.run_BO(50)
    TM.k = 2
    TM.run_BO(50)
    smooth_details_r1.append(