def test_safe_random_sampling(self): x_seed = np.array([1.45, 0.5]) y_seed = np.array([.8]) dynamics_parameters = {'shape': (50, 151)} self.env = Hovership(random_start=False, dynamics_parameters=dynamics_parameters, default_initial_state=x_seed[:1]) self.hyperparameters = { 'outputscale_prior': (0.4, 2), 'lengthscale_prior': (0.2, 0.2), 'noise_prior': (0.001, 0.002) } self.agent = SafetyLearner( env=self.env, gamma_optimistic=0.6, gamma_cautious=0.95, lambda_cautious=0, x_seed=x_seed, y_seed=y_seed, gp_params=self.hyperparameters, ) safe_state = self.agent.get_random_safe_state() self.assertTrue(safe_state is not None, 'No safe state found') measure = self.agent.safety_model.measure( state=safe_state, lambda_threshold=0, gamma_threshold=self.agent.safety_model.gamma_measure) self.assertTrue( measure > 0, f'The measure of state {safe_state} is {measure} and should be > 0' )
def test_indexing(self): hovership_params = {'shape': (100, 5)} env = Hovership(random_start=True, dynamics_parameters=hovership_params) x_seed = np.array([1., 1.]) y_seed = np.array([1]) hyperparameters = { 'outputscale_prior': (1, 0.1), 'lengthscale_prior': (0.2, 0.05), 'noise_prior': (0.001, 0.001) } gpqlearning = GPQLearning(env, 0.9, 0.9, x_seed=x_seed, y_seed=y_seed, gp_params=hyperparameters) query = gpqlearning._get_query_from_index( (np.array([0.5]), slice(None, None, None))) self.assertEqual(query.shape, (5, 2)) self.assertTrue(np.all(query[:, 0] == 0.5)) pred = gpqlearning.gp.predict(query).mean.cpu().numpy() self.assertEqual(pred.shape, (5, ))
def test_policy_convergence(self): hovership_params = {'shape': (100, 2)} env = Hovership(random_start=True, dynamics_parameters=hovership_params) hyperparameters = { 'outputscale_prior': (1, 0.1), 'lengthscale_prior': (0.2, 0.05), 'noise_prior': (0.001, 0.001) } x_seed = np.array([0.85, 1.]) y_seed = np.array([1.]) gpqlearning = GPQLearning(env, 0.9, 0.9, x_seed=x_seed, y_seed=y_seed, gp_params=hyperparameters) nA = env.action_space.index_shape[0] eps = 0.1 for episode in range(3): state = env.reset() failed = env.has_failed n_steps = 0 while not failed and n_steps < 50: probas = np.ones(nA) * eps / nA probas[np.argmax(gpqlearning[state, :])] += 1 - eps action = env.action_space[np.random.choice(nA, p=probas)] new_state, reward, failed = env.step(action) print(f'Step {n_steps} - State {state} - New state {new_state}' f' - Action - {action} - Reward {reward} - Failed ' f'{failed}') gpqlearning.update(state, action, new_state, reward, failed) state = new_state n_steps += 1 def policy_from_gpq(gpq): q_values = gpq[:, :].reshape(gpq.env.stateaction_space.index_shape) policy = np.zeros_like(q_values) for i, _ in iter(env.state_space): policy[i, np.argmax(q_values[i, :])] = 1 return policy policy = policy_from_gpq(gpqlearning) print("The computation of the policy works, but " "the convergence value is not tested. " f"Policy:\n{policy}") self.assertTrue(True)
def __init__(self, output_directory, name, max_samples=250, gamma_optimistic=0.9, gamma_cautious=0.9, lambda_cautious=0.1, lengthscale_prior=(0.1, 0.05), shape=(10, 10), hyperparameters=None, ground_truth=None, every=50): x_seed = np.array([1.45, 0.5]) y_seed = np.array([1.]) dynamics_parameters = {'shape': shape} self.env = Hovership(random_start=False, dynamics_parameters=dynamics_parameters, default_initial_state=x_seed[:1]) if hyperparameters is None: hyperparameters = {} default_hyperparameters = { 'outputscale_prior': (1, 0.1), 'lengthscale_prior': lengthscale_prior, 'noise_prior': (0.001, 0.001) } default_hyperparameters.update(hyperparameters) hyperparameters = default_hyperparameters if ground_truth is None: self.ground_truth = None else: self.ground_truth = SafetyTruth(self.env) self.ground_truth.from_vibly_file(ground_truth) self.agent = SafetyLearner( env=self.env, gamma_optimistic=gamma_optimistic, gamma_cautious=gamma_cautious, lambda_cautious=lambda_cautious, x_seed=x_seed, y_seed=y_seed, gp_params=hyperparameters, ) self.agent.reset() plotters = {'Safety': SafetyPlotter(self.agent, self.ground_truth)} super(ToySimulation, self).__init__(output_directory, name, plotters) self.max_samples = max_samples self.every = every
def test_from_vibly(self): env = Hovership() truth = SafetyTruth(env) vibly_file_path = '../data/ground_truth/from_vibly/hover_map.pickle' truth.from_vibly_file(vibly_file_path) self.assertTrue(isinstance(truth.stateaction_space, StateActionSpace)) self.assertEqual(truth.viable_set.shape, truth.measure_value.shape) self.assertEqual(truth.viable_set.shape, truth.unviable_set.shape) self.assertEqual(truth.viable_set.shape, truth.failure_set.shape)
def __init__(self, max_samples, gamma_optimistic, gamma_cautious, lambda_cautious, shape, every): self.x_seed = np.array([1.45, 0.5]) self.y_seed = np.array([.8]) dynamics_parameters = { 'shape': shape } self.env = Hovership( random_start=True, dynamics_parameters=dynamics_parameters, default_initial_state=self.x_seed[:1] ) self.ground_truth = SafetyTruth(self.env) self.ground_truth.from_vibly_file( '../data/ground_truth/from_vibly/hover_map.pickle' ) self.hyperparameters = { 'outputscale_prior': (0.4, 2), 'lengthscale_prior': (0.1, 0.1), 'noise_prior': (0.001, 0.002) } self.agent = SafetyLearner( env=self.env, gamma_optimistic=gamma_optimistic, gamma_cautious=gamma_cautious, lambda_cautious=lambda_cautious, x_seed=self.x_seed, y_seed=self.y_seed, gp_params=self.hyperparameters, ) plotters = { 'DetailedSafety': DetailedSafetyPlotter(self.agent, self.ground_truth) } super(OptimisticSimulation, self).__init__( 'results', 'optimistic', plotters ) self.max_samples = max_samples self.every = every self.samples_path = self.output_directory / 'samples' self.samples_path.mkdir(parents=True, exist_ok=True) self.model_path = self.output_directory / 'model' self.model_path.mkdir(parents=True, exist_ok=True) failure_indexes = np.argwhere(self.ground_truth.failure_set == 1) self.failure_set = np.array([ self.ground_truth.stateaction_space[tuple(index)] for index in failure_indexes[::3] ])
def test_custom_creation(self): dynamics_parameters = { 'ground_gravity': 0, 'gravity_gradient': 0, } default_initial_state = atleast_1d(0.1) hovership = Hovership(random_start=True, default_initial_state=default_initial_state, dynamics_parameters=dynamics_parameters) hovership.reset(s=default_initial_state) self.routine(hovership, default_initial_state) for t in range(10): hovership.reset() if hovership.s[0] != default_initial_state[0]: break else: self.assertTrue(False) hovership.reset(s=default_initial_state) hovership.step(action=atleast_1d(0.)) self.assertEqual(hovership.s[0], default_initial_state[0])
def test_stateaction_space_stability(self): hovership = Hovership() for t in range(10): s, r, failed = hovership.step(atleast_1d(.8)) self.assertTrue(s in hovership.stateaction_space.state_space) hovership.reset() for t in range(100): s, r, failed = hovership.step(atleast_1d(0.)) self.assertTrue(s in hovership.stateaction_space.state_space)
def __init__(self, output_directory, name, max_samples, gamma_optimistic, gamma_cautious, lambda_cautious, shape, ground_truth, random_start=False, every=50): x_seed = np.array([1.45, 0.5]) y_seed = np.array([.8]) dynamics_parameters = { 'shape': shape } self.env = Hovership( random_start=random_start, dynamics_parameters=dynamics_parameters, default_initial_state=x_seed[:1] ) self.ground_truth = SafetyTruth(self.env) self.ground_truth.from_vibly_file(ground_truth) self.hyperparameters = { 'outputscale_prior': (0.4, 2), 'lengthscale_prior': (0.2, 0.2), 'noise_prior': (0.001, 0.002) } self.agent = SafetyLearner( env=self.env, gamma_optimistic=gamma_optimistic, gamma_cautious=gamma_cautious, lambda_cautious=lambda_cautious, x_seed=x_seed, y_seed=y_seed, gp_params=self.hyperparameters, ) self.agent.reset() plotters = { 'Safety': SafetyPlotter(self.agent, self.ground_truth) } super(HyperparametersSimulation, self).__init__( output_directory, name, plotters ) self.max_samples = max_samples self.every = every self.random_start = random_start
def test_get_training_examples(self): env = Hovership() truth = SafetyTruth(env) vibly_file_path = '../data/ground_truth/from_vibly/hover_map.pickle' truth.from_vibly_file(vibly_file_path) train_x, train_y = truth.get_training_examples(n_examples=2000) self.assertEqual(train_x.shape[0], train_y.shape[0]) self.assertEqual(train_x.shape[0], 2000) self.assertEqual(train_x.shape[1], truth.stateaction_space.index_dim) train_x, train_y = truth.get_training_examples(n_examples=2000, from_failure=True, viable_proportion=0.6) self.assertEqual(train_x.shape[0], train_y.shape[0]) self.assertEqual(train_x.shape[0], 2000) self.assertEqual(train_x.shape[1], truth.stateaction_space.index_dim) self.assertTrue((train_y[:1200] > 0).all()) self.assertTrue((train_y[1200:] == 0).all())
def test_save_load(self): env = Hovership() x_seed = np.array([1.45, 0.6]) y_seed = np.array([0.8]) x_blank = np.array([0., 0]) y_blank = np.array([0.]) hyperparameters = { 'outputscale_prior': (0.4, 2), 'lengthscale_prior': (0.2, 0.2), 'noise_prior': (0.001, 0.002) } safety = MaternSafety(env, 0.7, x_seed, y_seed, hyperparameters) tmpdir = 'results/' #tempfile.TemporaryDirectory().name safety.save(tmpdir) safety.save_samples(tmpdir + 'samples.npz') blank = MaternSafety.load(tmpdir, env, 0.7, x_blank, y_blank) blank.load_samples(tmpdir + 'samples.npz') self.assertTrue((blank.gp.train_x == safety.gp.train_x).all()) self.assertEqual(blank.gp.structure_dict, safety.gp.structure_dict)
def test_default_creation(self): hovership = Hovership() self.routine(hovership, hovership.default_initial_state)