コード例 #1
0
    def test_safe_random_sampling(self):
        x_seed = np.array([1.45, 0.5])
        y_seed = np.array([.8])

        dynamics_parameters = {'shape': (50, 151)}
        self.env = Hovership(random_start=False,
                             dynamics_parameters=dynamics_parameters,
                             default_initial_state=x_seed[:1])
        self.hyperparameters = {
            'outputscale_prior': (0.4, 2),
            'lengthscale_prior': (0.2, 0.2),
            'noise_prior': (0.001, 0.002)
        }
        self.agent = SafetyLearner(
            env=self.env,
            gamma_optimistic=0.6,
            gamma_cautious=0.95,
            lambda_cautious=0,
            x_seed=x_seed,
            y_seed=y_seed,
            gp_params=self.hyperparameters,
        )

        safe_state = self.agent.get_random_safe_state()
        self.assertTrue(safe_state is not None, 'No safe state found')
        measure = self.agent.safety_model.measure(
            state=safe_state,
            lambda_threshold=0,
            gamma_threshold=self.agent.safety_model.gamma_measure)

        self.assertTrue(
            measure > 0,
            f'The measure of state {safe_state} is {measure} and should be > 0'
        )
コード例 #2
0
    def test_indexing(self):
        hovership_params = {'shape': (100, 5)}
        env = Hovership(random_start=True,
                        dynamics_parameters=hovership_params)
        x_seed = np.array([1., 1.])
        y_seed = np.array([1])
        hyperparameters = {
            'outputscale_prior': (1, 0.1),
            'lengthscale_prior': (0.2, 0.05),
            'noise_prior': (0.001, 0.001)
        }
        gpqlearning = GPQLearning(env,
                                  0.9,
                                  0.9,
                                  x_seed=x_seed,
                                  y_seed=y_seed,
                                  gp_params=hyperparameters)

        query = gpqlearning._get_query_from_index(
            (np.array([0.5]), slice(None, None, None)))
        self.assertEqual(query.shape, (5, 2))
        self.assertTrue(np.all(query[:, 0] == 0.5))

        pred = gpqlearning.gp.predict(query).mean.cpu().numpy()
        self.assertEqual(pred.shape, (5, ))
コード例 #3
0
    def test_policy_convergence(self):
        hovership_params = {'shape': (100, 2)}
        env = Hovership(random_start=True,
                        dynamics_parameters=hovership_params)
        hyperparameters = {
            'outputscale_prior': (1, 0.1),
            'lengthscale_prior': (0.2, 0.05),
            'noise_prior': (0.001, 0.001)
        }
        x_seed = np.array([0.85, 1.])
        y_seed = np.array([1.])
        gpqlearning = GPQLearning(env,
                                  0.9,
                                  0.9,
                                  x_seed=x_seed,
                                  y_seed=y_seed,
                                  gp_params=hyperparameters)
        nA = env.action_space.index_shape[0]
        eps = 0.1
        for episode in range(3):
            state = env.reset()
            failed = env.has_failed
            n_steps = 0
            while not failed and n_steps < 50:
                probas = np.ones(nA) * eps / nA
                probas[np.argmax(gpqlearning[state, :])] += 1 - eps
                action = env.action_space[np.random.choice(nA, p=probas)]
                new_state, reward, failed = env.step(action)
                print(f'Step {n_steps} - State {state} - New state {new_state}'
                      f' - Action - {action} - Reward {reward} - Failed '
                      f'{failed}')
                gpqlearning.update(state, action, new_state, reward, failed)
                state = new_state
                n_steps += 1

        def policy_from_gpq(gpq):
            q_values = gpq[:, :].reshape(gpq.env.stateaction_space.index_shape)
            policy = np.zeros_like(q_values)
            for i, _ in iter(env.state_space):
                policy[i, np.argmax(q_values[i, :])] = 1
            return policy

        policy = policy_from_gpq(gpqlearning)
        print("The computation of the policy works, but "
              "the convergence value is not tested. "
              f"Policy:\n{policy}")
        self.assertTrue(True)
コード例 #4
0
    def __init__(self,
                 output_directory,
                 name,
                 max_samples=250,
                 gamma_optimistic=0.9,
                 gamma_cautious=0.9,
                 lambda_cautious=0.1,
                 lengthscale_prior=(0.1, 0.05),
                 shape=(10, 10),
                 hyperparameters=None,
                 ground_truth=None,
                 every=50):
        x_seed = np.array([1.45, 0.5])
        y_seed = np.array([1.])

        dynamics_parameters = {'shape': shape}

        self.env = Hovership(random_start=False,
                             dynamics_parameters=dynamics_parameters,
                             default_initial_state=x_seed[:1])

        if hyperparameters is None:
            hyperparameters = {}
        default_hyperparameters = {
            'outputscale_prior': (1, 0.1),
            'lengthscale_prior': lengthscale_prior,
            'noise_prior': (0.001, 0.001)
        }
        default_hyperparameters.update(hyperparameters)
        hyperparameters = default_hyperparameters

        if ground_truth is None:
            self.ground_truth = None
        else:
            self.ground_truth = SafetyTruth(self.env)
            self.ground_truth.from_vibly_file(ground_truth)

        self.agent = SafetyLearner(
            env=self.env,
            gamma_optimistic=gamma_optimistic,
            gamma_cautious=gamma_cautious,
            lambda_cautious=lambda_cautious,
            x_seed=x_seed,
            y_seed=y_seed,
            gp_params=hyperparameters,
        )

        self.agent.reset()

        plotters = {'Safety': SafetyPlotter(self.agent, self.ground_truth)}

        super(ToySimulation, self).__init__(output_directory, name, plotters)

        self.max_samples = max_samples
        self.every = every
コード例 #5
0
    def test_from_vibly(self):
        env = Hovership()
        truth = SafetyTruth(env)

        vibly_file_path = '../data/ground_truth/from_vibly/hover_map.pickle'
        truth.from_vibly_file(vibly_file_path)

        self.assertTrue(isinstance(truth.stateaction_space, StateActionSpace))
        self.assertEqual(truth.viable_set.shape, truth.measure_value.shape)
        self.assertEqual(truth.viable_set.shape, truth.unviable_set.shape)
        self.assertEqual(truth.viable_set.shape, truth.failure_set.shape)
    def __init__(self, max_samples, gamma_optimistic, gamma_cautious,
                 lambda_cautious, shape, every):
        self.x_seed = np.array([1.45, 0.5])
        self.y_seed = np.array([.8])
        dynamics_parameters = {
            'shape': shape
        }
        self.env = Hovership(
            random_start=True,
            dynamics_parameters=dynamics_parameters,
            default_initial_state=self.x_seed[:1]
        )

        self.ground_truth = SafetyTruth(self.env)
        self.ground_truth.from_vibly_file(
            '../data/ground_truth/from_vibly/hover_map.pickle'
        )

        self.hyperparameters = {
            'outputscale_prior': (0.4, 2),
            'lengthscale_prior': (0.1, 0.1),
            'noise_prior': (0.001, 0.002)
        }
        self.agent = SafetyLearner(
            env=self.env,
            gamma_optimistic=gamma_optimistic,
            gamma_cautious=gamma_cautious,
            lambda_cautious=lambda_cautious,
            x_seed=self.x_seed,
            y_seed=self.y_seed,
            gp_params=self.hyperparameters,
        )
        plotters = {
            'DetailedSafety': DetailedSafetyPlotter(self.agent, self.ground_truth)
        }

        super(OptimisticSimulation, self).__init__(
            'results', 'optimistic', plotters
        )

        self.max_samples = max_samples
        self.every = every
        self.samples_path = self.output_directory / 'samples'
        self.samples_path.mkdir(parents=True, exist_ok=True)
        self.model_path = self.output_directory / 'model'
        self.model_path.mkdir(parents=True, exist_ok=True)

        failure_indexes = np.argwhere(self.ground_truth.failure_set == 1)
        self.failure_set = np.array([
            self.ground_truth.stateaction_space[tuple(index)]
            for index in failure_indexes[::3]
        ])
コード例 #7
0
ファイル: environments_test.py プロジェクト: sheim/edge
    def test_custom_creation(self):
        dynamics_parameters = {
            'ground_gravity': 0,
            'gravity_gradient': 0,
        }
        default_initial_state = atleast_1d(0.1)
        hovership = Hovership(random_start=True,
                              default_initial_state=default_initial_state,
                              dynamics_parameters=dynamics_parameters)
        hovership.reset(s=default_initial_state)
        self.routine(hovership, default_initial_state)

        for t in range(10):
            hovership.reset()
            if hovership.s[0] != default_initial_state[0]:
                break
        else:
            self.assertTrue(False)

        hovership.reset(s=default_initial_state)
        hovership.step(action=atleast_1d(0.))
        self.assertEqual(hovership.s[0], default_initial_state[0])
コード例 #8
0
ファイル: environments_test.py プロジェクト: sheim/edge
    def test_stateaction_space_stability(self):
        hovership = Hovership()
        for t in range(10):
            s, r, failed = hovership.step(atleast_1d(.8))
            self.assertTrue(s in hovership.stateaction_space.state_space)

        hovership.reset()
        for t in range(100):
            s, r, failed = hovership.step(atleast_1d(0.))
            self.assertTrue(s in hovership.stateaction_space.state_space)
    def __init__(self, output_directory, name, max_samples,
                 gamma_optimistic, gamma_cautious, lambda_cautious,
                 shape, ground_truth,
                 random_start=False, every=50):
        x_seed = np.array([1.45, 0.5])
        y_seed = np.array([.8])

        dynamics_parameters = {
            'shape': shape
        }
        self.env = Hovership(
            random_start=random_start,
            dynamics_parameters=dynamics_parameters,
            default_initial_state=x_seed[:1]
        )

        self.ground_truth = SafetyTruth(self.env)
        self.ground_truth.from_vibly_file(ground_truth)

        self.hyperparameters = {
            'outputscale_prior': (0.4, 2),
            'lengthscale_prior': (0.2, 0.2),
            'noise_prior': (0.001, 0.002)
        }
        self.agent = SafetyLearner(
            env=self.env,
            gamma_optimistic=gamma_optimistic,
            gamma_cautious=gamma_cautious,
            lambda_cautious=lambda_cautious,
            x_seed=x_seed,
            y_seed=y_seed,
            gp_params=self.hyperparameters,
        )
        self.agent.reset()

        plotters = {
            'Safety': SafetyPlotter(self.agent, self.ground_truth)
        }

        super(HyperparametersSimulation, self).__init__(
            output_directory, name, plotters
        )

        self.max_samples = max_samples
        self.every = every
        self.random_start = random_start
コード例 #10
0
    def test_get_training_examples(self):
        env = Hovership()
        truth = SafetyTruth(env)

        vibly_file_path = '../data/ground_truth/from_vibly/hover_map.pickle'
        truth.from_vibly_file(vibly_file_path)

        train_x, train_y = truth.get_training_examples(n_examples=2000)
        self.assertEqual(train_x.shape[0], train_y.shape[0])
        self.assertEqual(train_x.shape[0], 2000)
        self.assertEqual(train_x.shape[1], truth.stateaction_space.index_dim)
        train_x, train_y = truth.get_training_examples(n_examples=2000,
                                                       from_failure=True,
                                                       viable_proportion=0.6)
        self.assertEqual(train_x.shape[0], train_y.shape[0])
        self.assertEqual(train_x.shape[0], 2000)
        self.assertEqual(train_x.shape[1], truth.stateaction_space.index_dim)
        self.assertTrue((train_y[:1200] > 0).all())
        self.assertTrue((train_y[1200:] == 0).all())
コード例 #11
0
    def test_save_load(self):
        env = Hovership()
        x_seed = np.array([1.45, 0.6])
        y_seed = np.array([0.8])
        x_blank = np.array([0., 0])
        y_blank = np.array([0.])
        hyperparameters = {
            'outputscale_prior': (0.4, 2),
            'lengthscale_prior': (0.2, 0.2),
            'noise_prior': (0.001, 0.002)
        }
        safety = MaternSafety(env, 0.7, x_seed, y_seed, hyperparameters)

        tmpdir = 'results/'  #tempfile.TemporaryDirectory().name
        safety.save(tmpdir)
        safety.save_samples(tmpdir + 'samples.npz')

        blank = MaternSafety.load(tmpdir, env, 0.7, x_blank, y_blank)
        blank.load_samples(tmpdir + 'samples.npz')

        self.assertTrue((blank.gp.train_x == safety.gp.train_x).all())
        self.assertEqual(blank.gp.structure_dict, safety.gp.structure_dict)
コード例 #12
0
ファイル: environments_test.py プロジェクト: sheim/edge
 def test_default_creation(self):
     hovership = Hovership()
     self.routine(hovership, hovership.default_initial_state)