def f_clw_instantiate_envs(self): 
        """ Instantiate both the Training and Test Gym Env 
        - They provide the same dynamical model and the same reward 
        """
        temp = 'gym_quadcopter:quadcopter-v' + str(self.env_desc.get_env_id())
        # TODO FIXME: Some models cannot handle multiple envs.
        N = self.env_desc.get_n_envs()
        if N < 1: 
            raise RuntimeError(f"Got NumEnvs needs to be >=1 but got NumEnvs={N}")
        logging.info(f"[SETUP] Creating {N} Training Environments - START")

        # Instantiating all the Envs and storing them into a private var 
        self.__envs_training = [f_fwgym_get_env(
            env_id=temp, used_states=self.used_states, instance_index=i,
            query_classes=self.query_classes, query_class=self.query_class,
            params=self.args.training_params
        ) for i in range(N)]

        # Passing references to previously created envs 
        self.env = DummyVecEnv([lambda: self.__envs_training[i] for i in range(N)]) 
        logging.info(f"[SETUP] Creating {N} Training Environments - DONE")
        logging.info(f"[SETUP] Creating 1 Test Environments - START")
        self.env_test = f_fwgym_get_env(
            env_id=temp, used_states=self.used_states, instance_index=0,
            query_classes=self.query_classes, query_class=self.query_class,
            params=self.args.testing_params
        )
        logging.info(f"[SETUP] Creating 1 Test Environments - DONE")
    def test_env_make(self):
        """ Testing Env0 State Space Size
        """
        instance_index = 0
        action = np.array([1.0, 2.0, 3.0])
        env_id = 'gym_quadcopter:quadcopter-v2'

        env1 = f_fwgym_get_env(
            env_id=env_id, used_states = ['e_p', 'e_q', 'e_r'],
            instance_index=instance_index, query_class='something',
            query_classes={}, params=self.testing_params
        )
        env1.reset()
        self.assertEqual(env1.params, self.testing_params)

        quadcopter = Quadcopter(T=20, dt_commands=0.03, dt=0.01)
        env2 = GymEnvBase.make(
            env_id=env_id, instance_index=instance_index,
            params=self.testing_params, quadcopter=quadcopter,
            used_states=['e_p', 'e_q', 'e_r']
        )
        env2.reset()
        self.assertEqual(env2.params, self.testing_params)
        self.assertEqual(env1.env_id, env2.env_id)
        self.assertEqual(env1.instance_index, env2.instance_index)
        self.assertEqual(env1.continuous, env2.continuous)
Ejemplo n.º 3
0
    def run_test(self):
        """ Runs the old tests or the new properties observer based ones
        """
        if self.args.eval_properties_observer['is_active']:
            return self.compute_traces()

        logging.info(f"Start test at {dt.now().strftime('%Y%m%d_%H%M')}")
        logging.info(f"Test Arguments\n{self._args2str(self.args)}")
        self.f_fwtfw_init()

        temp = 'gym_quadcopter:quadcopter-v' + str(self.args.env)

        self.env = f_fwgym_get_env(env_id=temp,
                                   used_states=self.args.used_states,
                                   instance_index=0,
                                   query_classes=self.args.query_classes,
                                   query_class=self.args.query_class,
                                   params=self.args.testing_params)
        logging.info(
            f"[eval_models.py] Instantiated env {str(temp)} with continuous {str(self.args.continuous)}"
        )
        checkpoints = self.range_checkpoints()
        rewards, s_rates = self.f_checkpoints_range_2_mean_performance(
            checkpoints=checkpoints)

        starting_min_reward = -10

        temp_x = [0] + [
            self.args.num_iterations_checkpoint * i for i in range(
                self.args.start_index, self.args.end_index, self.args.step)
        ]
        temp_y = [starting_min_reward] + list(rewards)
        f_iofsw_plot(
            x=temp_x,
            y=temp_y,
            x_ticks=np.array(temp_x),
            y_ticks=np.array(temp_y),
            title="Rewards",
            label_x="Training Timesteps",
            label_y="Average Rewards",
            filename=
            f"{self.args.plots_dir}/Timesteps_Rewards_{dt.now().strftime('%Y%m%d_%H%M%S')}.png",
        )

        temp_x = [0] + [
            self.args.num_iterations_checkpoint * i for i in range(
                self.args.start_index, self.args.end_index, self.args.step)
        ]
        temp_y = [0] + list(s_rates)
        f_iofsw_plot(
            x=temp_x,
            y=temp_y,
            x_ticks=np.array(temp_x),
            y_ticks=np.array(temp_y),
            title="Success Rate",
            label_x="Training Timesteps",
            label_y="Average Success Rate",
            filename=
            f"{self.args.plots_dir}/Timesteps_SuccessRate_{dt.now().strftime('%Y%m%d_%H%M%S')}.png",
        )
    def test_env_set_z_velocity_angles_reset_function(self):
        """ Tests the possibility to set the Velocity Reset Function from Config Training
        """
        env_desc = EnvDict(env_dict=self.args.env)
        tp_desc = TrainingParamsDict(tp_dict=self.args.training_params)
        self.assertEqual(env_desc.get_env_id(), self.training_config['env']['value'])
        self.assertEqual(self.args.model, self.training_config['model'])
        self.assertEqual(self.args.n_steps, self.training_config['n_steps'])
        self.assertEqual(self.args.training_params, self.training_config['training_params'])

        for i in range(0,3):
            env_id = f'gym_quadcopter:quadcopter-v{i}'
            env = f_fwgym_get_env(
                env_id=env_id, used_states = ['e_p', 'e_q', 'e_r'],
                instance_index=0, query_class='something',
                query_classes={}, params=self.args.training_params
            )
            self.assertEqual(env.params, self.args.training_params)

            env.reset()
            val_min = float(self.args.training_params['quadcopter']['reset_policy']['abs_z']['params'][0])
            val_max = float(self.args.training_params['quadcopter']['reset_policy']['abs_z']['params'][1])
            self.assertTrue(val_min <= env.quadcopter.z <= val_max)

            val_min = float(self.args.training_params['quadcopter']['reset_policy']['velocity_x']['params'][0])
            val_max = float(self.args.training_params['quadcopter']['reset_policy']['velocity_x']['params'][1])
            self.assertTrue(val_min <= env.quadcopter.velocity_x <= val_max)

            val_min = float(self.args.training_params['quadcopter']['reset_policy']['velocity_y']['params'][0])
            val_max = float(self.args.training_params['quadcopter']['reset_policy']['velocity_y']['params'][1])
            self.assertTrue(val_min <= env.quadcopter.velocity_y <= val_max)

            val_min = float(self.args.training_params['quadcopter']['reset_policy']['velocity_z']['params'][0])
            val_max = float(self.args.training_params['quadcopter']['reset_policy']['velocity_z']['params'][1])
            self.assertTrue(val_min <= env.quadcopter.velocity_z <= val_max)

            val_min = float(self.args.training_params['quadcopter']['reset_policy']['abs_roll']['params'][0])
            val_max = float(self.args.training_params['quadcopter']['reset_policy']['abs_roll']['params'][1])
            self.assertTrue(val_min <= env.quadcopter.abs_roll <= val_max)

            val_min = float(self.args.training_params['quadcopter']['reset_policy']['abs_pitch']['params'][0])
            val_max = float(self.args.training_params['quadcopter']['reset_policy']['abs_pitch']['params'][1])
            self.assertTrue(val_min <= env.quadcopter.abs_pitch <= val_max)

            val_min = float(self.args.training_params['quadcopter']['reset_policy']['abs_yaw']['params'][0])
            val_max = float(self.args.training_params['quadcopter']['reset_policy']['abs_yaw']['params'][1])
            self.assertTrue(val_min <= env.quadcopter.abs_yaw <= val_max)

            val_min = float(self.args.training_params['quadcopter']['reset_policy']['rate_roll']['params'][0])
            val_max = float(self.args.training_params['quadcopter']['reset_policy']['rate_roll']['params'][1])
            self.assertTrue(val_min <= env.quadcopter.rate_roll <= val_max)

            val_min = float(self.args.training_params['quadcopter']['reset_policy']['rate_pitch']['params'][0])
            val_max = float(self.args.training_params['quadcopter']['reset_policy']['rate_pitch']['params'][1])
            self.assertTrue(val_min <= env.quadcopter.rate_pitch <= val_max)

            val_min = float(self.args.training_params['quadcopter']['reset_policy']['rate_yaw']['params'][0])
            val_max = float(self.args.training_params['quadcopter']['reset_policy']['rate_yaw']['params'][1])
            self.assertTrue(val_min <= env.quadcopter.rate_yaw <= val_max)
    def test_env_altitude_controller_temporal_consistency(self):
        """ Tests that env.altitude_controller.compute_thrust() has no random component
        """
        for pol in self.reset_policy:
            self.training_config['training_params']['quadcopter']['reset_policy'][pol]['pdf'] = 'none'
        self.training_config['model']['name'] = 'ddpg'
        self.args = training_args_from_configs(base_config=self.base_config, config=self.training_config, debug_info=False)

        env_desc = EnvDict(env_dict=self.args.env)
        tp_desc = TrainingParamsDict(tp_dict=self.args.training_params)
        self.assertEqual(env_desc.get_env_id(), self.training_config['env']['value'])
        self.assertEqual(self.args.model, self.training_config['model'])
        self.assertEqual(self.args.n_steps, self.training_config['n_steps'])
        self.assertEqual(self.args.training_params, self.training_config['training_params'])

        supported_envs = [0]
        for i in supported_envs:
            env_id = f'gym_quadcopter:quadcopter-v{i}'
            print(f"Checking EnvID={env_id}")
            env = f_fwgym_get_env(
                env_id=env_id, used_states = ['e_p', 'e_q', 'e_r'],
                instance_index=0, query_class='something',
                query_classes={}, params=self.args.training_params
            )
            self.assertEqual(env.params, self.args.training_params)

            self.assertEqual(env.altitude_controller.p, 3000)
            self.assertEqual(env.altitude_controller.i, 300)
            self.assertEqual(env.altitude_controller.d, 500)

            obs_trace = np.zeros(10)
            exp_trace = np.zeros(10)

            env.reset()
            env.set_target_z(1.0)
            self.assertEqual(env.quadcopter.z, 0.0)
            self.assertEqual(env.target_z, 1.0)

            self.assertEqual(env.altitude_controller.z_integral, 0.0)
            self.assertEqual(env.previous_z_error, 0.0)

            for i in range(obs_trace.shape[0]):
                self.assertEqual(env.quadcopter.z, 0.0)
                exp_trace[i] = env.altitude_controller.compute_thrust(target_z=env.target_z, current_z=env.quadcopter.z)

            env.reset()
            env.set_target_z(1.0)
            self.assertEqual(env.quadcopter.z, 0.0)
            self.assertEqual(env.target_z, 1.0)

            self.assertEqual(env.altitude_controller.z_integral, 0.0)
            self.assertEqual(env.previous_z_error, 0.0)

            for i in range(obs_trace.shape[0]):
                self.assertEqual(env.quadcopter.z, 0.0)
                obs_trace[i] = env.altitude_controller.compute_thrust(target_z=env.target_z, current_z=env.quadcopter.z)

            self.assertTrue(np.allclose(obs_trace, exp_trace, atol=1e-5), msg=f"Temporal Consistency Check: EnvID={env_id} ObsTrace={obs_trace}, ExpTrace={exp_trace}, Delta={obs_trace-exp_trace}")
    def test_env_controller_temporal_consistency_on_altitude(self):
        """ Tests if given the same initial conditions the and target_z the selected altitude controller leads to the same z after the same number of iterations

        WARNING
        - This test is currently failing due to some precision issues hence it has been disabled but it revealed an issue that needs to be deeply investigated hence it needs to be commited in the repo
        """
        for pol in self.reset_policy:
            self.training_config['training_params']['quadcopter']['reset_policy'][pol]['pdf'] = 'none'
        self.training_config['model']['name'] = 'ddpg'
        self.args = training_args_from_configs(base_config=self.base_config, config=self.training_config, debug_info=False)

        env_desc = EnvDict(env_dict=self.args.env)
        tp_desc = TrainingParamsDict(tp_dict=self.args.training_params)
        self.assertEqual(env_desc.get_env_id(), self.training_config['env']['value'])
        self.assertEqual(self.args.model, self.training_config['model'])
        self.assertEqual(self.args.n_steps, self.training_config['n_steps'])
        self.assertEqual(self.args.training_params, self.training_config['training_params'])

        for i in self.supported_envs:
            env_id = f'gym_quadcopter:quadcopter-v{i}'
            print(f"Checking EnvID={env_id}")
            env = f_fwgym_get_env(
                env_id=env_id, used_states = ['e_p', 'e_q', 'e_r'],
                instance_index=0, query_class='something',
                query_classes={}, params=self.args.training_params
            )
            self.assertEqual(env.params, self.args.training_params)

            self.assertEqual(env.altitude_controller.p, 3000)
            self.assertEqual(env.altitude_controller.i, 300)
            self.assertEqual(env.altitude_controller.d, 500)

            obs_trace = np.zeros(10)
            exp_trace = np.zeros(10)

            env.reset()
            env.set_target_z(1.0)
            self.assertEqual(env.quadcopter.z, 0.0)
            self.assertEqual(env.target_z, 1.0)
            self.assertEqual(env.z_integral, 0.0)
            self.assertEqual(env.previous_z_error, 0.0)
            for i in range(obs_trace.shape[0]):
                env.step(action=np.zeros(3))
                exp_trace[i] = env.quadcopter.z

            env.reset()
            env.set_target_z(1.0)
            self.assertEqual(env.quadcopter.z, 0.0)
            self.assertEqual(env.target_z, 1.0)
            self.assertEqual(env.z_integral, 0.0)
            self.assertEqual(env.previous_z_error, 0.0)
            for i in range(obs_trace.shape[0]):
                env.step(action=np.zeros(3))
                obs_trace[i] = env.quadcopter.z

            self.assertTrue(np.allclose(obs_trace, exp_trace, atol=1e-5), msg=f"Temporal Consistency Check: EnvID={env_id} ObsTrace={obs_trace}, ExpTrace={exp_trace}, Delta={obs_trace-exp_trace}")
 def test_set_saturation(self):
     """Test the saturation of the motors
     """
     env_id = 'gym_quadcopter:quadcopter-v0'
     env = f_fwgym_get_env(
         env_id=env_id, instance_index=0,
         query_class='something', query_classes={},
         params=self.args.training_params, used_states=['e_p', 'e_q', 'e_r']
     )
     self.assertSequenceEqual(
         list(env.quadcopter.saturation),
         list([self.args.training_params['quadcopter']['saturation_motor']*65535.,65535.,65535.,65535.])
     )
    def test_env(self):
        env_desc = EnvDict(env_dict=self.args.env)
        tp_desc = TrainingParamsDict(tp_dict=self.args.training_params)
        self.assertEqual(env_desc.get_env_id(), self.training_config['env']['value'])
        self.assertEqual(self.args.model, self.training_config['model'])
        self.assertEqual(self.args.n_steps, self.training_config['n_steps'])
        self.assertEqual(self.args.training_params, self.training_config['training_params'])

        env_id = 'gym_quadcopter:quadcopter-v' + str(env_desc.get_env_id())
        env = f_fwgym_get_env(
            env_id=env_id, used_states = ['e_p', 'e_q', 'e_r'],
            instance_index=0, query_class='something', query_classes={},
            params=self.args.training_params
        )
        self.assertEqual(env.params, self.args.training_params)
 def setUp(self):
     """Prepare the test environment
     """
     with open('run/config/default.yaml', 'r') as f:
         self.base_config = yaml.safe_load(f)
     with open(self.base_config['filenames']['config_training'], 'r') as f:
         self.training_config = yaml.safe_load(f)['training']
     self.testing_params = self.training_config['training_params']
     self.is_continuous = (self.testing_params['query_generation']['value'] == "continuous")
     self.env_id = 'gym_quadcopter:quadcopter-v0'
     self.env = f_fwgym_get_env(
         env_id=self.env_id, used_states=['e_p', 'e_q', 'e_r'],
         instance_index=0, query_class='something',
         query_classes={}, params=self.testing_params
     )
Ejemplo n.º 10
0
    def compute_traces(self):
        """ For each Stable Baseline Checkpoin in the experiment dir, given a certain Query Generator Configuration it compute traces

        All the Stable Baseline Checkpoints are loaded and run in an Env with a properly istantiated Query Generator
        """

        # Instantiate the Env for the Tests
        env_id = 'gym_quadcopter:quadcopter-v' + str(self.args.env)
        self.env = f_fwgym_get_env(env_id=env_id,
                                   used_states=self.args.used_states,
                                   instance_index=0,
                                   query_classes=self.args.query_classes,
                                   query_class=self.args.query_class,
                                   params=self.args.testing_params)

        # Register Callback
        self.env.quadcopter.cb_step = self.cb_quadcopter_step
        logging.info(
            f"[eval_models.py] Instantiated env {env_id} with continuous {str(self.args.continuous)}"
        )
        checkpoints = self.range_checkpoints()

        # Iterate over the checkpoints
        for i in checkpoints:
            # Input: Checkpoints Dir
            cp_path = get_checkpoint_path(
                base_path=self.args.training_base_path,
                idx=i,
                suffix=self.args.suffix)
            # Loads the model from the Checkpoint
            model = load_model(path=cp_path,
                               env=self.env,
                               desc=self.args.model['name'])

            # Create a dir for the traces related to a given checkpoint, using `self.args.log_dir` as base
            base_path_cp_id = self.args.log_dir + f"/checkpoint_{i}/"
            Path(base_path_cp_id).mkdir()

            # Iterate over the episodes to test each checkpoint
            for j in range(self.args.n_episodes):

                # Get the evaluation results filenames
                query_full_path, commands_full_path, signals_full_path = get_signals_path(
                    basepath_checkpoint=base_path_cp_id, episode_idx=j)

                # Get the data structures for the evaluated data
                self.query, self.cmds, self.signals = get_signals_containers()

                # Reset the env at the beginning of each episode
                obs = self.env.reset()

                # Simulation loop
                while True:
                    # NOTE: The time granularity here should be the dt_commands one and not the dt one as it is sync with step() method
                    query_item = {
                        't': self.env.quadcopter.t.hex(),
                        'query_p': float(self.env.query[0]).hex(),
                        'query_q': float(self.env.query[1]).hex(),
                        'query_r': float(self.env.query[2]).hex()
                    }
                    self.query = self.query.append(query_item,
                                                   ignore_index=True)

                    # Get the action from the Actor
                    action, _ = model.predict(obs, deterministic=True)
                    obs, reward, done, _ = self.env.step(action)

                    if done:
                        break

                self.query.to_csv(query_full_path, index=False)
                self.cmds.to_csv(commands_full_path, index=False)
                self.signals.to_csv(signals_full_path, index=False)