def test_print_config(self): try: config = BlobbleConfig('testdata/blobble_config_test.ini') except: assert ( ) # Configuration should be syntactically correct, so no exception config.print_config()
def test_read_config_learning(self): try: config = BlobbleConfig('testdata/blobble_config_test.ini') except: assert ( ) # Configuration should be syntactically correct, so no exception # Check a value self.assertEqual(10, config.get_learning_params()['num_eval_episodes'])
def test_read_config_output(self): try: config = BlobbleConfig('testdata/blobble_config_test.ini') except: assert ( ) # Configuration should be syntactically correct, so no exception # Check a value self.assertEqual('True', config.get_output_params()['demonstration_video'])
def test_read_config_learning_adv(self): try: config = BlobbleConfig('testdata/blobble_config_test.ini') except: assert ( ) # Configuration should be syntactically correct, so no exception # Check a value self.assertEqual('5e-3', config.get_learning_adv_params()['learning_rate'])
def test_read_config_missing_section(self): try: config = BlobbleConfig( 'testdata/blobble_config_test_missing_section.ini') except KeyError: return assert () # KeyError should have been returned
def test_read_invalid_config(self): config = BlobbleConfig('testdata/blobble_config_est.ini') self.assertEqual(None, config.get_learning_params())
def __init__(self, env_name='blobble-world-v0' ): """ Initalise the agent by training a neural network for the passed tf-agent environment :param env_name: Name of environment for the agent so solve """ self._env_name = env_name # Take a timestamp. This will be used for any output files created in the output folder self._timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M") # Create training and evaluation environments self._train_py_env = suite_gym.load(self._env_name) self._eval_py_env = suite_gym.load(self._env_name) # Convert the training and test environments to Tensors self._train_env = tf_py_environment.TFPyEnvironment(self._train_py_env) self._eval_env = tf_py_environment.TFPyEnvironment(self._eval_py_env) print('=====================================================') print('Environments created for : ', self._env_name) print('Training Environment') print(' Observation Spec:') print(' ', self._train_env.time_step_spec().observation) print(' Reward Spec:') print(' ', self._train_env.time_step_spec().reward) print(' Action Spec:') print(' ', self._train_env.action_spec()) print('Evaluation Environment') print(' Observation Spec:') print(' ', self._eval_env.time_step_spec().observation) print(' Reward Spec:') print(' ', self._eval_env.time_step_spec().reward) print(' Action Spec:') print(' ', self._eval_env.action_spec()) print('=====================================================') self._config = BlobbleConfig('blobble_config.ini') self._config.print_config() # Get the demonstration parameters and output folder. We don't need these just yet but it's # good to do now in case there is an error in the config file (exception will be thrown) self._output_folder = (self._config.get_output_params()['output_folder']) self._num_demo_episodes = int(self._config.get_output_params()['num_demonstration_episodes']) demo_video = (self._config.get_output_params()['demonstration_video']) if demo_video == 'True': self._demo_video = True else: self._demo_video = False # Get and check the advanced learning parameters self._learning_rate = float(self._config.get_learning_adv_params()['learning_rate']) self._fc_layer_params = tuple(self._config.get_learning_adv_params()['fc_layer_params'].split(',')) print('Create and train a neural network agent') self._neural_network_agent = create_neural_network_agent(self._train_env, self._learning_rate, self._fc_layer_params) learning_params = self._config.get_learning_params() train_neural_network(self._neural_network_agent, self._train_env, self._eval_env, num_train_iterations=learning_params['training_iterations'], log_interval=learning_params['training_log_interval'], eval_interval=learning_params['eval_interval'], num_eval_episodes=learning_params['num_eval_episodes'], replay_buffer_max_length=learning_params['replay_buffer_max_length'], collect_steps_per_iteration=learning_params['collect_steps_per_iteration'], output_folder=self._output_folder, timestamp=self._timestamp)
class QNetworkAgent: """ Wrapper class to provide a Deep Neural Network agent for any provided tf-agent environment. """ def __init__(self, env_name='blobble-world-v0' ): """ Initalise the agent by training a neural network for the passed tf-agent environment :param env_name: Name of environment for the agent so solve """ self._env_name = env_name # Take a timestamp. This will be used for any output files created in the output folder self._timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M") # Create training and evaluation environments self._train_py_env = suite_gym.load(self._env_name) self._eval_py_env = suite_gym.load(self._env_name) # Convert the training and test environments to Tensors self._train_env = tf_py_environment.TFPyEnvironment(self._train_py_env) self._eval_env = tf_py_environment.TFPyEnvironment(self._eval_py_env) print('=====================================================') print('Environments created for : ', self._env_name) print('Training Environment') print(' Observation Spec:') print(' ', self._train_env.time_step_spec().observation) print(' Reward Spec:') print(' ', self._train_env.time_step_spec().reward) print(' Action Spec:') print(' ', self._train_env.action_spec()) print('Evaluation Environment') print(' Observation Spec:') print(' ', self._eval_env.time_step_spec().observation) print(' Reward Spec:') print(' ', self._eval_env.time_step_spec().reward) print(' Action Spec:') print(' ', self._eval_env.action_spec()) print('=====================================================') self._config = BlobbleConfig('blobble_config.ini') self._config.print_config() # Get the demonstration parameters and output folder. We don't need these just yet but it's # good to do now in case there is an error in the config file (exception will be thrown) self._output_folder = (self._config.get_output_params()['output_folder']) self._num_demo_episodes = int(self._config.get_output_params()['num_demonstration_episodes']) demo_video = (self._config.get_output_params()['demonstration_video']) if demo_video == 'True': self._demo_video = True else: self._demo_video = False # Get and check the advanced learning parameters self._learning_rate = float(self._config.get_learning_adv_params()['learning_rate']) self._fc_layer_params = tuple(self._config.get_learning_adv_params()['fc_layer_params'].split(',')) print('Create and train a neural network agent') self._neural_network_agent = create_neural_network_agent(self._train_env, self._learning_rate, self._fc_layer_params) learning_params = self._config.get_learning_params() train_neural_network(self._neural_network_agent, self._train_env, self._eval_env, num_train_iterations=learning_params['training_iterations'], log_interval=learning_params['training_log_interval'], eval_interval=learning_params['eval_interval'], num_eval_episodes=learning_params['num_eval_episodes'], replay_buffer_max_length=learning_params['replay_buffer_max_length'], collect_steps_per_iteration=learning_params['collect_steps_per_iteration'], output_folder=self._output_folder, timestamp=self._timestamp) def get_random_baseline_performance(self, iterations=10): """ Establish a baseline performance based on random behaviour :param iterations: :return: """ random_policy = random_tf_policy.RandomTFPolicy(self._train_env.time_step_spec(), self._train_env.action_spec()) return compute_avg_return(self._train_env, random_policy, iterations) def run_agent(self, fps=2, random=False): """ Run iterations. :param fps: Frames per second for video :param random: For random behaviour :return: """ run_py_env = suite_gym.load(self._env_name) run_env = tf_py_environment.TFPyEnvironment(run_py_env) if not random: policy = self._neural_network_agent.policy else: policy = random_tf_policy.RandomTFPolicy(run_env.time_step_spec(), run_env.action_spec()) if self._num_demo_episodes > 0: if self._demo_video: filename = os.path.join(self._output_folder, self._timestamp + "-demonstration" + ".mp4") with imageio.get_writer(filename, fps=fps) as video: for episode in range(self._num_demo_episodes): print('Demonstration Episode: ', episode+1) # Reset the evaluation environment time_step = run_env.reset() while not time_step.is_last(): action_step = policy.action(time_step) time_step = run_env.step(action_step.action) tf.print('ACTION: ', action_step.action, time_step) video.append_data(run_py_env.render()) print('Demonstration video is in: '+filename) else: for episode in range(self._num_demo_episodes): print('Demonstration Episode: ', episode+1) # Reset the evaluation environment time_step = run_env.reset() while not time_step.is_last(): action_step = policy.action(time_step) time_step = run_env.step(action_step.action) tf.print('ACTION: ', action_step.action, time_step)