def test_print_config(self):
        try:
            config = BlobbleConfig('testdata/blobble_config_test.ini')
        except:
            assert (
            )  # Configuration should be syntactically correct, so no exception

        config.print_config()
    def test_read_config_learning(self):
        try:
            config = BlobbleConfig('testdata/blobble_config_test.ini')
        except:
            assert (
            )  # Configuration should be syntactically correct, so no exception

        # Check a value
        self.assertEqual(10, config.get_learning_params()['num_eval_episodes'])
    def test_read_config_output(self):
        try:
            config = BlobbleConfig('testdata/blobble_config_test.ini')
        except:
            assert (
            )  # Configuration should be syntactically correct, so no exception

        # Check a value
        self.assertEqual('True',
                         config.get_output_params()['demonstration_video'])
    def test_read_config_learning_adv(self):
        try:
            config = BlobbleConfig('testdata/blobble_config_test.ini')
        except:
            assert (
            )  # Configuration should be syntactically correct, so no exception

        # Check a value
        self.assertEqual('5e-3',
                         config.get_learning_adv_params()['learning_rate'])
    def test_read_config_missing_section(self):
        try:
            config = BlobbleConfig(
                'testdata/blobble_config_test_missing_section.ini')
        except KeyError:
            return

        assert ()  # KeyError should have been returned
 def test_read_invalid_config(self):
     config = BlobbleConfig('testdata/blobble_config_est.ini')
     self.assertEqual(None, config.get_learning_params())
    def __init__(self,
                 env_name='blobble-world-v0'
                 ):
        """
        Initalise the agent by training a neural network for the passed tf-agent environment

        :param env_name:
        Name of environment for the agent so solve
        """
        self._env_name = env_name

        # Take a timestamp. This will be used for any output files created in the output folder
        self._timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")

        # Create training and evaluation environments
        self._train_py_env = suite_gym.load(self._env_name)
        self._eval_py_env = suite_gym.load(self._env_name)

        # Convert the training and test environments to Tensors
        self._train_env = tf_py_environment.TFPyEnvironment(self._train_py_env)
        self._eval_env = tf_py_environment.TFPyEnvironment(self._eval_py_env)
        print('=====================================================')
        print('Environments created for : ', self._env_name)
        print('Training Environment')
        print('  Observation Spec:')
        print('    ', self._train_env.time_step_spec().observation)
        print('  Reward Spec:')
        print('    ', self._train_env.time_step_spec().reward)
        print('  Action Spec:')
        print('    ', self._train_env.action_spec())
        print('Evaluation Environment')
        print('  Observation Spec:')
        print('    ', self._eval_env.time_step_spec().observation)
        print('  Reward Spec:')
        print('    ', self._eval_env.time_step_spec().reward)
        print('  Action Spec:')
        print('    ', self._eval_env.action_spec())
        print('=====================================================')

        self._config = BlobbleConfig('blobble_config.ini')
        self._config.print_config()

        # Get the demonstration parameters and output folder. We don't need these just yet but it's
        # good to do now in case there is an error in the config file (exception will be thrown)
        self._output_folder = (self._config.get_output_params()['output_folder'])

        self._num_demo_episodes = int(self._config.get_output_params()['num_demonstration_episodes'])
        demo_video = (self._config.get_output_params()['demonstration_video'])
        if demo_video == 'True':
            self._demo_video = True
        else:
            self._demo_video = False

        # Get and check the advanced learning parameters
        self._learning_rate = float(self._config.get_learning_adv_params()['learning_rate'])
        self._fc_layer_params = tuple(self._config.get_learning_adv_params()['fc_layer_params'].split(','))

        print('Create and train a neural network agent')
        self._neural_network_agent = create_neural_network_agent(self._train_env,
                                                                 self._learning_rate,
                                                                 self._fc_layer_params)

        learning_params = self._config.get_learning_params()
        train_neural_network(self._neural_network_agent,
                             self._train_env,
                             self._eval_env,
                             num_train_iterations=learning_params['training_iterations'],
                             log_interval=learning_params['training_log_interval'],
                             eval_interval=learning_params['eval_interval'],
                             num_eval_episodes=learning_params['num_eval_episodes'],
                             replay_buffer_max_length=learning_params['replay_buffer_max_length'],
                             collect_steps_per_iteration=learning_params['collect_steps_per_iteration'],
                             output_folder=self._output_folder,
                             timestamp=self._timestamp)
class QNetworkAgent:
    """
    Wrapper class to provide a Deep Neural Network agent for any provided tf-agent environment.
    """

    def __init__(self,
                 env_name='blobble-world-v0'
                 ):
        """
        Initalise the agent by training a neural network for the passed tf-agent environment

        :param env_name:
        Name of environment for the agent so solve
        """
        self._env_name = env_name

        # Take a timestamp. This will be used for any output files created in the output folder
        self._timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")

        # Create training and evaluation environments
        self._train_py_env = suite_gym.load(self._env_name)
        self._eval_py_env = suite_gym.load(self._env_name)

        # Convert the training and test environments to Tensors
        self._train_env = tf_py_environment.TFPyEnvironment(self._train_py_env)
        self._eval_env = tf_py_environment.TFPyEnvironment(self._eval_py_env)
        print('=====================================================')
        print('Environments created for : ', self._env_name)
        print('Training Environment')
        print('  Observation Spec:')
        print('    ', self._train_env.time_step_spec().observation)
        print('  Reward Spec:')
        print('    ', self._train_env.time_step_spec().reward)
        print('  Action Spec:')
        print('    ', self._train_env.action_spec())
        print('Evaluation Environment')
        print('  Observation Spec:')
        print('    ', self._eval_env.time_step_spec().observation)
        print('  Reward Spec:')
        print('    ', self._eval_env.time_step_spec().reward)
        print('  Action Spec:')
        print('    ', self._eval_env.action_spec())
        print('=====================================================')

        self._config = BlobbleConfig('blobble_config.ini')
        self._config.print_config()

        # Get the demonstration parameters and output folder. We don't need these just yet but it's
        # good to do now in case there is an error in the config file (exception will be thrown)
        self._output_folder = (self._config.get_output_params()['output_folder'])

        self._num_demo_episodes = int(self._config.get_output_params()['num_demonstration_episodes'])
        demo_video = (self._config.get_output_params()['demonstration_video'])
        if demo_video == 'True':
            self._demo_video = True
        else:
            self._demo_video = False

        # Get and check the advanced learning parameters
        self._learning_rate = float(self._config.get_learning_adv_params()['learning_rate'])
        self._fc_layer_params = tuple(self._config.get_learning_adv_params()['fc_layer_params'].split(','))

        print('Create and train a neural network agent')
        self._neural_network_agent = create_neural_network_agent(self._train_env,
                                                                 self._learning_rate,
                                                                 self._fc_layer_params)

        learning_params = self._config.get_learning_params()
        train_neural_network(self._neural_network_agent,
                             self._train_env,
                             self._eval_env,
                             num_train_iterations=learning_params['training_iterations'],
                             log_interval=learning_params['training_log_interval'],
                             eval_interval=learning_params['eval_interval'],
                             num_eval_episodes=learning_params['num_eval_episodes'],
                             replay_buffer_max_length=learning_params['replay_buffer_max_length'],
                             collect_steps_per_iteration=learning_params['collect_steps_per_iteration'],
                             output_folder=self._output_folder,
                             timestamp=self._timestamp)

    def get_random_baseline_performance(self, iterations=10):
        """
        Establish a baseline performance based on random behaviour
        :param iterations:
        :return:
        """
        random_policy = random_tf_policy.RandomTFPolicy(self._train_env.time_step_spec(),
                                                        self._train_env.action_spec())

        return compute_avg_return(self._train_env, random_policy, iterations)

    def run_agent(self, fps=2, random=False):
        """
        Run iterations.
        :param fps:
        Frames per second for video
        :param random:
        For random behaviour
        :return:
        """
        run_py_env = suite_gym.load(self._env_name)
        run_env = tf_py_environment.TFPyEnvironment(run_py_env)

        if not random:
            policy = self._neural_network_agent.policy
        else:
            policy = random_tf_policy.RandomTFPolicy(run_env.time_step_spec(),
                                                     run_env.action_spec())

        if self._num_demo_episodes > 0:
            if self._demo_video:
                filename = os.path.join(self._output_folder, self._timestamp + "-demonstration" + ".mp4")
                with imageio.get_writer(filename, fps=fps) as video:
                    for episode in range(self._num_demo_episodes):
                        print('Demonstration Episode: ', episode+1)
                        # Reset the evaluation environment
                        time_step = run_env.reset()
                        while not time_step.is_last():
                            action_step = policy.action(time_step)
                            time_step = run_env.step(action_step.action)
                            tf.print('ACTION: ', action_step.action, time_step)
                            video.append_data(run_py_env.render())
                print('Demonstration video is in: '+filename)
            else:
                for episode in range(self._num_demo_episodes):
                    print('Demonstration Episode: ', episode+1)
                    # Reset the evaluation environment
                    time_step = run_env.reset()
                    while not time_step.is_last():
                        action_step = policy.action(time_step)
                        time_step = run_env.step(action_step.action)
                        tf.print('ACTION: ', action_step.action, time_step)