Beispiel #1
0
def setup_REINFORCE_train_algo_generator_with_FAILING_spec():
    nb_of_try = 2
    env_max_return = 200.000

    epoch_generator, exp_spec, consol_print_learning_stats, reinforce_agent = init_spec_and_REINFORCEagent(
        hparam=CARTPOLE_HPARAM_FAIL)

    this_run_dir = setup_commented_run_dir_str(exp_spec, AGENT_ROOT_DIR)
    writer = tf_cv1.summary.FileWriter(this_run_dir,
                                       tf_cv1.get_default_graph())

    reinforce_agent.this_run_dir = this_run_dir
    reinforce_agent.writer = writer

    yield epoch_generator, nb_of_try, env_max_return, exp_spec

    consol_print_learning_stats.print_experiment_stats(
        print_plot=exp_spec.show_plot)
    reinforce_agent.writer.close()
Beispiel #2
0
    def train(self, render_env: bool = False) -> None:
        """
        Train a REINFORCE agent

        :param render_env: Control over trajectory rendering
        :type render_env: bool
        """

        print(":: Environment rendering autorised: {}".format(render_env))

        consol_print_learning_stats = ConsolPrintLearningStats(
            self.exp_spec, self.exp_spec.print_metric_every_what_epoch)
        """ ---- Setup run dir name ---- """
        self.this_run_dir = setup_commented_run_dir_str(
            self.exp_spec, self.agent_root_dir)
        """ ---- Create run dir & setup file writer for TensorBoard ---- """
        self.writer = tf_cv1.summary.FileWriter(self.this_run_dir,
                                                tf_cv1.get_default_graph())
        """ ---- Log experiment spec in run directory ---- """
        try:
            with open("{}/config.txt".format(self.this_run_dir), "w") as f:
                f.write(self.exp_spec.__repr__())
        except IOError as e:
            raise IOError(
                "The config file cannot be saved in the run directory!") from e
        """ ---- Start training agent ---- """
        for epoch in self._training_epoch_generator(
                consol_print_learning_stats, render_env):
            (epoch, epoch_loss, batch_average_trjs_return,
             batch_average_trjs_lenght) = epoch
        """ ---- Teardown ---- """
        consol_print_learning_stats.print_experiment_stats(
            print_plot=self.exp_spec.show_plot)

        self.writer.close()
        return None