Ejemplo n.º 1
0
    def run(self):
        for x in range(self.config.total_step):
            batch = self.replay_memory.sample()
            if batch is not None:
                training_info = self.model.step(batch)
                if training_info is not None:
                    self.replay_memory.training_info(batch, training_info)
                self.agent.update_actor(self.model.get_actor())

            if x % 5000 == 0:
                output_file_path = GLOBAL_LOGGER.get_log_path()
                self.model.save(output_file_path, str(x))
                GLOBAL_LOGGER.reset_event_file()

        output_file_path = GLOBAL_LOGGER.get_log_path()

        self.model.save(output_file_path, 'final')
Ejemplo n.º 2
0
    def run(self):
        for x in range(self.config.total_step):
            t1 = time.time()
            batch = self.replay_memory.sample(asynchronization=True)
            if batch is not None:
                training_info = self.model.step(batch)
                if training_info is not None:
                    self.replay_memory.training_info(batch, training_info)
                self.agent.update_actor(self.model.get_actor())
                GLOBAL_LOGGER.get_tb_logger().add_scalar(
                    "training_time",
                    time.time() - t1, x)
            else:
                time.sleep(0.01)

            if x % 5000 == 0:
                output_file_path = GLOBAL_LOGGER.get_log_path()
                self.model.save(output_file_path, str(x))
                GLOBAL_LOGGER.reset_event_file()

        output_file_path = GLOBAL_LOGGER.get_log_path()

        self.model.save(output_file_path, 'final')