def run(self): for x in range(self.config.total_step): batch = self.replay_memory.sample() if batch is not None: training_info = self.model.step(batch) if training_info is not None: self.replay_memory.training_info(batch, training_info) self.agent.update_actor(self.model.get_actor()) if x % 5000 == 0: output_file_path = GLOBAL_LOGGER.get_log_path() self.model.save(output_file_path, str(x)) GLOBAL_LOGGER.reset_event_file() output_file_path = GLOBAL_LOGGER.get_log_path() self.model.save(output_file_path, 'final')
def run(self): for x in range(self.config.total_step): t1 = time.time() batch = self.replay_memory.sample(asynchronization=True) if batch is not None: training_info = self.model.step(batch) if training_info is not None: self.replay_memory.training_info(batch, training_info) self.agent.update_actor(self.model.get_actor()) GLOBAL_LOGGER.get_tb_logger().add_scalar( "training_time", time.time() - t1, x) else: time.sleep(0.01) if x % 5000 == 0: output_file_path = GLOBAL_LOGGER.get_log_path() self.model.save(output_file_path, str(x)) GLOBAL_LOGGER.reset_event_file() output_file_path = GLOBAL_LOGGER.get_log_path() self.model.save(output_file_path, 'final')