def _is_ended(self): """ :return: True if an experiment is ended :rtype: bool """ key_founded_flag = False finished_flag = False for key in GlobalConfig().DEFAULT_EXPERIMENT_END_POINT: if GlobalConfig().DEFAULT_EXPERIMENT_END_POINT[key] is not None: key_founded_flag = True if get_global_status_collect()(key) >= GlobalConfig( ).DEFAULT_EXPERIMENT_END_POINT[key]: ConsoleLogger().print( 'info', 'pipeline ended because {}: {} >= end point value {}'. format( key, get_global_status_collect()(key), GlobalConfig().DEFAULT_EXPERIMENT_END_POINT[key])) finished_flag = True if key_founded_flag is False: ConsoleLogger().print( 'warning', '{} in experiment_end_point is not registered with global status collector: {}, experiment may not end' .format(GlobalConfig().DEFAULT_EXPERIMENT_END_POINT, list(get_global_status_collect()().keys()))) return finished_flag
def train(self, *args, **kwargs): """ train the agent :return: Only if memory buffer did not have enough data to train, return False :rtype: bool """ self.set_status('TRAIN') self.algo.set_status('TRAIN') ConsoleLogger().print('info', 'train agent:') try: res = self.algo.train(*args, **kwargs) except MemoryBufferLessThanBatchSizeError as e: ConsoleLogger().print( 'warning', 'memory buffer did not have enough data to train, skip training' ) return False ConsoleLogger().print('info', res) if self.algo_saving_scheduler and self.algo_saving_scheduler.value( ) is True: self.algo.save( global_step=self._status.get_specific_info_key_status( info_key='update_counter', under_status='TRAIN'))
def test_console_logger(self): self.assertTrue(ConsoleLogger().inited_flag) logger = ConsoleLogger() self.assertTrue(logger.inited_flag) logger.print('info', 'this is for test %s', 'args') logger.print('info', 'this is for test {}'.format('args')) logger2 = ConsoleLogger() self.assertEqual(id(logger), id(logger2)) logger.flush()
def LogSetup(self): Logger().init( config_or_config_dict=GlobalConfig().DEFAULT_LOG_CONFIG_DICT, log_path=GlobalConfig().DEFAULT_LOG_PATH, log_level=GlobalConfig().DEFAULT_LOG_LEVEL) ConsoleLogger().init(logger_name='console_logger', to_file_flag=True, level=GlobalConfig().DEFAULT_LOG_LEVEL, to_file_name=os.path.join(Logger().log_dir, 'console.log')) self.assertTrue(ConsoleLogger().inited_flag) self.assertTrue(Logger().inited_flag)
def sample( self, env, sample_count: int, in_which_status: str = 'TRAIN', store_flag=False, sample_type: str = 'transition' ) -> (TransitionData, TrajectoryData): """ sample a certain number of data from agent as an environment return batch_data to self.train or self.test :param env: environment to sample :param sample_count: int, sample count :param in_which_status: string, environment status :param store_flag: to store environment samples or not, default False :param sample_type: the type of sample, 'transition' by default :return: sample data from environment :rtype: some subclass of SampleData: TrajectoryData or TransitionData """ self.set_status(in_which_status) env.set_status(in_which_status) self.algo.set_status(in_which_status) ConsoleLogger().print( 'info', "agent sampled {} {} under status {}".format( sample_count, sample_type, self.get_status())) batch_data = self.sampler.sample( agent=self, env=env, reset_at_start=self.parameters('reset_state_every_sample'), sample_type=sample_type, sample_count=sample_count) if store_flag is True: self.store_samples(samples=batch_data) # todo when we have transition/ trajectory data here, the mean or sum results are still valid? ConsoleLogger().print( 'info', "sample: mean reward {}, sum reward {}\n".format( batch_data.get_mean_of(set_name='reward_set'), batch_data.get_sum_of(set_name='reward_set'))) self.recorder.append_to_obj_log( obj=self, attr_name='average_reward', status_info=self.get_status(), value=batch_data.get_mean_of('reward_set')) self.recorder.append_to_obj_log( obj=self, attr_name='sum_reward', status_info=self.get_status(), value=batch_data.get_sum_of('reward_set')) return batch_data
def train(self, *args, **kwargs): self.set_status('TRAIN') self.algo.set_status('TRAIN') if self.step_counter.val % 200 == 0: ConsoleLogger().print( 'info', 'Train at {} steps'.format(self.step_counter.val)) try: res = self.algo.train(*args, **kwargs) except MemoryBufferLessThanBatchSizeError as e: ConsoleLogger().print( 'warning', 'memory buffer did not have enough data to train, skip training' ) return False ConsoleLogger().print('info', res)
def register_info_key_status(self, obj, info_key: str, return_name: str, under_status=None): ConsoleLogger().print('info', 'registered obj: {}, key: {}, return name: {}, under status: {}'.\ format(obj, info_key, return_name, under_status)) for val in self._register_status_dict: assert return_name != val['return_name'] self._register_status_dict.append( dict(obj=obj, info_key=info_key, under_status=under_status, return_name=return_name)) try: self(info_key) except StatusInfoNotRegisteredError as e: ConsoleLogger().print('warning', 'new registred info: obj: {}, key: {}, return name: {}, under status: {} can not be detected now'.format( obj, info_key, return_name, under_status))
def get_specific_info_key_status(self, info_key, under_status, *args, **kwargs): res = self._get_specific_info_key_status(info_key=info_key, under_status=under_status, *args, **kwargs) if res is None: ConsoleLogger().print('error', 'try to access info key status: {} under status {} of obj: {}'. format(info_key, under_status, self.obj.name)) else: return res
def copy_from(self, obj) -> bool: if not isinstance(obj, type(self)): raise TypeError('Wrong type of obj %s to be copied, which should be %s' % (type(obj), type(self))) self.parameters.copy_from(obj.parameters) self._dynamics_model.copy_from(obj._dynamics_model) ConsoleLogger().print('info', 'model: {} copied from {}'.format(self, obj)) return True
def test(self, *args, **kwargs): self.set_status('TEST') self.algo.set_status('TEST') ConsoleLogger().print( 'info', '\nTest at {} steps'.format(self.step_counter.val)) env = kwargs['env'] cyber = kwargs['cyber'] data_sample = kwargs['data_sample'] test_reward = kwargs['test_reward'] num_test = kwargs['num_test'] max_step_per_episode = kwargs['max_step_per_episode'] ep_ret_test = 0 for i in range(num_test): obs = env.reset() test_step = 0 while True: action = self.predict(obs=obs) # action = np.squeeze(action) # [1.] obs_, reward, done, info = cyber.step(obs, action) ep_ret_test += reward if done or test_step > max_step_per_episode: break obs = obs_ test_step += 1 test_reward.append(ep_ret_test / num_test) data_sample.append(self.step_counter.val) print("Average test reward of step {}: {}\n".format( self.step_counter.val, ep_ret_test / num_test)) return data_sample, test_reward
def wrap_t_fn(t_fn): try: return t_fn() except StatusInfoNotRegisteredError: ConsoleLogger().print( 'error', 'StatusInfoNotRegisteredError occurred, return with 0') return 0
def test(self, sample_count, sample_trajectory_flag: bool = False): """ test the agent :param sample_count: how many transitions/trajectories used to evaluate the agent's performance :type sample_count: int :param sample_trajectory_flag: True for sampling trajectory instead of transitions :type sample_count: bool """ self.set_status('TEST') self.algo.set_status('TEST') ConsoleLogger().print( 'info', 'test: agent with {},sample_trajectory_flag {}'.format( sample_count, sample_trajectory_flag)) if sample_trajectory_flag is True: left_sample_count = sample_count while left_sample_count > 0: res = self.sample(env=self.env, sample_count=1, sample_type='trajectory', store_flag=False, in_which_status='TEST') self.total_test_samples += len(res) left_sample_count -= len(res) else: res = self.sample(env=self.env, sample_count=sample_count, sample_type='transition', store_flag=False, in_which_status='TEST') self.total_test_samples += len(res)
def load(self, path_to_model, model_name, global_step=None, **kwargs): sess = kwargs['sess'] if 'sess' in kwargs else None self.parameters.load(path_to_model=path_to_model, model_name=model_name, global_step=global_step, sess=sess) ConsoleLogger().print('info', 'model: {} loaded from {}'.format(model_name, path_to_model))
def sample( self, env, sample_count: int, in_which_status: str, store_flag=False, sample_type: str = 'transition' ) -> (TransitionData, TrajectoryData): """ sample a certain number of data from environment :param env: :param sample_count: :param in_which_status: :param store_flag: :param sample_type: :return: """ self.set_status(in_which_status) env.set_status(in_which_status) self.algo.set_status(in_which_status) ConsoleLogger().print( 'info', "agent sampled {} samples under status {}".format( sample_count, self.get_status())) batch_data = self.sampler.sample(agent=self, env=env, sample_type=sample_type, in_which_status=in_which_status, sample_count=sample_count) if store_flag is True: self.store_samples(samples=batch_data) # todo when we have transition/ trajectory data here, the mean or sum results are still valid? ConsoleLogger().print( 'info', "sample: mean reward {}, sum reward {}".format( batch_data.get_mean_of(set_name='reward_set'), batch_data.get_sum_of(set_name='reward_set'))) self.recorder.append_to_obj_log( obj=self, attr_name='average_reward', status_info=self.get_status(), log_val=batch_data.get_mean_of('reward_set')) self.recorder.append_to_obj_log( obj=self, attr_name='sum_reward', status_info=self.get_status(), log_val=batch_data.get_sum_of('reward_set')) return batch_data
def get_specific_info_key_status(self, info_key, *args, **kwargs): try: return self._info_dict[info_key] except KeyError: ConsoleLogger().print( 'ERROR', 'try to access info key status: {} of obj: {}'.format( info_key, self.obj.name)) return None
def copy_from(self, obj) -> bool: if not isinstance(obj, type(self)): raise TypeError('Wrong type of obj %s to be copied, which should be %s' % (type(obj), type(self))) self._own_placeholder_input_obj.copy_from(obj._own_placeholder_input_obj) for self_param, src_param in zip(self._placeholder_input_list, obj._placeholder_input_list): self_param['obj'].copy_from(src_param['obj']) ConsoleLogger().print('info', 'model: {} copyed from {}'.format(self, obj)) return True
def setUp(self): BaseTestCase.setUp(self) try: shutil.rmtree(GlobalConfig().DEFAULT_LOG_PATH) except FileNotFoundError: pass os.makedirs(GlobalConfig().DEFAULT_LOG_PATH) self.assertFalse(ConsoleLogger().inited_flag) self.assertFalse(Logger().inited_flag)
def save(self, global_step, save_path=None, name=None, **kwargs): save_path = save_path if save_path else GlobalConfig().DEFAULT_MODEL_CHECKPOINT_PATH name = name if name else self.name sess = kwargs['sess'] if 'sess' in kwargs else None self.parameters.save(save_path=save_path, global_step=global_step, sess=sess, name=name) ConsoleLogger().print('info', 'model: {}, global step: {}, saved at {}-{}'.format(name, global_step, save_path, global_step))
def launch(self) -> bool: """ Launch the flow until it finished or catch a system-allowed errors (e.g., out of GPU memory, to ensure the log will be saved safely). :return: Boolean, True for the flow correctly executed and finished. """ try: return self._launch() except GlobalConfig().DEFAULT_ALLOWED_EXCEPTION_OR_ERROR_LIST as e: ConsoleLogger().print('error', 'error {} occurred'.format(e)) return False
def setUp(self): BaseTestCase.setUp(self) try: shutil.rmtree(GlobalConfig().DEFAULT_LOG_PATH) except FileNotFoundError: pass # os.makedirs(GlobalConfig().DEFAULT_LOG_PATH) # self.assertFalse(ConsoleLogger().inited_flag) # self.assertFalse(Logger().inited_flag) Logger().init(config_or_config_dict=GlobalConfig().DEFAULT_LOG_CONFIG_DICT, log_path=GlobalConfig().DEFAULT_LOG_PATH, log_level=GlobalConfig().DEFAULT_LOG_LEVEL) ConsoleLogger().init(logger_name='console_logger', to_file_flag=True, level=GlobalConfig().DEFAULT_LOG_LEVEL, to_file_name=os.path.join(Logger().log_dir, 'console.log')) self.assertTrue(ConsoleLogger().inited_flag) self.assertTrue(Logger().inited_flag)
def single_exp_runner(task_fn, auto_choose_gpu_flag=False, gpu_id: int = 0, seed=None, del_if_log_path_existed=False, keep_session=False, **task_fn_kwargs): """ :param task_fn: task function defined bu users :type task_fn: method :param auto_choose_gpu_flag: auto choose gpu, default False :type auto_choose_gpu_flag: bool :param gpu_id: gpu id, default 0 :type gpu_id: int :param seed: seed generated by system time :type seed: int :param del_if_log_path_existed:delete obsolete log file path if existed, by default False :type del_if_log_path_existed: bool :param task_fn_kwargs: :type task_fn_kwargs: :param keep_session: Whether to keep default session & graph :type keep_session: :return: :rtype: """ os.environ['CUDA_DEVICE_ORDER'] = "PCI_BUS_ID" if auto_choose_gpu_flag is True: DEVICE_ID_LIST = Gpu.getFirstAvailable() DEVICE_ID = DEVICE_ID_LIST[0] os.environ["CUDA_VISIBLE_DEVICES"] = str(DEVICE_ID) else: os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) if not seed: seed = int(round(time.time() * 1000)) % (2**32 - 1) _reset_global_seed(seed, keep_session) print("create log path at {}".format(GlobalConfig().DEFAULT_LOG_PATH), flush=True) file.create_path(path=GlobalConfig().DEFAULT_LOG_PATH, del_if_existed=del_if_log_path_existed) Logger().init(config_or_config_dict=dict(), log_path=GlobalConfig().DEFAULT_LOG_PATH, log_level=GlobalConfig().DEFAULT_LOG_LEVEL) ConsoleLogger().init( to_file_flag=GlobalConfig().DEFAULT_WRITE_CONSOLE_LOG_TO_FILE_FLAG, to_file_name=os.path.join( GlobalConfig().DEFAULT_LOG_PATH, GlobalConfig().DEFAULT_CONSOLE_LOG_FILE_NAME), level=GlobalConfig().DEFAULT_LOG_LEVEL, logger_name=GlobalConfig().DEFAULT_CONSOLE_LOGGER_NAME) task_fn(**task_fn_kwargs)
def set_status(self, new_status: str): if not isinstance(new_status, str): raise TypeError("{} is not string".format(new_status)) if self._status_list: try: assert new_status in self._status_list except AssertionError as e: ConsoleLogger().print('error', "{} New status :{} not in the status list: {} ".format(e, new_status, self._status_list)) self._status_val = new_status else: self._status_val = new_status
def save(self, global_step, save_path, name, **kwargs): sess = kwargs['sess'] if 'sess' in kwargs else None self._own_placeholder_input_obj.parameters.save( save_path=save_path, global_step=global_step, sess=sess, name=name) for param in self._placeholder_input_list: param['obj'].save(save_path=os.path.join(save_path, param['attr_name']), global_step=global_step, sess=sess, name=param['obj'].name) ConsoleLogger().print( 'info', 'model: {}, global step: {}, saved at {}-{}'.format( name, global_step, save_path, global_step))
def sample(self, env, sample_count: int, buffer: (TransitionData, MPC_TransitionData) = MPC_TransitionData, num_trajectory: int = 10, max_step: int = 1000, num_simulated_paths: int = 1000, in_which_status: str = 'TRAIN', store_flag=False) -> (TransitionData, MPC_TransitionData): ''' Sample optimal actions from dyna_mlp and update 'buffer' (rl_buffer). Return updated rl_buffer. Excute dagger aggregation in work flow. :param env: :param sample_count: :param in_which_status: :param store_flag: :param buffer: :param num_trajectory: :param max_step: :param num_simulated_paths: :return: MPC_TransitionData ''' self.set_status(in_which_status) env.set_status(in_which_status) self.algo.set_status(in_which_status) # sample_count == on_policy_iter ConsoleLogger().print( 'info', "Agent samples {} {} under status {} as rl_buffer.".format( sample_count, str(type(buffer))[8:-2], self.get_status())) for i in range(num_trajectory): obs = env.reset() ep_len = 0 for j in range(max_step): act = self.predict(obs=obs, is_reward_func=False) obs_, rew, done, _ = env.step(act) buffer.append(obs, act, obs_, done, rew) if done: break else: obs = obs_ if j % 10 == 0: print('num_trajectory:{}/{} step:{}/{}'.format( i, num_trajectory - 1, j, max_step - 1)) return buffer
def load(self, path_to_model, model_name, global_step=None, **kwargs): sess = kwargs['sess'] if 'sess' in kwargs else None self._own_placeholder_input_obj.parameters.load( path_to_model=path_to_model, model_name=model_name, global_step=global_step, sess=sess ) for param in self._placeholder_input_list: param['obj'].load(path_to_model=os.path.join(path_to_model, param['attr_name']), global_step=global_step, model_name=param['obj'].name, sess=sess) ConsoleLogger().print('info', 'model: {} loaded from {}'.format(model_name, path_to_model))
def test(self, sample_count) -> SampleData: """ test the agent :param sample_count: how many trajectories used to evaluate the agent's performance :type sample_count: int :return: SampleData object. """ self.set_status('TEST') self.algo.set_status('TEST') ConsoleLogger().print( 'info', '\ntest agent: with {} trajectories'.format(sample_count)) res = self.sample(env=self.env, sample_count=sample_count, sample_type='trajectory', store_flag=False, in_which_status='TEST') return res
from baconian.common.logging import ConsoleLogger from baconian.config.global_config import GlobalConfig import os ConsoleLogger().init(to_file_flag=GlobalConfig().DEFAULT_WRITE_CONSOLE_LOG_TO_FILE_FLAG, to_file_name=os.path.join(GlobalConfig().DEFAULT_LOG_PATH, GlobalConfig().DEFAULT_CONSOLE_LOG_FILE_NAME), level=GlobalConfig().DEFAULT_LOG_LEVEL, logger_name=GlobalConfig().DEFAULT_CONSOLE_LOGGER_NAME) ConsoleLogger().print('info', 'test')
def tearDown(self): Logger().reset() ConsoleLogger().reset() BaseTestCase.tearDown(self) self.assertFalse(ConsoleLogger().inited_flag) self.assertFalse(Logger().inited_flag)
def launch(self) -> bool: try: return self._launch() except GlobalConfig().DEFAULT_ALLOWED_EXCEPTION_OR_ERROR_LIST as e: ConsoleLogger().print('error', 'error {} occurred'.format(e)) return False
def seed(self, seed=None): ConsoleLogger().print('warning', 'seed on dynamics model has no effect ') pass