def __init__( self, name, # config_or_config_dict: (DictConfig, dict), env: (Env, Wrapper), algo: Algo, env_spec: EnvSpec, sampler: Sampler = None, noise_adder: AgentActionNoiseWrapper = None, reset_noise_every_terminal_state=False, reset_state_every_sample=False, exploration_strategy: ExplorationStrategy = None, algo_saving_scheduler: EventScheduler = None): """ :param name: the name of the agent instance :type name: str :param env: environment that interacts with agent :type env: Env :param algo: algorithm of the agent :type algo: Algo :param env_spec: environment specifications: action apace and environment space :type env_spec: EnvSpec :param sampler: sampler :type sampler: Sampler :param reset_noise_every_terminal_state: reset the noise every sampled trajectory :type reset_noise_every_terminal_state: bool :param reset_state_every_sample: reset the state everytime perofrm the sample/rollout :type reset_state_every_sample: bool :param noise_adder: add action noise for exploration in action space :type noise_adder: AgentActionNoiseWrapper :param exploration_strategy: exploration strategy in action space :type exploration_strategy: ExplorationStrategy :param algo_saving_scheduler: control the schedule the varying parameters in training process :type algo_saving_scheduler: EventSchedule """ super(Agent, self).__init__(name=name, status=StatusWithSubInfo(self)) self.parameters = Parameters(parameters=dict( reset_noise_every_terminal_state=reset_noise_every_terminal_state, reset_state_every_sample=reset_state_every_sample)) self.env = env self.algo = algo self._env_step_count = 0 if sampler is None: sampler = Sampler() self.sampler = sampler self.recorder = Recorder(default_obj=self) self.env_spec = env_spec if exploration_strategy: assert isinstance(exploration_strategy, ExplorationStrategy) self.explorations_strategy = exploration_strategy else: self.explorations_strategy = None self.noise_adder = noise_adder self.algo_saving_scheduler = algo_saving_scheduler
def __init__( self, name, # config_or_config_dict: (DictConfig, dict), env: Env, algo: Algo, env_spec: EnvSpec, sampler: Sampler = None, noise_adder: AgentActionNoiseWrapper = None, exploration_strategy: ExplorationStrategy = None, algo_saving_scheduler: EventScheduler = None): """ :param name: the name of the agent instance :type name: str :param env: environment that interacts with agent :type env: Env :param algo: algorithm of the agent :type algo: Algo :param env_spec: environment specifications: action apace and environment space :type env_spec: EnvSpec :param sampler: sampler :type sampler: Sampler :param noise_adder: add action noise for exploration in action space :type noise_adder: AgentActionNoiseWrapper :param exploration_strategy: exploration strategy in action space :type exploration_strategy: ExplorationStrategy :param algo_saving_scheduler: control the schedule the varying parameters in training process :type algo_saving_scheduler: EventSchedule """ super(Agent, self).__init__(name=name, status=StatusWithSubInfo(self)) self.parameters = Parameters(parameters=dict()) self.total_test_samples = 0 self.total_train_samples = 0 self.env = env self.algo = algo self._env_step_count = 0 self.sampler = sampler self.recorder = Recorder() self.env_spec = env_spec if exploration_strategy: assert isinstance(exploration_strategy, ExplorationStrategy) self.explorations_strategy = exploration_strategy else: self.explorations_strategy = None self.sampler = sampler if sampler else Sampler( env_spec=env_spec, name='{}_sampler'.format(name)) self.noise_adder = noise_adder self.algo_saving_scheduler = algo_saving_scheduler
def __init__(self, name: str = 'env'): super(Env, self).__init__(status=StatusWithSubInfo(obj=self), name=name) self.action_space = None self.observation_space = None self.step_count = None self.recorder = Recorder() self._last_reset_point = 0 self.total_step_count_fn = lambda: self._status.group_specific_info_key( info_key='step', group_way='sum')
def __init__(self, name: str, agent: Agent, env: Env, flow: Flow, tuner: Tuner = None, register_default_global_status=True): """ :param name: name of experiment :type name: str :param agent: agent of experiment :type agent: Agent :param env: environment of experiment :type env: Env :param flow: control flow to experiment :type flow: Flow :param tuner: hyper-parameter tuning method, currently in development :type tuner: Tuner :param register_default_global_status: register info key and status into global status collection :type register_default_global_status: bool """ super().__init__(status=StatusWithSingleInfo(obj=self), name=name) self.agent = agent self.env = env self.tuner = tuner self.recorder = Recorder(flush_by_split_status=False) # self.status_collector = StatusCollector() self.flow = flow if register_default_global_status is True: get_global_status_collect().register_info_key_status( obj=agent, info_key='predict_counter', under_status='TRAIN', return_name='TOTAL_AGENT_TRAIN_SAMPLE_COUNT') get_global_status_collect().register_info_key_status( obj=agent, info_key='predict_counter', under_status='TEST', return_name='TOTAL_AGENT_TEST_SAMPLE_COUNT') get_global_status_collect().register_info_key_status( obj=agent, info_key='update_counter', under_status='TRAIN', return_name='TOTAL_AGENT_UPDATE_COUNT') get_global_status_collect().register_info_key_status( obj=env, info_key='step', under_status='TEST', return_name='TOTAL_ENV_STEP_TEST_SAMPLE_COUNT') get_global_status_collect().register_info_key_status( obj=env, info_key='step', under_status='TRAIN', return_name='TOTAL_ENV_STEP_TRAIN_SAMPLE_COUNT')
def __init__( self, name, # config_or_config_dict: (DictConfig, dict), env: Env, algo: Algo, env_spec: EnvSpec, sampler: Sampler = None, noise_adder: AgentActionNoiseWrapper = None, exploration_strategy: ExplorationStrategy = None, algo_saving_scheduler: EventSchedule = None): """ :param name: :param env: :param algo: :param env_spec: :param sampler: :param noise_adder: :param exploration_strategy: :param algo_saving_scheduler: """ super(Agent, self).__init__(name=name, status=StatusWithSubInfo(self)) self.parameters = Parameters(parameters=dict()) self.total_test_samples = 0 self.total_train_samples = 0 self.env = env self.algo = algo self._env_step_count = 0 self.sampler = sampler self.recorder = Recorder() self.env_spec = env_spec if exploration_strategy: assert isinstance(exploration_strategy, ExplorationStrategy) self.explorations_strategy = exploration_strategy else: self.explorations_strategy = None self.sampler = sampler if sampler else Sampler( env_spec=env_spec, name='{}_sampler'.format(name)) self.noise_adder = noise_adder self.algo_saving_scheduler = algo_saving_scheduler
def __init__(self, env_spec: EnvSpec, name: str = 'algo'): """ Constructor :param env_spec: environment specifications :type env_spec: EnvSpec :param name: name of the algorithm :type name: str """ super().__init__(status=StatusWithSubInfo(obj=self), name=name) self.env_spec = env_spec self.recorder = Recorder()
def __init__(self, name: str, agent: Agent, env: Env, flow: Flow, tuner: Tuner = None, register_default_global_status=True): """ :param name: :param agent: :param env: :param flow: :param tuner: :param register_default_global_status: """ super().__init__(status=StatusWithSingleInfo(obj=self), name=name) self.agent = agent self.env = env self.tuner = tuner self.recorder = Recorder(flush_by_split_status=False) # self.status_collector = StatusCollector() self.flow = flow if register_default_global_status is True: get_global_status_collect().register_info_key_status( obj=agent, info_key='predict_counter', under_status='TRAIN', return_name='TOTAL_AGENT_TRAIN_SAMPLE_COUNT') get_global_status_collect().register_info_key_status( obj=agent, info_key='predict_counter', under_status='TEST', return_name='TOTAL_AGENT_TEST_SAMPLE_COUNT') get_global_status_collect().register_info_key_status( obj=agent, info_key='update_counter', under_status='TRAIN', return_name='TOTAL_AGENT_UPDATE_COUNT') get_global_status_collect().register_info_key_status( obj=env, info_key='step', under_status='TEST', return_name='TOTAL_ENV_STEP_TEST_SAMPLE_COUNT') get_global_status_collect().register_info_key_status( obj=env, info_key='step', under_status='TRAIN', return_name='TOTAL_ENV_STEP_TRAIN_SAMPLE_COUNT')
def __init__(self, env_spec: EnvSpec, parameters: Parameters = None, init_state=None, name='dynamics_model'): """ :param env_spec: :param parameters: :param init_state: :param name: """ super().__init__(name=name) self.env_spec = env_spec self.state = init_state self.parameters = parameters self.state_input = None self.action_input = None self.new_state_output = None self.recorder = Recorder(flush_by_split_status=False) self._status = StatusWithSingleInfo(obj=self)
def __init__(self, name: str = 'env', copy_from_env=None): super(Env, self).__init__(status=StatusWithSubInfo(obj=self), name=name) self.action_space = None self.observation_space = None self.trajectory_level_step_count = 0 self.recorder = Recorder(default_obj=self) self._last_reset_point = 0 self.total_step_count_fn = lambda: self._status.group_specific_info_key( info_key='step', group_way='sum') self.env_spec = None if copy_from_env: assert isinstance(copy_from_env, Env) self.action_space = copy_from_env.action_space self.observation_space = copy_from_env.observation_space self.trajectory_level_step_count = copy_from_env.trajectory_level_step_count self.trajectory_level_step_count = copy_from_env._last_reset_point self.env_spec = copy_from_env.env_spec
def __init__(self, name='ModifiedHalfCheetah'): print('====> Initiating Modified Half Cheetah with observation space Box(18,)') Basic.__init__(self, name=name, status=StatusWithSubInfo(obj=self)) self.action_space = None self.observation_space = None self._last_reset_point = 0 self.trajectory_level_step_count = 0 self.recorder = Recorder(default_obj=self) self.total_step_count_fn = lambda: \ self._status.group_specific_info_key(info_key='step', group_way='sum') # record step self.env_spec = None self._inited_flag = False # avoid multiple inits dir_path = os.path.dirname(os.path.realpath(__file__)) mujoco_env.MujocoEnv.__init__(self, '%s/assets/modified_half_cheetah.xml' % dir_path, 5) utils.EzPickle.__init__(self) self.init()
def __init__(self, env_spec: EnvSpec, name: str = 'algo', warm_up_trajectories_number=0): """ Constructor :param env_spec: environment specifications :type env_spec: EnvSpec :param name: name of the algorithm :type name: str :param warm_up_trajectories_number: how many trajectories used to warm up the training :type warm_up_trajectories_number: int """ super().__init__(status=StatusWithSubInfo(obj=self), name=name) self.env_spec = env_spec self.parameters = Parameters(dict()) self.recorder = Recorder(default_obj=self) self.warm_up_trajectories_number = warm_up_trajectories_number
def __init__(self, env_spec: EnvSpec, parameters: Parameters = None, init_state=None, name='dynamics_model', state_input_scaler: DataScaler = None, action_input_scaler: DataScaler = None, state_output_scaler: DataScaler = None): """ :param env_spec: environment specifications, such as observation space and action space :type env_spec: EnvSpec :param parameters: parameters :type parameters: Parameters :param init_state: initial state of dymamics model :type init_state: str :param name: name of instance, 'dynamics_model' by default :type name: str :param state_input_scaler: data preprocessing scaler of state input :type state_input_scaler: DataScaler :param action_input_scaler: data preprocessing scaler of action input :type action_input_scaler: DataScaler :param state_output_scaler: data preprocessing scaler of state output :type state_output_scaler: DataScaler """ super().__init__(name=name) self.env_spec = env_spec self.state = init_state self.parameters = parameters self.state_input = None self.action_input = None self.new_state_output = None self.recorder = Recorder(flush_by_split_status=False, default_obj=self) self._status = StatusWithSingleInfo(obj=self) self.state_input_scaler = state_input_scaler if state_input_scaler else IdenticalDataScaler( dims=env_spec.flat_obs_dim) self.action_input_scaler = action_input_scaler if action_input_scaler else IdenticalDataScaler( dims=env_spec.flat_action_dim) self.state_output_scaler = state_output_scaler if state_output_scaler else IdenticalDataScaler( dims=env_spec.flat_obs_dim)
def test_register(self): obj = Foo() a = Recorder(flush_by_split_status=False) a.register_logging_attribute_by_record( obj=obj, attr_name='val', get_method=lambda x: x['obj'].get_val(), static_flag=False) a.register_logging_attribute_by_record(obj=obj, attr_name='loss', static_flag=True) a.record() print(a._obj_log) self.assertTrue('val' in a._obj_log[obj]) self.assertTrue('loss' in a._obj_log[obj]) obj.loss = 10.0 a.record() b = Recorder(flush_by_split_status=False) b.register_logging_attribute_by_record( obj=obj, attr_name='val', get_method=lambda x: x['obj'].get_val(), static_flag=False) b.register_logging_attribute_by_record(obj=obj, attr_name='loss', static_flag=True) b.record() self.assertTrue('val' in b._obj_log[obj]) self.assertTrue('loss' in b._obj_log[obj]) obj.loss = 10.0 b.record() self.assertTrue(b._obj_log is not a._obj_log) self.assertTrue(b._registered_log_attr_by_get_dict is not a._registered_log_attr_by_get_dict)
def __init__(self, env_spec: EnvSpec, name: str = 'algo'): super().__init__(status=StatusWithSubInfo(obj=self), name=name) self.env_spec = env_spec self.recorder = Recorder()
class Agent(Basic): STATUS_LIST = ('NOT_INIT', 'JUST_INITED', 'TRAIN', 'TEST') INIT_STATUS = 'NOT_INIT' required_key_dict = {} @init_func_arg_record_decorator() @typechecked def __init__( self, name, # config_or_config_dict: (DictConfig, dict), env: Env, algo: Algo, env_spec: EnvSpec, sampler: Sampler = None, noise_adder: AgentActionNoiseWrapper = None, exploration_strategy: ExplorationStrategy = None, algo_saving_scheduler: EventScheduler = None): """ :param name: the name of the agent instance :type name: str :param env: environment that interacts with agent :type env: Env :param algo: algorithm of the agent :type algo: Algo :param env_spec: environment specifications: action apace and environment space :type env_spec: EnvSpec :param sampler: sampler :type sampler: Sampler :param noise_adder: add action noise for exploration in action space :type noise_adder: AgentActionNoiseWrapper :param exploration_strategy: exploration strategy in action space :type exploration_strategy: ExplorationStrategy :param algo_saving_scheduler: control the schedule the varying parameters in training process :type algo_saving_scheduler: EventSchedule """ super(Agent, self).__init__(name=name, status=StatusWithSubInfo(self)) self.parameters = Parameters(parameters=dict()) self.total_test_samples = 0 self.total_train_samples = 0 self.env = env self.algo = algo self._env_step_count = 0 self.sampler = sampler self.recorder = Recorder() self.env_spec = env_spec if exploration_strategy: assert isinstance(exploration_strategy, ExplorationStrategy) self.explorations_strategy = exploration_strategy else: self.explorations_strategy = None self.sampler = sampler if sampler else Sampler( env_spec=env_spec, name='{}_sampler'.format(name)) self.noise_adder = noise_adder self.algo_saving_scheduler = algo_saving_scheduler # @record_return_decorator(which_recorder='self') @register_counter_info_to_status_decorator(increment=1, info_key='update_counter', under_status='TRAIN') def train(self, *args, **kwargs): """ train the agent :return: Only if memory buffer did not have enough data to train, return False :rtype: bool """ self.set_status('TRAIN') self.algo.set_status('TRAIN') ConsoleLogger().print('info', 'train agent:') try: res = self.algo.train(*args, **kwargs) except MemoryBufferLessThanBatchSizeError as e: ConsoleLogger().print( 'warning', 'memory buffer did not have enough data to train, skip training' ) return False ConsoleLogger().print('info', res) if self.algo_saving_scheduler and self.algo_saving_scheduler.value( ) is True: self.algo.save( global_step=self._status.get_specific_info_key_status( info_key='update_counter', under_status='TRAIN')) # @record_return_decorator(which_recorder='self') def test(self, sample_count, sample_trajectory_flag: bool = False): """ test the agent :param sample_count: how many transitions/trajectories used to evaluate the agent's performance :type sample_count: int :param sample_trajectory_flag: True for sampling trajectory instead of transitions :type sample_count: bool """ self.set_status('TEST') self.algo.set_status('TEST') ConsoleLogger().print( 'info', 'test: agent with {},sample_trajectory_flag {}'.format( sample_count, sample_trajectory_flag)) if sample_trajectory_flag is True: left_sample_count = sample_count while left_sample_count > 0: res = self.sample(env=self.env, sample_count=1, sample_type='trajectory', store_flag=False, in_which_status='TEST') self.total_test_samples += len(res) left_sample_count -= len(res) else: res = self.sample(env=self.env, sample_count=sample_count, sample_type='transition', store_flag=False, in_which_status='TEST') self.total_test_samples += len(res) @register_counter_info_to_status_decorator(increment=1, info_key='predict_counter', under_status=('TRAIN', 'TEST'), ignore_wrong_status=True) def predict(self, **kwargs): """ predict the action given the state :param kwargs: rest parameters, include key: obs :return: predicted action :rtype: ndarray """ if self.explorations_strategy and not self.is_testing: return self.explorations_strategy.predict(**kwargs, algo=self.algo) else: if self.noise_adder and not self.is_testing: return self.env_spec.action_space.clip( self.noise_adder(self.algo.predict(**kwargs))) else: return self.algo.predict(**kwargs) @register_counter_info_to_status_decorator(increment=1, info_key='sample_counter', under_status=('TRAIN', 'TEST'), ignore_wrong_status=True) def sample( self, env, sample_count: int, in_which_status: str, store_flag=False, sample_type: str = 'transition' ) -> (TransitionData, TrajectoryData): """ sample a certain number of data from environment :param env: environment to sample :param sample_count: int, sample count :param in_which_status: string, environment status :param store_flag: to store environment samples or not, default False :param sample_type: the type of sample, 'transition' by default :return: sample data from environment :rtype: some subclass of SampleData: TrajectoryData or TransitionData """ self.set_status(in_which_status) env.set_status(in_which_status) self.algo.set_status(in_which_status) ConsoleLogger().print( 'info', "agent sampled {} {} under status {}".format( sample_count, sample_type, self.get_status())) batch_data = self.sampler.sample(agent=self, env=env, sample_type=sample_type, in_which_status=in_which_status, sample_count=sample_count) if store_flag is True: self.store_samples(samples=batch_data) # todo when we have transition/ trajectory data here, the mean or sum results are still valid? ConsoleLogger().print( 'info', "sample: mean reward {}, sum reward {}".format( batch_data.get_mean_of(set_name='reward_set'), batch_data.get_sum_of(set_name='reward_set'))) self.recorder.append_to_obj_log( obj=self, attr_name='average_reward', status_info=self.get_status(), log_val=batch_data.get_mean_of('reward_set')) self.recorder.append_to_obj_log( obj=self, attr_name='sum_reward', status_info=self.get_status(), log_val=batch_data.get_sum_of('reward_set')) return batch_data def init(self): """ Initialize the algorithm, and set status to 'JUST_INITED'. """ self.algo.init() self.set_status('JUST_INITED') @typechecked def store_samples(self, samples: SampleData): """ store the samples into memory/replay buffer if the algorithm that agent hold need to do so, like DQN, DDPG :param samples: sample data of the experiment :type samples: SampleData """ self.algo.append_to_memory(samples=samples) @property def is_training(self): """ Check whether the agent is training. Return a boolean value. :return: true if the agent is training :rtype: bool """ return self.get_status()['status'] == 'TRAIN' @property def is_testing(self): """ Check whether the agent is testing. Return a boolean value. :return: true if the agent is testing :rtype: bool """ return self.get_status()['status'] == 'TEST'
def __init__(self, name='foo'): super().__init__(name=name) self.loss = 1.0 self.recorder = Recorder(flush_by_split_status=False)
def __init__(self): self.recorder = Recorder()