Exemple #1
0
    def __init__(
            self,
            name,
            # config_or_config_dict: (DictConfig, dict),
            env: (Env, Wrapper),
            algo: Algo,
            env_spec: EnvSpec,
            sampler: Sampler = None,
            noise_adder: AgentActionNoiseWrapper = None,
            reset_noise_every_terminal_state=False,
            reset_state_every_sample=False,
            exploration_strategy: ExplorationStrategy = None,
            algo_saving_scheduler: EventScheduler = None):
        """

        :param name: the name of the agent instance
        :type name: str
        :param env: environment that interacts with agent
        :type env: Env
        :param algo: algorithm of the agent
        :type algo: Algo
        :param env_spec: environment specifications: action apace and environment space
        :type env_spec: EnvSpec
        :param sampler: sampler
        :type sampler: Sampler
        :param reset_noise_every_terminal_state: reset the noise every sampled trajectory
        :type reset_noise_every_terminal_state: bool
        :param reset_state_every_sample: reset the state everytime perofrm the sample/rollout
        :type reset_state_every_sample: bool
        :param noise_adder: add action noise for exploration in action space
        :type noise_adder: AgentActionNoiseWrapper
        :param exploration_strategy: exploration strategy in action space
        :type exploration_strategy: ExplorationStrategy
        :param algo_saving_scheduler: control the schedule the varying parameters in training process
        :type algo_saving_scheduler: EventSchedule
        """
        super(Agent, self).__init__(name=name, status=StatusWithSubInfo(self))
        self.parameters = Parameters(parameters=dict(
            reset_noise_every_terminal_state=reset_noise_every_terminal_state,
            reset_state_every_sample=reset_state_every_sample))
        self.env = env
        self.algo = algo
        self._env_step_count = 0
        if sampler is None:
            sampler = Sampler()
        self.sampler = sampler
        self.recorder = Recorder(default_obj=self)
        self.env_spec = env_spec
        if exploration_strategy:
            assert isinstance(exploration_strategy, ExplorationStrategy)
            self.explorations_strategy = exploration_strategy
        else:
            self.explorations_strategy = None
        self.noise_adder = noise_adder
        self.algo_saving_scheduler = algo_saving_scheduler
Exemple #2
0
    def __init__(
            self,
            name,
            # config_or_config_dict: (DictConfig, dict),
            env: Env,
            algo: Algo,
            env_spec: EnvSpec,
            sampler: Sampler = None,
            noise_adder: AgentActionNoiseWrapper = None,
            exploration_strategy: ExplorationStrategy = None,
            algo_saving_scheduler: EventScheduler = None):
        """

        :param name: the name of the agent instance
        :type name: str
        :param env: environment that interacts with agent
        :type env: Env
        :param algo: algorithm of the agent
        :type algo: Algo
        :param env_spec: environment specifications: action apace and environment space
        :type env_spec: EnvSpec
        :param sampler: sampler
        :type sampler: Sampler
        :param noise_adder: add action noise for exploration in action space
        :type noise_adder: AgentActionNoiseWrapper
        :param exploration_strategy: exploration strategy in action space
        :type exploration_strategy: ExplorationStrategy
        :param algo_saving_scheduler: control the schedule the varying parameters in training process
        :type algo_saving_scheduler: EventSchedule
        """
        super(Agent, self).__init__(name=name, status=StatusWithSubInfo(self))
        self.parameters = Parameters(parameters=dict())
        self.total_test_samples = 0
        self.total_train_samples = 0
        self.env = env
        self.algo = algo
        self._env_step_count = 0
        self.sampler = sampler
        self.recorder = Recorder()
        self.env_spec = env_spec
        if exploration_strategy:
            assert isinstance(exploration_strategy, ExplorationStrategy)
            self.explorations_strategy = exploration_strategy
        else:
            self.explorations_strategy = None
        self.sampler = sampler if sampler else Sampler(
            env_spec=env_spec, name='{}_sampler'.format(name))
        self.noise_adder = noise_adder
        self.algo_saving_scheduler = algo_saving_scheduler
Exemple #3
0
 def __init__(self, name: str = 'env'):
     super(Env, self).__init__(status=StatusWithSubInfo(obj=self),
                               name=name)
     self.action_space = None
     self.observation_space = None
     self.step_count = None
     self.recorder = Recorder()
     self._last_reset_point = 0
     self.total_step_count_fn = lambda: self._status.group_specific_info_key(
         info_key='step', group_way='sum')
    def __init__(self,
                 name: str,
                 agent: Agent,
                 env: Env,
                 flow: Flow,
                 tuner: Tuner = None,
                 register_default_global_status=True):
        """

        :param name: name of experiment
        :type name: str
        :param agent: agent of experiment
        :type agent: Agent
        :param env: environment of experiment
        :type env: Env
        :param flow: control flow to experiment
        :type flow: Flow
        :param tuner: hyper-parameter tuning method, currently in development
        :type tuner: Tuner
        :param register_default_global_status: register info key and status into global status collection
        :type register_default_global_status: bool
        """
        super().__init__(status=StatusWithSingleInfo(obj=self), name=name)
        self.agent = agent
        self.env = env
        self.tuner = tuner
        self.recorder = Recorder(flush_by_split_status=False)
        # self.status_collector = StatusCollector()
        self.flow = flow
        if register_default_global_status is True:
            get_global_status_collect().register_info_key_status(
                obj=agent,
                info_key='predict_counter',
                under_status='TRAIN',
                return_name='TOTAL_AGENT_TRAIN_SAMPLE_COUNT')
            get_global_status_collect().register_info_key_status(
                obj=agent,
                info_key='predict_counter',
                under_status='TEST',
                return_name='TOTAL_AGENT_TEST_SAMPLE_COUNT')
            get_global_status_collect().register_info_key_status(
                obj=agent,
                info_key='update_counter',
                under_status='TRAIN',
                return_name='TOTAL_AGENT_UPDATE_COUNT')
            get_global_status_collect().register_info_key_status(
                obj=env,
                info_key='step',
                under_status='TEST',
                return_name='TOTAL_ENV_STEP_TEST_SAMPLE_COUNT')
            get_global_status_collect().register_info_key_status(
                obj=env,
                info_key='step',
                under_status='TRAIN',
                return_name='TOTAL_ENV_STEP_TRAIN_SAMPLE_COUNT')
Exemple #5
0
    def __init__(
            self,
            name,
            # config_or_config_dict: (DictConfig, dict),
            env: Env,
            algo: Algo,
            env_spec: EnvSpec,
            sampler: Sampler = None,
            noise_adder: AgentActionNoiseWrapper = None,
            exploration_strategy: ExplorationStrategy = None,
            algo_saving_scheduler: EventSchedule = None):
        """

        :param name:
        :param env:
        :param algo:
        :param env_spec:
        :param sampler:
        :param noise_adder:
        :param exploration_strategy:
        :param algo_saving_scheduler:
        """
        super(Agent, self).__init__(name=name, status=StatusWithSubInfo(self))
        self.parameters = Parameters(parameters=dict())
        self.total_test_samples = 0
        self.total_train_samples = 0
        self.env = env
        self.algo = algo
        self._env_step_count = 0
        self.sampler = sampler
        self.recorder = Recorder()
        self.env_spec = env_spec
        if exploration_strategy:
            assert isinstance(exploration_strategy, ExplorationStrategy)
            self.explorations_strategy = exploration_strategy
        else:
            self.explorations_strategy = None
        self.sampler = sampler if sampler else Sampler(
            env_spec=env_spec, name='{}_sampler'.format(name))
        self.noise_adder = noise_adder
        self.algo_saving_scheduler = algo_saving_scheduler
Exemple #6
0
    def __init__(self, env_spec: EnvSpec, name: str = 'algo'):
        """
        Constructor

        :param env_spec: environment specifications
        :type env_spec: EnvSpec
        :param name: name of the algorithm
        :type name: str
        """

        super().__init__(status=StatusWithSubInfo(obj=self), name=name)
        self.env_spec = env_spec
        self.recorder = Recorder()
Exemple #7
0
    def __init__(self,
                 name: str,
                 agent: Agent,
                 env: Env,
                 flow: Flow,
                 tuner: Tuner = None,
                 register_default_global_status=True):
        """

        :param name:
        :param agent:
        :param env:
        :param flow:
        :param tuner:
        :param register_default_global_status:
        """
        super().__init__(status=StatusWithSingleInfo(obj=self), name=name)
        self.agent = agent
        self.env = env
        self.tuner = tuner
        self.recorder = Recorder(flush_by_split_status=False)
        # self.status_collector = StatusCollector()
        self.flow = flow
        if register_default_global_status is True:
            get_global_status_collect().register_info_key_status(
                obj=agent,
                info_key='predict_counter',
                under_status='TRAIN',
                return_name='TOTAL_AGENT_TRAIN_SAMPLE_COUNT')
            get_global_status_collect().register_info_key_status(
                obj=agent,
                info_key='predict_counter',
                under_status='TEST',
                return_name='TOTAL_AGENT_TEST_SAMPLE_COUNT')
            get_global_status_collect().register_info_key_status(
                obj=agent,
                info_key='update_counter',
                under_status='TRAIN',
                return_name='TOTAL_AGENT_UPDATE_COUNT')
            get_global_status_collect().register_info_key_status(
                obj=env,
                info_key='step',
                under_status='TEST',
                return_name='TOTAL_ENV_STEP_TEST_SAMPLE_COUNT')
            get_global_status_collect().register_info_key_status(
                obj=env,
                info_key='step',
                under_status='TRAIN',
                return_name='TOTAL_ENV_STEP_TRAIN_SAMPLE_COUNT')
Exemple #8
0
 def __init__(self, env_spec: EnvSpec, parameters: Parameters = None, init_state=None, name='dynamics_model'):
     """
     :param env_spec:
     :param parameters:
     :param init_state:
     :param name:
     """
     super().__init__(name=name)
     self.env_spec = env_spec
     self.state = init_state
     self.parameters = parameters
     self.state_input = None
     self.action_input = None
     self.new_state_output = None
     self.recorder = Recorder(flush_by_split_status=False)
     self._status = StatusWithSingleInfo(obj=self)
Exemple #9
0
 def __init__(self, name: str = 'env', copy_from_env=None):
     super(Env, self).__init__(status=StatusWithSubInfo(obj=self),
                               name=name)
     self.action_space = None
     self.observation_space = None
     self.trajectory_level_step_count = 0
     self.recorder = Recorder(default_obj=self)
     self._last_reset_point = 0
     self.total_step_count_fn = lambda: self._status.group_specific_info_key(
         info_key='step', group_way='sum')
     self.env_spec = None
     if copy_from_env:
         assert isinstance(copy_from_env, Env)
         self.action_space = copy_from_env.action_space
         self.observation_space = copy_from_env.observation_space
         self.trajectory_level_step_count = copy_from_env.trajectory_level_step_count
         self.trajectory_level_step_count = copy_from_env._last_reset_point
         self.env_spec = copy_from_env.env_spec
Exemple #10
0
    def __init__(self, name='ModifiedHalfCheetah'):
        print('====> Initiating Modified Half Cheetah with observation space Box(18,)')

        Basic.__init__(self, name=name, status=StatusWithSubInfo(obj=self))

        self.action_space = None
        self.observation_space = None
        self._last_reset_point = 0
        self.trajectory_level_step_count = 0
        self.recorder = Recorder(default_obj=self)
        self.total_step_count_fn = lambda: \
            self._status.group_specific_info_key(info_key='step', group_way='sum')  # record step
        self.env_spec = None
        self._inited_flag = False   # avoid multiple inits

        dir_path = os.path.dirname(os.path.realpath(__file__))
        mujoco_env.MujocoEnv.__init__(self, '%s/assets/modified_half_cheetah.xml' % dir_path, 5)
        utils.EzPickle.__init__(self)

        self.init()
Exemple #11
0
    def __init__(self,
                 env_spec: EnvSpec,
                 name: str = 'algo',
                 warm_up_trajectories_number=0):
        """
        Constructor

        :param env_spec: environment specifications
        :type env_spec: EnvSpec
        :param name: name of the algorithm
        :type name: str
        :param warm_up_trajectories_number: how many trajectories used to warm up the training
        :type warm_up_trajectories_number: int
        """

        super().__init__(status=StatusWithSubInfo(obj=self), name=name)
        self.env_spec = env_spec
        self.parameters = Parameters(dict())
        self.recorder = Recorder(default_obj=self)
        self.warm_up_trajectories_number = warm_up_trajectories_number
    def __init__(self,
                 env_spec: EnvSpec,
                 parameters: Parameters = None,
                 init_state=None,
                 name='dynamics_model',
                 state_input_scaler: DataScaler = None,
                 action_input_scaler: DataScaler = None,
                 state_output_scaler: DataScaler = None):
        """

        :param env_spec: environment specifications, such as observation space and action space
        :type env_spec: EnvSpec
        :param parameters: parameters
        :type parameters: Parameters
        :param init_state: initial state of dymamics model
        :type init_state: str
        :param name: name of instance, 'dynamics_model' by default
        :type name: str
        :param state_input_scaler: data preprocessing scaler of state input
        :type state_input_scaler: DataScaler
        :param action_input_scaler: data preprocessing scaler of action input
        :type action_input_scaler: DataScaler
        :param state_output_scaler: data preprocessing scaler of state output
        :type state_output_scaler: DataScaler
        """
        super().__init__(name=name)
        self.env_spec = env_spec
        self.state = init_state
        self.parameters = parameters
        self.state_input = None
        self.action_input = None
        self.new_state_output = None
        self.recorder = Recorder(flush_by_split_status=False, default_obj=self)
        self._status = StatusWithSingleInfo(obj=self)
        self.state_input_scaler = state_input_scaler if state_input_scaler else IdenticalDataScaler(
            dims=env_spec.flat_obs_dim)
        self.action_input_scaler = action_input_scaler if action_input_scaler else IdenticalDataScaler(
            dims=env_spec.flat_action_dim)
        self.state_output_scaler = state_output_scaler if state_output_scaler else IdenticalDataScaler(
            dims=env_spec.flat_obs_dim)
Exemple #13
0
    def test_register(self):
        obj = Foo()

        a = Recorder(flush_by_split_status=False)
        a.register_logging_attribute_by_record(
            obj=obj,
            attr_name='val',
            get_method=lambda x: x['obj'].get_val(),
            static_flag=False)
        a.register_logging_attribute_by_record(obj=obj,
                                               attr_name='loss',
                                               static_flag=True)
        a.record()
        print(a._obj_log)
        self.assertTrue('val' in a._obj_log[obj])
        self.assertTrue('loss' in a._obj_log[obj])
        obj.loss = 10.0
        a.record()

        b = Recorder(flush_by_split_status=False)
        b.register_logging_attribute_by_record(
            obj=obj,
            attr_name='val',
            get_method=lambda x: x['obj'].get_val(),
            static_flag=False)
        b.register_logging_attribute_by_record(obj=obj,
                                               attr_name='loss',
                                               static_flag=True)

        b.record()
        self.assertTrue('val' in b._obj_log[obj])
        self.assertTrue('loss' in b._obj_log[obj])
        obj.loss = 10.0
        b.record()
        self.assertTrue(b._obj_log is not a._obj_log)
        self.assertTrue(b._registered_log_attr_by_get_dict
                        is not a._registered_log_attr_by_get_dict)
Exemple #14
0
 def __init__(self, env_spec: EnvSpec, name: str = 'algo'):
     super().__init__(status=StatusWithSubInfo(obj=self), name=name)
     self.env_spec = env_spec
     self.recorder = Recorder()
Exemple #15
0
class Agent(Basic):
    STATUS_LIST = ('NOT_INIT', 'JUST_INITED', 'TRAIN', 'TEST')
    INIT_STATUS = 'NOT_INIT'
    required_key_dict = {}

    @init_func_arg_record_decorator()
    @typechecked
    def __init__(
            self,
            name,
            # config_or_config_dict: (DictConfig, dict),
            env: Env,
            algo: Algo,
            env_spec: EnvSpec,
            sampler: Sampler = None,
            noise_adder: AgentActionNoiseWrapper = None,
            exploration_strategy: ExplorationStrategy = None,
            algo_saving_scheduler: EventScheduler = None):
        """

        :param name: the name of the agent instance
        :type name: str
        :param env: environment that interacts with agent
        :type env: Env
        :param algo: algorithm of the agent
        :type algo: Algo
        :param env_spec: environment specifications: action apace and environment space
        :type env_spec: EnvSpec
        :param sampler: sampler
        :type sampler: Sampler
        :param noise_adder: add action noise for exploration in action space
        :type noise_adder: AgentActionNoiseWrapper
        :param exploration_strategy: exploration strategy in action space
        :type exploration_strategy: ExplorationStrategy
        :param algo_saving_scheduler: control the schedule the varying parameters in training process
        :type algo_saving_scheduler: EventSchedule
        """
        super(Agent, self).__init__(name=name, status=StatusWithSubInfo(self))
        self.parameters = Parameters(parameters=dict())
        self.total_test_samples = 0
        self.total_train_samples = 0
        self.env = env
        self.algo = algo
        self._env_step_count = 0
        self.sampler = sampler
        self.recorder = Recorder()
        self.env_spec = env_spec
        if exploration_strategy:
            assert isinstance(exploration_strategy, ExplorationStrategy)
            self.explorations_strategy = exploration_strategy
        else:
            self.explorations_strategy = None
        self.sampler = sampler if sampler else Sampler(
            env_spec=env_spec, name='{}_sampler'.format(name))
        self.noise_adder = noise_adder
        self.algo_saving_scheduler = algo_saving_scheduler

    # @record_return_decorator(which_recorder='self')
    @register_counter_info_to_status_decorator(increment=1,
                                               info_key='update_counter',
                                               under_status='TRAIN')
    def train(self, *args, **kwargs):
        """
        train the agent

        :return: Only if memory buffer did not have enough data to train, return False
        :rtype: bool
        """
        self.set_status('TRAIN')
        self.algo.set_status('TRAIN')
        ConsoleLogger().print('info', 'train agent:')
        try:
            res = self.algo.train(*args, **kwargs)
        except MemoryBufferLessThanBatchSizeError as e:
            ConsoleLogger().print(
                'warning',
                'memory buffer did not have enough data to train, skip training'
            )
            return False

        ConsoleLogger().print('info', res)

        if self.algo_saving_scheduler and self.algo_saving_scheduler.value(
        ) is True:
            self.algo.save(
                global_step=self._status.get_specific_info_key_status(
                    info_key='update_counter', under_status='TRAIN'))

    # @record_return_decorator(which_recorder='self')
    def test(self, sample_count, sample_trajectory_flag: bool = False):
        """
        test the agent

        :param sample_count: how many transitions/trajectories used to evaluate the agent's performance
        :type sample_count: int
        :param sample_trajectory_flag: True for sampling trajectory instead of transitions
        :type sample_count: bool
        """
        self.set_status('TEST')
        self.algo.set_status('TEST')
        ConsoleLogger().print(
            'info', 'test: agent with {},sample_trajectory_flag {}'.format(
                sample_count, sample_trajectory_flag))
        if sample_trajectory_flag is True:
            left_sample_count = sample_count
            while left_sample_count > 0:
                res = self.sample(env=self.env,
                                  sample_count=1,
                                  sample_type='trajectory',
                                  store_flag=False,
                                  in_which_status='TEST')
                self.total_test_samples += len(res)
                left_sample_count -= len(res)

        else:
            res = self.sample(env=self.env,
                              sample_count=sample_count,
                              sample_type='transition',
                              store_flag=False,
                              in_which_status='TEST')
            self.total_test_samples += len(res)

    @register_counter_info_to_status_decorator(increment=1,
                                               info_key='predict_counter',
                                               under_status=('TRAIN', 'TEST'),
                                               ignore_wrong_status=True)
    def predict(self, **kwargs):
        """
        predict the action given the state

        :param kwargs: rest parameters, include key: obs
        :return: predicted action
        :rtype: ndarray
        """
        if self.explorations_strategy and not self.is_testing:
            return self.explorations_strategy.predict(**kwargs, algo=self.algo)
        else:
            if self.noise_adder and not self.is_testing:
                return self.env_spec.action_space.clip(
                    self.noise_adder(self.algo.predict(**kwargs)))
            else:
                return self.algo.predict(**kwargs)

    @register_counter_info_to_status_decorator(increment=1,
                                               info_key='sample_counter',
                                               under_status=('TRAIN', 'TEST'),
                                               ignore_wrong_status=True)
    def sample(
            self,
            env,
            sample_count: int,
            in_which_status: str,
            store_flag=False,
            sample_type: str = 'transition'
    ) -> (TransitionData, TrajectoryData):
        """
        sample a certain number of data from environment

        :param env: environment to sample
        :param sample_count: int, sample count
        :param in_which_status: string, environment status
        :param store_flag: to store environment samples or not, default False
        :param sample_type: the type of sample, 'transition' by default
        :return: sample data from environment
        :rtype: some subclass of SampleData: TrajectoryData or TransitionData
        """
        self.set_status(in_which_status)
        env.set_status(in_which_status)
        self.algo.set_status(in_which_status)
        ConsoleLogger().print(
            'info', "agent sampled {} {} under status {}".format(
                sample_count, sample_type, self.get_status()))
        batch_data = self.sampler.sample(agent=self,
                                         env=env,
                                         sample_type=sample_type,
                                         in_which_status=in_which_status,
                                         sample_count=sample_count)
        if store_flag is True:
            self.store_samples(samples=batch_data)
        # todo when we have transition/ trajectory data here, the mean or sum results are still valid?
        ConsoleLogger().print(
            'info', "sample: mean reward {}, sum reward {}".format(
                batch_data.get_mean_of(set_name='reward_set'),
                batch_data.get_sum_of(set_name='reward_set')))
        self.recorder.append_to_obj_log(
            obj=self,
            attr_name='average_reward',
            status_info=self.get_status(),
            log_val=batch_data.get_mean_of('reward_set'))
        self.recorder.append_to_obj_log(
            obj=self,
            attr_name='sum_reward',
            status_info=self.get_status(),
            log_val=batch_data.get_sum_of('reward_set'))
        return batch_data

    def init(self):
        """
        Initialize the algorithm, and set status to 'JUST_INITED'.
        """
        self.algo.init()
        self.set_status('JUST_INITED')

    @typechecked
    def store_samples(self, samples: SampleData):
        """
        store the samples into memory/replay buffer if the algorithm that agent hold need to do so, like DQN, DDPG

        :param samples: sample data of the experiment
        :type samples: SampleData
        """
        self.algo.append_to_memory(samples=samples)

    @property
    def is_training(self):
        """
        Check whether the agent is training. Return a boolean value.

        :return: true if the agent is training
        :rtype: bool
        """
        return self.get_status()['status'] == 'TRAIN'

    @property
    def is_testing(self):
        """
        Check whether the agent is testing. Return a boolean value.

        :return: true if the agent is testing
        :rtype: bool
        """
        return self.get_status()['status'] == 'TEST'
Exemple #16
0
 def __init__(self, name='foo'):
     super().__init__(name=name)
     self.loss = 1.0
     self.recorder = Recorder(flush_by_split_status=False)
Exemple #17
0
 def __init__(self):
     self.recorder = Recorder()