Exemple #1
0
    def get_output_head(self,
                        head_params: HeadParameters,
                        head_idx: int,
                        loss_weight: float = 1.):
        """
        Given a head type, creates the head and returns it
        :param head_params: the parameters of the head to create
        :param head_type: the path to the class of the head under the embedders directory or a full path to a head class.
                          the path should be in the following structure: <module_path>:<class_path>
        :param head_idx: the head index
        :param loss_weight: the weight to assign for the embedders loss
        :return: the head
        """

        head_params_copy = copy.copy(head_params)
        head_params_copy.activation_function = self.get_activation_function(
            head_params_copy.activation_function)
        return dynamic_import_and_instantiate_module_from_params(
            head_params_copy,
            extra_kwargs={
                'agent_parameters': self.ap,
                'spaces': self.spaces,
                'network_name': self.network_wrapper_name,
                'head_idx': head_idx,
                'loss_weight': loss_weight,
                'is_local': self.network_is_local
            })
Exemple #2
0
    def get_input_embedder(self, input_name: str, embedder_params: InputEmbedderParameters):
        """
        Given an input embedder parameters class, creates the input embedder and returns it
        :param input_name: the name of the input to the embedder (used for retrieving the shape). The input should
                           be a value within the state or the action.
        :param embedder_params: the parameters of the class of the embedder
        :return: the embedder instance
        """
        allowed_inputs = copy.copy(self.spaces.state.sub_spaces)
        allowed_inputs["action"] = copy.copy(self.spaces.action)
        allowed_inputs["goal"] = copy.copy(self.spaces.goal)

        if input_name not in allowed_inputs.keys():
            raise ValueError("The key for the input embedder ({}) must match one of the following keys: {}"
                             .format(input_name, allowed_inputs.keys()))

        type = "vector"
        if isinstance(allowed_inputs[input_name], PlanarMapsObservationSpace):
            type = "image"

        embedder_path = 'rl_coach.architectures.tensorflow_components.embedders.' + embedder_params.path[type]
        embedder_params_copy = copy.copy(embedder_params)
        embedder_params_copy.activation_function = self.get_activation_function(embedder_params.activation_function)
        embedder_params_copy.input_rescaling = embedder_params_copy.input_rescaling[type]
        embedder_params_copy.input_offset = embedder_params_copy.input_offset[type]
        embedder_params_copy.name = input_name
        module = dynamic_import_and_instantiate_module_from_params(embedder_params_copy,
                                                                   path=embedder_path,
                                                                   positional_args=[allowed_inputs[input_name].shape])
        return module
Exemple #3
0
 def get_middleware(self, middleware_params: MiddlewareParameters):
     """
     Given a middleware type, creates the middleware and returns it
     :param middleware_params: the paramaeters of the middleware class
     :return: the middleware instance
     """
     middleware_params_copy = copy.copy(middleware_params)
     middleware_params_copy.activation_function = self.get_activation_function(middleware_params.activation_function)
     module = dynamic_import_and_instantiate_module_from_params(middleware_params_copy)
     return module
Exemple #4
0
    def init_environment_dependent_modules(self) -> None:
        """
        Initialize any modules that depend on knowing information about the environment such as the action space or
        the observation space
        :return: None
        """
        # initialize exploration policy
        self.ap.exploration.action_space = self.spaces.action
        self.exploration_policy = dynamic_import_and_instantiate_module_from_params(
            self.ap.exploration)

        # create all the networks of the agent
        self.networks = self.create_networks()
 def get_middleware(self, middleware_params: MiddlewareParameters):
     """
     Given a middleware type, creates the middleware and returns it
     :param middleware_params: the paramaeters of the middleware class
     :return: the middleware instance
     """
     mod_name = middleware_params.parameterized_class_name
     middleware_path = 'rl_coach.architectures.tensorflow_components.middlewares:' + mod_name
     middleware_params_copy = copy.copy(middleware_params)
     middleware_params_copy.activation_function = utils.get_activation_function(
         middleware_params.activation_function)
     module = dynamic_import_and_instantiate_module_from_params(
         middleware_params_copy, path=middleware_path)
     return module
Exemple #6
0
 def get_output_head(self, head_params: HeadParameters, head_idx: int):
     """
     Given a head type, creates the head and returns it
     :param head_params: the parameters of the head to create
     :param head_idx: the head index
     :return: the head
     """
     mod_name = head_params.parameterized_class_name
     head_path = head_params.path
     head_params_copy = copy.copy(head_params)
     head_params_copy.activation_function = utils.get_activation_function(head_params_copy.activation_function)
     return dynamic_import_and_instantiate_module_from_params(head_params_copy, path=head_path, extra_kwargs={
         'agent_parameters': self.ap, 'spaces': self.spaces, 'network_name': self.network_wrapper_name,
         'head_idx': head_idx, 'is_local': self.network_is_local})
Exemple #7
0
 def get_middleware(self, middleware_params: MiddlewareParameters):
     """
     Given a middleware type, creates the middleware and returns it
     :param middleware_params: the paramaeters of the middleware class
     :return: the middleware instance
     """
     mod_name = middleware_params.parameterized_class_name
     middleware_path = middleware_params.path
     middleware_params_copy = copy.copy(middleware_params)
     middleware_params_copy.activation_function = utils.get_activation_function(
         middleware_params.activation_function)
     middleware_params_copy.is_training = self.is_training
     module = dynamic_import_and_instantiate_module_from_params(
         middleware_params_copy, path=middleware_path)
     return module
Exemple #8
0
    def __init__(self, action_space: ActionSpace, epsilon_schedule: Schedule,
                 evaluation_epsilon: float,
                 continuous_exploration_policy_parameters: ExplorationParameters=AdditiveNoiseParameters()):
        """
        :param action_space: the action space used by the environment
        :param epsilon_schedule: a schedule for the epsilon values
        :param evaluation_epsilon: the epsilon value to use for evaluation phases
        :param continuous_exploration_policy_parameters: the parameters of the continuous exploration policy to use
                                                         if the e-greedy is used for a continuous policy
        """
        super().__init__(action_space)
        self.epsilon_schedule = epsilon_schedule
        self.evaluation_epsilon = evaluation_epsilon

        if isinstance(self.action_space, BoxActionSpace):
            # for continuous e-greedy (see http://www.cs.ubc.ca/~van/papers/2017-TOG-deepLoco/2017-TOG-deepLoco.pdf)
            continuous_exploration_policy_parameters.action_space = action_space
            self.continuous_exploration_policy = \
                dynamic_import_and_instantiate_module_from_params(continuous_exploration_policy_parameters)

        self.current_random_value = np.random.rand()
Exemple #9
0
    def init_environment_dependent_modules(self) -> None:
        """
        Initialize any modules that depend on knowing information about the environment such as the action space or
        the observation space

        :return: None
        """
        # initialize exploration policy
        if isinstance(self.ap.exploration, dict):
            if self.spaces.action.__class__ in self.ap.exploration.keys():
                self.ap.exploration = self.ap.exploration[self.spaces.action.__class__]
            else:
                raise ValueError("The exploration parameters were defined as a mapping between action space types and "
                                 "exploration types, but the action space used by the environment ({}) was not part of "
                                 "the exploration parameters dictionary keys ({})"
                                 .format(self.spaces.action.__class__, list(self.ap.exploration.keys())))
        self.ap.exploration.action_space = self.spaces.action
        self.exploration_policy = dynamic_import_and_instantiate_module_from_params(self.ap.exploration)

        # create all the networks of the agent
        self.networks = self.create_networks()
Exemple #10
0
    def __init__(self,
                 agent_parameters: AgentParameters,
                 parent: Union['LevelManager', 'CompositeAgent'] = None):
        """
        :param agent_parameters: A Preset class instance with all the running paramaters
        """
        super().__init__()
        self.ap = agent_parameters
        self.task_id = self.ap.task_parameters.task_index
        self.is_chief = self.task_id == 0
        self.shared_memory = type(agent_parameters.task_parameters) == DistributedTaskParameters \
                             and self.ap.memory.shared_memory
        if self.shared_memory:
            self.shared_memory_scratchpad = self.ap.task_parameters.shared_memory_scratchpad
        self.name = agent_parameters.name
        self.parent = parent
        self.parent_level_manager = None
        self.full_name_id = agent_parameters.full_name_id = self.name

        if type(agent_parameters.task_parameters) == DistributedTaskParameters:
            screen.log_title(
                "Creating agent - name: {} task id: {} (may take up to 30 seconds due to "
                "tensorflow wake up time)".format(self.full_name_id,
                                                  self.task_id))
        else:
            screen.log_title("Creating agent - name: {}".format(
                self.full_name_id))
        self.imitation = False
        self.agent_logger = Logger()
        self.agent_episode_logger = EpisodeLogger()

        # get the memory
        # - distributed training + shared memory:
        #   * is chief?  -> create the memory and add it to the scratchpad
        #   * not chief? -> wait for the chief to create the memory and then fetch it
        # - non distributed training / not shared memory:
        #   * create memory
        memory_name = self.ap.memory.path.split(':')[1]
        self.memory_lookup_name = self.full_name_id + '.' + memory_name
        if self.shared_memory and not self.is_chief:
            self.memory = self.shared_memory_scratchpad.get(
                self.memory_lookup_name)
        else:
            # modules
            if agent_parameters.memory.load_memory_from_file_path:
                screen.log_title(
                    "Loading replay buffer from pickle. Pickle path: {}".
                    format(agent_parameters.memory.load_memory_from_file_path))
                self.memory = read_pickle(
                    agent_parameters.memory.load_memory_from_file_path)
            else:
                self.memory = dynamic_import_and_instantiate_module_from_params(
                    self.ap.memory)

            if self.shared_memory and self.is_chief:
                self.shared_memory_scratchpad.add(self.memory_lookup_name,
                                                  self.memory)

        # set devices
        if type(agent_parameters.task_parameters) == DistributedTaskParameters:
            self.has_global = True
            self.replicated_device = agent_parameters.task_parameters.device
            self.worker_device = "/job:worker/task:{}".format(self.task_id)
        else:
            self.has_global = False
            self.replicated_device = None
            self.worker_device = ""
        if agent_parameters.task_parameters.use_cpu:
            self.worker_device += "/cpu:0"
        else:
            self.worker_device += "/device:GPU:0"

        # filters
        self.input_filter = self.ap.input_filter
        self.output_filter = self.ap.output_filter
        self.pre_network_filter = self.ap.pre_network_filter
        device = self.replicated_device if self.replicated_device else self.worker_device
        self.input_filter.set_device(device)
        self.output_filter.set_device(device)
        self.pre_network_filter.set_device(device)

        # initialize all internal variables
        self._phase = RunPhase.HEATUP
        self.total_shaped_reward_in_current_episode = 0
        self.total_reward_in_current_episode = 0
        self.total_steps_counter = 0
        self.running_reward = None
        self.training_iteration = 0
        self.last_target_network_update_step = 0
        self.last_training_phase_step = 0
        self.current_episode = self.ap.current_episode = 0
        self.curr_state = {}
        self.current_hrl_goal = None
        self.current_episode_steps_counter = 0
        self.episode_running_info = {}
        self.last_episode_evaluation_ran = 0
        self.running_observations = []
        self.agent_logger.set_current_time(self.current_episode)
        self.exploration_policy = None
        self.networks = {}
        self.last_action_info = None
        self.running_observation_stats = None
        self.running_reward_stats = None
        self.accumulated_rewards_across_evaluation_episodes = 0
        self.accumulated_shaped_rewards_across_evaluation_episodes = 0
        self.num_successes_across_evaluation_episodes = 0
        self.num_evaluation_episodes_completed = 0
        self.current_episode_buffer = Episode(
            discount=self.ap.algorithm.discount)
        # TODO: add agents observation rendering for debugging purposes (not the same as the environment rendering)

        # environment parameters
        self.spaces = None
        self.in_action_space = self.ap.algorithm.in_action_space

        # signals
        self.episode_signals = []
        self.step_signals = []
        self.loss = self.register_signal('Loss')
        self.curr_learning_rate = self.register_signal('Learning Rate')
        self.unclipped_grads = self.register_signal('Grads (unclipped)')
        self.reward = self.register_signal('Reward',
                                           dump_one_value_per_episode=False,
                                           dump_one_value_per_step=True)
        self.shaped_reward = self.register_signal(
            'Shaped Reward',
            dump_one_value_per_episode=False,
            dump_one_value_per_step=True)
        if isinstance(self.in_action_space, GoalsSpace):
            self.distance_from_goal = self.register_signal(
                'Distance From Goal', dump_one_value_per_step=True)

        # use seed
        if self.ap.task_parameters.seed is not None:
            random.seed(self.ap.task_parameters.seed)
            np.random.seed(self.ap.task_parameters.seed)