def get_output_head(self, head_params: HeadParameters, head_idx: int, loss_weight: float = 1.): """ Given a head type, creates the head and returns it :param head_params: the parameters of the head to create :param head_type: the path to the class of the head under the embedders directory or a full path to a head class. the path should be in the following structure: <module_path>:<class_path> :param head_idx: the head index :param loss_weight: the weight to assign for the embedders loss :return: the head """ head_params_copy = copy.copy(head_params) head_params_copy.activation_function = self.get_activation_function( head_params_copy.activation_function) return dynamic_import_and_instantiate_module_from_params( head_params_copy, extra_kwargs={ 'agent_parameters': self.ap, 'spaces': self.spaces, 'network_name': self.network_wrapper_name, 'head_idx': head_idx, 'loss_weight': loss_weight, 'is_local': self.network_is_local })
def get_input_embedder(self, input_name: str, embedder_params: InputEmbedderParameters): """ Given an input embedder parameters class, creates the input embedder and returns it :param input_name: the name of the input to the embedder (used for retrieving the shape). The input should be a value within the state or the action. :param embedder_params: the parameters of the class of the embedder :return: the embedder instance """ allowed_inputs = copy.copy(self.spaces.state.sub_spaces) allowed_inputs["action"] = copy.copy(self.spaces.action) allowed_inputs["goal"] = copy.copy(self.spaces.goal) if input_name not in allowed_inputs.keys(): raise ValueError("The key for the input embedder ({}) must match one of the following keys: {}" .format(input_name, allowed_inputs.keys())) type = "vector" if isinstance(allowed_inputs[input_name], PlanarMapsObservationSpace): type = "image" embedder_path = 'rl_coach.architectures.tensorflow_components.embedders.' + embedder_params.path[type] embedder_params_copy = copy.copy(embedder_params) embedder_params_copy.activation_function = self.get_activation_function(embedder_params.activation_function) embedder_params_copy.input_rescaling = embedder_params_copy.input_rescaling[type] embedder_params_copy.input_offset = embedder_params_copy.input_offset[type] embedder_params_copy.name = input_name module = dynamic_import_and_instantiate_module_from_params(embedder_params_copy, path=embedder_path, positional_args=[allowed_inputs[input_name].shape]) return module
def get_middleware(self, middleware_params: MiddlewareParameters): """ Given a middleware type, creates the middleware and returns it :param middleware_params: the paramaeters of the middleware class :return: the middleware instance """ middleware_params_copy = copy.copy(middleware_params) middleware_params_copy.activation_function = self.get_activation_function(middleware_params.activation_function) module = dynamic_import_and_instantiate_module_from_params(middleware_params_copy) return module
def init_environment_dependent_modules(self) -> None: """ Initialize any modules that depend on knowing information about the environment such as the action space or the observation space :return: None """ # initialize exploration policy self.ap.exploration.action_space = self.spaces.action self.exploration_policy = dynamic_import_and_instantiate_module_from_params( self.ap.exploration) # create all the networks of the agent self.networks = self.create_networks()
def get_middleware(self, middleware_params: MiddlewareParameters): """ Given a middleware type, creates the middleware and returns it :param middleware_params: the paramaeters of the middleware class :return: the middleware instance """ mod_name = middleware_params.parameterized_class_name middleware_path = 'rl_coach.architectures.tensorflow_components.middlewares:' + mod_name middleware_params_copy = copy.copy(middleware_params) middleware_params_copy.activation_function = utils.get_activation_function( middleware_params.activation_function) module = dynamic_import_and_instantiate_module_from_params( middleware_params_copy, path=middleware_path) return module
def get_output_head(self, head_params: HeadParameters, head_idx: int): """ Given a head type, creates the head and returns it :param head_params: the parameters of the head to create :param head_idx: the head index :return: the head """ mod_name = head_params.parameterized_class_name head_path = head_params.path head_params_copy = copy.copy(head_params) head_params_copy.activation_function = utils.get_activation_function(head_params_copy.activation_function) return dynamic_import_and_instantiate_module_from_params(head_params_copy, path=head_path, extra_kwargs={ 'agent_parameters': self.ap, 'spaces': self.spaces, 'network_name': self.network_wrapper_name, 'head_idx': head_idx, 'is_local': self.network_is_local})
def get_middleware(self, middleware_params: MiddlewareParameters): """ Given a middleware type, creates the middleware and returns it :param middleware_params: the paramaeters of the middleware class :return: the middleware instance """ mod_name = middleware_params.parameterized_class_name middleware_path = middleware_params.path middleware_params_copy = copy.copy(middleware_params) middleware_params_copy.activation_function = utils.get_activation_function( middleware_params.activation_function) middleware_params_copy.is_training = self.is_training module = dynamic_import_and_instantiate_module_from_params( middleware_params_copy, path=middleware_path) return module
def __init__(self, action_space: ActionSpace, epsilon_schedule: Schedule, evaluation_epsilon: float, continuous_exploration_policy_parameters: ExplorationParameters=AdditiveNoiseParameters()): """ :param action_space: the action space used by the environment :param epsilon_schedule: a schedule for the epsilon values :param evaluation_epsilon: the epsilon value to use for evaluation phases :param continuous_exploration_policy_parameters: the parameters of the continuous exploration policy to use if the e-greedy is used for a continuous policy """ super().__init__(action_space) self.epsilon_schedule = epsilon_schedule self.evaluation_epsilon = evaluation_epsilon if isinstance(self.action_space, BoxActionSpace): # for continuous e-greedy (see http://www.cs.ubc.ca/~van/papers/2017-TOG-deepLoco/2017-TOG-deepLoco.pdf) continuous_exploration_policy_parameters.action_space = action_space self.continuous_exploration_policy = \ dynamic_import_and_instantiate_module_from_params(continuous_exploration_policy_parameters) self.current_random_value = np.random.rand()
def init_environment_dependent_modules(self) -> None: """ Initialize any modules that depend on knowing information about the environment such as the action space or the observation space :return: None """ # initialize exploration policy if isinstance(self.ap.exploration, dict): if self.spaces.action.__class__ in self.ap.exploration.keys(): self.ap.exploration = self.ap.exploration[self.spaces.action.__class__] else: raise ValueError("The exploration parameters were defined as a mapping between action space types and " "exploration types, but the action space used by the environment ({}) was not part of " "the exploration parameters dictionary keys ({})" .format(self.spaces.action.__class__, list(self.ap.exploration.keys()))) self.ap.exploration.action_space = self.spaces.action self.exploration_policy = dynamic_import_and_instantiate_module_from_params(self.ap.exploration) # create all the networks of the agent self.networks = self.create_networks()
def __init__(self, agent_parameters: AgentParameters, parent: Union['LevelManager', 'CompositeAgent'] = None): """ :param agent_parameters: A Preset class instance with all the running paramaters """ super().__init__() self.ap = agent_parameters self.task_id = self.ap.task_parameters.task_index self.is_chief = self.task_id == 0 self.shared_memory = type(agent_parameters.task_parameters) == DistributedTaskParameters \ and self.ap.memory.shared_memory if self.shared_memory: self.shared_memory_scratchpad = self.ap.task_parameters.shared_memory_scratchpad self.name = agent_parameters.name self.parent = parent self.parent_level_manager = None self.full_name_id = agent_parameters.full_name_id = self.name if type(agent_parameters.task_parameters) == DistributedTaskParameters: screen.log_title( "Creating agent - name: {} task id: {} (may take up to 30 seconds due to " "tensorflow wake up time)".format(self.full_name_id, self.task_id)) else: screen.log_title("Creating agent - name: {}".format( self.full_name_id)) self.imitation = False self.agent_logger = Logger() self.agent_episode_logger = EpisodeLogger() # get the memory # - distributed training + shared memory: # * is chief? -> create the memory and add it to the scratchpad # * not chief? -> wait for the chief to create the memory and then fetch it # - non distributed training / not shared memory: # * create memory memory_name = self.ap.memory.path.split(':')[1] self.memory_lookup_name = self.full_name_id + '.' + memory_name if self.shared_memory and not self.is_chief: self.memory = self.shared_memory_scratchpad.get( self.memory_lookup_name) else: # modules if agent_parameters.memory.load_memory_from_file_path: screen.log_title( "Loading replay buffer from pickle. Pickle path: {}". format(agent_parameters.memory.load_memory_from_file_path)) self.memory = read_pickle( agent_parameters.memory.load_memory_from_file_path) else: self.memory = dynamic_import_and_instantiate_module_from_params( self.ap.memory) if self.shared_memory and self.is_chief: self.shared_memory_scratchpad.add(self.memory_lookup_name, self.memory) # set devices if type(agent_parameters.task_parameters) == DistributedTaskParameters: self.has_global = True self.replicated_device = agent_parameters.task_parameters.device self.worker_device = "/job:worker/task:{}".format(self.task_id) else: self.has_global = False self.replicated_device = None self.worker_device = "" if agent_parameters.task_parameters.use_cpu: self.worker_device += "/cpu:0" else: self.worker_device += "/device:GPU:0" # filters self.input_filter = self.ap.input_filter self.output_filter = self.ap.output_filter self.pre_network_filter = self.ap.pre_network_filter device = self.replicated_device if self.replicated_device else self.worker_device self.input_filter.set_device(device) self.output_filter.set_device(device) self.pre_network_filter.set_device(device) # initialize all internal variables self._phase = RunPhase.HEATUP self.total_shaped_reward_in_current_episode = 0 self.total_reward_in_current_episode = 0 self.total_steps_counter = 0 self.running_reward = None self.training_iteration = 0 self.last_target_network_update_step = 0 self.last_training_phase_step = 0 self.current_episode = self.ap.current_episode = 0 self.curr_state = {} self.current_hrl_goal = None self.current_episode_steps_counter = 0 self.episode_running_info = {} self.last_episode_evaluation_ran = 0 self.running_observations = [] self.agent_logger.set_current_time(self.current_episode) self.exploration_policy = None self.networks = {} self.last_action_info = None self.running_observation_stats = None self.running_reward_stats = None self.accumulated_rewards_across_evaluation_episodes = 0 self.accumulated_shaped_rewards_across_evaluation_episodes = 0 self.num_successes_across_evaluation_episodes = 0 self.num_evaluation_episodes_completed = 0 self.current_episode_buffer = Episode( discount=self.ap.algorithm.discount) # TODO: add agents observation rendering for debugging purposes (not the same as the environment rendering) # environment parameters self.spaces = None self.in_action_space = self.ap.algorithm.in_action_space # signals self.episode_signals = [] self.step_signals = [] self.loss = self.register_signal('Loss') self.curr_learning_rate = self.register_signal('Learning Rate') self.unclipped_grads = self.register_signal('Grads (unclipped)') self.reward = self.register_signal('Reward', dump_one_value_per_episode=False, dump_one_value_per_step=True) self.shaped_reward = self.register_signal( 'Shaped Reward', dump_one_value_per_episode=False, dump_one_value_per_step=True) if isinstance(self.in_action_space, GoalsSpace): self.distance_from_goal = self.register_signal( 'Distance From Goal', dump_one_value_per_step=True) # use seed if self.ap.task_parameters.seed is not None: random.seed(self.ap.task_parameters.seed) np.random.seed(self.ap.task_parameters.seed)