def save( self, path: Union[str, pathlib.Path, io.BufferedIOBase], exclude: Optional[Iterable[str]] = None, include: Optional[Iterable[str]] = None, ) -> None: """ Save all the attributes of the object and the model parameters in a zip-file. :param (Union[str, pathlib.Path, io.BufferedIOBase]): path to the file where the rl agent should be saved :param exclude: name of parameters that should be excluded in addition to the default one :param include: name of parameters that might be excluded but should be included anyway """ # copy parameter list so we don't mutate the original dict data = self.__dict__.copy() # Exclude is union of specified parameters (if any) and standard exclusions if exclude is None: exclude = [] exclude = set(exclude).union(self.excluded_save_params()) # Do not exclude params if they are specifically included if include is not None: exclude = exclude.difference(include) state_dicts_names, tensors_names = self.get_torch_variables() # any params that are in the save vars must not be saved by data torch_variables = state_dicts_names + tensors_names for torch_var in torch_variables: # we need to get only the name of the top most module as we'll remove that var_name = torch_var.split(".")[0] exclude.add(var_name) # Remove parameter entries of parameters which are to be excluded for param_name in exclude: data.pop(param_name, None) # Build dict of tensor variables tensors = None if tensors_names is not None: tensors = {} for name in tensors_names: attr = recursive_getattr(self, name) tensors[name] = attr # Build dict of state_dicts params_to_save = {} for name in state_dicts_names: attr = recursive_getattr(self, name) # Retrieve state dict params_to_save[name] = attr.state_dict() save_to_zip_file(path, data=data, params=params_to_save, tensors=tensors)
def save(self, path: str, exclude: Optional[List[str]] = None, include: Optional[List[str]] = None) -> None: """ Save all the attributes of the object and the model parameters in a zip-file. :param path: path to the file where the rl agent should be saved :param exclude: name of parameters that should be excluded in addition to the default one :param include: name of parameters that might be excluded but should be included anyway """ # copy parameter list so we don't mutate the original dict data = self.__dict__.copy() # use standard list of excluded parameters if none given if exclude is None: exclude = self.excluded_save_params() else: # append standard exclude params to the given params exclude.extend([param for param in self.excluded_save_params() if param not in exclude]) # do not exclude params if they are specifically included if include is not None: exclude = [param_name for param_name in exclude if param_name not in include] state_dicts_names, tensors_names = self.get_torch_variables() # any params that are in the save vars must not be saved by data torch_variables = state_dicts_names + tensors_names for torch_var in torch_variables: # we need to get only the name of the top most module as we'll remove that var_name = torch_var.split('.')[0] exclude.append(var_name) # Remove parameter entries of parameters which are to be excluded for param_name in exclude: if param_name in data: data.pop(param_name, None) # Build dict of tensor variables tensors = None if tensors_names is not None: tensors = {} for name in tensors_names: attr = recursive_getattr(self, name) tensors[name] = attr # Build dict of state_dicts params_to_save = {} for name in state_dicts_names: attr = recursive_getattr(self, name) # Retrieve state dict params_to_save[name] = attr.state_dict() self._save_to_file_zip(path, data=data, params=params_to_save, tensors=tensors)
def load(cls, load_path: str, env: Optional[GymEnv] = None, **kwargs): """ Load the model from a zip-file :param load_path: the location of the saved data :param env: the new environment to run the loaded model on (can be None if you only need prediction from a trained model) has priority over any saved environment :param kwargs: extra arguments to change the model when loading """ data, params, tensors = load_from_zip_file(load_path) if 'policy_kwargs' in data: for arg_to_remove in ['device']: if arg_to_remove in data['policy_kwargs']: del data['policy_kwargs'][arg_to_remove] if 'policy_kwargs' in kwargs and kwargs['policy_kwargs'] != data['policy_kwargs']: raise ValueError(f"The specified policy kwargs do not equal the stored policy kwargs." f"Stored kwargs: {data['policy_kwargs']}, specified kwargs: {kwargs['policy_kwargs']}") # check if observation space and action space are part of the saved parameters if ("observation_space" not in data or "action_space" not in data) and "env" not in data: raise ValueError("The observation_space and action_space was not given, can't verify new environments") # check if given env is valid if env is not None: check_for_correct_spaces(env, data["observation_space"], data["action_space"]) # if no new env was given use stored env if possible if env is None and "env" in data: env = data["env"] # noinspection PyArgumentList model = cls(policy=data["policy_class"], env=env, device='auto', _init_setup_model=False) # load parameters model.__dict__.update(data) model.__dict__.update(kwargs) if not hasattr(model, "_setup_model") and len(params) > 0: raise NotImplementedError(f"{cls} has no ``_setup_model()`` method") model._setup_model() # put state_dicts back in place for name in params: attr = recursive_getattr(model, name) attr.load_state_dict(params[name]) # put tensors back in place if tensors is not None: for name in tensors: recursive_setattr(model, name, tensors[name]) # Sample gSDE exploration matrix, so it uses the right device # see issue #44 if model.use_sde: model.policy.reset_noise() return model
def get_parameters(self) -> Dict[str, Dict]: """ Return the parameters of the agent. This includes parameters from different networks, e.g. critics (value functions) and policies (pi functions). :return: Mapping of from names of the objects to PyTorch state-dicts. """ state_dicts_names, _ = self._get_torch_save_params() params = {} for name in state_dicts_names: attr = recursive_getattr(self, name) # Retrieve state dict params[name] = attr.state_dict() return params
def save(model, path, exclude=None, include=None): """ Save all the attributes of the object and the model parameters in a zip-file. :param path: path to the file where the rl agent should be saved :param exclude: name of parameters that should be excluded in addition to the default ones :param include: name of parameters that might be excluded but should be included anyway """ # copy parameter list so we don't mutate the original dict data = model.__dict__.copy() # exclude is union of specified parameters (if any) and standard exclusions if exclude is None: exclude = [] exclude = set(exclude).union(model._excluded_save_params()) # do not exclude params if they are specifically included if include is not None: exclude = exclude.difference(include) state_dicts_names, torch_variable_names = model._get_torch_save_params() all_pytorch_variables = state_dicts_names + torch_variable_names for torch_var in all_pytorch_variables: # we need to get only the name of the top most module as we'll remove that var_name = torch_var.split(".")[0] # any params that are in the save vars must not be saved by data exclude.add(var_name) # remove parameter entries of parameters which are to be excluded for param_name in exclude: data.pop(param_name, None) # build dict of torch variables pytorch_variables = None if torch_variable_names is not None: pytorch_variables = {} for name in torch_variable_names: attr = recursive_getattr(model, name) pytorch_variables[name] = attr # build dict of state_dicts params_to_save = model.get_parameters() save_to_zip_file(path, data=data, params=params_to_save, pytorch_variables=pytorch_variables)
def set_parameters( self, load_path_or_dict: Union[str, Dict[str, Dict]], exact_match: bool = True, device: Union[th.device, str] = "auto", ) -> None: """ Load parameters from a given zip-file or a nested dictionary containing parameters for different modules (see ``get_parameters``). :param load_path_or_iter: Location of the saved data (path or file-like, see ``save``), or a nested dictionary containing nn.Module parameters used by the policy. The dictionary maps object names to a state-dictionary returned by ``torch.nn.Module.state_dict()``. :param exact_match: If True, the given parameters should include parameters for each module and each of their parameters, otherwise raises an Exception. If set to False, this can be used to update only specific parameters. :param device: Device on which the code should run. """ params = None if isinstance(load_path_or_dict, dict): params = load_path_or_dict else: _, params, _ = load_from_zip_file(load_path_or_dict, device=device) # Keep track which objects were updated. # `_get_torch_save_params` returns [params, other_pytorch_variables]. # We are only interested in former here. objects_needing_update = set(self._get_torch_save_params()[0]) updated_objects = set() for name in params: attr = None try: attr = recursive_getattr(self, name) except Exception: # What errors recursive_getattr could throw? KeyError, but # possible something else too (e.g. if key is an int?). # Catch anything for now. raise ValueError(f"Key {name} is an invalid object name.") if isinstance(attr, th.optim.Optimizer): # Optimizers do not support "strict" keyword... # Seems like they will just replace the whole # optimizer state with the given one. # On top of this, optimizer state-dict # seems to change (e.g. first ``optim.step()``), # which makes comparing state dictionary keys # invalid (there is also a nesting of dictionaries # with lists with dictionaries with ...), adding to the # mess. # # TL;DR: We might not be able to reliably say # if given state-dict is missing keys. # # Solution: Just load the state-dict as is, and trust # the user has provided a sensible state dictionary. attr.load_state_dict(params[name]) else: # Assume attr is th.nn.Module attr.load_state_dict(params[name], strict=exact_match) updated_objects.add(name) if exact_match and updated_objects != objects_needing_update: raise ValueError( "Names of parameters do not match agents' parameters: " f"expected {objects_needing_update}, got {updated_objects}")
def load( cls, load_path: str, env: Optional[GymEnv] = None, device: Union[th.device, str] = "auto", **kwargs ) -> "BaseAlgorithm": """ Load the model from a zip-file :param load_path: the location of the saved data :param env: the new environment to run the loaded model on (can be None if you only need prediction from a trained model) has priority over any saved environment :param device: (Union[th.device, str]) Device on which the code should run. :param kwargs: extra arguments to change the model when loading """ data, params, tensors = load_from_zip_file(load_path, device=device) if "policy_kwargs" in data: for arg_to_remove in ["device"]: if arg_to_remove in data["policy_kwargs"]: del data["policy_kwargs"][arg_to_remove] if "policy_kwargs" in kwargs and kwargs["policy_kwargs"] != data["policy_kwargs"]: raise ValueError( f"The specified policy kwargs do not equal the stored policy kwargs." f"Stored kwargs: {data['policy_kwargs']}, specified kwargs: {kwargs['policy_kwargs']}" ) # check if observation space and action space are part of the saved parameters if "observation_space" not in data or "action_space" not in data: raise KeyError("The observation_space and action_space were not given, can't verify new environments") # check if given env is valid if env is not None: check_for_correct_spaces(env, data["observation_space"], data["action_space"]) # if no new env was given use stored env if possible if env is None and "env" in data: env = data["env"] # noinspection PyArgumentList model = cls( policy=data["policy_class"], env=env, device=device, _init_setup_model=False, # pytype: disable=not-instantiable,wrong-keyword-args ) # load parameters model.__dict__.update(data) model.__dict__.update(kwargs) model._setup_model() # put state_dicts back in place for name in params: attr = recursive_getattr(model, name) attr.load_state_dict(params[name]) # put tensors back in place if tensors is not None: for name in tensors: recursive_setattr(model, name, tensors[name]) # Sample gSDE exploration matrix, so it uses the right device # see issue #44 if model.use_sde: model.policy.reset_noise() # pytype: disable=attribute-error return model