def load(cls, load_path: str, env: Optional[GymEnv] = None, **kwargs): """ Load the model from a zip-file :param load_path: the location of the saved data :param env: the new environment to run the loaded model on (can be None if you only need prediction from a trained model) has priority over any saved environment :param kwargs: extra arguments to change the model when loading """ data, params, tensors = load_from_zip_file(load_path) if 'policy_kwargs' in data: for arg_to_remove in ['device']: if arg_to_remove in data['policy_kwargs']: del data['policy_kwargs'][arg_to_remove] if 'policy_kwargs' in kwargs and kwargs['policy_kwargs'] != data['policy_kwargs']: raise ValueError(f"The specified policy kwargs do not equal the stored policy kwargs." f"Stored kwargs: {data['policy_kwargs']}, specified kwargs: {kwargs['policy_kwargs']}") # check if observation space and action space are part of the saved parameters if ("observation_space" not in data or "action_space" not in data) and "env" not in data: raise ValueError("The observation_space and action_space was not given, can't verify new environments") # check if given env is valid if env is not None: check_for_correct_spaces(env, data["observation_space"], data["action_space"]) # if no new env was given use stored env if possible if env is None and "env" in data: env = data["env"] # noinspection PyArgumentList model = cls(policy=data["policy_class"], env=env, device='auto', _init_setup_model=False) # load parameters model.__dict__.update(data) model.__dict__.update(kwargs) if not hasattr(model, "_setup_model") and len(params) > 0: raise NotImplementedError(f"{cls} has no ``_setup_model()`` method") model._setup_model() # put state_dicts back in place for name in params: attr = recursive_getattr(model, name) attr.load_state_dict(params[name]) # put tensors back in place if tensors is not None: for name in tensors: recursive_setattr(model, name, tensors[name]) # Sample gSDE exploration matrix, so it uses the right device # see issue #44 if model.use_sde: model.policy.reset_noise() return model
def load(cls, load_path: str, env: Optional[GymEnv] = None): data, params, tensors = load_from_zip_file(load_path) model = cls( policy=data['policy_class'], env=env, n_sampled_goal=data['n_sampled_goal'], goal_selection_strategy=data['goal_selection_strategy'], _init_setup_model=True) model.__dict__['observation_space'] = data['her_obs_space'] model.__dict__['action_space'] = data['her_action_space'] return model
def load_parameters_from_path(model, model_filename, model_cls, bodies, default_wrapper): args = common.args data, params, pytorch_variables = load_from_zip_file(model_filename) robot_ids_in_file = [] if args.cnspns: for parameter_name, module in params['policy'].items(): _match = re.findall(r'pns_sensor_adaptor\.nets\.([0-9]+)\.weight', parameter_name) if _match: _robot_id = _match[0] print(f"Sensor channel for the policy: {module.shape[0]}") robot_ids_in_file.append(int(_robot_id)) assert args.cnspns_sensor_channel == module.shape[ 0], f"Loading from a model with a different number of sensor channels. Want {args.cnspns_sensor_channel}, the model has {module.shape[0]}." _match = re.findall(r'pns_motor_adaptor\.nets\.([0-9])+\.weight', parameter_name) if _match: print(f"Motor channel for the policy: {module.shape[1]}") assert args.cnspns_motor_channel == module.shape[ 1], f"Loading from a model with a different number of motor channels. Want {args.cnspns_motor_channel}, the model has {module.shape[1]}." fake_env = DummyVecEnv([ gym_interface.make_env(robot_body=_robot_id, wrappers=default_wrapper, render=False, dataset_folder=args.body_folder) for _robot_id in robot_ids_in_file ]) else: fake_env = None load_model = model_cls.load(model_filename, fake_env) if args.cnspns: for robot_id in robot_ids_in_file: if robot_id not in bodies: model.policy.add_net_to_adaptors(robot_id) for robot_id in bodies: if robot_id not in robot_ids_in_file: load_model.policy.add_net_to_adaptors(robot_id) load_weights = load_model.policy.state_dict() model.policy.load_state_dict(load_weights) # model.policy.rebuild()? print(f"Weights loaded from {model_filename}") return model
def load( cls, path: Union[str, pathlib.Path, io.BufferedIOBase], env: Optional[GymEnv] = None, device: Union[th.device, str] = "auto", custom_objects: Optional[Dict[str, Any]] = None, **kwargs, ) -> "BaseAlgorithm": """ Load the model from a zip-file :param path: path to the file (or a file-like) where to load the agent from :param env: the new environment to run the loaded model on (can be None if you only need prediction from a trained model) has priority over any saved environment :param device: Device on which the code should run. :param custom_objects: Dictionary of objects to replace upon loading. If a variable is present in this dictionary as a key, it will not be deserialized and the corresponding item will be used instead. Similar to custom_objects in ``keras.models.load_model``. Useful when you have an object in file that can not be deserialized. :param kwargs: extra arguments to change the model when loading """ data, params, pytorch_variables = load_from_zip_file( path, device=device, custom_objects=custom_objects) # Remove stored device information and replace with ours if "policy_kwargs" in data: if "device" in data["policy_kwargs"]: del data["policy_kwargs"]["device"] if "policy_kwargs" in kwargs and kwargs["policy_kwargs"] != data[ "policy_kwargs"]: raise ValueError( f"The specified policy kwargs do not equal the stored policy kwargs." f"Stored kwargs: {data['policy_kwargs']}, specified kwargs: {kwargs['policy_kwargs']}" ) if "observation_space" not in data or "action_space" not in data: raise KeyError( "The observation_space and action_space were not given, can't verify new environments" ) if env is not None: # Wrap first if needed env = cls._wrap_env(env, data["verbose"]) # Check if given env is valid check_for_correct_spaces(env, data["observation_space"], data["action_space"]) else: # Use stored env, if one exists. If not, continue as is (can be used for predict) if "env" in data: env = data["env"] # noinspection PyArgumentList model = cls( # pytype: disable=not-instantiable,wrong-keyword-args policy=data["policy_class"], env=env, device=device, _init_setup_model=False, # pytype: disable=not-instantiable,wrong-keyword-args ) # load parameters model.__dict__.update(data) model.__dict__.update(kwargs) model._setup_model() # put state_dicts back in place model.set_parameters(params, exact_match=True, device=device) # put other pytorch variables back in place if pytorch_variables is not None: for name in pytorch_variables: # Set the data attribute directly to avoid issue when using optimizers # See https://github.com/DLR-RM/stable-baselines3/issues/391 recursive_setattr(model, name + ".data", pytorch_variables[name].data) # Sample gSDE exploration matrix, so it uses the right device # see issue #44 if model.use_sde: model.policy.reset_noise() # pytype: disable=attribute-error return model
def set_parameters( self, load_path_or_dict: Union[str, Dict[str, Dict]], exact_match: bool = True, device: Union[th.device, str] = "auto", ) -> None: """ Load parameters from a given zip-file or a nested dictionary containing parameters for different modules (see ``get_parameters``). :param load_path_or_iter: Location of the saved data (path or file-like, see ``save``), or a nested dictionary containing nn.Module parameters used by the policy. The dictionary maps object names to a state-dictionary returned by ``torch.nn.Module.state_dict()``. :param exact_match: If True, the given parameters should include parameters for each module and each of their parameters, otherwise raises an Exception. If set to False, this can be used to update only specific parameters. :param device: Device on which the code should run. """ params = None if isinstance(load_path_or_dict, dict): params = load_path_or_dict else: _, params, _ = load_from_zip_file(load_path_or_dict, device=device) # Keep track which objects were updated. # `_get_torch_save_params` returns [params, other_pytorch_variables]. # We are only interested in former here. objects_needing_update = set(self._get_torch_save_params()[0]) updated_objects = set() for name in params: attr = None try: attr = recursive_getattr(self, name) except Exception: # What errors recursive_getattr could throw? KeyError, but # possible something else too (e.g. if key is an int?). # Catch anything for now. raise ValueError(f"Key {name} is an invalid object name.") if isinstance(attr, th.optim.Optimizer): # Optimizers do not support "strict" keyword... # Seems like they will just replace the whole # optimizer state with the given one. # On top of this, optimizer state-dict # seems to change (e.g. first ``optim.step()``), # which makes comparing state dictionary keys # invalid (there is also a nesting of dictionaries # with lists with dictionaries with ...), adding to the # mess. # # TL;DR: We might not be able to reliably say # if given state-dict is missing keys. # # Solution: Just load the state-dict as is, and trust # the user has provided a sensible state dictionary. attr.load_state_dict(params[name]) else: # Assume attr is th.nn.Module attr.load_state_dict(params[name], strict=exact_match) updated_objects.add(name) if exact_match and updated_objects != objects_needing_update: raise ValueError( "Names of parameters do not match agents' parameters: " f"expected {objects_needing_update}, got {updated_objects}")
def load( cls, path: Union[str, pathlib.Path, io.BufferedIOBase], env: Optional[GymEnv] = None, device: Union[th.device, str] = "auto", custom_objects: Optional[Dict[str, Any]] = None, print_system_info: bool = False, force_reset: bool = True, **kwargs, ) -> "BaseAlgorithm": """ Load the model from a zip-file. Warning: ``load`` re-creates the model from scratch, it does not update it in-place! For an in-place load use ``set_parameters`` instead. :param path: path to the file (or a file-like) where to load the agent from :param env: the new environment to run the loaded model on (can be None if you only need prediction from a trained model) has priority over any saved environment :param device: Device on which the code should run. :param custom_objects: Dictionary of objects to replace upon loading. If a variable is present in this dictionary as a key, it will not be deserialized and the corresponding item will be used instead. Similar to custom_objects in ``keras.models.load_model``. Useful when you have an object in file that can not be deserialized. :param print_system_info: Whether to print system info from the saved model and the current system info (useful to debug loading issues) :param force_reset: Force call to ``reset()`` before training to avoid unexpected behavior. See https://github.com/DLR-RM/stable-baselines3/issues/597 :param kwargs: extra arguments to change the model when loading :return: new model instance with loaded parameters """ if print_system_info: print("== CURRENT SYSTEM INFO ==") get_system_info() data, params, pytorch_variables = load_from_zip_file( path, device=device, custom_objects=custom_objects, print_system_info=print_system_info) # Remove stored device information and replace with ours if "policy_kwargs" in data: if "device" in data["policy_kwargs"]: del data["policy_kwargs"]["device"] if "policy_kwargs" in kwargs and kwargs["policy_kwargs"] != data[ "policy_kwargs"]: raise ValueError( f"The specified policy kwargs do not equal the stored policy kwargs." f"Stored kwargs: {data['policy_kwargs']}, specified kwargs: {kwargs['policy_kwargs']}" ) if "observation_space" not in data or "action_space" not in data: raise KeyError( "The observation_space and action_space were not given, can't verify new environments" ) if env is not None: # Wrap first if needed env = cls._wrap_env(env, data["verbose"]) # Check if given env is valid check_for_correct_spaces(env, data["observation_space"], data["action_space"]) # Discard `_last_obs`, this will force the env to reset before training # See issue https://github.com/DLR-RM/stable-baselines3/issues/597 if force_reset and data is not None: data["_last_obs"] = None else: # Use stored env, if one exists. If not, continue as is (can be used for predict) if "env" in data: env = data["env"] # noinspection PyArgumentList model = cls( # pytype: disable=not-instantiable,wrong-keyword-args policy=data["policy_class"], env=env, device=device, _init_setup_model=False, # pytype: disable=not-instantiable,wrong-keyword-args ) # load parameters model.__dict__.update(data) model.__dict__.update(kwargs) model._setup_model() # put state_dicts back in place model.set_parameters(params, exact_match=True, device=device) # put other pytorch variables back in place if pytorch_variables is not None: for name in pytorch_variables: # Skip if PyTorch variable was not defined (to ensure backward compatibility). # This happens when using SAC/TQC. # SAC has an entropy coefficient which can be fixed or optimized. # If it is optimized, an additional PyTorch variable `log_ent_coef` is defined, # otherwise it is initialized to `None`. if pytorch_variables[name] is None: continue # Set the data attribute directly to avoid issue when using optimizers # See https://github.com/DLR-RM/stable-baselines3/issues/391 recursive_setattr(model, name + ".data", pytorch_variables[name].data) # Sample gSDE exploration matrix, so it uses the right device # see issue #44 if model.use_sde: model.policy.reset_noise() # pytype: disable=attribute-error return model
def load( cls, path: Union[str, pathlib.Path, io.BufferedIOBase], env: Optional[GymEnv] = None, device: Union[th.device, str] = "auto", **kwargs, ) -> "BaseAlgorithm": """ Load the model from a zip-file :param path: path to the file (or a file-like) where to load the agent from :param env: the new environment to run the loaded model on (can be None if you only need prediction from a trained model) has priority over any saved environment :param device: Device on which the code should run. :param kwargs: extra arguments to change the model when loading """ data, params, pytorch_variables = load_from_zip_file(path, device=device) # Remove stored device information and replace with ours if "policy_kwargs" in data: if "device" in data["policy_kwargs"]: del data["policy_kwargs"]["device"] if "policy_kwargs" in kwargs and kwargs["policy_kwargs"] != data[ "policy_kwargs"]: raise ValueError( f"The specified policy kwargs do not equal the stored policy kwargs." f"Stored kwargs: {data['policy_kwargs']}, specified kwargs: {kwargs['policy_kwargs']}" ) if "observation_space" not in data or "action_space" not in data: raise KeyError( "The observation_space and action_space were not given, can't verify new environments" ) if env is not None: # Wrap first if needed env = cls._wrap_env(env, data["verbose"]) # Check if given env is valid check_for_correct_spaces(env, data["observation_space"], data["action_space"]) else: # Use stored env, if one exists. If not, continue as is (can be used for predict) if "env" in data: env = data["env"] # noinspection PyArgumentList model = cls( policy=data["policy_class"], env=env, device=device, _init_setup_model=False, # pytype: disable=not-instantiable,wrong-keyword-args ) # load parameters model.__dict__.update(data) model.__dict__.update(kwargs) model._setup_model() # import pdb; pdb.set_trace() # initial_parameters = model.get_parameters() # model.set_some_parameters(initial_parameters, params, exact_match=True, device=device) # put state_dicts back in place model.set_parameters(params, exact_match=True, device=device) # put other pytorch variables back in place if pytorch_variables is not None: for name in pytorch_variables: recursive_setattr(model, name, pytorch_variables[name]) # Sample gSDE exploration matrix, so it uses the right device # see issue #44 if model.use_sde: model.policy.reset_noise() # pytype: disable=attribute-error return model
def load( cls, load_path: str, env: Optional[GymEnv] = None, device: Union[th.device, str] = "auto", **kwargs ) -> "BaseAlgorithm": """ Load the model from a zip-file :param load_path: the location of the saved data :param env: the new environment to run the loaded model on (can be None if you only need prediction from a trained model) has priority over any saved environment :param device: (Union[th.device, str]) Device on which the code should run. :param kwargs: extra arguments to change the model when loading """ data, params, tensors = load_from_zip_file(load_path, device=device) if "policy_kwargs" in data: for arg_to_remove in ["device"]: if arg_to_remove in data["policy_kwargs"]: del data["policy_kwargs"][arg_to_remove] if "policy_kwargs" in kwargs and kwargs["policy_kwargs"] != data["policy_kwargs"]: raise ValueError( f"The specified policy kwargs do not equal the stored policy kwargs." f"Stored kwargs: {data['policy_kwargs']}, specified kwargs: {kwargs['policy_kwargs']}" ) # check if observation space and action space are part of the saved parameters if "observation_space" not in data or "action_space" not in data: raise KeyError("The observation_space and action_space were not given, can't verify new environments") # check if given env is valid if env is not None: check_for_correct_spaces(env, data["observation_space"], data["action_space"]) # if no new env was given use stored env if possible if env is None and "env" in data: env = data["env"] # noinspection PyArgumentList model = cls( policy=data["policy_class"], env=env, device=device, _init_setup_model=False, # pytype: disable=not-instantiable,wrong-keyword-args ) # load parameters model.__dict__.update(data) model.__dict__.update(kwargs) model._setup_model() # put state_dicts back in place for name in params: attr = recursive_getattr(model, name) attr.load_state_dict(params[name]) # put tensors back in place if tensors is not None: for name in tensors: recursive_setattr(model, name, tensors[name]) # Sample gSDE exploration matrix, so it uses the right device # see issue #44 if model.use_sde: model.policy.reset_noise() # pytype: disable=attribute-error return model
if done: break returns += [env.cum_return] env.close() print(pd.Series(returns).describe()) with open(f'{timestamp}/env.txt', 'w') as f: print(str(env), file=f) env = Monitor(env) model = PPO('MlpPolicy', env, policy_kwargs=dict(activation_fn=nn.Identity, net_arch=[dict(pi=[1], vf=[64, 64])])) model.save(f'{timestamp}/model') data, params, tensors = load_from_zip_file(f'{timestamp}/model') params['policy']['mlp_extractor.policy_net.0.weight'] = th.tensor( agent.policy, dtype=th.float32) params['policy']['mlp_extractor.policy_net.0.bias'] = th.tensor( [0], dtype=th.float32) params['policy']['action_net.weight'] = th.tensor([[1]], dtype=th.float32) params['policy']['action_net.bias'] = th.tensor([0], dtype=th.float32) save_to_zip_file(f'{timestamp}/model', data, params, tensors) model = model.load(f'{timestamp}/model') np.random.seed(0) returns = [] for i in range(100): obs = env.reset() for _ in range(env.steps_max): env.render()
from common import wrapper_custom_align, wrapper_diff, wrapper_mut, wrapper_pns import common.common as common import common.wrapper as wrapper import common.gym_interface as gym_interface from common.pns import PNSPPO, PNSMlpPolicy from common.cnspns import CNSPNSPPO, CNSPNSPolicy if __name__ == "__main__": args = common.args print(args) hyperparams = common.load_hyperparameters(conf_name="PPO") data, params, pytorch_variables = load_from_zip_file(args.model_filename, device="cpu") if args.cnspns: cns_parameter_means = [] for parameter_name, module in params['policy'].items(): _match = re.findall( r'pns_(sensor|motor)_adaptor\.nets\.([0-9]+)\.(weight|bias)', parameter_name) if _match: if _match[0][2] == 'weight': if _match[0][0] == "sensor": print( f"Sensor channel for the policy: {module.shape[0]}" ) args.cnspns_sensor_channel = module.shape[0] else:
def load( cls, path: Union[str, pathlib.Path, io.BufferedIOBase], env: Optional[GymEnv] = None, device: Union[th.device, str] = "auto", custom_objects: Optional[Dict[str, Any]] = None, **kwargs, ) -> "BaseAlgorithm": """ Load the model from a zip-file :param path: path to the file (or a file-like) where to load the agent from :param env: the new environment to run the loaded model on (can be None if you only need prediction from a trained model) has priority over any saved environment :param device: Device on which the code should run. :param custom_objects: Dictionary of objects to replace upon loading. If a variable is present in this dictionary as a key, it will not be deserialized and the corresponding item will be used instead. Similar to custom_objects in ``keras.models.load_model``. Useful when you have an object in file that can not be deserialized. :param kwargs: extra arguments to change the model when loading """ data, params, pytorch_variables = load_from_zip_file( path, device=device, custom_objects=custom_objects) # Remove stored device information and replace with ours if "policy_kwargs" in data: if "device" in data["policy_kwargs"]: del data["policy_kwargs"]["device"] if "policy_kwargs" in kwargs and kwargs["policy_kwargs"] != data[ "policy_kwargs"]: raise ValueError( f"The specified policy kwargs do not equal the stored policy kwargs." f"Stored kwargs: {data['policy_kwargs']}, specified kwargs: {kwargs['policy_kwargs']}" ) # check if observation space and action space are part of the saved parameters if "observation_space" not in data or "action_space" not in data: raise KeyError( "The observation_space and action_space were not given, can't verify new environments" ) # check if given env is valid if env is not None: # Wrap first if needed env = cls._wrap_env(env, data["verbose"]) # Check if given env is valid check_for_correct_spaces(env, data["observation_space"], data["action_space"]) else: # Use stored env, if one exists. If not, continue as is (can be used for predict) if "env" in data: env = data["env"] if "use_sde" in data and data["use_sde"]: kwargs["use_sde"] = True # Keys that cannot be changed for key in {"model_class", "online_sampling", "max_episode_length"}: if key in kwargs: del kwargs[key] # Keys that can be changed for key in {"n_sampled_goal", "goal_selection_strategy"}: if key in kwargs: data[key] = kwargs[key] # pytype: disable=unsupported-operands del kwargs[key] # noinspection PyArgumentList her_model = cls( policy=data["policy_class"], env=env, model_class=data["model_class"], n_sampled_goal=data["n_sampled_goal"], goal_selection_strategy=data["goal_selection_strategy"], online_sampling=data["online_sampling"], max_episode_length=data["max_episode_length"], policy_kwargs=data["policy_kwargs"], _init_setup_model=False, # pytype: disable=not-instantiable,wrong-keyword-args **kwargs, ) # load parameters her_model.model.__dict__.update(data) her_model.model.__dict__.update(kwargs) her_model._setup_model() her_model._total_timesteps = her_model.model._total_timesteps her_model.num_timesteps = her_model.model.num_timesteps her_model._episode_num = her_model.model._episode_num # put state_dicts back in place her_model.model.set_parameters(params, exact_match=True, device=device) # put other pytorch variables back in place if pytorch_variables is not None: for name in pytorch_variables: recursive_setattr(her_model.model, name, pytorch_variables[name]) # Sample gSDE exploration matrix, so it uses the right device # see issue #44 if her_model.model.use_sde: her_model.model.policy.reset_noise() # pytype: disable=attribute-error return her_model
def __init__(self, load_path): self.load_path = load_path assert load_path.endswith(".zip"), "bad file name for sb3 load" self.data, self.params, self.pytorch_vars = load_from_zip_file( load_path, device="cpu")
import pickle from stable_baselines3.common.save_util import load_from_zip_file if __name__ == '__main__': pretrained_path = 'C:/Users/matan/Documents/SAC_MER/experiments__2021_01_13__13_43/' replay_mems_path = pretrained_path + 'SAC_no_reset/buffer_50000/final_only/' replay_buffers = [] for i in range(80): zf_name = replay_mems_path + f'/model_{i}.zip' data, params, pytorch_variables = load_from_zip_file(zf_name) replay_mem = data['replay_buffer'] replay_buffers.append(replay_mem) pickle.dump(replay_buffers, open(pretrained_path + 'replay_buffers.pkl', 'wb')) db = 1