def save( self, path: Union[str, pathlib.Path, io.BufferedIOBase], exclude: Optional[Iterable[str]] = None, include: Optional[Iterable[str]] = None, ) -> None: """ Save all the attributes of the object and the model parameters in a zip-file. :param (Union[str, pathlib.Path, io.BufferedIOBase]): path to the file where the rl agent should be saved :param exclude: name of parameters that should be excluded in addition to the default one :param include: name of parameters that might be excluded but should be included anyway """ # copy parameter list so we don't mutate the original dict data = self.__dict__.copy() # Exclude is union of specified parameters (if any) and standard exclusions if exclude is None: exclude = [] exclude = set(exclude).union(self.excluded_save_params()) # Do not exclude params if they are specifically included if include is not None: exclude = exclude.difference(include) state_dicts_names, tensors_names = self.get_torch_variables() # any params that are in the save vars must not be saved by data torch_variables = state_dicts_names + tensors_names for torch_var in torch_variables: # we need to get only the name of the top most module as we'll remove that var_name = torch_var.split(".")[0] exclude.add(var_name) # Remove parameter entries of parameters which are to be excluded for param_name in exclude: data.pop(param_name, None) # Build dict of tensor variables tensors = None if tensors_names is not None: tensors = {} for name in tensors_names: attr = recursive_getattr(self, name) tensors[name] = attr # Build dict of state_dicts params_to_save = {} for name in state_dicts_names: attr = recursive_getattr(self, name) # Retrieve state dict params_to_save[name] = attr.state_dict() save_to_zip_file(path, data=data, params=params_to_save, tensors=tensors)
def save(self, path: str, exclude: Optional[List[str]] = None, include: Optional[List[str]] = None) -> None: """ Save all the attributes of the object and the model parameters in a zip-file. :param path: path to the file where the rl agent should be saved :param exclude: name of parameters that should be excluded in addition to the default one :param include: name of parameters that might be excluded but should be included anyway """ # copy parameter list so we don't mutate the original dict data = self.__dict__.copy() # use standard list of excluded parameters if none given if exclude is None: exclude = self.excluded_save_params() else: # append standard exclude params to the given params exclude.extend([param for param in self.excluded_save_params() if param not in exclude]) # do not exclude params if they are specifically included if include is not None: exclude = [param_name for param_name in exclude if param_name not in include] state_dicts_names, tensors_names = self.get_torch_variables() # any params that are in the save vars must not be saved by data torch_variables = state_dicts_names + tensors_names for torch_var in torch_variables: # we need to get only the name of the top most module as we'll remove that var_name = torch_var.split('.')[0] exclude.append(var_name) # Remove parameter entries of parameters which are to be excluded for param_name in exclude: if param_name in data: data.pop(param_name, None) # Build dict of tensor variables tensors = None if tensors_names is not None: tensors = {} for name in tensors_names: attr = recursive_getattr(self, name) tensors[name] = attr # Build dict of state_dicts params_to_save = {} for name in state_dicts_names: attr = recursive_getattr(self, name) # Retrieve state dict params_to_save[name] = attr.state_dict() save_to_zip_file(path, data=data, params=params_to_save, tensors=tensors)
print(str(env), file=f) env = Monitor(env) model = PPO('MlpPolicy', env, policy_kwargs=dict(activation_fn=nn.Identity, net_arch=[dict(pi=[1], vf=[64, 64])])) model.save(f'{timestamp}/model') data, params, tensors = load_from_zip_file(f'{timestamp}/model') params['policy']['mlp_extractor.policy_net.0.weight'] = th.tensor( agent.policy, dtype=th.float32) params['policy']['mlp_extractor.policy_net.0.bias'] = th.tensor( [0], dtype=th.float32) params['policy']['action_net.weight'] = th.tensor([[1]], dtype=th.float32) params['policy']['action_net.bias'] = th.tensor([0], dtype=th.float32) save_to_zip_file(f'{timestamp}/model', data, params, tensors) model = model.load(f'{timestamp}/model') np.random.seed(0) returns = [] for i in range(100): obs = env.reset() for _ in range(env.steps_max): env.render() action, _states = model.predict(obs, deterministic=True) obs, reward, done, info = env.step(action) if done: break returns += [env.cum_return] env.close() print(pd.Series(returns).describe())
def save(self, save_path): save_to_zip_file(save_path, self.data, self.params, self.pytorch_vars)