def load_policy(self): actor_critic = MLPPolicy(self.args.obs_shape[1], self.args.full_state_shape[1], self.env.robot.action_space, symm_policy=self.args.symm_policy) print(os.path.join(self.args.load_dir + self.args.algo, self.args.phase, self.args.env_name, self.args.env_name + self.args.tr_itr + ".pt")) state_dict, ob_rms, st_rms, ret_rms = \ torch.load( os.path.join(self.args.load_dir + self.args.algo, self.args.phase, self.args.env_name, self.args.env_name + self.args.tr_itr + ".pt"), map_location='cpu') actor_critic.load_state_dict(state_dict) actor_critic.train(False) actor_critic.eval() self.env.robot.ob_rms = ob_rms return actor_critic
----[ Load Policy ]---- """ actor_critic = MLPPolicy(obs_shape[1], full_state_shape[1], env.robot.action_space, symm_policy=args.symm_policy) print( os.path.join(args.load_dir + args.algo, args.phase, args.env_name, args.env_name + args.tr_itr + ".pt")) # state_dict, ob_rms = \ state_dict, ob_rms, st_rms, ret_rms = \ torch.load(os.path.join(args.load_dir + args.algo, args.phase, args.env_name, args.env_name + args.tr_itr + ".pt"), map_location='cpu') actor_critic.load_state_dict(state_dict) actor_critic.train(False) actor_critic.eval() # TODO print('ob_rms: ', ob_rms) # print('av_ob_rms: ', av_ob_rms) # print('ret_rms: ', ret_rms) # print('av_ret_rms: ', av_ret_rms) env.robot.ob_rms = ob_rms epi_rewards = 0 ###################### # Load skeleton data # def load_train_data(file_path, _shape): # load_data = np.loadtxt(file_path).reshape(_shape)