Esempio n. 1
0
    def load_policy(self):
        actor_critic = MLPPolicy(self.args.obs_shape[1], self.args.full_state_shape[1], self.env.robot.action_space,
                                 symm_policy=self.args.symm_policy)
        print(os.path.join(self.args.load_dir + self.args.algo, self.args.phase, self.args.env_name, self.args.env_name
                           + self.args.tr_itr + ".pt"))
        state_dict, ob_rms, st_rms, ret_rms = \
            torch.load(
                os.path.join(self.args.load_dir + self.args.algo, self.args.phase, self.args.env_name,
                             self.args.env_name
                             + self.args.tr_itr + ".pt"),
                map_location='cpu')

        actor_critic.load_state_dict(state_dict)
        actor_critic.train(False)
        actor_critic.eval()
        self.env.robot.ob_rms = ob_rms
        return actor_critic
Esempio n. 2
0
actor_critic = MLPPolicy(obs_shape[1],
                         full_state_shape[1],
                         env.robot.action_space,
                         symm_policy=args.symm_policy)

print(
    os.path.join(args.load_dir + args.algo, args.phase, args.env_name,
                 args.env_name + args.tr_itr + ".pt"))
# state_dict, ob_rms = \
state_dict, ob_rms, st_rms, ret_rms = \
    torch.load(os.path.join(args.load_dir + args.algo, args.phase, args.env_name, args.env_name + args.tr_itr + ".pt"),
               map_location='cpu')

actor_critic.load_state_dict(state_dict)
actor_critic.train(False)
actor_critic.eval()  # TODO

print('ob_rms: ', ob_rms)
# print('av_ob_rms: ', av_ob_rms)
# print('ret_rms: ', ret_rms)
# print('av_ret_rms: ', av_ret_rms)

env.robot.ob_rms = ob_rms

epi_rewards = 0
######################
# Load skeleton data
# def load_train_data(file_path, _shape):
#     load_data = np.loadtxt(file_path).reshape(_shape)
#     print('load data shape: ', load_data.shape)
#     return load_data, load_data.shape