def test_load_from_auto_save(): import pickle path_to_dict = gps_path + '/gps/algorithm/policy_opt/tf_checkpoint/policy_checkpoint.ckpt_hyperparams' state = pickle.load(open(path_to_dict, "rb")) hyper_params = state['hyperparams'] deg_obs = state['dO'] deg_action = state['dU'] policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action) policy_opt.__setstate__(state)
def test_load_from_auto_save(): import pickle path_to_dict = gps_path + '/gps/algorithm/policy_opt/tf_checkpoint/policy_checkpoint.ckpt_hyperparams' state = pickle.load(open(path_to_dict, "rb")) hyper_params = state['hyperparams'] deg_obs = state['dO'] deg_action = state['dU'] policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action) policy_opt.__setstate__(state)
def test_unpickle(): hyper_params = POLICY_OPT_TF deg_obs = 14 deg_action = 7 policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action) N = 20 T = 30 obs = np.random.randn(N, T, deg_obs) obs_reshaped = np.reshape(obs, (N*T, deg_obs)) scale = np.diag(1.0 / np.std(obs_reshaped, axis=0)) bias = -np.mean(obs_reshaped.dot(scale), axis=0) hyper_params['scale'] = scale hyper_params['bias'] = bias hyper_params['tf_iter'] = 100 policy_opt.__setstate__({'hyperparams': hyper_params, 'dO': deg_obs, 'dU': deg_action, 'scale': policy_opt.policy.scale, 'bias': policy_opt.policy.bias, 'tf_iter': 100})
def test_unpickle(): hyper_params = POLICY_OPT_TF deg_obs = 14 deg_action = 7 policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action) N = 20 T = 30 obs = np.random.randn(N, T, deg_obs) obs_reshaped = np.reshape(obs, (N*T, deg_obs)) scale = np.diag(1.0 / np.std(obs_reshaped, axis=0)) bias = -np.mean(obs_reshaped.dot(scale), axis=0) hyper_params['scale'] = scale hyper_params['bias'] = bias hyper_params['tf_iter'] = 100 policy_opt.__setstate__({'hyperparams': hyper_params, 'dO': deg_obs, 'dU': deg_action, 'scale': policy_opt.policy.scale, 'bias': policy_opt.policy.bias, 'tf_iter': 100})