예제 #1
0
def test_load_from_auto_save():
    import pickle
    path_to_dict = gps_path + '/gps/algorithm/policy_opt/tf_checkpoint/policy_checkpoint.ckpt_hyperparams'
    state = pickle.load(open(path_to_dict, "rb"))
    hyper_params = state['hyperparams']
    deg_obs = state['dO']
    deg_action = state['dU']
    policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action)
    policy_opt.__setstate__(state)
예제 #2
0
def test_load_from_auto_save():
    import pickle
    path_to_dict = gps_path + '/gps/algorithm/policy_opt/tf_checkpoint/policy_checkpoint.ckpt_hyperparams'
    state = pickle.load(open(path_to_dict, "rb"))
    hyper_params = state['hyperparams']
    deg_obs = state['dO']
    deg_action = state['dU']
    policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action)
    policy_opt.__setstate__(state)
예제 #3
0
def test_unpickle():
    hyper_params = POLICY_OPT_TF
    deg_obs = 14
    deg_action = 7
    policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action)
    N = 20
    T = 30
    obs = np.random.randn(N, T, deg_obs)
    obs_reshaped = np.reshape(obs, (N*T, deg_obs))
    scale = np.diag(1.0 / np.std(obs_reshaped, axis=0))
    bias = -np.mean(obs_reshaped.dot(scale), axis=0)
    hyper_params['scale'] = scale
    hyper_params['bias'] = bias
    hyper_params['tf_iter'] = 100
    policy_opt.__setstate__({'hyperparams': hyper_params, 'dO': deg_obs, 'dU': deg_action,
                             'scale': policy_opt.policy.scale, 'bias': policy_opt.policy.bias, 'tf_iter': 100})
예제 #4
0
def test_unpickle():
    hyper_params = POLICY_OPT_TF
    deg_obs = 14
    deg_action = 7
    policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action)
    N = 20
    T = 30
    obs = np.random.randn(N, T, deg_obs)
    obs_reshaped = np.reshape(obs, (N*T, deg_obs))
    scale = np.diag(1.0 / np.std(obs_reshaped, axis=0))
    bias = -np.mean(obs_reshaped.dot(scale), axis=0)
    hyper_params['scale'] = scale
    hyper_params['bias'] = bias
    hyper_params['tf_iter'] = 100
    policy_opt.__setstate__({'hyperparams': hyper_params, 'dO': deg_obs, 'dU': deg_action,
                             'scale': policy_opt.policy.scale, 'bias': policy_opt.policy.bias, 'tf_iter': 100})