Ejemplo n.º 1
0
def test_policy_opt_live():
    test_dir = gps_path + '/tests/tests_tensorflow' + '/test_data/'
    obs = np.load(test_dir + 'obs.npy')
    tgt_mu = np.load(test_dir + 'tgt_mu.npy')
    tgt_prc = np.load(test_dir + 'tgt_prc.npy')
    scale = np.load(test_dir + 'scale_npy.npy')
    bias = np.load(test_dir + 'bias_npy.npy')
    hyper_params = POLICY_OPT_TF
    deg_obs = 4
    deg_action = 2

    policy = PolicyOptTf(hyper_params, deg_obs, deg_action)
    policy.policy.scale = scale
    policy.policy.bias = bias

    iterations = 200
    batch_size = 32
    batches_per_epoch = np.floor(800 / batch_size)
    idx = range(800)
    np.random.shuffle(idx)

    for i in range(iterations):
        # Load in data for this batch.
        start_idx = int(i * batch_size %
                        (batches_per_epoch * batch_size))
        idx_i = idx[start_idx:start_idx+batch_size]
        feed_dict = {policy.obs_tensor: obs[idx_i],
                     policy.action_tensor: tgt_mu[idx_i],
                     policy.precision_tensor: tgt_prc[idx_i]}
        t = policy.sess.run(policy.act_op, feed_dict={policy.obs_tensor: np.expand_dims(obs[idx_i][0], 0)})
        policy.solver(feed_dict, policy.sess)
Ejemplo n.º 2
0
def test_load_from_auto_save():
    import pickle
    path_to_dict = gps_path + '/gps/algorithm/policy_opt/tf_checkpoint/policy_checkpoint.ckpt_hyperparams'
    state = pickle.load(open(path_to_dict, "rb"))
    hyper_params = state['hyperparams']
    deg_obs = state['dO']
    deg_action = state['dU']
    policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action)
    policy_opt.__setstate__(state)
Ejemplo n.º 3
0
def test_policy_opt_tf_backwards():
    hyper_params = POLICY_OPT_TF
    deg_obs = 14
    deg_action = 7
    policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action)
    N = 20
    T = 30
    obs = np.random.randn(N, T, deg_obs)
    obs_reshaped = np.reshape(obs, (N*T, deg_obs))
    policy_opt.policy.scale = np.diag(1.0 / np.std(obs_reshaped, axis=0))
    policy_opt.policy.bias = -np.mean(obs_reshaped.dot(policy_opt.policy.scale), axis=0)
    policy_opt.prob(obs=obs)
Ejemplo n.º 4
0
def test_policy_opt_backwards():
    hyper_params = POLICY_OPT_TF
    deg_obs = 14
    deg_action = 7
    policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action)
    # pylint: disable=W0212
    policy_opt._hyperparams['iterations'] = 100  # 100 for testing.
    N = 10
    T = 10
    obs = np.random.randn(N, T, deg_obs)
    tgt_mu = np.random.randn(N, T, deg_action)
    tgt_prc = np.random.randn(N, T, deg_action, deg_action)
    tgt_wt = np.random.randn(N, T)
    new_policy = policy_opt.update(obs, tgt_mu, tgt_prc, tgt_wt, itr=0, inner_itr=1)
Ejemplo n.º 5
0
def test_unpickle():
    hyper_params = POLICY_OPT_TF
    deg_obs = 14
    deg_action = 7
    policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action)
    N = 20
    T = 30
    obs = np.random.randn(N, T, deg_obs)
    obs_reshaped = np.reshape(obs, (N*T, deg_obs))
    scale = np.diag(1.0 / np.std(obs_reshaped, axis=0))
    bias = -np.mean(obs_reshaped.dot(scale), axis=0)
    hyper_params['scale'] = scale
    hyper_params['bias'] = bias
    hyper_params['tf_iter'] = 100
    policy_opt.__setstate__({'hyperparams': hyper_params, 'dO': deg_obs, 'dU': deg_action,
                             'scale': policy_opt.policy.scale, 'bias': policy_opt.policy.bias, 'tf_iter': 100})
Ejemplo n.º 6
0
def test_pickle():
    hyper_params = POLICY_OPT_TF
    deg_obs = 100
    deg_action = 7
    policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action)
    state = policy_opt.__getstate__()
Ejemplo n.º 7
0
def test_auto_save_state():
    hyper_params = POLICY_OPT_TF
    deg_obs = 100
    deg_action = 7
    policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action)
    policy_opt.auto_save_state()
Ejemplo n.º 8
0
def test_auto_save_state():
    hyper_params = POLICY_OPT_TF
    deg_obs = 100
    deg_action = 7
    policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action)
    policy_opt.auto_save_state()
Ejemplo n.º 9
0
def test_pickle():
    hyper_params = POLICY_OPT_TF
    deg_obs = 100
    deg_action = 7
    policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action)
    state = policy_opt.__getstate__()
Ejemplo n.º 10
0
def test_policy_opt_tf_init():
    hyper_params = POLICY_OPT_TF
    deg_obs = 100
    deg_action = 7
    PolicyOptTf(hyper_params, deg_obs, deg_action)