def test_policy_opt_live(): test_dir = gps_path + '/tests/tests_tensorflow' + '/test_data/' obs = np.load(test_dir + 'obs.npy') tgt_mu = np.load(test_dir + 'tgt_mu.npy') tgt_prc = np.load(test_dir + 'tgt_prc.npy') scale = np.load(test_dir + 'scale_npy.npy') bias = np.load(test_dir + 'bias_npy.npy') hyper_params = POLICY_OPT_TF deg_obs = 4 deg_action = 2 policy = PolicyOptTf(hyper_params, deg_obs, deg_action) policy.policy.scale = scale policy.policy.bias = bias iterations = 200 batch_size = 32 batches_per_epoch = np.floor(800 / batch_size) idx = range(800) np.random.shuffle(idx) for i in range(iterations): # Load in data for this batch. start_idx = int(i * batch_size % (batches_per_epoch * batch_size)) idx_i = idx[start_idx:start_idx+batch_size] feed_dict = {policy.obs_tensor: obs[idx_i], policy.action_tensor: tgt_mu[idx_i], policy.precision_tensor: tgt_prc[idx_i]} t = policy.sess.run(policy.act_op, feed_dict={policy.obs_tensor: np.expand_dims(obs[idx_i][0], 0)}) policy.solver(feed_dict, policy.sess)
def test_load_from_auto_save(): import pickle path_to_dict = gps_path + '/gps/algorithm/policy_opt/tf_checkpoint/policy_checkpoint.ckpt_hyperparams' state = pickle.load(open(path_to_dict, "rb")) hyper_params = state['hyperparams'] deg_obs = state['dO'] deg_action = state['dU'] policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action) policy_opt.__setstate__(state)
def test_policy_opt_tf_backwards(): hyper_params = POLICY_OPT_TF deg_obs = 14 deg_action = 7 policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action) N = 20 T = 30 obs = np.random.randn(N, T, deg_obs) obs_reshaped = np.reshape(obs, (N*T, deg_obs)) policy_opt.policy.scale = np.diag(1.0 / np.std(obs_reshaped, axis=0)) policy_opt.policy.bias = -np.mean(obs_reshaped.dot(policy_opt.policy.scale), axis=0) policy_opt.prob(obs=obs)
def test_policy_opt_backwards(): hyper_params = POLICY_OPT_TF deg_obs = 14 deg_action = 7 policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action) # pylint: disable=W0212 policy_opt._hyperparams['iterations'] = 100 # 100 for testing. N = 10 T = 10 obs = np.random.randn(N, T, deg_obs) tgt_mu = np.random.randn(N, T, deg_action) tgt_prc = np.random.randn(N, T, deg_action, deg_action) tgt_wt = np.random.randn(N, T) new_policy = policy_opt.update(obs, tgt_mu, tgt_prc, tgt_wt, itr=0, inner_itr=1)
def test_unpickle(): hyper_params = POLICY_OPT_TF deg_obs = 14 deg_action = 7 policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action) N = 20 T = 30 obs = np.random.randn(N, T, deg_obs) obs_reshaped = np.reshape(obs, (N*T, deg_obs)) scale = np.diag(1.0 / np.std(obs_reshaped, axis=0)) bias = -np.mean(obs_reshaped.dot(scale), axis=0) hyper_params['scale'] = scale hyper_params['bias'] = bias hyper_params['tf_iter'] = 100 policy_opt.__setstate__({'hyperparams': hyper_params, 'dO': deg_obs, 'dU': deg_action, 'scale': policy_opt.policy.scale, 'bias': policy_opt.policy.bias, 'tf_iter': 100})
def test_pickle(): hyper_params = POLICY_OPT_TF deg_obs = 100 deg_action = 7 policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action) state = policy_opt.__getstate__()
def test_auto_save_state(): hyper_params = POLICY_OPT_TF deg_obs = 100 deg_action = 7 policy_opt = PolicyOptTf(hyper_params, deg_obs, deg_action) policy_opt.auto_save_state()
def test_policy_opt_tf_init(): hyper_params = POLICY_OPT_TF deg_obs = 100 deg_action = 7 PolicyOptTf(hyper_params, deg_obs, deg_action)