def test_policy2(): from train_script import built_mixedpg_parser import gym args = built_mixedpg_parser() env = gym.make('Pendulum-v0') policy_with_value = PolicyWithQs(env.observation_space, env.action_space, args)
def test_trained_model(model_dir, ppc_params_dir, iteration): from train_script import built_mixedpg_parser from policy import PolicyWithQs args = built_mixedpg_parser() evaluator = Evaluator(PolicyWithQs, args.env_id, args) evaluator.load_weights(model_dir, iteration) evaluator.load_ppc_params(ppc_params_dir) return evaluator.metrics(1000, render=False, reset=False)
def test_policy(): import gym from train_script import built_mixedpg_parser args = built_mixedpg_parser() print(args.obs_dim, args.act_dim) env = gym.make('PathTracking-v0') policy = PolicyWithQs(env.observation_space, env.action_space, args) obs = np.random.random((128, 6)) act = np.random.random((128, 2)) Qs = policy.compute_Qs(obs, act) print(Qs)
def test_policy_with_Qs(): from train_script import built_mixedpg_parser import gym import numpy as np import tensorflow as tf args = built_mixedpg_parser() args.obs_dim = 3 env = gym.make('Pendulum-v0') policy_with_value = PolicyWithQs(env.observation_space, env.action_space, args) # print(policy_with_value.policy.trainable_weights) # print(policy_with_value.Qs[0].trainable_weights) obses = np.array([[1., 2., 3.], [3., 4., 5.]], dtype=np.float32) with tf.GradientTape() as tape: acts, _ = policy_with_value.compute_action(obses) Qs = policy_with_value.compute_Qs(obses, acts)[0] print(Qs) loss = tf.reduce_mean(Qs) gradient = tape.gradient(loss, policy_with_value.policy.trainable_weights) print(gradient)