def testGinConfig(self): gin.parse_config_file( test_utils.test_src_dir_path( 'environments/configs/suite_pybullet.gin')) env = suite_pybullet.load() self.assertIsInstance(env, py_environment.PyEnvironment) self.assertIsInstance(env, wrappers.TimeLimit)
def get_env(): # The kwargs are the reward function weights return suite_pybullet.load(env_name, gym_kwargs={ 'distance_weight': 1.0, 'energy_weight': 0.001, 'shake_weight': 0.00, 'drift_weight': 0.05 })
actor_learning_rate = 3e-4 alpha_learning_rate = 3e-4 target_update_tau = 0.005 target_update_period = 1 gamma = 0.99 reward_scale_factor = 1.0 actor_fc_layer_params = (256, 256) critic_joint_fc_layer_params = (256, 256) log_interval = 5000 num_eval_episodes = 20 eval_interval = 10000 policy_save_interval = 5000 collect_env = tf_py_environment.TFPyEnvironment(suite_pybullet.load(env_name)) eval_env = tf_py_environment.TFPyEnvironment(suite_pybullet.load(env_name)) observation_spec, action_spec, time_step_spec = (spec_utils.get_tensor_specs(collect_env)) critic_net = critic_network.CriticNetwork( (observation_spec, action_spec), observation_fc_layer_params=None, action_fc_layer_params=None, joint_fc_layer_params=critic_joint_fc_layer_params, kernel_initializer=tf.keras.initializers.HeNormal(), last_kernel_initializer=tf.keras.initializers.HeNormal() ) actor_net = actor_distribution_network.ActorDistributionNetwork( observation_spec,
initial_collect_steps = 1000 collect_steps_per_iteration = 64 replay_buffer_max_length = 100000 batch_size = 64 learning_rate = 1e-3 log_interval = 200 num_eval_episodes = 10 eval_interval = 10 policy_save_interval = 5000 #####LOAD ENVIRONMENT ##### env_name = "MinitaurBulletEnv-v0" env = suite_pybullet.load(env_name) ####TWO ENV instantiated. One for Train, One for Eval ###### train_py_env = suite_gym.load(env_name) eval_py_env = suite_gym.load(env_name) #Converts Numpy Arrays to Tensors, so they are compatible with Tensorflow agents and policies train_env = tf_py_environment.TFPyEnvironment(train_py_env) eval_env = tf_py_environment.TFPyEnvironment(eval_py_env) time_step = env.reset() observation_spec, action_spec, time_step_spec = ( spec_utils.get_tensor_specs(train_env)) #######Networks##### #conv_layer_params = [(32,3,3),(32,3,3),(32,3,3)] conv_layer_params = None
def testPybulletEnvRegistered(self): env = suite_pybullet.load('InvertedPendulumBulletEnv-v0') self.assertIsInstance(env, py_environment.PyEnvironment) self.assertIsInstance(env, wrappers.TimeLimit)
from tf_agents.train.utils import strategy_utils from tf_agents.agents.behavioral_cloning.behavioral_cloning_agent import BehavioralCloningAgent from tf_agents.networks.sequential import Sequential from tf_agents.environments import tf_py_environment from tf_agents.replay_buffers.tf_uniform_replay_buffer import TFUniformReplayBuffer from tf_agents.metrics.tf_metrics import AverageReturnMetric from tf_agents.policies import random_tf_policy from drivers import TFRenderDriver from tf_agents.policies.actor_policy import ActorPolicy from tf_agents.trajectories import time_step as ts from tf_agents.policies.policy_saver import PolicySaver if __name__ == '__main__': py_env = suite_pybullet.load('AntBulletEnv-v0') py_env.render(mode="human") env = tf_py_environment.TFPyEnvironment(py_env) strategy = strategy_utils.get_strategy(tpu=False, use_gpu=True) replay_buffer_capacity = 2000 learning_rate = 1e-3 fc_layer_params = [128, 64, 64] num_iterations = 100 log_interval = 2 eval_interval = 2 action_tensor_spec = tensor_spec.from_spec(env.action_spec())
alpha_learning_rate = 3e-4 # @param {type:"number"} target_update_tau = 0.005 # @param {type:"number"} target_update_period = 1 # @param {type:"number"} gamma = 0.99 # @param {type:"number"} reward_scale_factor = 1.0 # @param {type:"number"} gradient_clipping = None # @param actor_fc_layer_params = (256, 256) critic_joint_fc_layer_params = (256, 256) log_interval = 5000 # @param {type:"integer"} num_eval_episodes = 30 # @param {type:"integer"} eval_interval = 10000 # @param {type:"integer"} env = suite_pybullet.load(env_name) env.reset() PIL.Image.fromarray(env.render()) print('Observation Spec:') print(env.time_step_spec().observation) print('Action Spec:') print(env.action_spec()) train_py_env = suite_pybullet.load(env_name) eval_py_env = suite_pybullet.load(env_name) train_env = tf_py_environment.TFPyEnvironment(train_py_env) eval_env = tf_py_environment.TFPyEnvironment(eval_py_env) observation_spec = train_env.observation_spec()
target_update_tau = 0.005 # @param {type:"number"} target_update_period = 1 # @param {type:"number"} gamma = 0.99 # @param {type:"number"} reward_scale_factor = 1.0 # @param {type:"number"} actor_fc_layer_params = (256, 256) critic_joint_fc_layer_params = (256, 256) log_interval = 1000 # @param {type:"integer"} num_eval_episodes = 20 # @param {type:"integer"} eval_interval = 5000 # @param {type:"integer"} policy_save_interval = 5000 # @param {type:"integer"} env = suite_pybullet.load(env_name) env.reset() PIL.Image.fromarray(env.render()) collect_env = suite_pybullet.load(env_name) eval_env = suite_pybullet.load(env_name) strategy = strategy_utils.get_strategy(tpu=False, use_gpu=use_gpu) observation_spec, action_spec, time_step_spec = ( spec_utils.get_tensor_specs(collect_env)) with strategy.scope(): critic_net = critic_network.CriticNetwork( (observation_spec, action_spec), observation_fc_layer_params=None,