Example #1
0
 def testGinConfig(self):
     gin.parse_config_file(
         test_utils.test_src_dir_path(
             'environments/configs/suite_pybullet.gin'))
     env = suite_pybullet.load()
     self.assertIsInstance(env, py_environment.PyEnvironment)
     self.assertIsInstance(env, wrappers.TimeLimit)
def get_env():
    # The kwargs are the reward function weights
    return suite_pybullet.load(env_name,
                               gym_kwargs={
                                   'distance_weight': 1.0,
                                   'energy_weight': 0.001,
                                   'shake_weight': 0.00,
                                   'drift_weight': 0.05
                               })
    actor_learning_rate = 3e-4
    alpha_learning_rate = 3e-4
    target_update_tau = 0.005
    target_update_period = 1
    gamma = 0.99
    reward_scale_factor = 1.0

    actor_fc_layer_params = (256, 256)
    critic_joint_fc_layer_params = (256, 256)

    log_interval = 5000
    num_eval_episodes = 20
    eval_interval = 10000
    policy_save_interval = 5000

    collect_env = tf_py_environment.TFPyEnvironment(suite_pybullet.load(env_name))
    eval_env = tf_py_environment.TFPyEnvironment(suite_pybullet.load(env_name))

    observation_spec, action_spec, time_step_spec = (spec_utils.get_tensor_specs(collect_env))

    critic_net = critic_network.CriticNetwork(
        (observation_spec, action_spec),
        observation_fc_layer_params=None,
        action_fc_layer_params=None,
        joint_fc_layer_params=critic_joint_fc_layer_params,
        kernel_initializer=tf.keras.initializers.HeNormal(),
        last_kernel_initializer=tf.keras.initializers.HeNormal()
    )

    actor_net = actor_distribution_network.ActorDistributionNetwork(
        observation_spec,
initial_collect_steps = 1000
collect_steps_per_iteration = 64
replay_buffer_max_length = 100000

batch_size = 64
learning_rate = 1e-3
log_interval = 200

num_eval_episodes = 10
eval_interval = 10

policy_save_interval = 5000

#####LOAD ENVIRONMENT #####
env_name = "MinitaurBulletEnv-v0"
env = suite_pybullet.load(env_name)

####TWO ENV instantiated. One for Train, One for Eval ######
train_py_env = suite_gym.load(env_name)
eval_py_env = suite_gym.load(env_name)
#Converts Numpy Arrays to Tensors, so they are compatible with Tensorflow agents and policies
train_env = tf_py_environment.TFPyEnvironment(train_py_env)
eval_env = tf_py_environment.TFPyEnvironment(eval_py_env)

time_step = env.reset()
observation_spec, action_spec, time_step_spec = (
    spec_utils.get_tensor_specs(train_env))

#######Networks#####
#conv_layer_params = [(32,3,3),(32,3,3),(32,3,3)]
conv_layer_params = None
Example #5
0
 def testPybulletEnvRegistered(self):
     env = suite_pybullet.load('InvertedPendulumBulletEnv-v0')
     self.assertIsInstance(env, py_environment.PyEnvironment)
     self.assertIsInstance(env, wrappers.TimeLimit)
Example #6
0
from tf_agents.train.utils import strategy_utils
from tf_agents.agents.behavioral_cloning.behavioral_cloning_agent import BehavioralCloningAgent
from tf_agents.networks.sequential import Sequential
from tf_agents.environments import tf_py_environment
from tf_agents.replay_buffers.tf_uniform_replay_buffer import TFUniformReplayBuffer
from tf_agents.metrics.tf_metrics import AverageReturnMetric
from tf_agents.policies import random_tf_policy
from drivers import TFRenderDriver

from tf_agents.policies.actor_policy import ActorPolicy
from tf_agents.trajectories import time_step as ts
from tf_agents.policies.policy_saver import PolicySaver

if __name__ == '__main__':

    py_env = suite_pybullet.load('AntBulletEnv-v0')
    py_env.render(mode="human")
    env = tf_py_environment.TFPyEnvironment(py_env)

    strategy = strategy_utils.get_strategy(tpu=False, use_gpu=True)

    replay_buffer_capacity = 2000
    learning_rate = 1e-3
    fc_layer_params = [128, 64, 64]

    num_iterations = 100

    log_interval = 2
    eval_interval = 2

    action_tensor_spec = tensor_spec.from_spec(env.action_spec())
Example #7
0
alpha_learning_rate = 3e-4  # @param {type:"number"}
target_update_tau = 0.005  # @param {type:"number"}
target_update_period = 1  # @param {type:"number"}
gamma = 0.99  # @param {type:"number"}
reward_scale_factor = 1.0  # @param {type:"number"}
gradient_clipping = None  # @param

actor_fc_layer_params = (256, 256)
critic_joint_fc_layer_params = (256, 256)

log_interval = 5000  # @param {type:"integer"}

num_eval_episodes = 30  # @param {type:"integer"}
eval_interval = 10000  # @param {type:"integer"}

env = suite_pybullet.load(env_name)
env.reset()
PIL.Image.fromarray(env.render())

print('Observation Spec:')
print(env.time_step_spec().observation)
print('Action Spec:')
print(env.action_spec())

train_py_env = suite_pybullet.load(env_name)
eval_py_env = suite_pybullet.load(env_name)

train_env = tf_py_environment.TFPyEnvironment(train_py_env)
eval_env = tf_py_environment.TFPyEnvironment(eval_py_env)

observation_spec = train_env.observation_spec()
Example #8
0
target_update_tau = 0.005  # @param {type:"number"}
target_update_period = 1  # @param {type:"number"}
gamma = 0.99  # @param {type:"number"}
reward_scale_factor = 1.0  # @param {type:"number"}

actor_fc_layer_params = (256, 256)
critic_joint_fc_layer_params = (256, 256)

log_interval = 1000  # @param {type:"integer"}

num_eval_episodes = 20  # @param {type:"integer"}
eval_interval = 5000  # @param {type:"integer"}

policy_save_interval = 5000  # @param {type:"integer"}

env = suite_pybullet.load(env_name)
env.reset()
PIL.Image.fromarray(env.render())

collect_env = suite_pybullet.load(env_name)
eval_env = suite_pybullet.load(env_name)

strategy = strategy_utils.get_strategy(tpu=False, use_gpu=use_gpu)

observation_spec, action_spec, time_step_spec = (
    spec_utils.get_tensor_specs(collect_env))

with strategy.scope():
    critic_net = critic_network.CriticNetwork(
        (observation_spec, action_spec),
        observation_fc_layer_params=None,