Python ActorCritic Examples

Programming Language: Python

Namespace/Package Name: hobotrl

Method/Function: ActorCritic

Examples at hotexamples.com: 4

Python ActorCritic - 4 examples found. These are the top rated real world Python examples of hobotrl.ActorCritic extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def test_run(self):
        tf.reset_default_graph()
        env = gym.make('Pendulum-v0')
        env = hrl.envs.C2DEnvWrapper(env, [5])
        env = hrl.envs.ScaledRewards(env, 0.1)
        state_shape = list(env.observation_space.shape)
        global_step = tf.get_variable('global_step', [],
                                      dtype=tf.int32,
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        def f_net(inputs):
            l2 = 1e-4
            state = inputs[0]
            q = hrl.network.Utils.layer_fcs(state, [200, 100],
                                            env.action_space.n,
                                            l2=l2,
                                            var_scope="q")
            pi = hrl.network.Utils.layer_fcs(state, [200, 100],
                                             env.action_space.n,
                                             activation_out=tf.nn.softmax,
                                             l2=l2,
                                             var_scope="pi")
            return {"q": q, "pi": pi}

        agent = hrl.ActorCritic(
            f_create_net=f_net,
            state_shape=state_shape,
            # ACUpdate arguments
            discount_factor=0.9,
            entropy=hrl.utils.CappedLinear(1e6, 1e-2, 1e-2),
            target_estimator=None,
            max_advantage=100.0,
            # optimizer arguments
            network_optimizer=hrl.network.LocalOptimizer(
                tf.train.AdamOptimizer(1e-3), grad_clip=10.0),
            max_gradient=10.0,
            # sampler arguments
            sampler=None,
            batch_size=8,
            global_step=global_step,
        )
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with agent.create_session(config=config, save_dir=None) as sess:
            runner = hrl.EnvRunner(env,
                                   agent,
                                   evaluate_interval=sys.maxint,
                                   render_interval=sys.maxint,
                                   logdir=None)
            runner.episode(50)

Example #2

Show file

File: exp_new.py Project: hobotrl/hobotrl

 def create_agent(n_optimizer, global_step):
     agent = hrl.ActorCritic(
         f_create_net=f_net,
         state_shape=state_shape,
         # ACUpdate arguments
         discount_factor=discount_factor,
         entropy=hrl.utils.CappedLinear(1e6, 4e-2, 4e-2),
         target_estimator=None,
         max_advantage=100.0,
         # optimizer arguments
         network_optimizer=n_optimizer,
         max_gradient=10.0,
         # sampler arguments
         sampler=None,
         batch_size=8,
         global_step=global_step,
     )
     return agent

Example #3

Show file

File: exp_car_racing.py Project: hobotrl/hobotrl

 def create_agent(n_optimizer, global_step):
     # all ScheduledParam hyper parameters are mutable objects.
     # so we will not want to use same object for different Agent instances.
     entropy = hrl.utils.clone_params(self._entropy)
     agent = hrl.ActorCritic(
         f_create_net=self._f_create_net,
         state_shape=state_shape,
         # ACUpdate arguments
         discount_factor=self._discount_factor,
         entropy=entropy,
         target_estimator=None,
         max_advantage=100.0,
         # optimizer arguments
         network_optimizer=n_optimizer,
         # sampler arguments
         sampler=None,
         batch_size=self._batch_size,
         global_step=global_step,
     )
     return agent

Example #4

Show file

File: exp_DrSim_fnet_ac_docker02.py Project: hobotrl/hobotrl

FLAGS = tf.app.flags.FLAGS

global_step = tf.get_variable('global_step', [],
                              dtype=tf.int32,
                              initializer=tf.constant_initializer(0),
                              trainable=False)

agent = hrl.ActorCritic(
    f_create_net=f_net,
    state_shape=state_shape,
    # ACUpdate arguments
    discount_factor=0.9,
    entropy=hrl.utils.CappedLinear(1e6, 1e-2, 1e-2),
    target_estimator=None,
    max_advantage=100.0,
    # optimizer arguments
    network_optmizer=hrl.network.LocalOptimizer(tf.train.AdamOptimizer(1e-3),
                                                grad_clip=10.0),
    max_gradient=10.0,
    # sampler arguments
    sampler=None,
    batch_size=8,
    global_step=global_step,
)

config = tf.ConfigProto(gpu_options=tf.GPUOptions(
    per_process_gpu_memory_fraction=0.3, allow_growth=True),
                        allow_soft_placement=True,
                        log_device_placement=False)

# sv = agent.init_supervisor(