예제 #1
0
def test_fit_batch_online_pendulum_with_sac():
    make_env = lambda: gym.make("Pendulum-v0")
    env = AsyncBatchEnv([make_env for _ in range(5)])
    eval_env = gym.make("Pendulum-v0")

    algo = SAC()

    buffer = BatchReplayBuffer(1000, env)

    algo.fit_batch_online(
        env,
        buffer,
        n_epochs=1,
        n_steps_per_epoch=500,
        n_updates_per_epoch=1,
        eval_env=eval_env,
        logdir="test_data",
        tensorboard=False,
    )
예제 #2
0
import gym

from d3rlpy.algos import SAC
from d3rlpy.envs import AsyncBatchEnv
from d3rlpy.online.buffers import BatchReplayBuffer

if __name__ == '__main__':
    env = AsyncBatchEnv([lambda: gym.make('Pendulum-v0') for _ in range(10)])
    eval_env = gym.make('Pendulum-v0')

    # setup algorithm
    sac = SAC(batch_size=100, use_gpu=False)

    # replay buffer for experience replay
    buffer = BatchReplayBuffer(maxlen=100000, env=env)

    # start training
    sac.fit_batch_online(env,
                         buffer,
                         n_epochs=100,
                         eval_interval=1,
                         eval_env=eval_env,
                         n_steps_per_epoch=1000,
                         n_updates_per_epoch=1000)