def test_fit_online_pendulum_with_sac(): env = gym.make('Pendulum-v0') eval_env = gym.make('Pendulum-v0') algo = SAC() buffer = ReplayBuffer(1000, env) algo.fit_online(env, buffer, n_epochs=1, eval_env=eval_env, logdir='test_data', tensorboard=False)
def test_fit_online_pendulum_with_sac(): env = gym.make("Pendulum-v0") eval_env = gym.make("Pendulum-v0") algo = SAC() buffer = ReplayBuffer(1000, env) algo.fit_online( env, buffer, n_steps=500, eval_env=eval_env, logdir="test_data", )
def test_timelimit_aware(timelimit_aware): env = gym.make("Pendulum-v0") algo = SAC() buffer = ReplayBuffer(1000, env) algo.fit_online( env, buffer, n_steps=500, logdir="test_data", timelimit_aware=timelimit_aware, ) terminal_count = 0 for i in range(len(buffer)): terminal_count += int(buffer.transitions[i].terminal) if timelimit_aware: assert terminal_count == 0 else: assert terminal_count > 0
import gym from d3rlpy.algos import SAC from d3rlpy.online.buffers import ReplayBuffer env = gym.make('Pendulum-v0') eval_env = gym.make('Pendulum-v0') # setup algorithm sac = SAC(batch_size=100, use_gpu=False) # replay buffer for experience replay buffer = ReplayBuffer(maxlen=100000, env=env) # start training # probablistic policies does not need explorers sac.fit_online(env, buffer, n_epochs=100, eval_env=eval_env, n_steps_per_epoch=1000, n_updates_per_epoch=100)
import gym from d3rlpy.algos import SAC from d3rlpy.online.buffers import ReplayBuffer env = gym.make('Pendulum-v0') eval_env = gym.make('Pendulum-v0') # setup algorithm sac = SAC(batch_size=100, use_gpu=False) # replay buffer for experience replay buffer = ReplayBuffer(maxlen=100000, env=env) # start training # probablistic policies does not need explorers sac.fit_online(env, buffer, n_steps=100000, eval_env=eval_env, n_steps_per_epoch=1000, update_start_step=1000)