def main(): env = Game() env.start() agent = DQNAgent(env) MAX_EPISODES = 500 MAX_STEPS = 5000 BATCH_SIZE = 32 episode_rewards = mini_batch_train(env, agent, MAX_EPISODES, MAX_STEPS, BATCH_SIZE)
import gym from common.utils import mini_batch_train from ddpg import DDPGAgent env = gym.make("Pendulum-v0") max_episodes = 100 max_steps = 500 batch_size = 32 gamma = 0.99 tau = 1e-2 buffer_maxlen = 100000 critic_lr = 1e-3 actor_lr = 1e-3 agent = DDPGAgent(env, gamma, tau, buffer_maxlen, critic_lr, actor_lr) episode_rewards = mini_batch_train(env, agent, max_episodes, max_steps, batch_size)
import gym from duelingDQN.dueling_ddqn import DuelingAgent from common.utils import mini_batch_train env_id = "CartPole-v0" MAX_EPISODES = 1000 MAX_STEPS = 500 BATCH_SIZE = 32 env = gym.make(env_id) agent = DuelingAgent(env, use_conv=False) episode_rewards = mini_batch_train(env, agent, MAX_EPISODES, MAX_STEPS, BATCH_SIZE)
import gym env = gym.make("Pendulum-v0") #SAC 2018 Params tau = 0.005 gamma = 0.99 value_lr = 3e-3 q_lr = 3e-3 policy_lr = 3e-3 buffer_maxlen = 1000000 # SAC 2019 Params # gamma = 0.99 # tau = 0.01 # alpha = 0.2 # a_lr = 3e-4 # q_lr = 3e-4 # p_lr = 3e-4 # buffer_maxlen = 1000000 state = env.reset() #2018 agent agent = SACAgent(env, gamma, tau, value_lr, q_lr, policy_lr, buffer_maxlen) #2019 agent # agent = SACAgent(env, gamma, tau, alpha, q_lr, p_lr, a_lr, buffer_maxlen) # train episode_rewards = mini_batch_train(env, agent, 50, 500, 64)