Exemple #1
0
def main():
    env = Game()
    env.start()
    agent = DQNAgent(env)
    MAX_EPISODES = 500
    MAX_STEPS = 5000
    BATCH_SIZE = 32
    episode_rewards = mini_batch_train(env, agent, MAX_EPISODES, MAX_STEPS,
                                       BATCH_SIZE)
Exemple #2
0
import gym

from common.utils import mini_batch_train
from ddpg import DDPGAgent

env = gym.make("Pendulum-v0")

max_episodes = 100
max_steps = 500
batch_size = 32

gamma = 0.99
tau = 1e-2
buffer_maxlen = 100000
critic_lr = 1e-3
actor_lr = 1e-3

agent = DDPGAgent(env, gamma, tau, buffer_maxlen, critic_lr, actor_lr)
episode_rewards = mini_batch_train(env, agent, max_episodes, max_steps,
                                   batch_size)
Exemple #3
0
import gym

from duelingDQN.dueling_ddqn import DuelingAgent
from common.utils import mini_batch_train

env_id = "CartPole-v0"
MAX_EPISODES = 1000
MAX_STEPS = 500
BATCH_SIZE = 32

env = gym.make(env_id)
agent = DuelingAgent(env, use_conv=False)
episode_rewards = mini_batch_train(env, agent, MAX_EPISODES, MAX_STEPS,
                                   BATCH_SIZE)
import gym

env = gym.make("Pendulum-v0")

#SAC 2018 Params
tau = 0.005
gamma = 0.99
value_lr = 3e-3
q_lr = 3e-3
policy_lr = 3e-3
buffer_maxlen = 1000000

# SAC 2019 Params
# gamma = 0.99
# tau = 0.01
# alpha = 0.2
# a_lr = 3e-4
# q_lr = 3e-4
# p_lr = 3e-4
# buffer_maxlen = 1000000

state = env.reset()
#2018 agent
agent = SACAgent(env, gamma, tau, value_lr, q_lr, policy_lr, buffer_maxlen)

#2019 agent
# agent = SACAgent(env, gamma, tau, alpha, q_lr, p_lr, a_lr, buffer_maxlen)

# train
episode_rewards = mini_batch_train(env, agent, 50, 500, 64)