예제 #1
0
                   new_height=84,
                   to_gray=True,
                   noop_action=[1, 0, 0, 0],
                   start_action=[0, 1, 0, 0],
                   clip_rewards=True)

test_env = AtariWrapper(env_name,
                        action_repeat=4,
                        obs_stack=4,
                        new_width=84,
                        new_height=84,
                        to_gray=True,
                        start_action=[0, 1, 0, 0])

agent = DeepQ(env=env,
              use_double=True,
              model=DeepQModel(nature_arch=True, dueling=False),
              optimizer=RMSProp(7e-4, decay=0.99, epsilon=0.1),
              policy=EGreedyPolicy(1.0, 0.1, 4000000),
              targetfreq=10000)

trainer = ReplayTrainer(env=env,
                        agent=agent,
                        maxsteps=80000000,
                        replay=backPropagationReplay,
                        logdir='tmp/%s/moving_average_bias' % env_name,
                        logfreq=900,
                        test_env=test_env,
                        test_render=False)
trainer.train()
예제 #2
0
                        new_height=84,
                        to_gray=True,
                        start_action=[0, 1, 0, 0, 0, 0])

agent = DeepQ(env,
              use_double=False,
              model=DeepQModel(nature_arch=False, dueling=False),
              optimizer=RMSProp(7e-4, decay=0.99, epsilon=0.1))
threads = []
for i, eps in enumerate([0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.5, 0.5] * 2):
    threads.append(
        DeepQ(env.copy(),
              model=DeepQModel(nature_arch=False, dueling=False),
              optimizer=agent.opt,
              trainable_weights=agent.weights,
              target_net=agent.target_net,
              target_weights=agent.target_weights,
              use_double=False,
              targetfreq=10000,
              policy=EGreedyPolicy(1.0, eps, 4000000),
              name='thread%s' % i))

trainer = AsyncTrainer(agent,
                       threads,
                       maxsteps=80000000,
                       batch_size=5,
                       logdir='/tmp/rf/AsyncDeepQ/%s' % env_name,
                       logfreq=240,
                       test_env=test_env)
trainer.train()
예제 #3
0
from reinforceflow.core.optimizer import Adam
from reinforceflow.trainers.async_trainer import AsyncTrainer
reinforceflow.set_random_seed(555)

env_name = 'CartPole-v0'

agent = DeepQ(Vectorize(env_name),
              model=FullyConnected(),
              optimizer=Adam(3e-5))

threads = []
for i, eps in enumerate([0.8, 0.4] * 2):
    threads.append(
        DeepQ(Vectorize(env_name),
              model=FullyConnected(),
              optimizer=agent.opt,
              trainable_weights=agent.weights,
              target_net=agent.target_net,
              target_weights=agent.target_weights,
              targetfreq=10000,
              policy=EGreedyPolicy(1, eps, 100000),
              name='Thread%s' % i))

trainer = AsyncTrainer(agent,
                       threads,
                       maxsteps=100000,
                       batch_size=20,
                       logdir='/tmp/rf/AsyncDeepQ/%s' % env_name,
                       logfreq=10)
trainer.train()