Example #1
0
callback1 = FileLogger(filepath='save/history1_{}'.format(timenow), interval=1)
callback2 = FileLogger(filepath='save/history2_{}'.format(timenow), interval=1)
callback3 = FileLogger(filepath='save/history3_{}'.format(timenow), interval=1)
callback4 = FileLogger(filepath='save/history4_{}'.format(timenow), interval=1)

memory = SequentialMemory(limit=50000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                          theta=.15,
                                          mu=0.,
                                          sigma=.3)
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=1000,
                  nb_steps_warmup_actor=1000,
                  batch_size=32,
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3)
agent.compile([Adam(lr=0.00001, clipnorm=1.),
               Adam(lr=0.01, clipnorm=1.)],
              metrics=['mae'])

agent.fit(env,
          nb_steps=200000,
          visualize=False,
          callbacks=[callback3],
          verbose=2)
#time.sleep(1800)
callback1 = FileLogger(filepath='save/history1_{}'.format(timenow), interval=1)
#callback2 = FileLogger(filepath='save/history2_{}'.format(timenow), interval=1)
#callback3 = FileLogger(filepath='save/history3_{}'.format(timenow), interval=1)
#callback4 = FileLogger(filepath='save/history4_{}'.format(timenow), interval=1)

memory = SequentialMemory(limit=50000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                          theta=.15,
                                          mu=0.,
                                          sigma=.3)
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=1000,
                  nb_steps_warmup_actor=1000,
                  batch_size=96,
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3)
agent.compile([Adam(lr=0.00001), Adam(lr=0.001)], metrics=['mae'])

agent.fit(env,
          nb_epsteps=3000,
          visualize=False,
          callbacks=[callback1],
          verbose=2)

agent.save_weights('save/ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
print('weights saved!')
    # Memory
    memory = SimpleMemory(env=env, limit=100000)
    #memory = SequentialMemory(limit=10000, window_length=1)
    # Noise
    #random_process = OrnsteinUhlenbeckProcess(size=env.action_space.dim, theta=.15, mu=0., sigma=3.)
    random_process = GaussianWhiteNoiseProcess()


    callback1 = FileLogger(filepath='save/history1_{}'.format(timenow), interval=1)

    # Agent
    agent = DDPGAgent(
        experiment=experiment,
        actor=actor,
        critic=critic,
        env=env,
        memory=memory,
        #random_process=random_process
    )
    agent.compile()

    history = agent.train(
        env=env,
        episodes=3000,
        render=False,
        callbacks=[callback1],
        verbosity=2,
        plots=False)

    test = agent.test(env=env)
    agent.save_weights('save/ddpg_ContinuousCartpole_weights.h5f', overwrite=True)