callback1 = FileLogger(filepath='save/history1_{}'.format(timenow), interval=1) callback2 = FileLogger(filepath='save/history2_{}'.format(timenow), interval=1) callback3 = FileLogger(filepath='save/history3_{}'.format(timenow), interval=1) callback4 = FileLogger(filepath='save/history4_{}'.format(timenow), interval=1) memory = SequentialMemory(limit=50000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, batch_size=32, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile([Adam(lr=0.00001, clipnorm=1.), Adam(lr=0.01, clipnorm=1.)], metrics=['mae']) agent.fit(env, nb_steps=200000, visualize=False, callbacks=[callback3], verbose=2) #time.sleep(1800)
callback1 = FileLogger(filepath='save/history1_{}'.format(timenow), interval=1) #callback2 = FileLogger(filepath='save/history2_{}'.format(timenow), interval=1) #callback3 = FileLogger(filepath='save/history3_{}'.format(timenow), interval=1) #callback4 = FileLogger(filepath='save/history4_{}'.format(timenow), interval=1) memory = SequentialMemory(limit=50000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, batch_size=96, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile([Adam(lr=0.00001), Adam(lr=0.001)], metrics=['mae']) agent.fit(env, nb_epsteps=3000, visualize=False, callbacks=[callback1], verbose=2) agent.save_weights('save/ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) print('weights saved!')
# Memory memory = SimpleMemory(env=env, limit=100000) #memory = SequentialMemory(limit=10000, window_length=1) # Noise #random_process = OrnsteinUhlenbeckProcess(size=env.action_space.dim, theta=.15, mu=0., sigma=3.) random_process = GaussianWhiteNoiseProcess() callback1 = FileLogger(filepath='save/history1_{}'.format(timenow), interval=1) # Agent agent = DDPGAgent( experiment=experiment, actor=actor, critic=critic, env=env, memory=memory, #random_process=random_process ) agent.compile() history = agent.train( env=env, episodes=3000, render=False, callbacks=[callback1], verbosity=2, plots=False) test = agent.test(env=env) agent.save_weights('save/ddpg_ContinuousCartpole_weights.h5f', overwrite=True)