#callback4 = FileLogger(filepath='save/history4_{}'.format(timenow), interval=1) memory = SequentialMemory(limit=50000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, batch_size=96, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile([Adam(lr=0.00001), Adam(lr=0.001)], metrics=['mae']) agent.fit(env, nb_epsteps=3000, visualize=False, callbacks=[callback1], verbose=2) agent.save_weights('save/ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) print('weights saved!') #agent.test(env, nb_episodes=500, visualize=True)
env=env, memory=memory, #random_process=random_process ) agent.compile() history = agent.train( env=env, episodes=3000, render=False, callbacks=[callback1], verbosity=2, plots=False) test = agent.test(env=env) agent.save_weights('save/ddpg_ContinuousCartpole_weights.h5f', overwrite=True) #print(history.history.keys()) ''' agent._run( episodes=20, train=True, render=False, exploration=True, plots=False, tensorboard=False, callbacks=callback1, verbosity=2, action_repetition=1, nb_max_episode_steps=None,