# serialize model to JSON model_save = model.to_json() with open("save/NNmodel1.json", "w") as json_file: json_file.write(model_save) print("Saved model to disk!") # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=10000, window_length=1) #policy1 = BoltzmannQPolicy() policy1 = EpsDisGreedyQPolicy(eps=0.0001, eps_decay=0.999) policy2 = BoltzmannQPolicy() callback12 = FileLogger(filepath='save/history12_{}'.format(timenow), interval=1) dqn8 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, gamma=0.99, nb_steps_warmup=2000, target_model_update=1, policy=policy1) dqn8.compile(Adam(lr=1e-3), metrics=['mae']) history8 = dqn8.fit(env, nb_epsteps=3000, visualize=False, callbacks=[callback12], verbose=2)
callback1 = FileLogger(filepath='save/nhistory1_{}'.format(timenow), interval=1) callback2 = FileLogger(filepath='save/nhistory2_{}'.format(timenow), interval=1) callback3 = FileLogger(filepath='save/nhistory3_{}'.format(timenow), interval=1) callback4 = FileLogger(filepath='save/nhistory4_{}'.format(timenow), interval=1) callback5 = FileLogger(filepath='save/nhistory5_{}'.format(timenow), interval=1) # dqn1 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, # target_model_update=1e-2, policy=policy1) # dqn1.compile(Adam(lr=1e-3), metrics=['mae']) # history1 = dqn1.fit(env, nb_epsteps=5000, visualize=False, callbacks=[callback1], verbose=2) dqn2 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=32, nb_steps_warmup=1000, target_model_update=1e-2, policy=policy2) dqn2.compile(Adam(lr=0.01), metrics=['mse']) # dqn2.save_weights('save/dqn_blotzmann0.8_{}_weights.h5f'.format(ENV_NAME), overwrite=True) history2 = dqn2.fit(env, nb_steps=200000, visualize=False, callbacks=[callback1], verbose=2) time.sleep(3600) # dqn3 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, # target_model_update=1e-2, policy=policy1, enable_double_dqn=False) # dqn3.compile(Adam(lr=1e-3), metrics=['mae']) # history3 = dqn3.fit(env, nb_epsteps=100, visualize=False, callbacks=[callback3], verbose=2) # dqn4 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, # target_model_update=1e-2, policy=policy2, enable_double_dqn=False) # dqn4.compile(Adam(lr=1e-3), metrics=['mae']) # history4 = dqn4.fit(env, nb_epsteps=100, visualize=False, callbacks=[callback4], verbose=2) # print(history1.history.keys()) # print(len(history1.history['policy_config']))
#dqn1 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=64, nb_steps_warmup=2000, # target_model_update=1e-2, policy=policy2) #dqn1.compile(Adam(lr=1e-3), metrics=['mae']) #dqn1.fit(env, nb_steps=300000, visualize=False, callbacks=[callback1], verbose=2) #dqn2 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=64, gamma=0.5, nb_steps_warmup=2000, # target_model_update=1e-2, policy=policy2) #dqn2.compile(Adam(lr=1e-3), metrics=['mae']) #dqn2.fit(env, nb_steps=300000, visualize=False, callbacks=[callback2], verbose=2) dqn3 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=64, nb_steps_warmup=2000, target_model_update=1e-2, policy=policy1) dqn3.compile(Adam(lr=1e-3), metrics=['mae']) dqn3.fit(env, nb_steps=300000, visualize=False, callbacks=[callback3], verbose=2) ''' pyplot.figure() pyplot.subplot(2, 1, 1) pyplot.plot(history1.history['episode_reward'], 'r--',history3.history['episode_reward'], 'b--') pyplot.subplot(2, 1, 2) #pyplot.plot(history1.history['nb_steps'], history1.history['episode_reward'], 'r', history2.history['nb_steps'], history2.history['episode_reward'], 'g') pyplot.plot(history2.history['episode_reward'], 'r', history4.history['episode_reward'], 'b') pyplot.show() #pyplot.savefig('save/BoltzmannQPolicy') '''
dqn1 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, target_model_update=1e-2, policy=policy1) dqn1.compile(Adam(lr=1e-3), metrics=['mae']) history1 = dqn1.fit(env, nb_epsteps=100, visualize=False, callbacks=[callback1], verbose=2) dqn2 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, target_model_update=1e-2, policy=policy2) dqn2.compile(Adam(lr=1e-3), metrics=['mae']) history2 = dqn2.fit(env, nb_epsteps=100, visualize=False, callbacks=[callback2], verbose=2) dqn3 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, target_model_update=1e-2, policy=policy1, enable_double_dqn=False) dqn3.compile(Adam(lr=1e-3), metrics=['mae']) history3 = dqn3.fit(env, nb_epsteps=100, visualize=False, callbacks=[callback3], verbose=2) ''' dqn3 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=640, nb_steps_warmup=20000, target_model_update=1e-2, policy=policy1) dqn3.compile(Adam(lr=1e-2), metrics=['mae']) history3 = dqn3.fit(env, nb_steps=500000, visualize=False, callbacks=[callback1], verbose=2) #dqn3.save_weights('save/dqn4_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
# serialize model to JSON model_save = model.to_json() with open("save/NNmodel1.json", "w") as json_file: json_file.write(model_save) print("Saved model to disk!") # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=10000, window_length=1) #policy1 = BoltzmannQPolicy() policy1 = EpsDisGreedyQPolicy(eps=0.0001, eps_decay=0.999) policy2 = BoltzmannQPolicy() callback10 = FileLogger(filepath='save/history10_{}'.format(timenow), interval=1) dqn10 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, gamma=0.99, nb_steps_warmup=2000, target_model_update=1e-2, policy=policy2) dqn10.compile(Adam(lr=1e-3), metrics=['mae']) history10 = dqn10.fit(env, nb_epsteps=3000, visualize=False, callbacks=[callback10], verbose=2)
#dqn3 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, # target_model_update=1e-2, policy=policy1, enable_double_dqn=False) #dqn3.compile(Adam(lr=1e-3), metrics=['mae']) #history3 = dqn3.fit(env, nb_epsteps=100, visualize=False, callbacks=[callback3], verbose=2) dqn4 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=64, nb_steps_warmup=2000, target_model_update=1e-2, policy=policy2) dqn4.compile(Adam(lr=1e-3), metrics=['mae']) history4 = dqn4.fit(env, nb_epsteps=3000, visualize=False, callbacks=[callback4], verbose=2) ''' pyplot.figure() pyplot.subplot(2, 1, 1) pyplot.plot(history1.history['episode_reward'], 'r--',history3.history['episode_reward'], 'b--') pyplot.subplot(2, 1, 2) #pyplot.plot(history1.history['nb_steps'], history1.history['episode_reward'], 'r', history2.history['nb_steps'], history2.history['episode_reward'], 'g') pyplot.plot(history2.history['episode_reward'], 'r', history4.history['episode_reward'], 'b') pyplot.show() #pyplot.savefig('save/BoltzmannQPolicy') '''
model.add(Activation('linear')) print(model.summary()) # serialize model to JSON model_save = model.to_json() with open("save/NNmodel1.json", "w") as json_file: json_file.write(model_save) print("Saved model to disk!") memory = SequentialMemory(limit=10000, window_length=1) #policy1 = BoltzmannQPolicy() policy1 = EpsDisGreedyQPolicy(eps=0.0001, eps_decay=0.999) policy2 = BoltzmannQPolicy() callback6 = FileLogger(filepath='save/history6_{}'.format(timenow), interval=1) dqn6 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, gamma=0.1, nb_steps_warmup=2000, target_model_update=1e-2, policy=policy2) dqn6.compile(Adam(lr=1e-3), metrics=['mae']) history6 = dqn6.fit(env, nb_epsteps=3000, visualize=False, callbacks=[callback6], verbose=2)
#dqn4 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, # target_model_update=1e-2, policy=policy2, enable_double_dqn=False) #dqn4.compile(Adam(lr=1e-3), metrics=['mae']) #history4 = dqn4.fit(env, nb_epsteps=100, visualize=False, callbacks=[callback4], verbose=2) dqn5 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=96, nb_steps_warmup=2000, target_model_update=1e-2, policy=policy2) dqn5.compile(Adam(lr=1e-3), metrics=['mae']) history5 = dqn5.fit(env, nb_epsteps=3000, visualize=False, callbacks=[callback5], verbose=2) ''' pyplot.figure() pyplot.subplot(2, 1, 1) pyplot.plot(history1.history['episode_reward'], 'r--',history3.history['episode_reward'], 'b--') pyplot.subplot(2, 1, 2) #pyplot.plot(history1.history['nb_steps'], history1.history['episode_reward'], 'r', history2.history['nb_steps'], history2.history['episode_reward'], 'g') pyplot.plot(history2.history['episode_reward'], 'r', history4.history['episode_reward'], 'b') pyplot.show() #pyplot.savefig('save/BoltzmannQPolicy') '''
model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) # serialize model to JSON model_save = model.to_json() with open("save/NNmodel1.json", "w") as json_file: json_file.write(model_save) print("Saved model to disk!") # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=10000, window_length=1) #policy1 = BoltzmannQPolicy() policy1 = EpsDisGreedyQPolicy(eps=0.0001, eps_decay=0.999) policy2 = BoltzmannQPolicy() callback9 = FileLogger(filepath='save/history9_{}'.format(timenow), interval=1) dqn9 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, gamma=0.99, nb_steps_warmup=2000, target_model_update=1e-2, policy=policy1) dqn9.compile(Adam(lr=1e-3), metrics=['mae']) history9 = dqn9.fit(env, nb_epsteps=3000, visualize=False, callbacks=[callback9], verbose=2)
model.add(Activation('linear')) print(model.summary()) # serialize model to JSON model_save = model.to_json() with open("save/NNmodel1.json", "w") as json_file: json_file.write(model_save) print("Saved model to disk!") memory = SequentialMemory(limit=10000, window_length=1) #policy1 = BoltzmannQPolicy() policy1 = EpsDisGreedyQPolicy(eps=0.0001, eps_decay=0.999) policy2 = BoltzmannQPolicy() callback7 = FileLogger(filepath='save/history7_{}'.format(timenow), interval=1) dqn7 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, gamma=0.5, nb_steps_warmup=2000, target_model_update=1e-2, policy=policy2) dqn7.compile(Adam(lr=1e-3), metrics=['mae']) history7 = dqn7.fit(env, nb_epsteps=3000, visualize=False, callbacks=[callback7], verbose=2)