# serialize model to JSON model_save = model.to_json() with open("save/NNmodel1.json", "w") as json_file: json_file.write(model_save) print("Saved model to disk!") # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=10000, window_length=1) #policy1 = BoltzmannQPolicy() policy1 = EpsDisGreedyQPolicy(eps=0.0001, eps_decay=0.999) policy2 = BoltzmannQPolicy() callback12 = FileLogger(filepath='save/history12_{}'.format(timenow), interval=1) dqn8 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, gamma=0.99, nb_steps_warmup=2000, target_model_update=1, policy=policy1) dqn8.compile(Adam(lr=1e-3), metrics=['mae']) history8 = dqn8.fit(env, nb_epsteps=3000, visualize=False, callbacks=[callback12], verbose=2)
callback3 = FileLogger(filepath='save/nhistory3_{}'.format(timenow), interval=1) callback4 = FileLogger(filepath='save/nhistory4_{}'.format(timenow), interval=1) #dqn1 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=64, nb_steps_warmup=2000, # target_model_update=1e-2, policy=policy2) #dqn1.compile(Adam(lr=1e-3), metrics=['mae']) #dqn1.fit(env, nb_steps=300000, visualize=False, callbacks=[callback1], verbose=2) #dqn2 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=64, gamma=0.5, nb_steps_warmup=2000, # target_model_update=1e-2, policy=policy2) #dqn2.compile(Adam(lr=1e-3), metrics=['mae']) #dqn2.fit(env, nb_steps=300000, visualize=False, callbacks=[callback2], verbose=2) dqn3 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=64, nb_steps_warmup=2000, target_model_update=1e-2, policy=policy1) dqn3.compile(Adam(lr=1e-3), metrics=['mae']) dqn3.fit(env, nb_steps=300000, visualize=False, callbacks=[callback3], verbose=2) ''' pyplot.figure() pyplot.subplot(2, 1, 1) pyplot.plot(history1.history['episode_reward'], 'r--',history3.history['episode_reward'], 'b--') pyplot.subplot(2, 1, 2) #pyplot.plot(history1.history['nb_steps'], history1.history['episode_reward'], 'r', history2.history['nb_steps'], history2.history['episode_reward'], 'g') pyplot.plot(history2.history['episode_reward'], 'r', history4.history['episode_reward'], 'b') pyplot.show() #pyplot.savefig('save/BoltzmannQPolicy') '''
json_file = open('save/NNmodel1.json', 'r') loaded_model1 = json_file.read() json_file.close() model = model_from_json(loaded_model1) print("Loaded model from disk!") ENV_NAME = 'GazeboCircuit2TurtlebotLidar-v1' env = gym.make(ENV_NAME) nb_actions = env.action_space.n memory = SequentialMemory(limit=10000, window_length=1) policy = BoltzmannQPolicy() #dqn1 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=2000, # target_model_update=1e-2, policy=policy) dqn2 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=2000, target_model_update=1e-2) #dqn3 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, # target_model_update=1e-2, policy=policy, enable_double_dqn=False) #dqn4 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, # target_model_update=1e-2, enable_double_dqn=False) #dqn1.compile(Adam, metrics=['mae']) dqn2.compile(Adam, metrics=['mae']) #dqn3.compile(SGD, metrics=['mae']) #dqn4.compile(SGD, metrics=['mae']) #dqn1.load_weights('save/dqn2_{}_weights.h5f'.format(ENV_NAME)) dqn2.load_weights( 'save/dqn_blotzmann0.8_GazeboCircuit2TurtlebotLidar-v1_weights.h5f')
memory = SequentialMemory(limit=50000, window_length=1) # policy1 = BoltzmannQPolicy() policy1 = EpsDisGreedyQPolicy(eps=0.05, eps_decay=0.999) policy2 = BoltzmannQPolicy(tau=0.8) callback1 = FileLogger(filepath='save/nhistory1_{}'.format(timenow), interval=1) callback2 = FileLogger(filepath='save/nhistory2_{}'.format(timenow), interval=1) callback3 = FileLogger(filepath='save/nhistory3_{}'.format(timenow), interval=1) callback4 = FileLogger(filepath='save/nhistory4_{}'.format(timenow), interval=1) callback5 = FileLogger(filepath='save/nhistory5_{}'.format(timenow), interval=1) # dqn1 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, # target_model_update=1e-2, policy=policy1) # dqn1.compile(Adam(lr=1e-3), metrics=['mae']) # history1 = dqn1.fit(env, nb_epsteps=5000, visualize=False, callbacks=[callback1], verbose=2) dqn2 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=32, nb_steps_warmup=1000, target_model_update=1e-2, policy=policy2) dqn2.compile(Adam(lr=0.01), metrics=['mse']) # dqn2.save_weights('save/dqn_blotzmann0.8_{}_weights.h5f'.format(ENV_NAME), overwrite=True) history2 = dqn2.fit(env, nb_steps=200000, visualize=False, callbacks=[callback1], verbose=2) time.sleep(3600) # dqn3 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, # target_model_update=1e-2, policy=policy1, enable_double_dqn=False) # dqn3.compile(Adam(lr=1e-3), metrics=['mae']) # history3 = dqn3.fit(env, nb_epsteps=100, visualize=False, callbacks=[callback3], verbose=2) # dqn4 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, # target_model_update=1e-2, policy=policy2, enable_double_dqn=False) # dqn4.compile(Adam(lr=1e-3), metrics=['mae']) # history4 = dqn4.fit(env, nb_epsteps=100, visualize=False, callbacks=[callback4], verbose=2)
model.add(Dense(24)) model.add(Activation('relu')) model.add(Dense(24)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) # serialize model to JSON model_save = model.to_json() with open("save/NNmodel1.json", "w") as json_file: json_file.write(model_save) print("Saved model to disk!") memory = SequentialMemory(limit=100000, window_length=1) policy1 = BoltzmannQPolicy() policy2 = EpsGreedyQPolicy() callback5 = FileLogger(filepath='save/history5_{}'.format(timenow), interval=1) dqn5 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=320, nb_steps_warmup=10000, target_model_update=50000, policy=policy2) dqn5.compile(Adam(lr=1e-3), metrics=['mae']) history4 = dqn5.fit(env, nb_epsteps=2500, visualize=False, callbacks=[callback5], verbose=2) #dqn4.save_weights('save/dqn4_{}_weights.h5f'.format(ENV_NAME), overwrite=True)