model=model, nb_actions=action_size, memory=memory, nb_steps_warmup=500, target_model_update=1e-2, policy=policy, test_policy=policy) dqn_only_embedding.compile(Adam(lr=1e-3), metrics=['mae']) dqn_only_embedding.fit(env, nb_steps=50000, visualize=False, verbose=1, nb_max_episode_steps=100, log_interval=10000, start_step_policy=policy) q_values = dqn_only_embedding.compute_batch_q_values([0]) for i in range(1, state_size): q_values = np.vstack( (q_values, dqn_only_embedding.compute_batch_q_values([i]))) #dqn_only_embedding.test(env, nb_episodes=5, visualize=False, verbose=1, nb_max_episode_steps=100, # start_step_policy=policy) #Caminho para o carro 0 até MI env.reset() env.step(21) env.step(4) env.step(22) env.step(5) env.step(19) env.step(2)
class DQNPolicy: def __init__(self, env, model_path): # Setup self.env = env self.model_path = model_path self.num_actions = env.action_space.n self.input_shape = env.observation_space.shape self.env_name = 'MountainCar-v0' self.dqn = None self.model = Sequential() self.model.add( Flatten(input_shape=(1, ) + self.input_shape, name='input')) self.model.add(Dense(48)) self.model.add(Activation('relu')) self.model.add(Dense(48)) self.model.add(Activation('relu')) self.model.add(Dense(48)) self.model.add(Activation('relu')) self.model.add( Dense(self.num_actions, kernel_initializer='zeros', name='output_weights')) self.model.add(Activation('linear')) print(self.model.summary()) memory = SequentialMemory(limit=50000, window_length=1) # limit=50000 policy = BoltzmannQPolicy() self.dqn = DQNAgent(model=self.model, nb_actions=self.num_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) self.dqn.compile(Adam(lr=1e-3), metrics=['mae']) weights_filename = 'dqn_{}_weights.h5f'.format(self.env_name) # if args.weights: # weights_filename = args.weights self.dqn.load_weights(self.model_path + weights_filename) def predict_q(self, imgs): # #SOHEIL################################################ # if isinstance(imgs,np.ndarray): # print("predict_Q_imgs_INPUT",type(imgs), imgs.shape) # elif type(imgs) == list: # print("predict_Q_imgs_INPUT",type(imgs), len(imgs),type(imgs[0]),imgs[0].shape) # ####################################################### qs1 = [] for state in imgs: current_state = np.expand_dims(state, axis=0) current_state = list(np.expand_dims(current_state, axis=0)) qs = self.dqn.compute_batch_q_values(current_state) qs1.append(qs.tolist()[0]) qs1 = np.asarray(qs1) # #SOHEIL################################################ # print("predict_Q_imgs_OUTPUT",type(qs1), qs1.shape) # ####################################################### return qs1 def predict(self, imgs): # #SOHEIL################################################ # if isinstance(imgs,np.ndarray): # print("predict_ACTIONS_imgs_INPUT",type(imgs), imgs.shape) # elif type(imgs) == list: # print("predict_ACTIONS_imgs_INPUT",type(imgs), len(imgs),type(imgs[0]),imgs[0].shape) # ####################################################### acts = [] if type(imgs) == list: for state in imgs: current_state = np.expand_dims(np.expand_dims(state, axis=0), axis=0) current_state = list(np.expand_dims(current_state, axis=0)) acts.append( np.asarray( [np.argmax(self.dqn.model.predict(current_state))])) elif isinstance(imgs, np.ndarray): # print(imgs) current_state = np.expand_dims(imgs, axis=0) current_state = list(np.expand_dims(current_state, axis=0)) acts = np.asarray( [np.argmax(self.dqn.model.predict(current_state))]) # #SOHEIL################################################ # print("predict_ACTIONS_imgs_OUTPUT",type(acts), acts.shape) # ####################################################### return acts
# Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. env.is_train = True dqn.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME)) dqn.fit(env, nb_steps=100000, visualize=False, verbose=2) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) with open('dqn_action.json', 'w') as fw: observation = status.tolist() action = [ float(actions[dqn.forward(np.array([obs]))]) for obs in observation ] json.dump({'observation': observation, 'action': action}, fw) state_batch = status.reshape([-1, 1, 1]) q_val = pd.DataFrame(dqn.compute_batch_q_values(state_batch)) q_val.to_csv('dqn_qvalue.csv') env.is_train = False env.plot_row = 1 env.plot_col = 5 # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True) env.plt.ioff() env.plt.show()