model=model,
                              nb_actions=action_size,
                              memory=memory,
                              nb_steps_warmup=500,
                              target_model_update=1e-2,
                              policy=policy,
                              test_policy=policy)
dqn_only_embedding.compile(Adam(lr=1e-3), metrics=['mae'])
dqn_only_embedding.fit(env,
                       nb_steps=50000,
                       visualize=False,
                       verbose=1,
                       nb_max_episode_steps=100,
                       log_interval=10000,
                       start_step_policy=policy)
q_values = dqn_only_embedding.compute_batch_q_values([0])
for i in range(1, state_size):
    q_values = np.vstack(
        (q_values, dqn_only_embedding.compute_batch_q_values([i])))

#dqn_only_embedding.test(env, nb_episodes=5, visualize=False, verbose=1, nb_max_episode_steps=100,
#                     start_step_policy=policy)

#Caminho para o carro 0 até MI
env.reset()
env.step(21)
env.step(4)
env.step(22)
env.step(5)
env.step(19)
env.step(2)
class DQNPolicy:
    def __init__(self, env, model_path):
        # Setup
        self.env = env
        self.model_path = model_path
        self.num_actions = env.action_space.n
        self.input_shape = env.observation_space.shape
        self.env_name = 'MountainCar-v0'
        self.dqn = None

        self.model = Sequential()
        self.model.add(
            Flatten(input_shape=(1, ) + self.input_shape, name='input'))
        self.model.add(Dense(48))
        self.model.add(Activation('relu'))
        self.model.add(Dense(48))
        self.model.add(Activation('relu'))
        self.model.add(Dense(48))
        self.model.add(Activation('relu'))
        self.model.add(
            Dense(self.num_actions,
                  kernel_initializer='zeros',
                  name='output_weights'))
        self.model.add(Activation('linear'))
        print(self.model.summary())

        memory = SequentialMemory(limit=50000, window_length=1)  # limit=50000
        policy = BoltzmannQPolicy()
        self.dqn = DQNAgent(model=self.model,
                            nb_actions=self.num_actions,
                            memory=memory,
                            nb_steps_warmup=10,
                            target_model_update=1e-2,
                            policy=policy)
        self.dqn.compile(Adam(lr=1e-3), metrics=['mae'])

        weights_filename = 'dqn_{}_weights.h5f'.format(self.env_name)
        # if args.weights:
        #     weights_filename = args.weights
        self.dqn.load_weights(self.model_path + weights_filename)

    def predict_q(self, imgs):
        # #SOHEIL################################################
        # if isinstance(imgs,np.ndarray):
        #     print("predict_Q_imgs_INPUT",type(imgs), imgs.shape)
        # elif type(imgs) == list:
        #     print("predict_Q_imgs_INPUT",type(imgs), len(imgs),type(imgs[0]),imgs[0].shape)
        # #######################################################
        qs1 = []
        for state in imgs:
            current_state = np.expand_dims(state, axis=0)
            current_state = list(np.expand_dims(current_state, axis=0))
            qs = self.dqn.compute_batch_q_values(current_state)
            qs1.append(qs.tolist()[0])
        qs1 = np.asarray(qs1)

        # #SOHEIL################################################
        # print("predict_Q_imgs_OUTPUT",type(qs1), qs1.shape)
        # #######################################################
        return qs1

    def predict(self, imgs):
        # #SOHEIL################################################
        # if isinstance(imgs,np.ndarray):
        #     print("predict_ACTIONS_imgs_INPUT",type(imgs), imgs.shape)
        # elif type(imgs) == list:
        #     print("predict_ACTIONS_imgs_INPUT",type(imgs), len(imgs),type(imgs[0]),imgs[0].shape)
        # #######################################################
        acts = []
        if type(imgs) == list:
            for state in imgs:
                current_state = np.expand_dims(np.expand_dims(state, axis=0),
                                               axis=0)
                current_state = list(np.expand_dims(current_state, axis=0))
                acts.append(
                    np.asarray(
                        [np.argmax(self.dqn.model.predict(current_state))]))
        elif isinstance(imgs, np.ndarray):
            # print(imgs)
            current_state = np.expand_dims(imgs, axis=0)
            current_state = list(np.expand_dims(current_state, axis=0))
            acts = np.asarray(
                [np.argmax(self.dqn.model.predict(current_state))])

        # #SOHEIL################################################
        # print("predict_ACTIONS_imgs_OUTPUT",type(acts), acts.shape)
        # #######################################################
        return acts
# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
env.is_train = True

dqn.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME))
dqn.fit(env, nb_steps=100000, visualize=False, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

with open('dqn_action.json', 'w') as fw:
    observation = status.tolist()
    action = [
        float(actions[dqn.forward(np.array([obs]))]) for obs in observation
    ]
    json.dump({'observation': observation, 'action': action}, fw)

state_batch = status.reshape([-1, 1, 1])
q_val = pd.DataFrame(dqn.compute_batch_q_values(state_batch))
q_val.to_csv('dqn_qvalue.csv')

env.is_train = False
env.plot_row = 1
env.plot_col = 5
# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)

env.plt.ioff()
env.plt.show()