def test_already_trained_model(trained_model): rewards_list = [] num_test_episode = 100 env = gym.make(GYM_ENV) logger_main.info("Starting Testing of the trained model...") step_count = 1000 for test_episode in range(num_test_episode): current_state = env.reset() num_observation_space = env.observation_space.shape[0] current_state = np.reshape(current_state, [1, num_observation_space]) reward_for_episode = 0 for step in range(step_count): env.render() selected_action = np.argmax( trained_model.predict(current_state)[0]) new_state, reward, done, info = env.step(selected_action) new_state = np.reshape(new_state, [1, num_observation_space]) current_state = new_state reward_for_episode += reward if done: break rewards_list.append(reward_for_episode) logger_main.info( f"{test_episode} \t: Episode || Reward: {reward_for_episode}") return rewards_list
def initialize_model(self): model = Sequential() model.add( Dense(512, input_dim=self.num_observation_space, activation=relu)) model.add(Dense(256, activation=relu)) model.add(Dense(self.num_action_space, activation=linear)) # Compile the model model.compile(loss=mean_squared_error, optimizer=Adam(lr=self.lr)) logger_main.info(model.summary()) return model
def run_experiment_for_gamma(images_dir): logger_main.info('Running Experiment for gamma...') env = gym.make(GYM_ENV) # set seeds env.seed(21) np.random.seed(21) # setting up params lr = 0.001 epsilon = 1.0 epsilon_decay = 0.995 gamma_list = [0.99, 0.9, 0.8, 0.7] training_episodes = 1000 rewards_list_for_gammas = [] for gamma_value in gamma_list: # save_dir = "hp_gamma_"+ str(gamma_value) + "_" model = DQN(env, lr, gamma_value, epsilon, epsilon_decay) logger_main.info(f"Training model for Gamma: {gamma_value}") model.train(training_episodes, False) rewards_list_for_gammas.append(model.rewards_list) pickle.dump( rewards_list_for_gammas, open(rewards_dir + f"{GYM_ENV}_rewards_list_for_gammas.p", "wb")) rewards_list_for_gammas = pickle.load( open(rewards_dir + f"{GYM_ENV}_rewards_list_for_gammas.p", "rb")) gamma_rewards_pd = pd.DataFrame( index=pd.Series(range(1, training_episodes + 1))) for i in range(len(gamma_list)): col_name = "gamma=" + str(gamma_list[i]) gamma_rewards_pd[col_name] = rewards_list_for_gammas[i] plot_experiments( gamma_rewards_pd, images_dir + "Figure 4: Rewards per episode for different gamma values", "Figure 4: Rewards per episode for different gamma values", "Episodes", "Reward", (-600, 300))
def run_experiment_for_ed(images_dir): logger_main.info('Running Experiment for epsilon decay...') env = gym.make(GYM_ENV) # set seeds env.seed(21) np.random.seed(21) # setting up params lr = 0.001 epsilon = 1.0 ed_values = [0.999, 0.995, 0.990, 0.9] gamma = 0.99 training_episodes = 1000 rewards_list_for_ed = [] for ed in ed_values: save_dir = "hp_ed_" + str(ed) + "_" model = DQN(env, lr, gamma, epsilon, ed) logger_main.info("Training model for ED: {ed}") model.train(training_episodes, False) rewards_list_for_ed.append(model.rewards_list) pickle.dump(rewards_list_for_ed, open(rewards_dir + f"{GYM_ENV}_rewards_list_for_ed.p", "wb")) rewards_list_for_ed = pickle.load( open(rewards_dir + f"{GYM_ENV}_rewards_list_for_ed.p", "rb")) ed_rewards_pd = pd.DataFrame( index=pd.Series(range(1, training_episodes + 1))) for i in range(len(ed_values)): col_name = "epsilon_decay = " + str(ed_values[i]) ed_rewards_pd[col_name] = rewards_list_for_ed[i] plot_experiments( ed_rewards_pd, images_dir + "Figure 5: Rewards per episode for different epsilon(ε) decay", "Figure 5: Rewards per episode for different epsilon(ε) decay values", "Episodes", "Reward", (-600, 300))
def train(self, num_episodes=2000, can_stop=True): for episode in range(num_episodes): state = env.reset() reward_for_episode = 0 num_steps = 1000 state = np.reshape(state, [1, self.num_observation_space]) for step in range(num_steps): env.render() received_action = self.get_action(state) # logger_main.info("received_action:", received_action) next_state, reward, done, info = env.step(received_action) next_state = np.reshape(next_state, [1, self.num_observation_space]) # Store the experience in replay memory self.add_to_replay_memory(state, received_action, reward, next_state, done) # add up rewards reward_for_episode += reward state = next_state self.update_counter() self.learn_and_update_weights_by_reply() if done: break self.rewards_list.append(reward_for_episode) # Decay the epsilon after each experience completion if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay # Check for breaking condition last_rewards_mean = np.mean(self.rewards_list[-100:]) if last_rewards_mean > 200 and can_stop: logger_main.info("DQN Training Complete...") break logger_main.info( f"{episode} \t: Episode || Reward: {reward_for_episode} \t|| Average Reward: {last_rewards_mean} \t epsilon: {self.epsilon:.10f}" )
def run_experiment_for_lr(images_dir): logger_main.info('Running Experiment for learning rate...') env = gym.make(GYM_ENV) # set seeds env.seed(21) np.random.seed(21) # setting up params lr_values = [0.0001, 0.001, 0.01, 0.1] epsilon = 1.0 epsilon_decay = 0.995 gamma = 0.99 training_episodes = 1000 rewards_list_for_lrs = [] for lr_value in lr_values: model = DQN(env, lr_value, gamma, epsilon, epsilon_decay) logger_main.info(f"Training model for LR: {lr_value}") model.train(training_episodes, False) rewards_list_for_lrs.append(model.rewards_list) pickle.dump(rewards_list_for_lrs, open(rewards_dir + f"{GYM_ENV}_rewards_list_for_lrs.p", "wb")) rewards_list_for_lrs = pickle.load( open(rewards_dir + f"{GYM_ENV}_rewards_list_for_lrs.p", "rb")) lr_rewards_pd = pd.DataFrame( index=pd.Series(range(1, training_episodes + 1))) for i in range(len(lr_values)): col_name = "lr=" + str(lr_values[i]) lr_rewards_pd[col_name] = rewards_list_for_lrs[i] plot_experiments( lr_rewards_pd, images_dir + "Figure 3: Rewards per episode for different learning rates", "Figure 3: Rewards per episode for different learning rates", "Episodes", "Reward", (-2000, 300))
rewards_dir = "./rewards/" + GYM_ENV + "/" pathlib.Path(save_dir).mkdir(parents=True, exist_ok=True) pathlib.Path(images_dir).mkdir(parents=True, exist_ok=True) # set seeds env.seed(21) np.random.seed(21) # setting up params lr = 0.001 epsilon = 1.0 epsilon_decay = 0.995 gamma = 0.99 training_episodes = 2000 if enable_train: logger_main.info('Start train') model = DQN(env, lr, gamma, epsilon, epsilon_decay) model.train(training_episodes, True) # Save trained model model.save(save_dir + f"{GYM_ENV}_trained_model.h5") # Save Rewards list pickle.dump(model.rewards_list, open(save_dir + f"{GYM_ENV}_train_rewards_list.p", "wb")) logger_main.info('End train') logger_main.info('Start testing') rewards_list = pickle.load( open(save_dir + f"{GYM_ENV}_train_rewards_list.p", "rb")) # plot reward in graph reward_df = pd.DataFrame(rewards_list)