예제 #1
0
def test_already_trained_model(trained_model):
    rewards_list = []
    num_test_episode = 100
    env = gym.make(GYM_ENV)
    logger_main.info("Starting Testing of the trained model...")

    step_count = 1000

    for test_episode in range(num_test_episode):
        current_state = env.reset()
        num_observation_space = env.observation_space.shape[0]
        current_state = np.reshape(current_state, [1, num_observation_space])
        reward_for_episode = 0
        for step in range(step_count):
            env.render()
            selected_action = np.argmax(
                trained_model.predict(current_state)[0])
            new_state, reward, done, info = env.step(selected_action)
            new_state = np.reshape(new_state, [1, num_observation_space])
            current_state = new_state
            reward_for_episode += reward
            if done:
                break
        rewards_list.append(reward_for_episode)
        logger_main.info(
            f"{test_episode} \t: Episode || Reward: {reward_for_episode}")

    return rewards_list
예제 #2
0
    def initialize_model(self):
        model = Sequential()
        model.add(
            Dense(512, input_dim=self.num_observation_space, activation=relu))
        model.add(Dense(256, activation=relu))
        model.add(Dense(self.num_action_space, activation=linear))

        # Compile the model
        model.compile(loss=mean_squared_error, optimizer=Adam(lr=self.lr))
        logger_main.info(model.summary())
        return model
예제 #3
0
def run_experiment_for_gamma(images_dir):
    logger_main.info('Running Experiment for gamma...')
    env = gym.make(GYM_ENV)

    # set seeds
    env.seed(21)
    np.random.seed(21)

    # setting up params
    lr = 0.001
    epsilon = 1.0
    epsilon_decay = 0.995
    gamma_list = [0.99, 0.9, 0.8, 0.7]
    training_episodes = 1000

    rewards_list_for_gammas = []
    for gamma_value in gamma_list:
        # save_dir = "hp_gamma_"+ str(gamma_value) + "_"
        model = DQN(env, lr, gamma_value, epsilon, epsilon_decay)
        logger_main.info(f"Training model for Gamma: {gamma_value}")
        model.train(training_episodes, False)
        rewards_list_for_gammas.append(model.rewards_list)

    pickle.dump(
        rewards_list_for_gammas,
        open(rewards_dir + f"{GYM_ENV}_rewards_list_for_gammas.p", "wb"))
    rewards_list_for_gammas = pickle.load(
        open(rewards_dir + f"{GYM_ENV}_rewards_list_for_gammas.p", "rb"))

    gamma_rewards_pd = pd.DataFrame(
        index=pd.Series(range(1, training_episodes + 1)))
    for i in range(len(gamma_list)):
        col_name = "gamma=" + str(gamma_list[i])
        gamma_rewards_pd[col_name] = rewards_list_for_gammas[i]
    plot_experiments(
        gamma_rewards_pd, images_dir +
        "Figure 4: Rewards per episode for different gamma values",
        "Figure 4: Rewards per episode for different gamma values", "Episodes",
        "Reward", (-600, 300))
예제 #4
0
def run_experiment_for_ed(images_dir):
    logger_main.info('Running Experiment for epsilon decay...')
    env = gym.make(GYM_ENV)

    # set seeds
    env.seed(21)
    np.random.seed(21)

    # setting up params
    lr = 0.001
    epsilon = 1.0
    ed_values = [0.999, 0.995, 0.990, 0.9]
    gamma = 0.99
    training_episodes = 1000

    rewards_list_for_ed = []
    for ed in ed_values:
        save_dir = "hp_ed_" + str(ed) + "_"
        model = DQN(env, lr, gamma, epsilon, ed)
        logger_main.info("Training model for ED: {ed}")
        model.train(training_episodes, False)
        rewards_list_for_ed.append(model.rewards_list)

    pickle.dump(rewards_list_for_ed,
                open(rewards_dir + f"{GYM_ENV}_rewards_list_for_ed.p", "wb"))
    rewards_list_for_ed = pickle.load(
        open(rewards_dir + f"{GYM_ENV}_rewards_list_for_ed.p", "rb"))

    ed_rewards_pd = pd.DataFrame(
        index=pd.Series(range(1, training_episodes + 1)))
    for i in range(len(ed_values)):
        col_name = "epsilon_decay = " + str(ed_values[i])
        ed_rewards_pd[col_name] = rewards_list_for_ed[i]
    plot_experiments(
        ed_rewards_pd, images_dir +
        "Figure 5: Rewards per episode for different epsilon(ε) decay",
        "Figure 5: Rewards per episode for different epsilon(ε) decay values",
        "Episodes", "Reward", (-600, 300))
예제 #5
0
    def train(self, num_episodes=2000, can_stop=True):
        for episode in range(num_episodes):
            state = env.reset()
            reward_for_episode = 0
            num_steps = 1000
            state = np.reshape(state, [1, self.num_observation_space])
            for step in range(num_steps):
                env.render()
                received_action = self.get_action(state)
                # logger_main.info("received_action:", received_action)
                next_state, reward, done, info = env.step(received_action)
                next_state = np.reshape(next_state,
                                        [1, self.num_observation_space])
                # Store the experience in replay memory
                self.add_to_replay_memory(state, received_action, reward,
                                          next_state, done)
                # add up rewards
                reward_for_episode += reward
                state = next_state
                self.update_counter()
                self.learn_and_update_weights_by_reply()

                if done:
                    break
            self.rewards_list.append(reward_for_episode)

            # Decay the epsilon after each experience completion
            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay

            # Check for breaking condition
            last_rewards_mean = np.mean(self.rewards_list[-100:])
            if last_rewards_mean > 200 and can_stop:
                logger_main.info("DQN Training Complete...")
                break
            logger_main.info(
                f"{episode} \t: Episode || Reward: {reward_for_episode} \t|| Average Reward: {last_rewards_mean} \t epsilon: {self.epsilon:.10f}"
            )
예제 #6
0
def run_experiment_for_lr(images_dir):
    logger_main.info('Running Experiment for learning rate...')
    env = gym.make(GYM_ENV)

    # set seeds
    env.seed(21)
    np.random.seed(21)

    # setting up params
    lr_values = [0.0001, 0.001, 0.01, 0.1]
    epsilon = 1.0
    epsilon_decay = 0.995
    gamma = 0.99
    training_episodes = 1000
    rewards_list_for_lrs = []
    for lr_value in lr_values:
        model = DQN(env, lr_value, gamma, epsilon, epsilon_decay)
        logger_main.info(f"Training model for LR: {lr_value}")
        model.train(training_episodes, False)
        rewards_list_for_lrs.append(model.rewards_list)

    pickle.dump(rewards_list_for_lrs,
                open(rewards_dir + f"{GYM_ENV}_rewards_list_for_lrs.p", "wb"))
    rewards_list_for_lrs = pickle.load(
        open(rewards_dir + f"{GYM_ENV}_rewards_list_for_lrs.p", "rb"))

    lr_rewards_pd = pd.DataFrame(
        index=pd.Series(range(1, training_episodes + 1)))
    for i in range(len(lr_values)):
        col_name = "lr=" + str(lr_values[i])
        lr_rewards_pd[col_name] = rewards_list_for_lrs[i]
    plot_experiments(
        lr_rewards_pd, images_dir +
        "Figure 3: Rewards per episode for different learning rates",
        "Figure 3: Rewards per episode for different learning rates",
        "Episodes", "Reward", (-2000, 300))
예제 #7
0
    rewards_dir = "./rewards/" + GYM_ENV + "/"
    pathlib.Path(save_dir).mkdir(parents=True, exist_ok=True)
    pathlib.Path(images_dir).mkdir(parents=True, exist_ok=True)

    # set seeds
    env.seed(21)
    np.random.seed(21)

    # setting up params
    lr = 0.001
    epsilon = 1.0
    epsilon_decay = 0.995
    gamma = 0.99
    training_episodes = 2000
    if enable_train:
        logger_main.info('Start train')
        model = DQN(env, lr, gamma, epsilon, epsilon_decay)
        model.train(training_episodes, True)
        # Save trained model
        model.save(save_dir + f"{GYM_ENV}_trained_model.h5")
        # Save Rewards list
        pickle.dump(model.rewards_list,
                    open(save_dir + f"{GYM_ENV}_train_rewards_list.p", "wb"))
        logger_main.info('End train')

    logger_main.info('Start testing')
    rewards_list = pickle.load(
        open(save_dir + f"{GYM_ENV}_train_rewards_list.p", "rb"))

    # plot reward in graph
    reward_df = pd.DataFrame(rewards_list)