Exemple #1
0
    def random_play_and_save(self, env, current_state, act_list):

        # Decide action
        action_index = env.action_space.sample()

        # Check if all previous actions are NOOP
        all_zeros = all(p == 0 for p in act_list)

        # Change action if all previous actions are NOOP and current is NOOP
        if all_zeros and action_index == 0:
            action_index = random.randint(1, env.action_space.n - 1)

        # Build action vector
        action = np.eye(self.action_size, dtype=np.int8)[action_index]

        # Advance the game to the next state based on the action.
        obs, reward, is_done, _ = env.step(action_index)

        # Pre-process observation
        obs = preprocess(obs)

        # Build next state
        next_state = get_next_state(current_state, obs)

        # Pre-process reward
        transformed_reward = transform_reward(reward)

        # Remember the previous state, action, reward, and done
        self.memory.append((current_state, action, transformed_reward, next_state, is_done))

        return next_state, reward, is_done, action_index
Exemple #2
0
    def q_iteration(self, env, current_state, act_list):

        # Choose the action
        if random.random() < self.epsilon:
            action_index = env.action_space.sample()

        else:
            action_index = self.choose_best_action(current_state)

        # Check if all previous actions are NOOP
        all_zeros = all(p == 0 for p in act_list)

        # Change action if all previous actions are NOOP and current is NOOP
        if all_zeros and action_index == 0:
            action_index = random.randint(1, env.action_space.n - 1)

        # Build action vector
        action = np.eye(self.action_size, dtype=np.int8)[action_index]

        # Play one game iteration
        obs, reward, is_done, _ = env.step(action_index)

        # Pre-process observation
        obs = preprocess(obs)

        # Build next state
        next_state = get_next_state(current_state, obs)

        # Pre-process reward
        transformed_reward = transform_reward(reward)

        # Remember the previous state, action, reward, and done
        self.memory.append((current_state, action, transformed_reward, next_state, is_done))

        # Sample and fit
        batch = self.memory.sample_batch(self.batch_size)
        self.fit_batch(batch)

        return next_state, reward, is_done, action_index
Exemple #3
0
    def nonrandom_play_and_save(self, env, current_state, act_list):
        """This method is designed to be used when you need to continue training after a (initial)training session is
        finished.
        I have found that initializing the memory with random play in that situation can make the agent to diverge."""

        # Choose the action according to the behaviour policy
        if random.random() < 0.05:
            action_index = env.action_space.sample()
        else:
            action_index = self.choose_best_action(current_state)

        # Check if all previous actions are NOOP
        all_zeros = all(p == 0 for p in act_list)

        # Change action if all previous actions are NOOP and current is NOOP
        if all_zeros and action_index == 0:
            action_index = random.randint(1, env.action_space.n - 1)

        # Build action vector
        action = np.eye(self.action_size, dtype=np.int8)[action_index]

        # Advance the game to the next state based on the action.
        obs, reward, is_done, _ = env.step(action_index)

        # Pre-process observation
        obs = preprocess(obs)

        # Build next state
        next_state = get_next_state(current_state, obs)

        # Pre-process reward
        transformed_reward = transform_reward(reward)

        # Remember the previous state, action, reward, and done
        self.memory.append((current_state, action, transformed_reward, next_state, is_done))

        return next_state, reward, is_done, action_index
Exemple #4
0
    for time_step in xrange(20000):
        # print "episode:", e, "time_step:", time_step

        # turn this on if you want to render
        # env.render()

        # Choose the action according to the behaviour policy
        if random.random() < 0.05:
            action_index = env.action_space.sample()
        else:
            action_index = agent.choose_best_action(current_state)

        # Play one game iteration
        raw_obs, reward, is_done, _ = env.step(action_index)
        obs = preprocess(raw_obs)
        next_state = get_next_state(current_state, obs)

        # make next_state the new current state for the next frame.
        current_state = next_state

        # Update return
        episode_return += reward
        # episode_return += transform_reward(reward)

        # imgplot = plt.imshow(raw_obs)
        # plt.show()
        # raw_input("Press Enter to continue...")

        # is_done becomes True when the game ends
        if is_done:
            # print the score and break out of the loop