Exemple #1
0
class Environment:
    def __init__(self, display=Config.SHOW_MODE):
        self.game = GameManager(display=display)
        self.previous_state = None
        self.current_state = None
        self.available = None
        self.total_reward = 0
        self.envs_mean = None
        self.envs_std = None
        self.num_steps = 0

        self.state_mean = 0
        self.state_std = 0
        self.alpha = 0.9999
        self.num_steps = 0

        #self.reset()

    def get_num_actions(self):
        return self.game.get_num_actions()

    def get_num_states(self):
        return self.game.get_num_state()

    def _observation(self, observation):
        self.num_steps += 1
        self.state_mean = self.state_mean * self.alpha + \
            observation.mean() * (1 - self.alpha)
        self.state_std = self.state_std * self.alpha + \
            observation.std() * (1 - self.alpha)

        unbiased_mean = self.state_mean / (1 - pow(self.alpha, self.num_steps))
        unbiased_std = self.state_std / (1 - pow(self.alpha, self.num_steps))

        return (observation - unbiased_mean) / (unbiased_std + 1e-8)

    def reset(self):
        self.total_reward = 0
        observation, available = self.game.reset()
        self.previous_state = self.current_state = None
        self.current_state = process_frame(observation)
        return self.current_state, available

    def step(self, action):
        observation, reward, done, available, envs_mean, envs_std = self.game.step(
            action)

        self.available = available
        self.total_reward += reward
        self.envs_mean = envs_mean
        self.envs_std = envs_std

        self.previous_state = self.current_state
        self.current_state = process_frame(observation)
        # self.current_state = self._observation(process_frame(observation))

        return self.current_state, reward, done, available, envs_mean, envs_std
Exemple #2
0
class Environment:
    def __init__(self):
        self.game = GameManager(Config.ATARI_GAME, display=Config.PLAY_MODE)
        self.nb_frames = Config.STACKED_FRAMES
        self.frame_q = Queue(maxsize=self.nb_frames)
        self.previous_states = []
        self.current_state = None
        self.total_reward = 0

        self.reset()

    @staticmethod
    def _rgb2gray(rgb):
        return np.dot(rgb[..., :3], [0.299, 0.587, 0.114])

    @staticmethod
    def _preprocess(image):
        image = Environment._rgb2gray(image)
        image = misc.imresize(image, [Config.IMAGE_HEIGHT, Config.IMAGE_WIDTH],
                              'bilinear')
        image = image.astype(np.float32) / 128.0 - 1.0
        return image

    def _get_current_state(self):
        if not self.frame_q.full():
            return None  # frame queue is not full yet.
        x_ = np.array(self.frame_q.queue)
        x_ = np.transpose(x_, [1, 2, 0])  # move channels
        return x_

    def _update_frame_q(self, frame):
        if self.frame_q.full():
            self.frame_q.get()
        image = Environment._preprocess(frame)
        self.frame_q.put(image)

    def get_num_actions(self):
        return self.game.get_num_actions()

    def num_basic_actions(self):
        return self.game.num_basic_actions()

    def reset(self):
        self.total_reward = 0
        self.frame_q.queue.clear()
        self._update_frame_q(self.game.reset())
        self.previous_states = []
        self.current_state = None

    def step(self, actions):
        observations, rewards, done, _ = self.game.step(actions)
        self.total_reward += sum(rewards)
        self.previous_states = [self.current_state]
        #self._update_frame_q(observations[-1])
        #temp = [Environment._preprocess(frame) for frame in observations[:-1]]
        for i in range(len(observations) - 1):
            self._update_frame_q(observations[i])
            self.previous_states.append(self._get_current_state())
        self._update_frame_q(observations[-1])
        self.current_state = self._get_current_state()
        return rewards, done
Exemple #3
0
 def get_num_actions(self):
     return GameManager.get_num_actions()