예제 #1
0
def main():
    with tf.Session() as sess:
        print('Creating environment...')
        env = TFBatchedEnv(sess, Pong(), 1)
        env = BatchedFrameStack(env)

        print('Creating model...')
        model = CNN(sess,
                    gym_space_distribution(env.action_space),
                    gym_space_vectorizer(env.observation_space))

        print('Creating roller...')
        roller = TruncatedRoller(env, model, 1)

        print('Initializing variables...')
        sess.run(tf.global_variables_initializer())

        if os.path.exists('params.pkl'):
            print('Loading parameters...')
            with open('params.pkl', 'rb') as in_file:
                params = pickle.load(in_file)
            for var, val in zip(tf.trainable_variables(), params):
                sess.run(tf.assign(var, val))
        else:
            print('Warning: parameter file does not exist!')

        print('Running agent...')
        viewer = SimpleImageViewer()
        while True:
            for obs in roller.rollouts()[0].step_observations:
                viewer.imshow(obs[..., -3:])
예제 #2
0
class SimpleRenderAgent(RL.Agent):
    def __init__(self,
                 plotfig_getter=None,
                 image_getter=None,
                 render_fn=None) -> None:
        self.render_fn = render_fn
        self.image_getter = image_getter
        self.plotfig_getter = plotfig_getter
        self.viewer = None

    def post_act(self):
        try:
            if self.render_fn is not None:
                self.render_fn()
            elif self.image_getter is not None:
                if self.viewer is None:
                    self.viewer = SimpleImageViewer()
                img = self.image_getter()
                self.viewer.imshow(img)
            elif self.plotfig_getter is not None:
                if self.viewer is None:
                    self.viewer = SimpleImageViewer()
                fig = self.plotfig_getter()  # type: Figure
                data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8)
                img = data.reshape(fig.canvas.get_width_height()[::-1] + (3, ))
                self.viewer.imshow(img)
            else:
                self.env.render()
        except Exception:
            logging.getLogger(__name__).exception(
                f'{self.name}: Unable to render. Disabling agent!')
            self.disable()
예제 #3
0
class PyBullet(EnvExt):
    def __init__(
            self,
            name: str = 'Hopper',
            add_timestep: bool = False,
            nosuffix: bool = False
    ) -> None:
        self.name = name
        try:
            import pybullet_envs  # noqa
        except ImportError:
            raise ImportError('pybullet is not installed')
        if not nosuffix:
            name += 'BulletEnv-v0'
        env = gym.make(name)
        if add_timestep:
            env = AddTimeStep(env)
        super().__init__(RewardMonitor(env))
        self.viewer = None
        self.spec.use_reward_monitor = True

    def render(self, mode: str = 'human') -> Optional[ndarray]:
        if mode == 'human':
            arr = self._env.render('rgb_array')
            if self.viewer is None:
                from gym.envs.classic_control.rendering import SimpleImageViewer
                self.viewer = SimpleImageViewer()
            self.viewer.imshow(arr)  # type: ignore
            return None
        else:
            return self._env.render(mode)
예제 #4
0
class Runner:
    def __init__(self, env, model, num_steps, discount_rate, summary_frequency,
                 performance_num_episodes, summary_log_dir):
        self.env = env
        self.model = model
        self.discount_rate = discount_rate
        self.observation = env.reset()
        self.num_steps = num_steps
        self.stats_recorder = StatsRecorder(
            summary_frequency=summary_frequency,
            performance_num_episodes=performance_num_episodes,
            summary_log_dir=summary_log_dir,
            save=True)
        self.viewer = SimpleImageViewer()

    def render(self):
        columns = []
        for i in range(80):
            rows = []
            for j in range(80):
                if self.observation[i][j] == 1:
                    rows.append([255, 255, 255])
                else:
                    rows.append([0, 0, 0])
            columns.append(rows)
        self.viewer.imshow(np.asarray(columns, dtype=np.uint8))

    def run(self):
        observations = []
        rewards = []
        actions = []
        terminals = []
        values = []

        for _ in range(self.num_steps):
            action_index, value = self.model.predict([self.observation])
            observations.append(self.observation)
            action = action_with_index(action_index)
            values.append(value)

            self.observation, reward, terminal = self.env.step(action)
            self.stats_recorder.after_step(reward=reward, terminal=terminal)

            rewards.append(reward)
            actions.append(action_index)
            terminals.append(terminal)

            if terminal:
                self.observation = self.env.reset()

        if terminals[-1] == 0:
            next_value = self.model.predict_value([self.observation])[0]
            discounted_rewards = discount(rewards + [next_value],
                                          terminals + [False],
                                          self.discount_rate)[:-1]
        else:
            discounted_rewards = discount(rewards, terminals,
                                          self.discount_rate)

        self.model.train(observations, discounted_rewards, actions, values)
예제 #5
0
def main():
    global restart, action
    parser = argparse.ArgumentParser()
    parser.add_argument("--bot", type=int, default=0,
                        help="Number of bot cars_full in environment.")
    parser.add_argument("--track", type=int, default=0,
                        help="Track for agents cars_full in environment.")
    parser.add_argument("--discrete", type=int, default=1, help="Apply discrete wrapper?")
    parser.add_argument("--sleep", type=float, default=None, help="time in s between actions")
    parser.add_argument("--debug", action='store_true', default=False, help="debug mode")
    parser.add_argument(
        "--env-settings",
        type=str,
        default='envs/gym_car_intersect_fixed/settings_sets/env_settings__basic_small_rotation.json',
        help="debug mode"
    )

    args = parser.parse_args()


    env = CarRacingHackatonContinuousFixed(args.env_settings)
    env = DiscreteWrapper(env)


    env.reset()
    time.sleep(3.0)

    viewer = SimpleImageViewer()
    viewer.imshow(env.get_true_picture())
    viewer.window.on_key_press = key_press
    viewer.window.on_key_release = key_release
    # while True:
    env.reset()
    total_reward = 0.0
    steps = 0
    restart = False
    while True:
        s = None
        done = None
        info = {}
        for _ in range(1):
            s, r, done, info = env.step(action)
            total_reward += r
        print("\naction " + str(action))
        print("step {} total_reward {:+0.2f}".format(steps, total_reward))
        print(info)

        steps += 1
        viewer.imshow(env.get_true_picture())

        if done or restart or 'need_restart' in info.keys():
            print('restart')
            break
예제 #6
0
class BaseEnv(gym.Env, ABC):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 3
    }
    reward_range = (-float('inf'), float('inf'))

    def __init__(self):
        self.viewer = None
        self.seed()

    @abstractmethod
    def step(self, action):
        pass

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    @abstractmethod
    def reset(self):
        pass

    @abstractmethod
    def get_image(self):
        pass

    def render(self, mode='rgb_array', max_width=20):
        img = self.get_image()
        img = np.asarray(img).astype(np.uint8)
        img_height, img_width = img.shape[:2]
        ratio = max_width / img_width
        img = Image.fromarray(img).resize(
            [int(ratio * img_width),
             int(ratio * img_height)])
        img = np.asarray(img)
        if mode == 'rgb_array':
            # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            return img
        elif mode == 'human':
            from gym.envs.classic_control.rendering import SimpleImageViewer
            if self.viewer is None:
                self.viewer = SimpleImageViewer()
            self.viewer.imshow(img)

            return self.viewer.isopen

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None
예제 #7
0
def main():
    args = arg_parser().parse_args()

    conn = redis.StrictRedis(host=args.redis_host, port=args.redis_port)
    pubsub = conn.pubsub()
    pubsub.subscribe(args.channel + ':state:' + args.env_id)
    viewer = SimpleImageViewer()
    for msg in pubsub.listen():
        if msg['type'] != 'message':
            continue
        img = np.frombuffer(msg['data'][:3 * (args.obs_size**2)],
                            dtype='uint8')
        img = img.reshape([args.obs_size] * 2 + [3])
        viewer.imshow(img)
예제 #8
0
class SpecialWrapper(gym.Wrapper):
    metadata = {'render.modes': ['human', 'rgb_array', 'encoding']}

    def __init__(self, env, terminal_condition=None):
        super(SpecialWrapper, self).__init__(env)
        self.terminal_condition = terminal_condition

    def reset(self):
        return self.env.reset()

    def step(self, action):
        observation, reward, terminal, info = self.env.step(action)
        if not terminal and not self.terminal_condition is None:
            terminal = self.terminal_condition.isterminal(
                reward, terminal, info)
        return observation, reward, terminal, info

    def render(self, mode='human', **kwargs):
        if mode == 'encoding':
            if not 'encoder' in kwargs:
                raise TypeError('Expected an encoder model `encoder`')
            if not 'observation' in kwargs:
                raise TypeError('Expected previous observation `observation`')

            encoder = kwargs['encoder']
            observation = kwargs['observation']
            encoding = encoder.predict(np.expand_dims(observation, axis=0))[0]
            encoding = (encoding - encoding.min()) / (encoding.max() -
                                                      encoding.min())
            image = np.repeat(np.expand_dims(encoding, axis=-1), 3, axis=-1)
            image = np.uint8(image * 255)
            image = cv2.resize(image, (210, 210), interpolation=cv2.INTER_AREA)
            image = np.concatenate((observation, image), axis=1)

            if self.viewer is None:
                from gym.envs.classic_control.rendering import SimpleImageViewer
                self.viewer = SimpleImageViewer()

            self.viewer.imshow(image)
        else:
            return self.env.render(mode, **kwargs)
예제 #9
0
def test():
    with open(os.path.join("demo", "Pong.demo"), "rb") as f:
        dat = pickle.load(f)
    viewer = SimpleImageViewer()
    env = gym.make('PongNoFrameskip-v4')
    checkpoint = dat['checkpoints'][18]
    checkpoint_action_nr = dat['checkpoint_action_nr'][18]
    env.reset()
    env.unwrapped.restore_state(checkpoint)

    t = 0
    while True:
        print("t ", t)
        action = dat['actions'][checkpoint_action_nr + t]
        observation, reward, done, _ = env.step(action)
        viewer.imshow(observation)
        if reward != 0:
            print("*** reset ***")
            env.reset()
            break
        time.sleep(0.5)
        t += 1
예제 #10
0
def watch_random(env, frame_rate=60.0):
    """
    Watch random agent play an environment.
    """
    init_state = env.reset(1)
    states = tf.placeholder(init_state.dtype, shape=init_state.get_shape())
    actions = tf.random_uniform(shape=[1],
                                minval=0,
                                maxval=env.num_actions,
                                dtype=tf.int32)
    new_states, rews, dones = env.step(states, actions)
    image = env.observe_visual(states)
    viewer = SimpleImageViewer()
    with tf.Session() as sess:
        cur_states = sess.run(init_state)
        while True:
            cur_states, cur_rews, cur_dones = sess.run(
                [new_states, rews, dones], feed_dict={states: cur_states})
            cur_image = sess.run(image, feed_dict={states: cur_states})
            viewer.imshow(cur_image[0])
            if cur_dones[0]:
                print('done with reward: %f' % cur_rews[0])
            time.sleep(1.0 / frame_rate)
예제 #11
0
class Water(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array'],
                'video.frames_per_second': 3}
    FIELD = [
        'M',  # 0 agent
        'S',  # 1 start
        'G',  # 2 goal
        'W',  # 3 water
        'N',  # 4 nothing
    ]
    # this is the restriction of over iteration
    MAX_STEPS = 5000

    def __init__(self):
        super().__init__()
        self.viewer = None
        self.radius = 5
        self.rotation = 10
        self.ellipce_r = 10
        self.ellipce_c = 12
        self.x_shape = 10 * self.radius
        self.y_shape = 10 * self.radius
        self.MAP_shape = (self.x_shape, self.y_shape)
        # set an action space
        self.action_space = gym.spaces.Discrete(4)
        self.observation_space = gym.spaces.Box(
            low=0,
            high=len(self.FIELD),
            shape=self.MAP_shape
        )
        nrows, ncols = self.MAP_shape
        reward_range = [-1., 1.]
        self.reset()

    def reset(self):
        self.map = self.ellipse_map
        nrows, ncols = self.MAP_shape
        self.pos = self.find_pos('S')[0]
        # self.goal = self.find_pos('G')[0]
        self.done = False
        self.reward = 0
        self.steps = 0
        self.visited = []
        return self.observe()
        
        # dipict the map
    def ellipse_map(self):
        self.x = np.ones((self.x_shape, self.y_shape), dtype=np.uint8)
        self.x[self.x == 1] = 4
        # Start
        self.x[(0, 0)] = 1
        self.x_a, self.y_a = ellipse(self.x_shape/2, self.y_shape/2, self.ellipce_r, self.ellipce_c, rotation=np.deg2rad(self.rotation))
        self.x[(self.x_a, self.y_a)] = 3
        return self.x

    def is_movable(self, pos):
         return ((0 <= pos[0] < self.x_shape) and (0 <= pos[1] < self.y_shape))

        # judge whether agent gets to the goal
    def is_goal(self, show=False):
        nrows, ncols = self.MAP_shape
        if self.pos[0] == nrows - 1 and self.pos[1] == ncols - 1:
            if show:
                print("Goal")
            return True
        else:
            return False

    def is_done(self, show=False):
        return (not self.is_movable) or self.is_goal(show) or self.steps > self.MAX_STEPS

    def observe(self):
        # to copy the map with the place of the agent
        observation = np.copy(self.map())
        observation[tuple(self.pos)] = self.FIELD.index('M')
        return observation

    def point_finder(self):
        flat_space = np.reshape(self.observe(), [-1, 1])
        #print(flat_space)
        point = np.where(flat_space == 0)
        return int(point[0])

    def trace(self):
        self.row, self.col = np.where(self.observe() == 0)
        self.visited.append((int(self.row), int(self.col)))
        return self.visited

    def get_reward(self, pos, moved):
        nrows, ncols = self.MAP_shape
        if moved:
            if self.map()[tuple(pos)] == self.FIELD.index('W'):
                self.reward -= 10
            elif self.map()[tuple(pos)] == self.FIELD.index('N'):
                self.reward -= 0.3
        else:
            self.reward -= 0.5
        # Goal
        if self.is_goal():
            self.reward += 15
        return self.reward

    def find_pos(self, field_type):
        return np.array([np.where(self.map() == self.FIELD.index(field_type))])

    def step(self, action):
        nrows, ncols = self.MAP_shape
        if action == 0:
            next_pos = [x + y for (x, y) in zip(self.pos, [0, 1])]
        elif action == 1:
            next_pos = [x + y for (x, y) in zip(self.pos, [-1, 0])]
        elif action == 2:
            next_pos = [x + y for (x, y) in zip(self.pos, [1, 0])]
        elif action == 3:
            next_pos = [x + y for (x, y) in zip(self.pos, [-1, 0])]

        if self.is_movable(next_pos):
            self.pos = next_pos
            moved = True
        else:
            moved = False
        reward = self.get_reward(self.pos, moved)
        observation = self.observe()
        trace = self.trace()
        state = self.point_finder()
        done = self.is_done(True)
        return trace, state, reward, observation, done

    def show(self):
        # plt.grid('on')
        ims = []
        nrows, ncols = self.MAP_shape
        ax = plt.gca()
        fig = plt.figure()
        ax.set_xticks(np.arange(0.5, nrows, 1))
        ax.set_yticks(np.arange(0.5, ncols, 1))
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        canvas = np.copy(self.map())
        for row, col in self.visited:
            canvas[(row, col)] = self.FIELD.index('M')
            img1 = plt.imshow(canvas, interpolation="bilinear", cmap=cm.GnBu)
            ims.append([img1])
        img = plt.imshow(canvas, interpolation="bilinear", cmap=cm.GnBu, animated=True)
        ani = animation.ArtistAnimation(fig, ims, interval=100, blit=True, repeat_delay=1000)
        plt.show()
        return

    @abstractmethod
    def get_image(self):
        pass

    def render(self, mode='human', max_width=500):
        img = self.get_image()
        img = np.asarray(img).astype(np.uint8)
        img_height, img_width = img.shape[:2]
        ratio = max_width / img_width
        #img = Image.fromarray(img).resize([int(ratio * img_width), int(ratio * img_height)])
        img = np.asarray(img)
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control.rendering import SimpleImageViewer
            if self.viewer is None:
                self.viewer = SimpleImageViewer()
            self.viewer.imshow(img)

            return self.viewer.isopen

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None
예제 #12
0
class MultiagentVecEnv(ABC):
    def __init__(self, num_envs: int, num_agents: int, height: int, width: int,
                 dtype: torch.dtype, device: str):
        self.num_envs = num_envs
        self.num_agents = num_agents
        self.height = height
        self.width = width
        self.dtype = dtype
        self.device = device
        self.viewer = None
        self.render_args = {'num_rows': 1, 'num_cols': 1, 'size': 256}

        # This Tensor represents the location of each agent in each environment. It should contain
        # only one non-zero entry for each sub array along dimension 0.
        self.agents = torch.zeros((num_envs * num_agents, 1, height, width),
                                  dtype=dtype,
                                  device=device,
                                  requires_grad=False)

        # This Tensor represents the current alive/dead state of each agent in each environment
        self.dones = torch.zeros(self.num_envs * self.num_agents,
                                 dtype=torch.uint8,
                                 device=device,
                                 requires_grad=False)

        # This tensor represents whether a particular environment has experienced an exception in the most recent
        # step. This is useful for resetting environments that have an exception
        self.errors = torch.zeros(self.num_envs,
                                  dtype=torch.uint8,
                                  device=device,
                                  requires_grad=False)

    @abstractmethod
    def step(
        self,
        actions: Dict[str, torch.Tensor],
        return_observations: bool = False
    ) -> (Dict[str, torch.Tensor], Dict[str, torch.Tensor], Dict[
            str, torch.Tensor], dict):
        raise NotImplementedError

    @abstractmethod
    def reset(self,
              done: torch.Tensor = None,
              return_observations: bool = True
              ) -> Optional[Dict[str, torch.Tensor]]:
        raise NotImplementedError

    @abstractmethod
    def _get_env_images(self) -> torch.Tensor:
        """Gets RGB arrays for each environment.

        Returns:
            img: A Tensor of shape (num_envs, 3, height, width) and dtype torch.short i.e.
                an RBG rendering of each environment
        """
        raise NotImplementedError

    def render(self, mode: str = 'human', env: Optional[int] = None) -> Any:
        if self.viewer is None and mode == 'human':
            # Lazy importing because this breaks EC2 instances that don't have a screen/viewing device
            from gym.envs.classic_control.rendering import SimpleImageViewer
            self.viewer = SimpleImageViewer(maxwidth=1080)

        img = self._get_env_images()
        img = build_render_rgb(img=img,
                               num_envs=self.num_envs,
                               env_height=self.height,
                               env_width=self.width,
                               env=env,
                               num_rows=self.render_args['num_rows'],
                               num_cols=self.render_args['num_cols'],
                               render_size=self.render_args['size'])

        if mode == 'human':
            self.viewer.imshow(img)
            return self.viewer.isopen
        elif mode == 'rgb_array':
            return img
        else:
            raise ValueError('Render mode not recognised.')

    @abstractmethod
    def check_consistency(self):
        raise NotImplementedError
예제 #13
0
파일: retro_env.py 프로젝트: PorkPy/retro
class RetroEnv(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 60.0
    }

    def compute_step(self, image):
        reward = self.data.current_reward()
        done = self.data.is_done()
        return reward, done, self.data.lookup_all()

    def record_movie(self, path):
        self.movie = retro.Movie(path, True)
        self.movie.configure(self.gamename, self.em)
        if self.initial_state:
            self.movie.set_state(self.initial_state)

    def stop_record(self):
        self.movie_path = None
        self.movie_id = 0
        if self.movie:
            self.movie.close()
            self.movie = None

    def auto_record(self, path=None):
        if not path:
            path = os.getcwd()
        self.movie_path = path

    def __init__(self,
                 game,
                 state=retro.STATE_DEFAULT,
                 scenario=None,
                 info=None,
                 use_restricted_actions=retro.ACTIONS_FILTERED,
                 record=False):
        if not hasattr(self, 'spec'):
            self.spec = None
        self.img = None
        self.viewer = None
        self.gamename = game
        self.statename = state

        game_path = retro.get_game_path(game)
        rom_path = retro.get_romfile_path(game)
        metadata_path = os.path.join(game_path, 'metadata.json')

        if state == retro.STATE_NONE:
            self.initial_state = None
        elif state == retro.STATE_DEFAULT:
            self.initial_state = None
            try:
                with open(metadata_path) as f:
                    metadata = json.load(f)
                if 'default_state' in metadata:
                    with gzip.open(
                            os.path.join(game_path, metadata['default_state'])
                            + '.state', 'rb') as fh:
                        self.initial_state = fh.read()
            except (IOError, json.JSONDecodeError):
                pass
        else:
            if not state.endswith('.state'):
                state += '.state'

            with gzip.open(os.path.join(game_path, state), 'rb') as fh:
                self.initial_state = fh.read()

        self.data = GameData()

        if info is None:
            info = 'data'

        if info.endswith('.json'):
            # assume it's a path
            info_path = info
        else:
            info_path = os.path.join(game_path, info + '.json')

        if scenario is None:
            scenario = 'scenario'

        if scenario.endswith('.json'):
            # assume it's a path
            scenario_path = scenario
        else:
            scenario_path = os.path.join(game_path, scenario + '.json')

        system = retro.get_romfile_system(rom_path)

        # We can't have more than one emulator per process. Before creating an
        # emulator, ensure that unused ones are garbage-collected
        gc.collect()
        self.em = retro.RetroEmulator(rom_path)
        self.em.configure_data(self.data)
        self.em.step()
        img = self.em.get_screen()

        core = retro.get_system_info(system)
        self.BUTTONS = core['buttons']
        self.NUM_BUTTONS = len(self.BUTTONS)
        self.BUTTON_COMBOS = self.data.valid_actions()

        try:
            assert self.data.load(
                info_path,
                scenario_path), 'Failed to load info (%s) or scenario (%s)' % (
                    info_path, scenario_path)
        except Exception:
            del self.em
            raise

        if use_restricted_actions == retro.ACTIONS_DISCRETE:
            combos = 1
            for combo in self.BUTTON_COMBOS:
                combos *= len(combo)
            self.action_space = gym.spaces.Discrete(combos)
        elif use_restricted_actions == retro.ACTIONS_MULTI_DISCRETE:
            self.action_space = gym.spaces.MultiDiscrete([
                len(combos) if gym_version >= (0, 9, 6) else
                (0, len(combos) - 1) for combos in self.BUTTON_COMBOS
            ])
        else:
            self.action_space = gym.spaces.MultiBinary(self.NUM_BUTTONS)

        kwargs = {}
        if gym_version >= (0, 9, 6):
            kwargs['dtype'] = np.uint8
        self.observation_space = gym.spaces.Box(low=0,
                                                high=255,
                                                shape=img.shape,
                                                **kwargs)

        self.use_restricted_actions = use_restricted_actions
        self.movie = None
        self.movie_id = 0
        self.movie_path = None
        if record is True:
            self.auto_record()
        elif record is not False:
            self.auto_record(record)
        self.seed()
        if gym_version < (0, 9, 6):
            self._seed = self.seed
            self._step = self.step
            self._reset = self.reset
            self._render = self.render
            self._close = self.close

    def step(self, a):
        if self.img is None:
            raise RuntimeError('Please call env.reset() before env.step()')

        action = 0
        if self.use_restricted_actions == retro.ACTIONS_DISCRETE:
            for combo in self.BUTTON_COMBOS:
                current = a % len(combo)
                a //= len(combo)
                action |= combo[current]
        elif self.use_restricted_actions == retro.ACTIONS_MULTI_DISCRETE:
            for i in range(len(a)):
                buttons = self.BUTTON_COMBOS[i]
                action |= buttons[a[i]]
        else:
            for i in range(len(a)):
                action |= int(a[i]) << i
            if self.use_restricted_actions == retro.ACTIONS_FILTERED:
                action = self.data.filter_action(action)
        a = np.zeros([16], np.uint8)
        for i in range(16):
            a[i] = (action >> i) & 1
            if self.movie:
                self.movie.set_key(i, a[i])
        if self.movie:
            self.movie.step()
        self.em.set_button_mask(a)
        self.em.step()
        self.img = ob = self.em.get_screen()
        self.data.update_ram()
        rew, done, info = self.compute_step(ob)
        return ob, float(rew), bool(done), dict(info)

    def reset(self):
        if self.initial_state:
            self.em.set_state(self.initial_state)
        self.em.set_button_mask(np.zeros([16], np.uint8))
        self.em.step()
        if self.movie_path is not None:
            self.record_movie(
                os.path.join(
                    self.movie_path, '%s-%s-%04d.bk2' %
                    (self.gamename, self.statename, self.movie_id)))
            self.movie_id += 1
        if self.movie:
            self.movie.step()
        self.img = ob = self.em.get_screen()
        self.data.reset()
        self.data.update_ram()
        return ob

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        return [seed1, seed2]

    def render(self, mode='human', close=False):
        if close:
            if self.viewer:
                self.viewer.close()
            return
        if mode == "rgb_array":
            return self.em.get_screen() if self.img is None else self.img
        elif mode == "human":
            if self.viewer is None:
                from gym.envs.classic_control.rendering import SimpleImageViewer
                self.viewer = SimpleImageViewer()
            self.viewer.imshow(self.img)
            return self.viewer.isopen

    def close(self):
        if hasattr(self, 'em'):
            del self.em

        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None
예제 #14
0
class RetroEnv(gym.Env):
    """
    Gym Retro environment class

    Provides a Gym interface to classic video games
    """
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 60.0
    }

    def __init__(self,
                 game,
                 state=retro.State.DEFAULT,
                 scenario=None,
                 info=None,
                 use_restricted_actions=retro.Actions.FILTERED,
                 record=False,
                 players=1,
                 inttype=retro.data.Integrations.STABLE,
                 obs_type=retro.Observations.IMAGE,
                 naudio_samples=None,
                 make_video=False,
                 is_baseline=False):
        if not hasattr(self, 'spec'):
            self.spec = None
        self._obs_type = obs_type
        self.img = None
        self.ram = None
        self.viewer = None
        self.gamename = game
        self.statename = state
        self.initial_state = None
        self.players = players
        self.naudio_samples = naudio_samples
        self.audio_clip = []
        self.make_video = make_video
        self.is_baseline = is_baseline

        metadata = {}
        rom_path = retro.data.get_romfile_path(game, inttype)
        metadata_path = retro.data.get_file_path(game, 'metadata.json',
                                                 inttype)

        if state == retro.State.NONE:
            self.statename = None
        elif state == retro.State.DEFAULT:
            self.statename = None
            try:
                with open(metadata_path) as f:
                    metadata = json.load(f)
                if 'default_player_state' in metadata and self.players <= len(
                        metadata['default_player_state']):
                    self.statename = metadata['default_player_state'][
                        self.players - 1]
                elif 'default_state' in metadata:
                    self.statename = metadata['default_state']
                else:
                    self.statename = None
            except (IOError, json.JSONDecodeError):
                pass

        if self.statename:
            self.load_state(self.statename, inttype)

        self.data = retro.data.GameData()

        if info is None:
            info = 'data'

        if info.endswith('.json'):
            # assume it's a path
            info_path = info
        else:
            info_path = retro.data.get_file_path(game, info + '.json', inttype)

        if scenario is None:
            scenario = 'scenario'

        if scenario.endswith('.json'):
            # assume it's a path
            scenario_path = scenario
        else:
            scenario_path = retro.data.get_file_path(game, scenario + '.json',
                                                     inttype)

        self.system = retro.get_romfile_system(rom_path)

        # We can't have more than one emulator per process. Before creating an
        # emulator, ensure that unused ones are garbage-collected
        gc.collect()
        self.em = retro.RetroEmulator(rom_path)
        self.em.configure_data(self.data)
        self.em.step()

        core = retro.get_system_info(self.system)
        self.buttons = core['buttons']
        self.num_buttons = len(self.buttons)
        self.button_combos = self.data.valid_actions()

        try:
            assert self.data.load(
                info_path,
                scenario_path), 'Failed to load info (%s) or scenario (%s)' % (
                    info_path, scenario_path)
        except Exception:
            del self.em
            raise

        if use_restricted_actions == retro.Actions.DISCRETE:
            combos = 1
            for combo in self.button_combos:
                combos *= len(combo)
            self.action_space = gym.spaces.Discrete(combos**players)
        elif use_restricted_actions == retro.Actions.MULTI_DISCRETE:
            self.action_space = gym.spaces.MultiDiscrete([
                len(combos) if gym_version >= (0, 9, 6) else
                (0, len(combos) - 1) for combos in self.button_combos
            ] * players)
        else:
            self.action_space = gym.spaces.MultiBinary(self.num_buttons *
                                                       players)

        kwargs = {}
        if gym_version >= (0, 9, 6):
            kwargs['dtype'] = np.uint8

        if self._obs_type == retro.Observations.RAM:
            shape = self.get_ram().shape
        else:
            img = [self.get_screen(p) for p in range(players)]
            shape = img[0].shape
        self.observation_space = gym.spaces.Box(low=0,
                                                high=255,
                                                shape=shape,
                                                **kwargs)

        self.use_restricted_actions = use_restricted_actions
        self.movie = None
        self.movie_id = 0
        self.movie_path = None
        if record is True:
            self.auto_record()
        elif record is not False:
            self.auto_record(record)
        self.seed()
        if gym_version < (0, 9, 6):
            self._seed = self.seed
            self._step = self.step
            self._reset = self.reset
            self._render = self.render
            self._close = self.close

    def _update_obs(self):
        if self._obs_type == retro.Observations.RAM:
            self.ram = self.get_ram()
            return self.ram
        elif self._obs_type == retro.Observations.IMAGE:
            self.img = self.get_screen()
            return self.img
        else:
            raise ValueError('Unrecognized observation type: {}'.format(
                self._obs_type))

    def action_to_array(self, a):
        actions = []
        for p in range(self.players):
            action = 0
            if self.use_restricted_actions == retro.Actions.DISCRETE:
                for combo in self.button_combos:
                    current = a % len(combo)
                    a //= len(combo)
                    action |= combo[current]
            elif self.use_restricted_actions == retro.Actions.MULTI_DISCRETE:
                ap = a[self.num_buttons * p:self.num_buttons * (p + 1)]
                for i in range(len(ap)):
                    buttons = self.button_combos[i]
                    action |= buttons[ap[i]]
            else:
                ap = a[self.num_buttons * p:self.num_buttons * (p + 1)]
                for i in range(len(ap)):
                    action |= int(ap[i]) << i
                if self.use_restricted_actions == retro.Actions.FILTERED:
                    action = self.data.filter_action(action)
            ap = np.zeros([self.num_buttons], np.uint8)
            for i in range(self.num_buttons):
                ap[i] = (action >> i) & 1
            actions.append(ap)
        return actions

    def step(self, a):
        if self.img is None and self.ram is None:
            raise RuntimeError('Please call env.reset() before env.step()')

        for p, ap in enumerate(self.action_to_array(a)):
            if self.movie:
                for i in range(self.num_buttons):
                    self.movie.set_key(i, ap[i], p)
            self.em.set_button_mask(ap, p)

        if self.movie:
            self.movie.step()
        self.em.step()
        self.data.update_ram()
        ob = self._update_obs()
        rew, done, info = self.compute_step()

        sample = self.em.get_audio()

        if self.naudio_samples is not None:
            info['audio'] = librosa.util.fix_length(sample.T,
                                                    int(self.naudio_samples)).T

            if self.make_video:
                self.audio_clip.extend(sample)

        if self.make_video:
            baseline_str = 'b-' if self.is_baseline else ''
            cv2.imwrite(
                'video_frames/' + baseline_str + self.gamename + '-' +
                str(self.n) + '.png', cv2.cvtColor(ob, cv2.COLOR_RGB2BGR))
        self.n += 1
        return ob, rew, bool(done), dict(info)

    def reset(self):
        if self.audio_clip:
            if self.make_video:
                baseline_str = 'b-' if self.is_baseline else ''
                path = baseline_str + self.gamename + '_audio.wav'
                numpy_audio = np.asarray(self.audio_clip)
                wv.write(path, int(self.em.get_audio_rate()), numpy_audio)

                # Combine all saved frames into video
                cmd1 = 'ffmpeg -y -r 60 -f image2 -i video_frames/' + baseline_str + self.gamename + '-%d.png -vcodec libx264 -crf 25  -pix_fmt yuv420p ' + baseline_str + self.gamename + '_noaudio.mp4 -hide_banner -loglevel panic'
                # Add audio to video
                os.system(cmd1)
                cmd = "ffmpeg -y -i " + baseline_str + self.gamename + "_noaudio.mp4 -i " + path + " -y -c:v copy -c:a aac -strict experimental -hide_banner -loglevel panic " + baseline_str + self.gamename + '-' + str(
                    self.n) + '.mp4'
                os.system(cmd)
                os.system('rm video_frames/' + baseline_str + self.gamename +
                          '*.png')
                print(
                    'saved video to ',
                    baseline_str + self.gamename + '-' + str(self.n) + '.mp4')
                sys.exit(0)

        self.audio_clip = []
        self.n = 0

        if self.initial_state:
            self.em.set_state(self.initial_state)
        for p in range(self.players):
            self.em.set_button_mask(np.zeros([self.num_buttons], np.uint8), p)
        self.em.step()
        if self.movie_path is not None:
            rel_statename = os.path.splitext(os.path.basename(
                self.statename))[0]
            self.record_movie(
                os.path.join(
                    self.movie_path, '%s-%s-%06d.bk2' %
                    (self.gamename, rel_statename, self.movie_id)))
            self.movie_id += 1
        if self.movie:
            self.movie.step()
        self.data.reset()
        self.data.update_ram()

        return self._update_obs()

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        return [seed1, seed2]

    def render(self, mode='human', close=False):
        if close:
            if self.viewer:
                self.viewer.close()
            return

        img = self.get_screen() if self.img is None else self.img
        if mode == "rgb_array":
            return img
        elif mode == "human":
            if self.viewer is None:
                from gym.envs.classic_control.rendering import SimpleImageViewer
                self.viewer = SimpleImageViewer()
            self.viewer.imshow(img)
            return self.viewer.isopen

    def close(self):
        if hasattr(self, 'em'):
            del self.em

    def get_action_meaning(self, act):
        actions = []
        for p, action in enumerate(self.action_to_array(act)):
            actions.append([
                self.buttons[i]
                for i in np.extract(action, np.arange(len(action)))
            ])
        if self.players == 1:
            return actions[0]
        return actions

    def get_ram(self):
        blocks = []
        for offset in sorted(self.data.memory.blocks):
            arr = np.frombuffer(self.data.memory.blocks[offset],
                                dtype=np.uint8)
            blocks.append(arr)
        return np.concatenate(blocks)

    def get_screen(self, player=0):
        img = self.em.get_screen()
        x, y, w, h = self.data.crop_info(player)
        if not w or x + w > img.shape[1]:
            w = img.shape[1]
        else:
            w += x
        if not h or y + h > img.shape[0]:
            h = img.shape[0]
        else:
            h += y
        if x == 0 and y == 0 and w == img.shape[1] and h == img.shape[0]:
            return img
        return img[y:h, x:w]

    def load_state(self, statename, inttype=retro.data.Integrations.DEFAULT):
        if not statename.endswith('.state'):
            statename += '.state'

        with gzip.open(
                retro.data.get_file_path(self.gamename, statename, inttype),
                'rb') as fh:
            self.initial_state = fh.read()

        self.statename = statename

    def compute_step(self):
        if self.players > 1:
            reward = [self.data.current_reward(p) for p in range(self.players)]
        else:
            reward = self.data.current_reward()
        done = self.data.is_done()
        return reward, done, self.data.lookup_all()

    def record_movie(self, path):
        self.movie = retro.Movie(path, True, self.players)
        self.movie.configure(self.gamename, self.em)
        if self.initial_state:
            self.movie.set_state(self.initial_state)

    def stop_record(self):
        self.movie_path = None
        self.movie_id = 0
        if self.movie:
            self.movie.close()
            self.movie = None

    def auto_record(self, path=None):
        if not path:
            path = os.getcwd()
        self.movie_path = path
예제 #15
0
class RetroEnv(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 60.0
    }

    def compute_step(self):
        if self.players > 1:
            reward = [self.data.current_reward(p) for p in range(self.players)]
        else:
            reward = self.data.current_reward()
        done = self.data.is_done()
        return reward, done, self.data.lookup_all()

    def record_movie(self, path):
        self.movie = retro.Movie(path, True, self.players)
        self.movie.configure(self.gamename, self.em)
        if self.initial_state:
            self.movie.set_state(self.initial_state)

    def stop_record(self):
        self.movie_path = None
        self.movie_id = 0
        if self.movie:
            self.movie.close()
            self.movie = None

    def auto_record(self, path=None):
        if not path:
            path = os.getcwd()
        self.movie_path = path

    def __init__(self,
                 game,
                 state=retro.State.DEFAULT,
                 scenario=None,
                 info=None,
                 use_restricted_actions=retro.Actions.FILTERED,
                 record=False,
                 players=1,
                 inttype=retro.data.Integrations.STABLE):
        if not hasattr(self, 'spec'):
            self.spec = None
        self.img = None
        self.viewer = None
        self.gamename = game
        self.statename = state
        self.initial_state = None
        self.players = players

        metadata = {}
        rom_path = retro.data.get_romfile_path(game, inttype)
        metadata_path = retro.data.get_file_path(game, 'metadata.json',
                                                 inttype)

        if state == retro.State.NONE:
            self.statename = None
        elif state == retro.State.DEFAULT:
            self.statename = None
            try:
                with open(metadata_path) as f:
                    metadata = json.load(f)
                if 'default_player_state' in metadata and self.players <= len(
                        metadata['default_player_state']):
                    self.statename = metadata['default_player_state'][
                        self.players - 1]
                elif 'default_state' in metadata:
                    self.statename = metadata['default_state']
                else:
                    self.statename = None
            except (IOError, json.JSONDecodeError):
                pass

        if self.statename:
            if not self.statename.endswith('.state'):
                self.statename += '.state'

            with gzip.open(
                    retro.data.get_file_path(game, self.statename, inttype),
                    'rb') as fh:
                self.initial_state = fh.read()

        self.data = retro.data.GameData()

        if info is None:
            info = 'data'

        if info.endswith('.json'):
            # assume it's a path
            info_path = info
        else:
            info_path = retro.data.get_file_path(game, info + '.json', inttype)

        if scenario is None:
            scenario = 'scenario'

        if scenario.endswith('.json'):
            # assume it's a path
            scenario_path = scenario
        else:
            scenario_path = retro.data.get_file_path(game, scenario + '.json',
                                                     inttype)

        self.system = retro.get_romfile_system(rom_path)

        # We can't have more than one emulator per process. Before creating an
        # emulator, ensure that unused ones are garbage-collected
        gc.collect()
        self.em = retro.RetroEmulator(rom_path)
        self.em.configure_data(self.data)
        self.em.step()

        core = retro.get_system_info(self.system)
        self.buttons = core['buttons']
        self.num_buttons = len(self.buttons)
        self.button_combos = self.data.valid_actions()

        try:
            assert self.data.load(
                info_path,
                scenario_path), 'Failed to load info (%s) or scenario (%s)' % (
                    info_path, scenario_path)
        except Exception:
            del self.em
            raise

        img = [self.get_screen(p) for p in range(players)]

        if use_restricted_actions == retro.Actions.DISCRETE:
            combos = 1
            for combo in self.button_combos:
                combos *= len(combo)
            self.action_space = gym.spaces.Discrete(combos**players)
        elif use_restricted_actions == retro.Actions.MULTI_DISCRETE:
            self.action_space = gym.spaces.MultiDiscrete([
                len(combos) if gym_version >= (0, 9, 6) else
                (0, len(combos) - 1) for combos in self.button_combos
            ] * players)
        else:
            self.action_space = gym.spaces.MultiBinary(self.num_buttons *
                                                       players)

        kwargs = {}
        if gym_version >= (0, 9, 6):
            kwargs['dtype'] = np.uint8
        self.observation_space = gym.spaces.Box(low=0,
                                                high=255,
                                                shape=img[0].shape,
                                                **kwargs)

        self.use_restricted_actions = use_restricted_actions
        self.movie = None
        self.movie_id = 0
        self.movie_path = None
        if record is True:
            self.auto_record()
        elif record is not False:
            self.auto_record(record)
        self.seed()
        if gym_version < (0, 9, 6):
            self._seed = self.seed
            self._step = self.step
            self._reset = self.reset
            self._render = self.render
            self._close = self.close

    def action_to_array(self, a):
        actions = []
        for p in range(self.players):
            action = 0
            if self.use_restricted_actions == retro.Actions.DISCRETE:
                for combo in self.button_combos:
                    current = a % len(combo)
                    a //= len(combo)
                    action |= combo[current]
            elif self.use_restricted_actions == retro.Actions.MULTI_DISCRETE:
                ap = a[self.num_buttons * p:self.num_buttons * (p + 1)]
                for i in range(len(ap)):
                    buttons = self.button_combos[i]
                    action |= buttons[ap[i]]
            else:
                ap = a[self.num_buttons * p:self.num_buttons * (p + 1)]
                for i in range(len(ap)):
                    action |= int(ap[i]) << i
                if self.use_restricted_actions == retro.Actions.FILTERED:
                    action = self.data.filter_action(action)
            ap = np.zeros([self.num_buttons], np.uint8)
            for i in range(self.num_buttons):
                ap[i] = (action >> i) & 1
            actions.append(ap)
        return actions

    def step(self, a):
        if self.img is None:
            raise RuntimeError('Please call env.reset() before env.step()')

        for p, ap in enumerate(self.action_to_array(a)):
            if self.movie:
                for i in range(self.num_buttons):
                    self.movie.set_key(i, ap[i], p)
            self.em.set_button_mask(ap, p)

        if self.movie:
            self.movie.step()
        self.em.step()
        self.img = ob = self.get_screen()
        self.data.update_ram()
        rew, done, info = self.compute_step()
        return ob, rew, bool(done), dict(info)

    def reset(self):
        if self.initial_state:
            self.em.set_state(self.initial_state)
        for p in range(self.players):
            self.em.set_button_mask(np.zeros([self.num_buttons], np.uint8), p)
        self.em.step()
        if self.movie_path is not None:
            rel_statename = os.path.splitext(os.path.basename(
                self.statename))[0]
            self.record_movie(
                os.path.join(
                    self.movie_path, '%s-%s-%06d.bk2' %
                    (self.gamename, rel_statename, self.movie_id)))
            self.movie_id += 1
        if self.movie:
            self.movie.step()
        self.img = ob = self.get_screen()
        self.data.reset()
        self.data.update_ram()
        return ob

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        return [seed1, seed2]

    def render(self, mode='human', close=False):
        if close:
            if self.viewer:
                self.viewer.close()
            return
        if mode == "rgb_array":
            return self.get_screen() if self.img is None else self.img
        elif mode == "human":
            if self.viewer is None:
                from gym.envs.classic_control.rendering import SimpleImageViewer
                self.viewer = SimpleImageViewer()
            self.viewer.imshow(self.img)
            return self.viewer.isopen

    def close(self):
        if hasattr(self, 'em'):
            del self.em

    def get_action_meaning(self, act):
        actions = []
        for p, action in enumerate(self.action_to_array(act)):
            actions.append([
                self.buttons[i]
                for i in np.extract(action, np.arange(len(action)))
            ])
        if self.players == 1:
            return actions[0]
        return actions

    def get_screen(self, player=0):
        img = self.em.get_screen()
        x, y, w, h = self.data.crop_info(player)
        if not w or x + w > img.shape[1]:
            w = img.shape[1]
        else:
            w += x
        if not h or y + h > img.shape[0]:
            h = img.shape[0]
        else:
            h += y
        if x == 0 and y == 0 and w == img.shape[1] and h == img.shape[0]:
            return img
        return img[y:h, x:w]
예제 #16
0
class BaseEnv(gym.Env, ABC):
    """Base class for all mazelab environments. 
    
    The subclass should implement at least the following:
    
    - :meth:`step`
    - :meth:`reset`
    - :meth:`get_image`

    """
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 3
    }

    def __init__(self, maze, motion):
        self.maze = maze
        self.motion = motion

        self.observation_space = spaces.Box(low=-np.inf,
                                            high=np.inf,
                                            shape=self.maze.size,
                                            dtype=np.float32)
        self.action_space = spaces.Discrete(self.motion.size)

        self.viewer = None

        self.seed()

    @abstractmethod
    def step(self, action):
        pass

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    @abstractmethod
    def reset(self):
        pass

    @abstractmethod
    def get_image(self):
        pass

    def render(self, mode='human', max_width=500):
        img = self.get_image()
        img = np.asarray(img).astype(np.uint8)
        img_height, img_width = img.shape[:2]
        ratio = max_width / img_width
        img = Image.fromarray(img).resize(
            [int(ratio * img_width),
             int(ratio * img_height)])
        img = np.asarray(img)
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control.rendering import SimpleImageViewer
            if self.viewer is None:
                self.viewer = SimpleImageViewer()
            self.viewer.imshow(img)

            return self.viewer.isopen

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None
예제 #17
0
파일: sampler.py 프로젝트: yyz940922/ray
def _env_runner(
    worker: "RolloutWorker",
    base_env: BaseEnv,
    extra_batch_callback: Callable[[SampleBatchType], None],
    policies: Dict[PolicyID, Policy],
    policy_mapping_fn: Callable[[AgentID], PolicyID],
    rollout_fragment_length: int,
    horizon: int,
    preprocessors: Dict[PolicyID, Preprocessor],
    obs_filters: Dict[PolicyID, Filter],
    clip_rewards: bool,
    clip_actions: bool,
    multiple_episodes_in_batch: bool,
    callbacks: "DefaultCallbacks",
    tf_sess: Optional["tf.Session"],
    perf_stats: _PerfStats,
    soft_horizon: bool,
    no_done_at_end: bool,
    observation_fn: "ObservationFunction",
    sample_collector: Optional[SampleCollector] = None,
    render: bool = None,
) -> Iterable[SampleBatchType]:
    """This implements the common experience collection logic.

    Args:
        worker (RolloutWorker): Reference to the current rollout worker.
        base_env (BaseEnv): Env implementing BaseEnv.
        extra_batch_callback (fn): function to send extra batch data to.
        policies (Dict[PolicyID, Policy]): Map of policy ids to Policy
            instances.
        policy_mapping_fn (func): Function that maps agent ids to policy ids.
            This is called when an agent first enters the environment. The
            agent is then "bound" to the returned policy for the episode.
        rollout_fragment_length (int): Number of episode steps before
            `SampleBatch` is yielded. Set to infinity to yield complete
            episodes.
        horizon (int): Horizon of the episode.
        preprocessors (dict): Map of policy id to preprocessor for the
            observations prior to filtering.
        obs_filters (dict): Map of policy id to filter used to process
            observations for the policy.
        clip_rewards (bool): Whether to clip rewards before postprocessing.
        multiple_episodes_in_batch (bool): Whether to pack multiple
            episodes into each batch. This guarantees batches will be exactly
            `rollout_fragment_length` in size.
        clip_actions (bool): Whether to clip actions to the space range.
        callbacks (DefaultCallbacks): User callbacks to run on episode events.
        tf_sess (Session|None): Optional tensorflow session to use for batching
            TF policy evaluations.
        perf_stats (_PerfStats): Record perf stats into this object.
        soft_horizon (bool): Calculate rewards but don't reset the
            environment when the horizon is hit.
        no_done_at_end (bool): Ignore the done=True at the end of the episode
            and instead record done=False.
        observation_fn (ObservationFunction): Optional multi-agent
            observation func to use for preprocessing observations.
        sample_collector (Optional[SampleCollector]): An optional
            SampleCollector object to use.
        render (bool): Whether to try to render the environment after each
            step.

    Yields:
        rollout (SampleBatch): Object containing state, action, reward,
            terminal condition, and other fields as dictated by `policy`.
    """

    # May be populated with used for image rendering
    simple_image_viewer: Optional["SimpleImageViewer"] = None

    # Try to get Env's `max_episode_steps` prop. If it doesn't exist, ignore
    # error and continue with max_episode_steps=None.
    max_episode_steps = None
    try:
        max_episode_steps = base_env.get_unwrapped()[0].spec.max_episode_steps
    except Exception:
        pass

    # Trainer has a given `horizon` setting.
    if horizon:
        # `horizon` is larger than env's limit.
        if max_episode_steps and horizon > max_episode_steps:
            # Try to override the env's own max-step setting with our horizon.
            # If this won't work, throw an error.
            try:
                base_env.get_unwrapped()[0].spec.max_episode_steps = horizon
                base_env.get_unwrapped()[0]._max_episode_steps = horizon
            except Exception:
                raise ValueError(
                    "Your `horizon` setting ({}) is larger than the Env's own "
                    "timestep limit ({}), which seems to be unsettable! Try "
                    "to increase the Env's built-in limit to be at least as "
                    "large as your wanted `horizon`.".format(
                        horizon, max_episode_steps))
    # Otherwise, set Trainer's horizon to env's max-steps.
    elif max_episode_steps:
        horizon = max_episode_steps
        logger.debug(
            "No episode horizon specified, setting it to Env's limit ({}).".
            format(max_episode_steps))
    # No horizon/max_episode_steps -> Episodes may be infinitely long.
    else:
        horizon = float("inf")
        logger.debug("No episode horizon specified, assuming inf.")

    # Pool of batch builders, which can be shared across episodes to pack
    # trajectory data.
    batch_builder_pool: List[MultiAgentSampleBatchBuilder] = []

    def get_batch_builder():
        if batch_builder_pool:
            return batch_builder_pool.pop()
        else:
            return None

    def new_episode(env_id):
        episode = MultiAgentEpisode(policies,
                                    policy_mapping_fn,
                                    get_batch_builder,
                                    extra_batch_callback,
                                    env_id=env_id)
        # Call each policy's Exploration.on_episode_start method.
        # type: Policy
        for p in policies.values():
            if getattr(p, "exploration", None) is not None:
                p.exploration.on_episode_start(policy=p,
                                               environment=base_env,
                                               episode=episode,
                                               tf_sess=getattr(
                                                   p, "_sess", None))
        callbacks.on_episode_start(
            worker=worker,
            base_env=base_env,
            policies=policies,
            episode=episode,
            env_index=env_id,
        )
        return episode

    active_episodes: Dict[str, MultiAgentEpisode] = \
        NewEpisodeDefaultDict(new_episode)

    while True:
        perf_stats.iters += 1
        t0 = time.time()
        # Get observations from all ready agents.
        # type: MultiEnvDict, MultiEnvDict, MultiEnvDict, MultiEnvDict, ...
        unfiltered_obs, rewards, dones, infos, off_policy_actions = \
            base_env.poll()
        perf_stats.env_wait_time += time.time() - t0

        if log_once("env_returns"):
            logger.info("Raw obs from env: {}".format(
                summarize(unfiltered_obs)))
            logger.info("Info return from env: {}".format(summarize(infos)))

        # Process observations and prepare for policy evaluation.
        t1 = time.time()
        # type: Set[EnvID], Dict[PolicyID, List[PolicyEvalData]],
        #       List[Union[RolloutMetrics, SampleBatchType]]
        active_envs, to_eval, outputs = \
            _process_observations(
                worker=worker,
                base_env=base_env,
                policies=policies,
                active_episodes=active_episodes,
                unfiltered_obs=unfiltered_obs,
                rewards=rewards,
                dones=dones,
                infos=infos,
                horizon=horizon,
                preprocessors=preprocessors,
                obs_filters=obs_filters,
                multiple_episodes_in_batch=multiple_episodes_in_batch,
                callbacks=callbacks,
                soft_horizon=soft_horizon,
                no_done_at_end=no_done_at_end,
                observation_fn=observation_fn,
                sample_collector=sample_collector,
            )
        perf_stats.raw_obs_processing_time += time.time() - t1
        for o in outputs:
            yield o

        # Do batched policy eval (accross vectorized envs).
        t2 = time.time()
        # type: Dict[PolicyID, Tuple[TensorStructType, StateBatch, dict]]
        eval_results = _do_policy_eval(
            to_eval=to_eval,
            policies=policies,
            sample_collector=sample_collector,
            active_episodes=active_episodes,
            tf_sess=tf_sess,
        )
        perf_stats.inference_time += time.time() - t2

        # Process results and update episode state.
        t3 = time.time()
        actions_to_send: Dict[EnvID, Dict[AgentID, EnvActionType]] = \
            _process_policy_eval_results(
                to_eval=to_eval,
                eval_results=eval_results,
                active_episodes=active_episodes,
                active_envs=active_envs,
                off_policy_actions=off_policy_actions,
                policies=policies,
                clip_actions=clip_actions,
            )
        perf_stats.action_processing_time += time.time() - t3

        # Return computed actions to ready envs. We also send to envs that have
        # taken off-policy actions; those envs are free to ignore the action.
        t4 = time.time()
        base_env.send_actions(actions_to_send)
        perf_stats.env_wait_time += time.time() - t4

        # Try to render the env, if required.
        if render:
            t5 = time.time()
            # Render can either return an RGB image (uint8 [w x h x 3] numpy
            # array) or take care of rendering itself (returning True).
            rendered = base_env.try_render()
            # Rendering returned an image -> Display it in a SimpleImageViewer.
            if isinstance(rendered, np.ndarray) and len(rendered.shape) == 3:
                # ImageViewer not defined yet, try to create one.
                if simple_image_viewer is None:
                    try:
                        from gym.envs.classic_control.rendering import \
                            SimpleImageViewer
                        simple_image_viewer = SimpleImageViewer()
                    except (ImportError, ModuleNotFoundError):
                        render = False  # disable rendering
                        logger.warning(
                            "Could not import gym.envs.classic_control."
                            "rendering! Try `pip install gym[all]`.")
                if simple_image_viewer:
                    simple_image_viewer.imshow(rendered)
            perf_stats.env_render_time += time.time() - t5
예제 #18
0
class N64Env(gym.Env):
    """
    Nintendo 64 environment.

    We can't use the typical retro environment because n64 uses dynamic memory addresses.
    So we have to read and interpret the ram differently, which we handle in this class.
    """
    metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 60.0}

    def __init__(self,
                 game,
                 state=retro.State.DEFAULT,
                 scenario=None,
                 info=None,
                 use_restricted_actions=retro.Actions.FILTERED,
                 record=False,
                 players=1,
                 inttype=retro.data.Integrations.STABLE,
                 obs_type=retro.Observations.IMAGE):
        if not hasattr(self, 'spec'):
            self.spec = None
        self._obs_type = obs_type
        self.img = None
        self.ram = None
        self.viewer = None
        self.gamename = game
        self.statename = state
        self.initial_state = None
        self.players = players

        if game != "SuperSmashBros-N64":
            raise NotImplementedError("Only ssb64 supported so far")
        self.ssb64_game_data = retro.data.SSB64GameData()

        metadata = {}
        rom_path = retro.data.get_romfile_path(game, inttype)
        metadata_path = retro.data.get_file_path(game, 'metadata.json', inttype)

        if state == retro.State.NONE:
            self.statename = None
        elif state == retro.State.DEFAULT:
            self.statename = None
            try:
                with open(metadata_path) as f:
                    metadata = json.load(f)
                if 'default_player_state' in metadata and self.players <= len(
                        metadata['default_player_state']):
                    self.statename = metadata['default_player_state'][self.players - 1]
                elif 'default_state' in metadata:
                    self.statename = metadata['default_state']
                else:
                    self.statename = None
            except (IOError, json.JSONDecodeError):
                pass

        if self.statename:
            self.load_state(self.statename, inttype)

        self.data = retro.data.GameData()

        if info is None:
            info = 'data'

        if info.endswith('.json'):
            # assume it's a path
            info_path = info
        else:
            info_path = retro.data.get_file_path(game, info + '.json', inttype)

        if scenario is None:
            scenario = 'scenario'

        if scenario.endswith('.json'):
            # assume it's a path
            scenario_path = scenario
        else:
            scenario_path = retro.data.get_file_path(game, scenario + '.json', inttype)

        self.system = retro.get_romfile_system(rom_path)

        # We can't have more than one emulator per process. Before creating an
        # emulator, ensure that unused ones are garbage-collected
        gc.collect()
        self.em = retro.RetroEmulator(rom_path)
        self.em.configure_data(self.data)
        self.em.step()

        core = retro.get_system_info(self.system)
        self.buttons = core['buttons']
        self.num_buttons = len(self.buttons)

        try:
            assert self.data.load(
                info_path,
                scenario_path), 'Failed to load info (%s) or scenario (%s)' % (info_path,
                                                                               scenario_path)
        except Exception:
            del self.em
            raise

        self.button_combos = self.data.valid_actions()
        if use_restricted_actions == retro.Actions.DISCRETE:
            combos = 1
            for combo in self.button_combos:
                combos *= len(combo)
            self.action_space = gym.spaces.Discrete(combos**players)
        elif use_restricted_actions == retro.Actions.MULTI_DISCRETE:
            self.action_space = gym.spaces.MultiDiscrete([
                len(combos) if gym_version >= (0, 9, 6) else (0, len(combos) - 1)
                for combos in self.button_combos
            ] * players)
        else:
            self.action_space = gym.spaces.MultiBinary(self.num_buttons * players)

        kwargs = {}
        if gym_version >= (0, 9, 6):
            kwargs['dtype'] = np.uint8

        if self._obs_type == retro.Observations.RAM:
            shape = self.get_ram().shape
        else:
            img = [self.get_screen(p) for p in range(players)]
            shape = img[0].shape
        self.observation_space = gym.spaces.Box(low=0, high=255, shape=shape, **kwargs)

        self.use_restricted_actions = use_restricted_actions
        self.movie = None
        self.movie_id = 0
        self.movie_path = None
        if record is True:
            self.auto_record()
        elif record is not False:
            self.auto_record(record)
        self.seed()
        if gym_version < (0, 9, 6):
            self._seed = self.seed
            self._step = self.step
            self._reset = self.reset
            self._render = self.render
            self._close = self.close

    def _update_obs(self):
        self.ram = self.get_ram()
        self.img = self.get_screen()

        if self._obs_type == retro.Observations.RAM:
            return self.ram
        elif self._obs_type == retro.Observations.IMAGE:
            return self.img
        else:
            raise ValueError('Unrecognized observation type: {}'.format(self._obs_type))

    def action_to_array(self, a):
        actions = []
        for p in range(self.players):
            action = 0
            if self.use_restricted_actions == retro.Actions.DISCRETE:
                for combo in self.button_combos:
                    current = a % len(combo)
                    a //= len(combo)
                    action |= combo[current]
            elif self.use_restricted_actions == retro.Actions.MULTI_DISCRETE:
                # # Is this entire thing just totally wrong?
                # I think so
                # maybe I should submit a pull request
                # ap = a[self.num_buttons * p:self.num_buttons * (p + 1)]
                # for i in range(len(ap)):
                #     # I think this index should be modulo the number of button_combos?
                #     # It definitely goes beyond the length of the list.
                #     buttons = self.button_combos[i % len(self.button_combos)]
                #     action |= buttons[ap[i]]
                num_combos = len(self.button_combos)
                ap = a[num_combos * p:num_combos * (p + 1)]
                for i in range(len(ap)):
                    buttons = self.button_combos[i]
                    action |= buttons[ap[i]]
            else:
                ap = a[self.num_buttons * p:self.num_buttons * (p + 1)]
                for i in range(len(ap)):
                    action |= int(ap[i]) << i
                if self.use_restricted_actions == retro.Actions.FILTERED:
                    action = self.data.filter_action(action)
            ap = np.zeros([self.num_buttons], np.uint8)
            for i in range(self.num_buttons):
                ap[i] = (action >> i) & 1
            actions.append(ap)
        return actions

    def step(self, a):
        if self.img is None and self.ram is None:
            raise RuntimeError('Please call env.reset() before env.step()')

        for p, ap in enumerate(self.action_to_array(a)):
            if self.movie:
                for i in range(self.num_buttons):
                    self.movie.set_key(i, ap[i], p)
            self.em.set_button_mask(ap, p)

        if self.movie:
            self.movie.step()
        self.em.step()
        self.data.update_ram()
        ob = self._update_obs()
        rew, done, info = self.compute_step()
        return ob, rew, bool(done), dict(info)

    def reset(self):
        if self.initial_state:
            self.em.set_state(self.initial_state)
        for p in range(self.players):
            self.em.set_button_mask(np.zeros([self.num_buttons], np.uint8), p)
        self.em.step()
        if self.movie_path is not None:
            rel_statename = os.path.splitext(os.path.basename(self.statename))[0]
            self.record_movie(
                os.path.join(self.movie_path,
                             '%s-%s-%06d.bk2' % (self.gamename, rel_statename, self.movie_id)))
            self.movie_id += 1
        if self.movie:
            self.movie.step()
        self.data.reset()
        self.ssb64_game_data.reset()
        self.data.update_ram()
        return self._update_obs()

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        return [seed1, seed2]

    def render(self, mode='human', close=False):
        if close:
            if self.viewer:
                self.viewer.close()
            return

        img = self.get_screen() if self.img is None else self.img
        if mode == "rgb_array":
            return img
        elif mode == "human":
            if self.viewer is None:
                from gym.envs.classic_control.rendering import SimpleImageViewer
                self.viewer = SimpleImageViewer()
            self.viewer.imshow(img)
            return self.viewer.isopen

    def close(self):
        if hasattr(self, 'em'):
            del self.em

    def get_action_meaning(self, act):
        actions = []
        for p, action in enumerate(self.action_to_array(act)):
            actions.append([self.buttons[i] for i in np.extract(action, np.arange(len(action)))])
        if self.players == 1:
            return actions[0]
        return actions

    def get_ram(self):
        blocks = []
        for offset in sorted(self.data.memory.blocks):
            arr = np.frombuffer(self.data.memory.blocks[offset], dtype=np.uint8)
            blocks.append(arr)
        return np.concatenate(blocks)

    def get_screen(self, player=0):
        img = self.em.get_screen()
        # OpenGL returns the image flipped and I'm not sure how to fix it there.
        img = np.flipud(img)

        x, y, w, h = self.data.crop_info(player)
        if not w or x + w > img.shape[1]:
            w = img.shape[1]
        else:
            w += x
        if not h or y + h > img.shape[0]:
            h = img.shape[0]
        else:
            h += y
        if x == 0 and y == 0 and w == img.shape[1] and h == img.shape[0]:
            return img
        return img[y:h, x:w]

    def load_state(self, statename, inttype=retro.data.Integrations.DEFAULT):
        if not statename.endswith('.state'):
            statename += '.state'

        with gzip.open(retro.data.get_file_path(self.gamename, statename, inttype), 'rb') as fh:
            self.initial_state = fh.read()

        self.statename = statename

    def compute_step(self):
        """Specific to ssb64 for now."""
        self.ssb64_game_data.update(self.ram)
        if self.players > 1:
            # Make the reward a numpy array so that certain wrappers work with it.
            reward = np.array([self.ssb64_game_data.current_reward(p) for p in range(self.players)])
        else:
            reward = self.ssb64_game_data.current_reward()
        done = self.ssb64_game_data.is_done()
        return reward, done, self.ssb64_game_data.lookup_all()

    def record_movie(self, path):
        self.movie = retro.Movie(path, True, self.players)
        self.movie.configure(self.gamename, self.em)
        if self.initial_state:
            self.movie.set_state(self.initial_state)

    def stop_record(self):
        self.movie_path = None
        self.movie_id = 0
        if self.movie:
            self.movie.close()
            self.movie = None

    def auto_record(self, path=None):
        if not path:
            path = os.getcwd()
        self.movie_path = path
예제 #19
0
class ToyboxBaseEnv(AtariEnv, ABC):
    metadata = {'render.modes': ['human']}

    def __init__(self,
                 toybox,
                 game,
                 frameskip=(2, 5),
                 repeat_action_probability=0.,
                 grayscale=True,
                 alpha=False,
                 actions=None):
        assert (toybox.rstate)
        self.toybox = toybox
        # This is a workaround for issues with Gym wrappers
        # resetting state prematurely
        self.cached_state = None
        self.score = self.toybox.get_score()
        self.viewer = None

        # Required for compatability with OpenAI Gym's Atari wrappers
        self.np_random = np_random
        self.ale = MockALE(toybox)
        utils.EzPickle.__init__(self, game, 'human', frameskip,
                                repeat_action_probability)

        # By default, we don't need actions passed in:
        if actions is None:
            actions = toybox.get_legal_action_set()
        assert (actions is not None)
        self._action_set = actions
        self._obs_type = 'image'
        self._rgba = 1 if grayscale else 4 if alpha else 3
        self._pixel_high = 255

        self._height = self.toybox.get_height()
        self._width = self.toybox.get_width()
        self._dim = (self._height, self._width, self._rgba
                     )  # * len(self.toybox.get_state()))

        self.reward_range = (0, float('inf'))
        self.action_space = spaces.Discrete(len(self._action_set))
        self.observation_space = spaces.Box(low=0,
                                            high=self._pixel_high,
                                            shape=self._dim,
                                            dtype='uint8')

    def seed(self, seed=None):
        """
        This is totally the implementation in AtariEnv in openai/gym.
        """
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        # Toybox takes a uint seed, but we're copying the ALE seed for reasons above.
        # They're unclear who checks, so being safe here.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        self.toybox.set_seed(seed2)
        # Start a new game to ensure that the seed gets used!.
        self.toybox.new_game()
        return [seed1, seed2]

    # This is required to "trick" baselines into treating us as a regular Atari game
    # Implementation copied from baselines
    def get_action_meanings(self):
        #return [ACTION_MEANING[i] for i in self._action_set]
        return list(ACTION_MEANING.values())

    # From OpenAI Gym Baselines
    # https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers.py
    def _get_obs(self):
        return self.toybox.get_state()

    def step(self, action_index):
        obs = None
        reward = None
        done = False
        info = {}

        # Sometimes the action_index is a numpy integer...
        #print('Action index and type', action_index, type(action_index))
        assert (action_index < len(self._action_set))
        assert (type(self._action_set) == list)

        self.toybox.apply_ale_action(self._action_set[action_index])

        if self.ale.game_over():
            print('GAME OVER')
            info['cached_state'] = self.toybox.to_state_json()

        obs = self._get_obs()

        # Compute the reward from the current score and reset the current score.
        score = self.toybox.get_score()
        reward = max(score - self.score, 0)
        self.score = score

        # Check whether the episode is done
        # use "ale" semantics here
        done = self.ale.game_over()

        # Send back dignostic information
        info['lives'] = self.toybox.get_lives()
        #info['frame'] = frame
        info['score'] = 0 if done else self.score

        return obs, reward, done, info

    def reset(self):
        self.cached_state = self.toybox.to_state_json()
        self.toybox.new_game()
        self.score = self.toybox.get_score()
        obs = self._get_obs()
        return obs

    def render(self, mode='human', close=False):
        if mode == 'human':
            # the following is copied from gym's AtariEnv
            if self.viewer is None:
                from gym.envs.classic_control.rendering import SimpleImageViewer
                self.viewer = SimpleImageViewer()
            self.viewer.imshow(self.toybox.get_rgb_frame())
            return self.viewer.isopen
        elif mode == 'rgb_array':
            return self.toybox.get_rgb_frame()

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
        del self.toybox
        self.toybox = None
예제 #20
0
class SuperMarioKartEnv(RetroEnv):

    def __init__(self, game, state=retro.State.DEFAULT, scenario=None, sprite_buffer=20,
                 **kwargs):
        RetroEnv.__init__(self, game, state, scenario, **kwargs)
        self.map = None
        self.sprite_buffer = sprite_buffer # defines visual area around kart; values 5-50 probably fine
        # TODO: update observation space here

    def get_screen(self, player=0):
        # Check the game mode
        game_mode_var = self.data.get_variable("game_mode")
        game_mode = self.data.memory.extract(game_mode_var["address"], game_mode_var["type"])

        # Get kart direction
        direction = self.data.memory.extract(int("0x95", 16), "|u1")

        # adjust the direction
        direction = (direction / 255) * 360

        if game_mode != 28:
            # return np.zeros((32, 32, 3)).astype("uint8")
            return np.zeros((128, 128, 3)).astype("uint8")
        # If we're in gameplay
        else:
            # Update the base map layer if necessary
            if self.map is None:
                self.map = self.read_map()
            # Make a copy and update the map with player position
            this_map = np.copy(self.map)
            # Get scaled player position on map
            player_position_east = self.data.memory.extract(8257672, "<u2")
            player_position_south = self.data.memory.extract(8257676, "<u2")
            player_position_east_relative = math.floor((player_position_east / 4100) * 128)
            player_position_south_relative = math.floor((player_position_south / 4100) * 128)
            # Scale the RGB inputs to get a greyscale map
            this_map = np.floor(((this_map + np.abs(np.min(this_map))) / np.max(this_map)) * 255).astype("uint8")
            this_map = np.reshape(this_map, (128,128,1))
            this_map = np.concatenate((this_map, this_map, this_map), axis=2)
            # Update the player position to be red
            this_map[player_position_south_relative, player_position_east_relative, 0] = 255
            this_map[player_position_south_relative, player_position_east_relative, 1:2] = 0
            # add enemy karts
            positions = self.get_cpu_kart_pos()
            for player_num, position_dict in positions.items():
                if position_dict["east"] == player_position_east and position_dict["south"] == player_position_south:
                    continue
                else:
                    p_south_rel = math.floor(((position_dict["south"] / 4100.0) * 128))
                    p_east_rel = math.floor(((position_dict["east"] / 4100.0) * 128))
                    this_map[p_south_rel, p_east_rel,2] = 255
                    this_map[p_south_rel, p_east_rel, 0] = 0
                    this_map[p_south_rel, p_east_rel, 1] = 0
            this_map = this_map.astype("uint8")
            # return this_map

            # Rotate
            # Pad using sprite buffer
            # Original image generated from RAM is 128x128
            a = np.concatenate((np.zeros((128, self.sprite_buffer, 3)), this_map), axis=1)
            a = np.concatenate((a, np.zeros((128, self.sprite_buffer, 3))), axis=1)
            a = np.concatenate((a, np.zeros((self.sprite_buffer, 128+self.sprite_buffer*2, 3))), axis=0)
            a = np.concatenate((np.zeros((self.sprite_buffer, 128+self.sprite_buffer*2, 3)), a), axis=0)
            a = a.astype("uint8")

            # Now need to account for padding
            smallmap = a[player_position_south_relative:player_position_south_relative + self.sprite_buffer*2,
                   player_position_east_relative:player_position_east_relative+self.sprite_buffer*2, :]
            smallmap = rotate_image(smallmap, direction).astype("uint8")

            # Scale back to 128x128 dimensions CNN can handle
            dim = (128, 128)
            smallbigmap = cv2.resize(smallmap, dim, interpolation=cv2.INTER_AREA)
            return smallbigmap




    def read_map(self):
        # This base tile contains the first spritemap byte.
        # We read them all into a 128x128 matrix to represent the overhead map
        base_tile_address = 8323072
        base_physics_address = int("0xB00", 16)
        map = np.zeros((128,128))
        for x in range(1,128):
            for y in range(1,128):
                address = base_tile_address+((x-1)+(y-1)*128)*1
                tile = self.data.memory.extract(address, "|u1")
                # extract physics elements of each tile
                # physics = self.get_road_physics(self.data.memory.extract(base_physics_address+tile, "|u1"))
                physics = self.get_physics(self.data.memory.extract(base_physics_address+tile, "|u1"))
                map[x-1, y-1] = physics
        map = np.fliplr(map)
        map = np.rot90(map)
        return map

    def get_cpu_kart_pos(self):
        pos = {}
        for k in range(2,9):
            base = int("0xF00", 16) + int("0x100", 16) * k
            x = self.data.memory.extract(int("0x18", 16) + base, "<2") * 4
            y = self.data.memory.extract(int("0x1C", 16) + base, "<2") * 4
            pos[k] = {"east": x, "south": y}
        return pos


    def render(self, mode='human', close=False):
        # Mimics functionality of parent render method, but adds lowres overlay
        if close:
            if self.viewer:
                self.viewer.close()
            return
        # Get game and overlay screens
        game_img = RetroEnv.get_screen(self)
        game_img_shape = game_img.shape
        lowres_overhead = self.get_screen()
        lowres_shape = lowres_overhead.shape
        # Extend the image
        actual_game_image = np.concatenate((game_img,np.zeros((game_img_shape[0], lowres_shape[1], 3))), axis=1)
        actual_game_image[0:lowres_shape[0],
                          game_img_shape[1]:game_img_shape[1] + lowres_shape[1],
                          :] = lowres_overhead
        actual_game_image = actual_game_image.astype("uint8")

        # Scale
        scale_percent = 400
        width = int(actual_game_image.shape[1] * scale_percent / 100)
        height = int(actual_game_image.shape[0] * scale_percent / 100)
        dim = (width, height)

        # resize image
        actual_game_image = cv2.resize(actual_game_image, dim, interpolation=cv2.INTER_AREA)


        if mode == "rgb_array":
            return actual_game_image
        elif mode == "human":
            if self.viewer is None:
                from gym.envs.classic_control.rendering import SimpleImageViewer
                self.viewer = SimpleImageViewer(maxwidth=width)
            self.viewer.imshow(actual_game_image)
            return self.viewer.isopen

    def get_road_physics(self, physics):
        if physics == int("0x40", 16):  # --road
            return 1
        elif physics == int("0x46",16): # --dirt road
            return 1
        elif physics == int("0x42",16): # --ghost road
            return 1
        elif physics == int("0x4E",16): # --light ghost road
            return 1
        elif physics == int("0x50",16): # --wood bridge
            return 1
        elif physics == int("0x1E",16): # --starting line
            return 1
        elif physics == int("0x44",16): # --castle road
            return 1
        elif physics == int("0x16",16): # --speed boost
            return 2
        elif physics == int("0x10",16): # --jump pad
            return 1.5
        elif physics == int("0x4C",16): # --choco road
            return 1
        elif physics == int("0x4A",16): # --sand road
            return 1
        else:
            return 0

    def get_physics(self, physics):
        if physics == int("0x54",16): # --dirt
            return 0
        elif physics == int("0x5A",16): # --lily pads/grass
            return 0
        elif physics == int("0x5C",16): # --shallow water
            return 0
        elif physics == int("0x58",16): # --snow
            return 0
        elif physics == int("0x56",16): # --chocodirt
            return -0.5
        elif physics == int("0x40",16): # --road
            return 1
        elif physics == int("0x46",16): # --dirt road
            return 0.75
        elif physics == int("0x52",16): # --loose dirt
            return 0.5
        elif physics == int("0x42",16): # --ghost road
            return 1
        elif physics == int("0x10",16): # --jump pad
            return 1.5
        elif physics == int("0x4E",16): # --light ghost road
            return 1
        elif physics == int("0x50",16): # --wood bridge
            return 1
        elif physics == int("0x1E",16): # --starting line
            return 1
        elif physics == int("0x44",16): # --castle road
            return 1
        elif physics == int("0x16",16): # --speed boost
            return 2
        elif physics == int("0x80",16): # --wall
            return -1.5
        elif physics == int("0x26",16): #	--oob grass
            return -1.5
        elif physics == int("0x22",16): # --deep water
            return -1
        elif physics == int("0x20",16): # --pit
            return -2
        elif physics == int("0x82",16): # --ghost house border
            return -1.5
        elif physics == int("0x24",16): # --lava
            return -2
        elif physics == int("0x4C",16): # --choco road
            return 1
        elif physics == int("0x12",16): # --choco bump
            return 0.75
        elif physics == int("0x1C",16): # --choco bump
            return 0.75
        elif physics == int("0x5E",16): # --mud
            return 0.5
        elif physics == int("0x48",16): # --wet sand
            return 0.75
        elif physics == int("0x4A",16): # --sand road
            return 1
        elif physics == int("0x84",16): # --ice blocks
            return -1.5
        elif physics == int("0x28",16): # --unsure
            return -1
        elif physics == int("0x14",16): # --? box
            return 1.5
        elif physics == int("0x1A",16): # --coin
            return 1.25
        elif physics == int("0x18",16): # --oil spill
            return -0.75
        else:
            raise(Exception("Unknown physics: {}".format(physics)))
예제 #21
0
파일: base.py 프로젝트: LLT1/marlgrid
class MultiGridEnv(gym.Env):
    def __init__(
        self,
        agents,
        grid_size=None,
        width=None,
        height=None,
        max_steps=100,
        see_through_walls=False,
        done_condition=None,
        seed=1337,
    ):

        if grid_size is not None:
            assert width == None and height == None
            width, height = grid_size, grid_size

        if done_condition is not None and done_condition not in ("any", "all"):
            raise ValueError("done_condition must be one of ['any', 'all', None].")
        self.done_condition = done_condition

        self.num_agents = len(agents)
        self.agents = agents

        self.action_space = gym.spaces.Tuple(
            tuple(gym.spaces.Discrete(len(agent.actions)) for agent in self.agents)
        )
        self.observation_space = gym.spaces.Tuple(
            tuple(
                gym.spaces.Box(
                    low=0,
                    high=255,
                    shape=(agent.view_size, agent.view_size, 3),
                    dtype="uint8",
                )
                for agent in self.agents
            )
        )
        self.reward_range = [(0, 1) for _ in range(len(self.agents))]

        self.window = None

        self.width = width
        self.height = height
        self.max_steps = max_steps
        self.see_through_walls = see_through_walls

        self.seed(seed=seed)

        self.reset()

    def seed(self, seed=1337):
        # Seed the random number generator
        self.np_random, _ = gym.utils.seeding.np_random(seed)
        return [seed]

    def _rand_int(self, low, high):
        """
        Generate random integer in [low,high[
        """

        return self.np_random.randint(low, high)

    def _rand_float(self, low, high):
        """
        Generate random float in [low,high[
        """

        return self.np_random.uniform(low, high)

    def _rand_bool(self):
        """
        Generate random boolean value
        """

        return self.np_random.randint(0, 2) == 0

    def _rand_elem(self, iterable):
        """
        Pick a random element in a list
        """

        lst = list(iterable)
        idx = self._rand_int(0, len(lst))
        return lst[idx]

    def reset(self):
        for agent in self.agents:
            agent.reset()

        self._gen_grid(self.width, self.height)

        for agent in self.agents:
            # Make sure _gen_grid initialized agent positions
            assert (agent.pos is not None) and (agent.dir is not None)
            # Make sure the agent doesn't overlap with an object
            start_cell = self.grid.get(*agent.pos)
            # assert start_cell is None or start_cell.can_overlap()
            assert start_cell is agent

        self.step_count = 0

        obs = self.gen_obs()
        return obs

    def gen_obs_grid(self, agent):
        topX, topY, botX, botY = agent.get_view_exts()

        grid = self.grid.slice(
            topX, topY, agent.view_size, agent.view_size, rot_k=agent.dir + 1
        )

        # Process occluders and visibility
        # Note that this incurs some performance cost
        if not self.see_through_walls:
            vis_mask = grid.process_vis(
                agent_pos=(agent.view_size // 2, agent.view_size - 1)
            )
        else:
            vis_mask = np.ones(shape=(grid.width, grid.height), dtype=np.bool)

        return grid, vis_mask

    def gen_agent_obs(self, agent):
        grid, vis_mask = self.gen_obs_grid(agent)
        return grid.render(tile_size=agent.view_tile_size)  # ,highlight_mask=~vis_mask)

    def gen_obs(self):
        """
        Generate the agent's view (partially observable, low-resolution encoding)
        """
        # obs_list = []
        # for agent in self.agents:
        #     grid, vis_mask = self.gen_obs_grid(agent)

        #     obs_list.append({
        #         'image': grid.encode(vis_mask),
        #         'direction': agent.dir,
        #         'mission': agent.mission
        #     })

        return [self.gen_agent_obs(agent) for agent in self.agents]
        # return obs_list

    # def get_obs_render(self, obs, agent, tile_size=TILE_PIXELS//2):
    #     grid, vis_mask = MultiGrid.decode(obs)

    def __str__(self):
        return self.grid.__str__()

    def step(self, actions):
        assert len(actions) == len(self.agents)
        rewards = np.zeros((len(self.agents,)), dtype=np.float)

        self.step_count += 1

        wasteds = []

        for agent_no, (agent, action) in enumerate(zip(self.agents, actions)):
            wasted = False
            if agent.active:

                cur_pos = agent.pos
                cur_cell = self.grid.get(*cur_pos)
                fwd_pos = agent.front_pos
                fwd_cell = self.grid.get(*fwd_pos)

                # Rotate left
                if action == agent.actions.left:
                    agent.dir = (agent.dir - 1) % 4

                # Rotate right
                elif action == agent.actions.right:
                    agent.dir = (agent.dir + 1) % 4

                # Move forward
                elif action == agent.actions.forward:
                    # Under these conditions, the agent can move forward.
                    if (fwd_cell is None) or fwd_cell.can_overlap():

                        # Move the agent to the forward cell
                        agent.pos = fwd_pos

                        if fwd_cell is None:
                            self.grid.set(*fwd_pos, agent)
                        elif fwd_cell.can_overlap():
                            fwd_cell.agent = agent

                        if cur_cell == agent:
                            self.grid.set(*cur_pos, None)
                        else:
                            cur_cell.agent = None
                    else:
                        wasted = True

                    if isinstance(fwd_cell, Goal):  # No extra wasting logic
                        rewards[agent_no] += fwd_cell.reward
                        agent.done = True
                        fwd_cell.agent = None

                    if isinstance(fwd_cell, Lava):
                        agent.done = True

                # Pick up an object
                elif action == agent.actions.pickup:
                    if fwd_cell and fwd_cell.can_pickup():
                        if agent.carrying is None:
                            agent.carrying = fwd_cell
                            agent.carrying.cur_pos = np.array([-1, -1])
                            self.grid.set(*fwd_pos, None)
                    else:
                        wasted = True

                # Drop an object
                elif action == agent.actions.drop:
                    if not fwd_cell and agent.carrying:
                        self.grid.set(*fwd_pos, agent.carrying)
                        agent.carrying.cur_pos = fwd_pos
                        agent.carrying = None
                    else:
                        wasted = True

                # Toggle/activate an object
                elif action == agent.actions.toggle:
                    if fwd_cell:
                        wasted = bool(fwd_cell.toggle(agent, fwd_pos))
                    else:
                        wasted = True

                # Done action (not used by default)
                elif action == agent.actions.done:
                    # dones[agent_no] = True
                    wasted = True

                else:
                    raise ValueError(f"Environment can't handle action {action}.")
            wasteds.append(wasted)

        done = np.array([agent.done for agent in self.agents], dtype=np.bool)
        if self.step_count >= self.max_steps:
            done[:] = True

        if self.done_condition is None:
            pass
        elif self.done_condition == "any":
            done = any(done)
        elif self.done_condition == "all":
            done = all(done)

        obs = [self.gen_agent_obs(agent) for agent in self.agents]

        wasteds = np.array(wasteds, dtype=np.bool)

        return obs, rewards, done, wasteds

    @property
    def agent_positions(self):
        return [
            tuple(agent.pos) if agent.pos is not None else None for agent in self.agents
        ]

    def place_obj(self, obj, top=None, size=None, reject_fn=None, max_tries=math.inf):
        max_tries = int(max(1, min(max_tries, 1e5)))
        if top is None:
            top = (0, 0)
        else:
            top = (max(top[0], 0), max(top[1], 0))
        if size is None:
            size = (self.grid.width, self.grid.height)

        agent_positions = self.agent_positions
        for try_no in range(max_tries):
            pos = (
                self._rand_int(top[0], min(top[0] + size[0], self.grid.width)),
                self._rand_int(top[1], min(top[1] + size[1], self.grid.height)),
            )

            if (
                (self.grid.get(*pos) is None)
                and (pos not in agent_positions)
                and (reject_fn is None or (not reject_fn(pos)))
            ):
                break
        else:
            raise RecursionError("Rejection sampling failed in place_obj.")

        self.grid.set(*pos, obj)
        if obj is not None:
            obj.init_pos = pos
            obj.cur_pos = pos

        return pos

    def put_obj(self, obj, i, j):
        """
        Put an object at a specific position in the grid
        """

        self.grid.set(i, j, obj)
        obj.init_pos = (i, j)
        obj.cur_pos = (i, j)

    def place_agent(self, agent, top=None, size=None, rand_dir=True, max_tries=100):
        agent.pos = self.place_obj(agent, top=top, size=size, max_tries=max_tries)
        if rand_dir:
            agent.dir = self._rand_int(0, 4)
        return agent

    def place_agents(self, top=None, size=None, rand_dir=True, max_tries=100):
        for agent in self.agents:
            self.place_agent(
                agent, top=top, size=size, rand_dir=rand_dir, max_tries=max_tries
            )
            if hasattr(self, "mission"):
                agent.mission = self.mission

    def render(
        self,
        mode="human",
        close=False,
        highlight=True,
        tile_size=TILE_PIXELS,
        show_agent_views=True,
        max_agents_per_col=3,
    ):
        """
        Render the whole-grid human view
        """

        if close:
            if self.window:
                self.window.close()
            return

        if mode == "human" and not self.window:
            from gym.envs.classic_control.rendering import SimpleImageViewer

            self.window = SimpleImageViewer()
            # self.window.show(block=False)

        # Compute which cells are visible to the agent
        highlight_mask = np.full((self.width, self.height), False, dtype=np.bool)
        for agent in self.agents:
            xlow, ylow, xhigh, yhigh = agent.get_view_exts()
            if agent.active:
                highlight_mask[
                    max(0, xlow) : min(self.grid.width, xhigh),
                    max(0, ylow) : min(self.grid.height, yhigh),
                ] = True

        # Render the whole grid
        img = self.grid.render(
            tile_size, highlight_mask=highlight_mask if highlight else None
        )
        rescale = lambda X, rescale_factor=2: np.kron(
            X, np.ones((rescale_factor, rescale_factor, 1))
        )

        if show_agent_views:
            agent_no = 0
            cols = []
            rescale_factor = None

            for col_no in range(len(self.agents) // (max_agents_per_col + 1) + 1):
                col_count = min(max_agents_per_col, len(self.agents) - agent_no)
                views = []
                for row_no in range(col_count):
                    tmp = self.gen_agent_obs(self.agents[agent_no])
                    if rescale_factor is None:
                        rescale_factor = img.shape[0] // (
                            min(3, col_count) * tmp.shape[1]
                        )
                    views.append(rescale(tmp, rescale_factor))
                    agent_no += 1

                col_width = max([v.shape[1] for v in views])
                img_col = np.zeros((img.shape[0], col_width, 3), dtype=np.uint8)
                for k, view in enumerate(views):
                    start_x = (k * img.shape[0]) // len(views)
                    start_y = 0  # (k*img.shape[1])//len(views)
                    dx, dy = view.shape[:2]
                    img_col[start_x : start_x + dx, start_y : start_y + dy, :] = view
                cols.append(img_col)
            img = np.concatenate((img, *cols), axis=1)

        if mode == "human":
            self.window.imshow(img)

        return img
예제 #22
0
class PyColabEnv(gym.Env):

    metadata = {
        'render.modes': ['human', 'rgb_array'],
    }

    def __init__(self,
                 max_iterations,
                 obs_type,
                 default_reward,
                 action_space,
                 act_null_value=4,
                 delay=30,
                 resize_scale=8,
                 crop_window=[5, 5]):
        """Create an `PyColabEnv` adapter to a `pycolab` game as a `gym.Env`.

        You can access the `pycolab.Engine` instance with `env.current_game`.

        Args:
            max_iterations: maximum number of steps.
            obs_type: type of observation to return.
            default_reward: default reward if reward is None returned by the
                `pycolab` game.
            action_space: the action `Space` of the environment.
            delay: renderer delay.
            resize_scale: number of pixels per observation pixel.
                Used only by the renderer.
            crop_window: dimensions of observation cropping.
        """
        assert max_iterations > 0
        assert isinstance(default_reward, numbers.Number)

        self._max_iterations = max_iterations
        self._default_reward = default_reward

        # At this point, the game would only want to access the random
        # property, although it is set to None initially.
        self.np_random = None

        self._colors = self.make_colors()
        test_game = self.make_game()
        test_game.the_plot.info = {}
        observations, _, _ = test_game.its_showtime()
        layers = list(observations.layers.keys())
        not_ordered = list(set(layers) - set(test_game.z_order))
        self._render_order = list(reversed(not_ordered + test_game.z_order))

        # Create the observation space.
        self.obs_type = obs_type

        if self.obs_type == 'mask':
            self.observation_space = spaces.Box(
                0., 1., [len(self.state_layer_chars)] +
                crop_window)  # don't count empty space layer
        elif self.obs_type == 'rgb':
            self.observation_space = spaces.Box(
                0., 255.,
                [crop_window[0] * resize_scale, crop_window[1] * resize_scale
                 ] + [3])
        self.action_space = action_space
        self.act_null_value = act_null_value

        self.current_game = None
        self._croppers = []
        self._state = None

        self._last_uncropped_observations = None
        self._empty_uncropped_board = None
        self._last_cropped_observations = None
        self._empty_cropped_board = None

        self._last_reward = None
        self._game_over = False

        self.viewer = None
        self.resize_scale = resize_scale
        self.delay = delay

        # Metrics
        self.visitation_frequency = {char: 0 for char in self.objects}
        self.first_visit_time = {char: 500 for char in self.objects}

        # Heatmaps
        self.episodes = 0  # number of episodes run (to determine when to save heatmaps)
        self.heatmap_save_freq = 3  # save heatmaps every 3 episodes
        self.heatmap = np.ones(
            (5, 5))  # stores counts each episode (5x5 is a placeholder)

    def pycolab_init(self, logdir, log_heatmaps):
        self.log_heatmaps = log_heatmaps
        root_path = os.path.abspath(__file__).split('/')[1:]
        root_path = root_path[:root_path.index('curiosity_baselines') + 1]
        self.heatmap_path = '/' + '/'.join(root_path) + '/' + '/'.join(
            logdir.split('/')[1:]) + '/heatmaps'
        if os.path.isdir(self.heatmap_path) == False and log_heatmaps == True:
            os.makedirs(self.heatmap_path)

    @abc.abstractmethod
    def make_game(self):
        """Function that creates a new pycolab game.

        Returns:
            pycolab.Engine.
        """
        pass

    def make_colors(self):
        """Functions that returns colors.

        Returns:
            Dictionary mapping key name to `tuple(R, G, B)`.
        """

        return {
            'P': (255., 255., 255.),
            'a': (175., 255., 15.),
            'b': (21., 0., 255.),
            'c': (250., 0., 129.),
            'd': (0., 250., 71.),
            'e': (255., 0., 0.),
            'f': (252., 28., 3.),
            'g': (136., 3., 252.),
            'h': (20., 145., 60.),
            '#': (61., 61., 61.),
            '@': (255., 255., 0.),
            ' ': (0., 0., 0.)
        }

    def _paint_board(self, layers, cropped=False):
        """Method to privately paint layers to RGB.

        Args:
            layers: a dictionary mapping a character to the respective curtain.
            cropped: whether or not this is being called to paint cropped or
                     uncropped images.

        Returns:
            3D np.array (np.uint32) representing the RGB of the observation
                layers.
        """
        if not cropped:
            board_shape = self._last_uncropped_observations.board.shape
        else:
            board_shape = self._last_cropped_observations.board.shape

        board = np.zeros(list(board_shape) + [3], np.uint32)
        board_mask = np.zeros(list(board_shape) + [3], np.bool)

        for key in self._render_order:
            color = self._colors.get(key, (0, 0, 0))
            color = np.reshape(color, [1, 1, -1]).astype(np.uint32)

            # Broadcast the layer to [H, W, C].
            board_layer_mask = np.array(layers[key])[..., None]
            board_layer_mask = np.repeat(board_layer_mask, 3, axis=-1)

            # Update the board with the new layer.
            board = np.where(np.logical_not(board_mask),
                             board_layer_mask * color, board)

            # Update the mask.
            board_mask = np.logical_or(board_layer_mask, board_mask)
        return board

    def _update_for_game_step(self, observations, reward):
        """Update internal state with data from an environment interaction."""
        # disentangled one hot state

        if self.obs_type == 'mask':
            self._state = []
            for char in self.state_layer_chars:
                if char != ' ':
                    mask = observations.layers[char].astype(float)
                    if char in self.objects and 1. in mask:
                        self.visitation_frequency[char] += 1
                    self._state.append(mask)
            self._state = np.array(self._state)

        elif self.obs_type == 'rgb':
            rgb_img = self._paint_board(observations.layers,
                                        cropped=True).astype(float)
            self._state = self.resize(rgb_img)
            for char in self.state_layer_chars:
                if char != ' ':
                    mask = observations.layers[char].astype(float)
                    if char in self.objects and 1. in mask:
                        self.visitation_frequency[char] += 1

        # update heatmap metric
        if self.log_heatmaps == True:
            pr, pc = self.current_game.things['P'].position
            self.heatmap[pr, pc] += 1

        self._last_reward = reward if reward is not None else \
            self._default_reward

        self._game_over = self.current_game.game_over

        if self.current_game.the_plot.frame >= self._max_iterations:
            self._game_over = True

    def reset(self):
        """Start a new episode."""
        self.current_game = self.make_game()
        for cropper in self._croppers:
            cropper.set_engine(self.current_game)
        self._colors = self.make_colors()
        self.current_game.the_plot.info = {}
        self._game_over = None
        self._last_observations = None
        self._last_reward = None

        observations, reward, _ = self.current_game.its_showtime()
        self._last_uncropped_observations = observations
        self._empty_uncropped_board = np.zeros_like(
            self._last_uncropped_observations.board)
        if len(self._croppers) > 0:
            observations = [
                cropper.crop(observations) for cropper in self._croppers
            ][0]
            self._last_cropped_observations = observations
            self._empty_cropped_board = np.zeros_like(
                self._last_cropped_observations)

        # save and reset metrics
        self.visitation_frequency = {char: 0 for char in self.objects}
        if self.log_heatmaps == True and self.episodes % self.heatmap_save_freq == 0:
            np.save('{}/{}.npy'.format(self.heatmap_path, self.episodes),
                    self.heatmap)
            heatmap_normed = self.heatmap / np.linalg.norm(self.heatmap)
            plt.imsave('{}/{}.png'.format(self.heatmap_path, self.episodes),
                       heatmap_normed,
                       cmap='afmhot',
                       vmin=0.0,
                       vmax=1.0)
        self.episodes += 1
        self.heatmap = np.zeros(self._last_uncropped_observations.board.shape)

        # run update
        self._update_for_game_step(observations, reward)
        return self._state

    def step(self, action):
        """Apply action, step the world forward, and return observations.

        Args:
            action: the desired action to apply to the environment.

        Returns:
            state, reward, done, info.
        """
        if self.current_game is None:
            logger.warn("Episode has already ended, call `reset` instead..")
            self._state = None
            reward = self._last_reward
            done = self._game_over
            return self._state, reward, done, {}

        # Execute the action in pycolab.
        self.current_game.the_plot.info = {}
        observations, reward, _ = self.current_game.play(action)
        self._last_uncropped_observations = observations
        self._empty_uncropped_board = np.zeros_like(
            self._last_uncropped_observations.board)

        # Crop and update
        if len(self._croppers) > 0:
            observations = [
                cropper.crop(observations) for cropper in self._croppers
            ][0]
            self._last_cropped_observations = observations
            self._empty_cropped_board = np.zeros_like(
                self._last_cropped_observations.board)

        self._update_for_game_step(observations, reward)
        info = self.current_game.the_plot.info

        # Add custom metrics
        info['visitation_frequency'] = self.visitation_frequency
        info['first_time_visit'] = self.first_visit_time

        # Check the current status of the game.
        reward = self._last_reward
        done = self._game_over

        if self._game_over:
            self.current_game = None

        return self._state, reward, done, info

    def render(self, mode='rgb_array', close=False):
        """Render the board to an image viewer or an np.array.

        Args:
            mode: One of the following modes:
                - 'human': render to an image viewer.
                - 'rgb_array': render to an RGB np.array (np.uint8)

        Returns:
            3D np.array (np.uint8) or a `viewer.isopen`.
        """
        img = self._empty_uncropped_board
        if self._last_uncropped_observations:
            img = self._last_uncropped_observations.board
            layers = self._last_uncropped_observations.layers
            if self._colors:
                img = self._paint_board(layers, cropped=False)
            else:
                assert img is not None, '`board` must not be `None`.'

        img = self.resize(img)

        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            if self.viewer is None:
                from gym.envs.classic_control.rendering import (
                    SimpleImageViewer)
                self.viewer = SimpleImageViewer()
            self.viewer.imshow(img)
            time.sleep(self.delay / 1e3)
            return self.viewer.isopen

    def resize(self, img):
        img = _repeat_axes(img, self.resize_scale, axis=[0, 1])
        if len(img.shape) != 3:
            img = np.repeat(img[..., None], 3, axis=-1)
        return img.astype(np.uint8)

    def seed(self, seed=None):
        """Seeds the environment.

        Args:
            seed: seed of the random engine.

        Returns:
            [seed].
        """
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def close(self):
        """Tears down the renderer."""
        if self.viewer:
            self.viewer.close()
            self.viewer = None
예제 #23
0
class KrazyGridWorld:
    def __init__(self,
                 screen_height,
                 grid_squares_per_row=10,
                 one_hot_obs=True,
                 seed=42,
                 task_seed=None,
                 init_pos_seed=None,
                 death_square_percentage=0.1,
                 ice_sq_perc=0.05,
                 num_goals=3,
                 min_goal_distance=2,
                 max_goal_distance=np.inf,
                 num_steps_before_energy_needed=11,
                 energy_replenish=8,
                 energy_sq_perc=0.05,
                 num_transporters=1,
                 sparse_rewards=True,
                 image_obs=True,
                 use_local_obs=False):

        if task_seed is None:
            task_seed = seed

        if init_pos_seed is None:
            init_pos_seed = seed

        self.init_pos_rng = np.random.RandomState(init_pos_seed)
        self.task_rng = np.random.RandomState(task_seed)
        random.seed(task_seed)

        self.one_hot_obs = one_hot_obs
        self.image_obs = image_obs
        self.use_local_obs = use_local_obs
        self.screen_dim = (screen_height, screen_height)  # width and height

        self.tile_types = TileTypes()
        self.agent = Agent(
            num_steps_until_energy_needed=num_steps_before_energy_needed,
            energy_replenish=energy_replenish)
        self.game_grid = GameGrid(grid_squares_per_row=grid_squares_per_row,
                                  tile_types=self.tile_types,
                                  agent=self.agent,
                                  task_rng=self.task_rng,
                                  death_sq_perc=death_square_percentage,
                                  energy_sq_perc=energy_sq_perc,
                                  ice_sq_perc=ice_sq_perc,
                                  num_goals=num_goals,
                                  min_goal_distance=min_goal_distance,
                                  max_goal_distance=max_goal_distance,
                                  num_transporters=num_transporters)

        self.num_goals_obtained = 0
        self.sparse_reward = sparse_rewards

        self.reset_task()

        self.simple_image_viewer = None
        self.last_im_obs = None

    def reset(self,
              reset_agent_start_pos=False,
              reset_board=False,
              reset_colors=False,
              reset_dynamics=False):
        self.agent.dead = False
        self.agent.agent_position = copy.deepcopy(
            self.agent.agent_position_init)
        self.agent.num_steps_until_energy_needed = copy.deepcopy(
            self.agent.energy_init)
        self.num_goals_obtained = 0
        self.game_grid.grid_np = copy.deepcopy(self.game_grid.game_grid_init)
        if reset_colors:
            self.tile_types.reset_colors()
        if reset_dynamics:
            self.agent.change_dynamics()
        if reset_board:
            self.reset_task()
        if reset_agent_start_pos:
            self.reset_agent_start_position()
        return self.get_obs()

    def reset_task(self):
        # reset the entire board and agent start position, generating a new MDP.
        self.game_grid.get_new_game_grid()
        self.reset_agent_start_position()

    def reset_agent_start_position(self):
        # keep the previous board but update the agents starting position.
        # keeps the previous MDP but samples x_0.
        new_start = self.game_grid.get_one_non_agent_square()
        self.agent.agent_position = new_start
        self.agent.agent_position_init = new_start

    def get_obs(self):
        if self.image_obs:
            return self.get_img_obs()
        else:
            return None

    def step(self, a, render=False):
        if self.agent.dead is False:
            proposed_step = self.agent.try_step(a)
            if self.game_grid.is_position_legal(proposed_step):
                self.agent.agent_position = proposed_step
            self.check_dead()
            self.check_at_goal()
            self.check_at_energy()
            self.check_at_transporter()

            #  this shit handles the ice squares
            while True:
                if self.check_at_ice_square() is False:
                    break
                else:
                    #  don't take energy for going over ice.
                    self.agent.num_steps_until_energy_needed += 1
                    proposed_step_nu = self.agent.try_step(a)
                    if self.game_grid.is_position_legal(proposed_step_nu):
                        self.step(a)
                    else:
                        break

            if self.agent.num_steps_until_energy_needed < 1:
                self.agent.dead = True

            if render:
                self.render()
        return self.get_obs(), self.get_reward(), self.agent.dead, dict()

    def check_dead(self):
        agent_pos = self.agent.agent_position
        game_grid = self.game_grid.grid_np
        if game_grid[agent_pos[0], agent_pos[1]] == self.tile_types.death:
            self.agent.dead = True

    def check_at_goal(self):
        if self.game_grid.grid_np[
                self.agent.agent_position[0],
                self.agent.agent_position[1]] == self.tile_types.goal:
            self.game_grid.grid_np[
                self.agent.agent_position[0],
                self.agent.agent_position[1]] = self.tile_types.normal
            self.num_goals_obtained += 1

    def check_at_energy(self):
        if self.game_grid.grid_np[
                self.agent.agent_position[0],
                self.agent.agent_position[1]] == self.tile_types.energy:
            self.game_grid.grid_np[
                self.agent.agent_position[0],
                self.agent.agent_position[1]] = self.tile_types.normal
            self.agent.give_energy()

    def check_at_transporter(self):
        transport_sq = None
        if self.game_grid.grid_np[
                self.agent.agent_position[0],
                self.agent.agent_position[1]] == self.tile_types.transporter:
            for tr in self.game_grid.transporters:
                if self.agent.agent_position[0] == tr[0][
                        0] and self.agent.agent_position[1] == tr[0][1]:
                    transport_sq = tr[1]
                elif self.agent.agent_position[0] == tr[1][
                        0] and self.agent.agent_position[1] == tr[1][1]:
                    transport_sq = tr[0]
            if transport_sq is not None:
                self.agent.agent_position = [transport_sq[0], transport_sq[1]]

    def check_at_ice_square(self):
        if self.game_grid.grid_np[
                self.agent.agent_position[0],
                self.agent.agent_position[1]] == self.tile_types.ice:
            return True
        return False

    def render(self):
        if self.simple_image_viewer is None:
            from gym.envs.classic_control.rendering import SimpleImageViewer
            self.simple_image_viewer = SimpleImageViewer()
        im_obs = self.get_img_obs()
        self.simple_image_viewer.imshow(im_obs)
        time.sleep(0.075)

    def get_state_obs(self):
        grid_np = copy.deepcopy(self.game_grid.grid_np)
        agent_p = self.agent.agent_position
        grid_np[agent_p[0], agent_p[1]] = self.tile_types.agent
        grid_np = grid_np.astype(np.uint8)
        #agent_p = np.array(self.agent.agent_position)
        if self.one_hot_obs:
            n_values = np.max(grid_np) + 1
            grid_np = np.eye(n_values)[grid_np]
            #agent_p_temp = np.zeros((self.game_grid.grid_squares_per_row, self.game_grid.grid_squares_per_row, 1))
            #agent_p_temp[agent_p[0], agent_p[1], :] = 1

        if self.use_local_obs:
            neighbors = []
            x, y = self.agent.agent_position
            for _i, _j in [(-1, -1), (0, -1), (1, -1), (1, 0), (1, 1), (0, 1),
                           (-1, 1), (-1, 0)]:
                i, j = (_i + x, _j + y)
                if 0 <= i < self.game_grid.grid_squares_per_row and 0 <= j < self.game_grid.grid_squares_per_row:
                    neighbors.append([j, i])
                else:
                    neighbors.append(None)

            grid_np = np.array(neighbors)

        return grid_np.flatten()

    def get_img_obs(self):
        grid_np = copy.deepcopy(self.game_grid.grid_np)
        grid_np[self.agent.agent_position[0],
                self.agent.agent_position[1]] = self.tile_types.agent
        fake_img = np.zeros((self.game_grid.grid_squares_per_row,
                             self.game_grid.grid_squares_per_row, 3))
        for i in range(len(self.tile_types.all_tt)):
            is_grid_sq_color_i = grid_np == self.tile_types.all_tt[i]
            one_idxs = is_grid_sq_color_i.astype(int)
            one_idxs = np.tile(np.expand_dims(one_idxs, -1), 3)
            one_idxs = one_idxs * np.array(self.tile_types.colors[i].value)
            fake_img += one_idxs

        if self.use_local_obs:
            neighbors = []
            x, y = self.agent.agent_position
            valid_idxs = np.zeros_like(fake_img)
            valid_idxs[x, y] = 1.0
            for _i, _j in [(-1, -1), (0, -1), (1, -1), (1, 0), (1, 1), (0, 1),
                           (-1, 1), (-1, 0)]:
                i, j = (_i + x, _j + y)
                if 0 <= i < self.game_grid.grid_squares_per_row and 0 <= j < self.game_grid.grid_squares_per_row:
                    #neighbors.append([j, i])
                    valid_idxs[i, j] = 1.0
                else:
                    neighbors.append(None)
            fake_img *= valid_idxs

        res = cv2.resize(fake_img,
                         dsize=(256, 256),
                         interpolation=cv2.INTER_NEAREST)
        res = res.astype(np.uint8)
        return res

    def get_reward(self):
        if self.sparse_reward:
            return 0 + self.num_goals_obtained
        else:
            rew = 0
            for goal in self.game_grid.goal_squares:
                dist_1 = abs(goal[0] - self.agent.agent_position[0])
                dist_2 = abs(goal[1] - self.agent.agent_position[1])
                rew = rew + dist_1 + dist_2
            rew = -1.0 * rew
            rew = rew + 3.0 * self.num_goals_obtained
            return rew

    def close(self):
        self.simple_image_viewer.close()
class PyColabEnv(gym.Env):

    metadata = {
        'render.modes': ['human', 'rgb_array'],
    }

    def __init__(self,
                 max_iterations,
                 obs_type,
                 default_reward,
                 action_space,
                 act_null_value=4,
                 delay=30,
                 resize_scale=8,
                 crop_window=[5, 5],
                 visitable_states=0,
                 color_palette=0,
                 reward_switch=[],
                 reward_config=dict(),
                 switch_perturbations=[],
                 dimensions=(19, 19)):
        """Create an `PyColabEnv` adapter to a `pycolab` game as a `gym.Env`.

        You can access the `pycolab.Engine` instance with `env.current_game`.

        Args:
            max_iterations: maximum number of steps.
            obs_type: type of observation to return.
            default_reward: default reward if reward is None returned by the
                `pycolab` game.
            action_space: the action `Space` of the environment.
            delay: renderer delay.
            resize_scale: number of pixels per observation pixel.
                Used only by the renderer.
            crop_window: dimensions of observation cropping.
            visitable_states: number of states the agent can visit.
            color_palette: which color palette to use for objects.
            reward_switch: list of objects or coords if the reward function switches.
            reward_config: list of objects and their associated rewards.
            switch_perturbations: color perturbations if a background switch is applied.
            dimensions: dimensions of the game board
        """
        assert max_iterations > 0
        assert isinstance(default_reward, numbers.Number)

        self._max_iterations = max_iterations

        # Reward specs
        self._default_reward = default_reward
        self._switch = 0
        self._reward_switch = reward_switch
        self._reward_target = None
        self._switch_perturbations = switch_perturbations
        self._reward_config = reward_config

        # At this point, the game would only want to access the random
        # property, although it is set to None initially.
        self.np_random = None
        self._color_palette = color_palette
        self._colors = self.make_colors()
        test_game = self.make_game(reward_config=self._reward_config)
        test_game.the_plot.info = {}
        observations, _, _ = test_game.its_showtime()
        layers = list(observations.layers.keys())
        not_ordered = list(set(layers) - set(test_game.z_order))
        self._render_order = list(reversed(not_ordered + test_game.z_order))

        # Prepare observation space.
        self.obs_type = obs_type
        self.height, self.width = dimensions
        self.crop_window = crop_window
        self.action_space = action_space
        if self.obs_type == 'mask':
            self.observation_space = spaces.Box(
                0., 1., [len(self.state_layer_chars)] +
                self.crop_window)  # don't count empty space layer
        elif self.obs_type == 'rgb':
            self.observation_space = spaces.Box(
                0., 255.,
                [self.crop_window[0] * 17, self.crop_window[1] * 17] + [3])
        elif self.obs_type == 'rgb_full':
            if 84 % self.width == 0:
                self.observation_space = spaces.Box(0., 255., [84, 84] + [3])
            else:
                self.observation_space = spaces.Box(0., 255., [85, 85] + [3])
        self.act_null_value = act_null_value
        self.visitable_states = visitable_states

        self.current_game = None
        self._croppers = []
        self._state = None

        self._last_uncropped_observations = None
        self._empty_uncropped_board = None
        self._last_cropped_observations = None
        self._empty_cropped_board = None

        self._last_reward = None
        self._game_over = False

        self.viewer = None
        self.delay = delay

        # Metrics
        self.visitation_frequency = {char: 0 for char in self.objects}
        self.first_visit_time = {char: 500 for char in self.objects}
        self.visitation_entropy = 0
        self.num_obj_eps = {char: 0 for char in self.objects}
        self.coverage = 0

    def heatmap_init(self, logdir, log_heatmaps):
        self.episodes = 0  # number of episodes run (to determine when to save heatmaps)
        self.heatmap_save_freq = 3  # save heatmaps every 3 episodes
        self.heatmap = np.zeros(
            (5, 5))  # stores counts each episode (5x5 is a placeholder)
        self.log_heatmaps = log_heatmaps
        root_path = os.path.abspath(__file__).split('/')[1:]
        root_path = root_path[:root_path.index('curiosity_baselines') + 1]
        self.heatmap_path = '/' + '/'.join(root_path) + '/' + '/'.join(
            logdir.split('/')[1:]) + '/heatmaps'
        self.startup = True
        if os.path.isdir(self.heatmap_path) == False and log_heatmaps == True:
            os.makedirs(self.heatmap_path)
        elif os.path.isdir(self.heatmap_path) == True:
            heatmaps = os.listdir(self.heatmap_path)
            if len(heatmaps) != 0:
                sorted_images = sorted(heatmaps,
                                       key=lambda img: int(img.split('.')[0]))
                last_episode = int(sorted_images[-1].split('.')[0])
                self.episodes = last_episode

    def obs_init(self, resize_scale):
        self.resize_scale = resize_scale

    @abc.abstractmethod
    def make_game(self):
        """Function that creates a new pycolab game.

        Returns:
            pycolab.Engine.
        """
        pass

    def make_colors(self):
        """Functions that returns colors.

        Returns:
            Dictionary mapping key name to `tuple(R, G, B)`.
        """
        if self._color_palette == 0:
            return {
                'P': (255., 255., 255.),
                'a': (175., 255., 15.),
                'b': (21., 0., 255.),
                'c': (255., 0., 0.),
                'd': (19., 139., 67.),
                'e': (250., 0., 129.),
                'f': (114., 206., 227.),
                'g': (136., 3., 252.),
                'h': (245., 119., 34.),
                '#': (61., 61., 61.),
                '@': (90., 90., 90.),
                ' ': (0., 0., 0.),
                '.': (110., 35., 35.)
            }
        elif self._color_palette == 1:
            return {
                'P': (255., 255., 255.),
                'a': (136., 3., 252.),
                'b': (21., 0., 255.),
                'c': (255., 0., 0.),
                'd': (19., 139., 67.),
                'e': (150., 0., 129.),
                '#': (61., 61., 61.),
                '@': (90., 90., 90.),
                ' ': (0., 0., 0.),
                '.': (110., 35., 35.)
            }
        elif self._color_palette == 2:
            return {
                'P': (255., 255., 255.),
                'a': (255., 0., 0.),
                'b': (255., 0., 0.),
                'c': (255., 0., 0.),
                'd': (255., 0., 0.),
                'e': (255., 0., 0.),
                'f': (255., 0., 0.),
                'g': (255., 0., 0.),
                'h': (255., 0., 0.),
                '#': (61., 61., 61.),
                '@': (90., 90., 90.),
                ' ': (0., 0., 0.),
                '.': (110., 35., 35.)
            }
        elif self._color_palette == 3:
            return {
                'P': (255., 255., 255.),
                'a': (30., 60., 90.),
                'b': (90., 60., 30.),
                'c': (90., 30., 60.),
                'd': (10., 100., 70.),
                'e': (10., 10., 160.),
                'f': (25., 130., 25.),
                'g': (50., 40., 90.),
                'h': (130., 25., 25.),
                '#': (61., 61., 61.),
                '@': (90., 90., 90.),
                ' ': (0., 0., 0.),
                '.': (110., 35., 35.)
            }

    def _check_visit(self, char):
        """Private method to check if the player
        has visited "char". A visit is when the
        character is within the 5x5 tile window
        around the player.
        """
        pr, pc = self.current_game.things['P'].position
        cr, cc = self.current_game.things[char].position
        if (pr - 2) <= cr <= (pr + 2) and (pc - 2) <= cc <= (pc + 2):
            return True
        return False

    def _paint_board(self, layers, cropped=False):
        """Method to privately paint layers to RGB.

        Args:
            layers: a dictionary mapping a character to the respective curtain.
            cropped: whether or not this is being called to paint cropped or
                     uncropped images.

        Returns:
            3D np.array (np.uint32) representing the RGB of the observation
                layers.
        """
        if not cropped:
            board_shape = self._last_uncropped_observations.board.shape
        else:
            board_shape = self._last_cropped_observations.board.shape

        board = np.zeros(list(board_shape) + [3], np.uint32)
        board_mask = np.zeros(list(board_shape) + [3], np.bool)

        for key in self._render_order:

            color = self._colors.get(key, (0, 0, 0))
            color = np.reshape(color, [1, 1, -1]).astype(np.uint32)

            # Broadcast the layer to [H, W, C].
            board_layer_mask = np.array(layers[key])[..., None]
            board_layer_mask = np.repeat(board_layer_mask, 3, axis=-1)

            # @ correspond to white noise or changing background
            perturbation = np.zeros(board_layer_mask.shape)
            if key == '@':
                if len(self._reward_switch) > 0:
                    perturbation = self._switch_perturbations[self._switch]
                else:
                    h, w = board_layer_mask.shape[:2]
                    perturbation = np.random.randint(-15, 15, (h, w, 1))

            # Update the board with the new layer.
            board = np.where(np.logical_not(board_mask),
                             board_layer_mask * color + perturbation, board)

            # Update the mask.
            board_mask = np.logical_or(board_layer_mask, board_mask)
        return board

    def _update_for_game_step(self, observations, reward):
        """Update internal state with data from an environment interaction."""
        # disentangled one hot state
        if self.obs_type == 'mask':
            self._state = []
            for char in self.state_layer_chars:
                if char in self.objects:
                    mask = observations.layers[char].astype(float)
                    if char in self.objects and 1. in mask:
                        self.visitation_frequency[char] += 1
                    self._state.append(mask)
            self._state = np.array(self._state)

        elif 'rgb' in self.obs_type:
            if self.obs_type == 'rgb':
                rgb_img = self._paint_board(observations.layers,
                                            cropped=True).astype(float)
            elif self.obs_type == 'rgb_full':
                rgb_img = self._paint_board(observations.layers,
                                            cropped=False).astype(float)
            self._state = self.resize(rgb_img)
            for char in self.state_layer_chars:
                if char in self.objects:
                    mask = observations.layers[char].astype(float)
                    if self._check_visit(char):
                        self.visitation_frequency[char] += 1

        # update heatmap metric
        if self.log_heatmaps == True:
            pr, pc = self.current_game.things['P'].position
            self.heatmap[pr, pc] += 1
            self.visitation_entropy = entropy(self.heatmap.flatten(),
                                              base=self.visitable_states)
            self.coverage = np.count_nonzero(
                self.heatmap) / self.visitable_states

        # update reward
        self._last_reward = reward if reward is not None else self._default_reward

        self._game_over = self.current_game.game_over

        if self.current_game.the_plot.frame >= self._max_iterations:
            self._game_over = True

    def step(self, action):
        """Apply action, step the world forward, and return observations.

        Args:
            action: the desired action to apply to the environment.

        Returns:
            state, reward, done, info.
        """
        if self.current_game is None:
            logger.warn("Episode has already ended, call `reset` instead..")
            self._state = None
            reward = self._last_reward
            done = self._game_over
            return self._state, reward, done, {}

        # Execute the action in pycolab.
        self.current_game.the_plot.info = {}
        observations, reward, _ = self.current_game.play(action)
        self._last_uncropped_observations = observations
        self._empty_uncropped_board = np.zeros_like(
            self._last_uncropped_observations.board)

        # Crop and update
        if len(self._croppers) > 0:
            observations = [
                cropper.crop(observations) for cropper in self._croppers
            ][0]
            self._last_cropped_observations = observations
            self._empty_cropped_board = np.zeros_like(
                self._last_cropped_observations.board)

        self._update_for_game_step(observations, reward)
        info = self.current_game.the_plot.info

        # Add custom metrics
        info['visitation_frequency'] = self.visitation_frequency
        info['first_time_visit'] = self.first_visit_time
        info['visitation_entropy'] = self.visitation_entropy
        info['coverage'] = self.coverage
        info['episodes'] = self.episodes
        info['num_obj_eps'] = self.num_obj_eps
        for ob in self.objects:
            pushes = getattr(self.current_game.things[ob], 'pushes', None)
            if pushes is not None:
                info['controllable_interactions'] = pushes

        # Check the current status of the game.
        reward = self._last_reward
        done = self._game_over

        if self._game_over:
            self.current_game = None

        return self._state, reward, done, info

    def reset(self):
        """Start a new episode."""
        if len(self._reward_switch) > 0:
            self._switch = np.random.randint(len(self._reward_switch))
            self._reward_target = self._reward_switch[self._switch]
            self._reward_config = {char: 0.0 for char in self._reward_switch}
            self._reward_config[self._reward_switch[self._switch]] = 1.0
        self.current_game = self.make_game(reward_config=self._reward_config)
        for cropper in self._croppers:
            cropper.set_engine(self.current_game)
        self._colors = self.make_colors()
        self.current_game.the_plot.info = {}
        self._game_over = None
        self._last_observations = None
        self._last_reward = None

        observations, reward, _ = self.current_game.its_showtime()
        self._last_uncropped_observations = observations
        self._empty_uncropped_board = np.zeros_like(
            self._last_uncropped_observations.board)
        if len(self._croppers) > 0:
            observations = [
                cropper.crop(observations) for cropper in self._croppers
            ][0]
            self._last_cropped_observations = observations
            self._empty_cropped_board = np.zeros_like(
                self._last_cropped_observations)

        # save and reset metrics
        for char in self.objects:
            if self.visitation_frequency[char] > 0:
                self.num_obj_eps[char] += 1
        self.visitation_frequency = {char: 0 for char in self.objects}
        if self.log_heatmaps == True and self.episodes % self.heatmap_save_freq == 0 and self.startup == False:
            np.save('{}/{}.npy'.format(self.heatmap_path, self.episodes),
                    self.heatmap)
            heatmap_normed = self.heatmap / np.linalg.norm(
                self.heatmap) + 0.0000000000000000001
            plt.imsave('{}/{}.png'.format(self.heatmap_path, self.episodes),
                       heatmap_normed,
                       cmap='afmhot',
                       vmin=0.0,
                       vmax=1.0)
        self.episodes += 1
        self.startup = False
        self.heatmap = np.zeros(self._last_uncropped_observations.board.shape)

        # run update
        self._update_for_game_step(observations, reward)
        return self._state

    def render(self, mode='rgb_array', close=False):
        """Render the board to an image viewer or an np.array.

        Args:
            mode: One of the following modes:
                - 'human': render to an image viewer.
                - 'rgb_array': render to an RGB np.array (np.uint8)

        Returns:
            3D np.array (np.uint8) or a `viewer.isopen`.
        """
        img = self._empty_uncropped_board
        if self._last_uncropped_observations:
            img = self._last_uncropped_observations.board
            layers = self._last_uncropped_observations.layers
            if self._colors:
                img = self._paint_board(layers, cropped=False)
            else:
                assert img is not None, '`board` must not be `None`.'

        img = self.resize(img, scale=17)

        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            if self.viewer is None:
                from gym.envs.classic_control.rendering import (
                    SimpleImageViewer)
                self.viewer = SimpleImageViewer()
            self.viewer.imshow(img)
            time.sleep(self.delay / 1e3)
            return self.viewer.isopen

    def resize(self, img, scale=None):
        if scale is None:
            img = _repeat_axes(img, self.resize_scale, axis=[0, 1])
        else:
            img = _repeat_axes(img, scale, axis=[0, 1])
        if len(img.shape) != 3:
            img = np.repeat(img[..., None], 3, axis=-1)
        return img.astype(np.uint8)

    def seed(self, seed=None):
        """Seeds the environment.

        Args:
            seed: seed of the random engine.

        Returns:
            [seed].
        """
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def close(self):
        """Tears down the renderer."""
        if self.viewer:
            self.viewer.close()
            self.viewer = None
예제 #25
0
class RetroEnv(gym.Env):
    """
    Gym Retro environment class

    Provides a Gym interface to classic video games
    """
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 60.0
    }

    def __init__(self,
                 game,
                 state=retro.State.DEFAULT,
                 scenario=None,
                 info=None,
                 use_restricted_actions=retro.Actions.FILTERED,
                 record=False,
                 players=1,
                 inttype=retro.data.Integrations.STABLE,
                 obs_type=retro.Observations.IMAGE,
                 retro_run_id=None):
        if not hasattr(self, 'spec'):
            self.spec = None
        self._obs_type = obs_type
        self.shm = None
        self.img = None
        self.ram = None
        self.viewer = None
        self.gamename = game
        self.statename = state
        self.initial_state = None
        self.players = players
        metadata = {}
        rom_path = retro.data.get_romfile_path(game, inttype)
        self.disas = retro.dispel.ingest(rom_path)
        metadata_path = retro.data.get_file_path(game, 'metadata.json',
                                                 inttype)

        if state == retro.State.NONE:
            self.statename = None
        elif state == retro.State.DEFAULT:
            self.statename = None
            try:
                with open(metadata_path) as f:
                    metadata = json.load(f)
                if 'default_player_state' in metadata and self.players <= len(
                        metadata['default_player_state']):
                    self.statename = metadata['default_player_state'][
                        self.players - 1]
                elif 'default_state' in metadata:
                    self.statename = metadata['default_state']
                else:
                    self.statename = None
            except (IOError, json.JSONDecodeError):
                pass

        if self.statename:
            self.load_state(self.statename, inttype)

        self.data = retro.data.GameData()

        if info is None:
            info = 'data'

        if info.endswith('.json'):
            # assume it's a path
            info_path = info
        else:
            info_path = retro.data.get_file_path(game, info + '.json', inttype)

        if scenario is None:
            scenario = 'scenario'

        if scenario.endswith('.json'):
            # assume it's a path
            scenario_path = scenario
        else:
            scenario_path = retro.data.get_file_path(game, scenario + '.json',
                                                     inttype)

        self.system = retro.get_romfile_system(rom_path)

        # Set up the shm if we're using the SNES emulator
        self._init_shm(retro_run_id)

        # We can't have more than one emulator per process. Before creating an
        # emulator, ensure that unused ones are garbage-collected
        gc.collect()
        self.em = retro.RetroEmulator(rom_path)
        self.em.configure_data(self.data)
        self.em.step()

        core = retro.get_system_info(self.system)
        self.buttons = core['buttons']
        self.num_buttons = len(self.buttons)

        try:
            assert self.data.load(
                info_path,
                scenario_path), 'Failed to load info (%s) or scenario (%s)' % (
                    info_path, scenario_path)
        except Exception:
            del self.em
            raise

        self.button_combos = self.data.valid_actions()
        if use_restricted_actions == retro.Actions.DISCRETE:
            combos = 1
            for combo in self.button_combos:
                combos *= len(combo)
            self.action_space = gym.spaces.Discrete(combos**players)
        elif use_restricted_actions == retro.Actions.MULTI_DISCRETE:
            self.action_space = gym.spaces.MultiDiscrete([
                len(combos) if gym_version >= (0, 9, 6) else
                (0, len(combos) - 1) for combos in self.button_combos
            ] * players)
        else:
            self.action_space = gym.spaces.MultiBinary(self.num_buttons *
                                                       players)

        kwargs = {}
        if gym_version >= (0, 9, 6):
            kwargs['dtype'] = np.uint8

        if self._obs_type == retro.Observations.RAM:
            shape = self.get_ram().shape
        else:
            img = [self.get_screen(p) for p in range(players)]
            shape = img[0].shape
        self.observation_space = gym.spaces.Box(low=0,
                                                high=255,
                                                shape=shape,
                                                **kwargs)

        self.use_restricted_actions = use_restricted_actions
        self.movie = None
        self.movie_id = 0
        self.movie_path = None
        if record is True:
            self.auto_record()
        elif record is not False:
            self.auto_record(record)
        self.seed()
        if gym_version < (0, 9, 6):
            self._seed = self.seed
            self._step = self.step
            self._reset = self.reset
            self._render = self.render
            self._close = self.close

    def _init_shm(self, retro_run_id):
        if self.system != 'Snes':
            self.shm = None
            return
        ##### Set up the shared memory segment ##################################################
        # currently only supports Snes
        # Set the identifier that the SNES C code may use to create a shared memory segment
        if retro_run_id is None:
            self.retro_run_id = random.randint(1, 1 << 30)
        else:
            self.retro_run_id = retro_run_id
        os.environ['RETRO_RUN_ID'] = f"{self.retro_run_id}"
        self.shm_key = self.retro_run_id
        shm_size = VISITED_BUFFER_SIZE * WORD_SIZE  # enough to hold 2^15 16-bit words
        try:
            self.shm = ipc.SharedMemory(self.shm_key,
                                        flags=ipc.IPC_CREX,
                                        mode=0o666,
                                        size=shm_size)
        except Exception as e:  # FIXME tighten this except up
            shm = ipc.SharedMemory(self.shm_key, 0, 0)
            ipc.remove_shared_memory(shm.id)
            self.shm = ipc.SharedMemory(self.shm_key,
                                        flags=ipc.IPC_CREX,
                                        mode=0o666,
                                        size=shm_size)
        return

    def _update_obs(self):
        if self._obs_type == retro.Observations.RAM:
            self.ram = self.get_ram()
            return self.ram
        elif self._obs_type == retro.Observations.IMAGE:
            self.img = self.get_screen()
            return self.img
        else:
            raise ValueError('Unrecognized observation type: {}'.format(
                self._obs_type))

    def _read_snes_shm(self):
        self.shm.attach()
        count = struct.unpack(f"<Q", self.shm.read(WORD_SIZE))[0]
        buf = self.shm.read((count + 1) * WORD_SIZE)
        self.shm.detach()
        g = MSG_FMT.iter_unpack(buf)
        _ = next(g)
        return [(addr | bank << 16, offset, bytes(bytecode))
                for (addr, bank, offset, *bytecode) in g]

    def action_to_array(self, a):
        actions = []
        for p in range(self.players):
            action = 0
            if self.use_restricted_actions == retro.Actions.DISCRETE:
                for combo in self.button_combos:
                    current = a % len(combo)
                    a //= len(combo)
                    action |= combo[current]
            elif self.use_restricted_actions == retro.Actions.MULTI_DISCRETE:
                ap = a[self.num_buttons * p:self.num_buttons * (p + 1)]
                for i in range(len(ap)):
                    buttons = self.button_combos[i]
                    action |= buttons[ap[i]]
            else:
                ap = a[self.num_buttons * p:self.num_buttons * (p + 1)]
                for i in range(len(ap)):
                    action |= int(ap[i]) << i
                if self.use_restricted_actions == retro.Actions.FILTERED:
                    action = self.data.filter_action(action)
            ap = np.zeros([self.num_buttons], np.uint8)
            for i in range(self.num_buttons):
                ap[i] = (action >> i) & 1
            actions.append(ap)
        return actions

    def disassemble(self, address, offset=None, bytecode=None):
        bank = address >> 16  #(0x0F & (address >> 16)) | 0x80
        addr = address & 0xFFFF
        if (bank, addr) in self.disas:
            trace = self.disas[(bank, addr)]
            return trace
        else:
            trace = retro.dispel.disas_code(code=bytecode[:offset],
                                            addr=address)[0]
            self.disas[(bank, addr)] = trace
            return trace

    def step(self, a):
        if self.img is None and self.ram is None:
            raise RuntimeError('Please call env.reset() before env.step()')

        for p, ap in enumerate(self.action_to_array(a)):
            if self.movie:
                for i in range(self.num_buttons):
                    self.movie.set_key(i, ap[i], p)
            self.em.set_button_mask(ap, p)

        if self.movie:
            self.movie.step()
        self.em.step()
        self.data.update_ram()
        ob = self._update_obs()
        rew, done, info = self.compute_step()
        info = dict(info)
        if self.system == 'Snes' and 'NOTRACE' not in os.environ:
            #info['trace'] = [self.disassemble(pc, flag, inst) for (pc, flag, inst) in self._read_snes_shm()]
            info['trace'] = self._read_snes_shm()
        return ob, rew, bool(done), info

    def reset(self):
        if self.initial_state:
            self.em.set_state(self.initial_state)
        for p in range(self.players):
            self.em.set_button_mask(np.zeros([self.num_buttons], np.uint8), p)
        self.em.step()
        if self.movie_path is not None:
            rel_statename = os.path.splitext(os.path.basename(
                self.statename))[0]
            self.record_movie(
                os.path.join(
                    self.movie_path, '%s-%s-%06d.bk2' %
                    (self.gamename, rel_statename, self.movie_id)))
            self.movie_id += 1
        if self.movie:
            self.movie.step()
        self.data.reset()
        self.data.update_ram()
        return self._update_obs()

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        return [seed1, seed2]

    def render(self, mode='human', close=False):
        if close:
            if self.viewer:
                self.viewer.close()
            return

        img = self.get_screen() if self.img is None else self.img
        if mode == "rgb_array":
            return img
        elif mode == "human":
            if self.viewer is None:
                from gym.envs.classic_control.rendering import SimpleImageViewer
                self.viewer = SimpleImageViewer()
            self.viewer.imshow(img)
            return self.viewer.isopen

    def close(self):
        if hasattr(self, 'em'):
            del self.em

    def get_action_meaning(self, act):
        actions = []
        for p, action in enumerate(self.action_to_array(act)):
            actions.append([
                self.buttons[i]
                for i in np.extract(action, np.arange(len(action)))
            ])
        if self.players == 1:
            return actions[0]
        return actions

    def get_ram(self):
        blocks = []
        for offset in sorted(self.data.memory.blocks):
            arr = np.frombuffer(self.data.memory.blocks[offset],
                                dtype=np.uint8)
            blocks.append(arr)
        return np.concatenate(blocks)

    def get_screen(self, player=0):
        img = self.em.get_screen()
        x, y, w, h = self.data.crop_info(player)
        if not w or x + w > img.shape[1]:
            w = img.shape[1]
        else:
            w += x
        if not h or y + h > img.shape[0]:
            h = img.shape[0]
        else:
            h += y
        if x == 0 and y == 0 and w == img.shape[1] and h == img.shape[0]:
            return img
        return img[y:h, x:w]

    def load_state(self, statename, inttype=retro.data.Integrations.DEFAULT):
        if not statename.endswith('.state'):
            statename += '.state'

        with gzip.open(
                retro.data.get_file_path(self.gamename, statename, inttype),
                'rb') as fh:
            self.initial_state = fh.read()

        self.statename = statename

    def compute_step(self):
        if self.players > 1:
            reward = [self.data.current_reward(p) for p in range(self.players)]
        else:
            reward = self.data.current_reward()
        done = self.data.is_done()
        return reward, done, self.data.lookup_all()

    def record_movie(self, path):
        self.movie = retro.Movie(path, True, self.players)
        self.movie.configure(self.gamename, self.em)
        if self.initial_state:
            self.movie.set_state(self.initial_state)

    def stop_record(self):
        self.movie_path = None
        self.movie_id = 0
        if self.movie:
            self.movie.close()
            self.movie = None

    def auto_record(self, path=None):
        if not path:
            path = os.getcwd()
        self.movie_path = path

    def __del__(self):
        if self.shm is not None:
            ipc.remove_shared_memory(self.shm.id)
예제 #26
0
class NESEnv(gym.Env, gym.utils.EzPickle):
    """An environment for playing NES games in OpenAI Gym using FCEUX."""

    # meta-data about the environment
    metadata = {'render.modes': ['human', 'rgb_array']}

    # a pipe from the emulator (FCEUX) to client (self). use the PID of this
    # python process to ensure the pipe is unique
    _pipe_in_name = '/tmp/smb-pipe-in-{}'.format(os.getpid())
    # a pipe from the client (self) to emulator (FCEUX). use the PID of this
    # python process to ensure the pipe is unique
    _pipe_out_name = '/tmp/smb-pipe-out-{}'.format(os.getpid())

    def __init__(
        self,
        max_episode_steps: int,
        frame_skip: int = 4,
        fceux_args: tuple = ('--nogui', '--sound 0'),
        random_seed: int = 0,
    ) -> None:
        """
        Initialize a new NES environment.

        Args:
            max_episode_steps: the math number of steps per episode.
                - pass math.inf to use no max_episode_steps limit
            frame_skip: the number of frames to skip between between inputs
            fceux_args: arguments to pass to the FCEUX command
            random_seed: the random seed to start the environment with

        Returns:
            None

        """
        # validate that fceux can be found in the path
        if spawn.find_executable('fceux', os.environ['PATH']) is None:
            msg = 'fceux not found in $PATH. is fceux installed?'
            raise DependencyNotFoundError(msg)
        gym.utils.EzPickle.__init__(self)
        self.max_episode_steps = max_episode_steps
        self.frame_skip = frame_skip
        self.fceux_args = fceux_args
        self.curr_seed = random_seed
        # setup the frame rate based on the frame skip rate
        self.metadata['video.frames_per_second'] = 60 / self.frame_skip
        self.viewer = None
        self.step_number = 0
        # these store the pipe for communicating with the environment
        self.pipe_in = None
        self.pipe_out = None
        # variables for the ROM and FCEUX interface files
        self.rom_file_path = None
        self.lua_interface_path = None
        self.emulator_started = False
        # Setup the observation space
        self.observation_space = gym.spaces.Box(low=0,
                                                high=255,
                                                shape=(SCREEN_HEIGHT,
                                                       SCREEN_WIDTH, 3),
                                                dtype=np.uint8)
        self.screen = self.observation_space.sample()
        # Setup the action space
        self.actions = [
            'U',  # Up
            'D',  # Down
            'L',  # Left
            'R',  # Right
            'UR',  # Up + Right
            'DR',  # Down + Right
            'URA',  # Up + Right + A
            'DRB',  # Down + Right + B
            'A',  # A
            'B',  # B
            'RB',  # Right + B
            'RA'  # Right + A
        ]
        self.action_space = gym.spaces.Discrete(len(self.actions))

    # MARK: FCEUX

    def _start_emulator(self) -> None:
        """Spawn an instance of FCEUX and pass parameters to it."""
        # validate that the rom file and lua interface are defiend
        if not self.rom_file_path:
            raise Exception('No rom file specified!')
        if not self.lua_interface_path:
            raise Exception("Must specify a lua interface file to get scores!")
        # setup the environment variables to pass to the emulator instance
        os.environ['frame_skip'] = str(self.frame_skip)
        os.environ['pipe_in_name'] = str(self._pipe_in_name)
        os.environ['pipe_out_name'] = str(self._pipe_out_name)
        # TODO: define and setup different reward schemes to initialize with
        # and activate them here using the environment key 'reward_scheme'

        # open up the pipes to the emulator.
        self._open_pipes()
        # build the FCEUX command
        command = ' '.join([
            'fceux', *self.fceux_args, '--loadlua', self.lua_interface_path,
            self.rom_file_path, '&'
        ])
        # open the FCEUX process
        proc = subprocess.Popen(command, shell=True)
        proc.communicate()
        # open the pipe files
        self.pipe_in = open(self._pipe_in_name, 'rb')
        self.pipe_out = open(self._pipe_out_name, 'w', 1)
        # make sure the emulator sends the ready message
        opcode, _ = self._read_from_pipe()
        assert 'ready' == opcode
        self.emulator_started = True

    def _joypad(self, button: str) -> None:
        """
        Pass a joy-pad command to the emulator

        Args:
            button: the button (or combination) to press on the controller

        Returns:
            None

        """
        self._write_to_pipe('joypad' + SEP + button)

    def _get_state(self) -> tuple:
        """
        Parse a state message from the emulator and return it.

        Returns:
            a tuple of:
            -   the screen from the emulator
            -   the reward from the previous action
            -   the terminal flag denoting if an episode has ended

        """
        # read the initial state from the pipe
        opcode, data = self._read_from_pipe()
        assert opcode == 'state'
        # The first two underscores are `reward` and `done`. the last one is
        # the dummy '\n' at the end of each line
        reward, done, screen, _ = data
        reward = int(reward.decode('ascii'))
        done = bool(int(done.decode('ascii')))
        # change the done flag to true if this step passes the episode length
        done = True if self.step_number > self.max_episode_steps else done

        # unwrap the P value representing a frame from the data
        pvs = np.array(struct.unpack('B' * len(screen), screen))
        # use the palette to convert the p values to RGB
        rgb = np.array(PALETTE[pvs - 20], dtype=np.uint8)
        # reshape the screen and assign it to self
        screen = rgb.reshape((SCREEN_HEIGHT, SCREEN_WIDTH, 3))

        return screen, reward, done

    # MARK: Pipes

    def _open_pipes(self) -> None:
        """Open the communication path between self and the emulator"""
        # Open the inbound pipe if it doesn't exist yet
        if not os.path.exists(self._pipe_in_name):
            os.mkfifo(self._pipe_in_name)
        # Open the outbound pipe if it doesn't exist yet
        if not os.path.exists(self._pipe_out_name):
            os.mkfifo(self._pipe_out_name)

    def _write_to_pipe(self, message: str) -> None:
        """Write a message to the outbound pip (emulator)."""
        # write the message to the pipe and flush it
        self.pipe_out.write(message + '\n')
        self.pipe_out.flush()

    def _read_from_pipe(self) -> tuple:
        """
        Read a message from the pipe.

        Returns:
            a tuple of
            -   the opcode
            -   the data with the message (as another tuple)

        """
        # Read a message from the pipe and separate along the delimiter 0xff
        message = self.pipe_in.readline().split(b'\xFF')
        # decode the opcde
        opcode = message[0].decode('ascii')
        # return the opcode and data tuple
        return opcode, message[1:]

    # MARK: OpenAI Gym API

    def step(self, action: int) -> tuple:
        """
        Take a step using the given action.

        Args:
            action: the discrete action to perform. will use the action in
                    `self.actions` indexed by this value

        Returns:
            a tuple of:
            -   the start as a result of the action
            -   the reward achieved by taking the action
            -   a flag denoting whether the episode has ended
            -   a dictionary of additional information

        """
        # unwrap the string action value from the list of actions
        self._joypad(self.actions[action])
        # increment the frame counter
        self.step_number += 1
        # get the screen, reward, and done flag from the emulator
        self.screen, reward, done = self._get_state()

        return self.screen, reward, done, {}

    def reset(self) -> np.ndarray:
        """Reset the emulator and return the initial state."""
        if not self.emulator_started:
            self._start_emulator()
        # write the reset command to the emulator
        self._write_to_pipe('reset' + SEP)
        self.step_number = 0
        # get a state from the emulator. ignore the `reward` and `done` flag
        self.screen, _, _ = self._get_state()

        return self.screen

    def render(self, mode: str = 'human'):
        """
        Render the current screen using the given mode.

        Args:
            mode: the mode to render the screen using
                - 'human': render in a window using GTK
                - 'rgb_array': render in the back-end and return a matrix

        Returns:
            None if mode is 'human' or a matrix if mode is 'rgb_array'

        """
        if mode == 'human':
            if self.viewer is None:
                from gym.envs.classic_control.rendering import SimpleImageViewer
                self.viewer = SimpleImageViewer()
            self.viewer.imshow(self.screen)
        elif mode == 'rgb_array':
            return self.screen

    def close(self) -> None:
        """Close the emulator and shutdown FCEUX."""
        self._write_to_pipe('close')
        self.pipe_in.close()
        self.pipe_out.close()
        self.emulator_started = False

    def seed(self, seed: int = None) -> list:
        """
        Set the seed for this env's random number generator(s).

        Returns:
            A list of seeds used in this env's random number generators.
            there is only one "main" seed in this env
        """
        self.curr_seed = gym.utils.seeding.hash_seed(seed) % 256
        return [self.curr_seed]
예제 #27
0
class MagnetsEnv(Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, G_const=1.0, acceleration=30.0, time_step=0.01,
                 time_limit=10, friction=10.0, seed=None,
                 boundary_less=-1, boundary_greater=1, num_agents=3):
        ''' constants '''
        self.G_const = G_const
        self.acceleration = acceleration
        self.time_step = time_step
        self.time_limit = time_limit
        self.friction = friction
        if (seed is None):
            self.seed = int(time.time())
        else:
            self.seed = seed
        self.boundary_less = boundary_less
        self.boundary_greater = boundary_greater
        self.num_agents = num_agents

        self.action_space = MultiDiscrete([[0, 8] for _ in range(num_agents)])

        # It's unclear what low and high here should be. Set them to 0 so
        # that if anyone tries to use them, it is more likely that obviously
        # wrong things happen.
        self.observation_space = Box(low=0, high=0, shape=(4*(num_agents+1),))

        ''' variables that change with time '''
        self.state = State(num_agents, seed)

        self.spec = None
        self.viewer = None

    def _reset(self):
        self.seed += 1
        self.state = State(self.num_agents, self.seed)
        return self.state.to_array()

    def _step(self, action):
        ''' evolve the state  '''
        if not isinstance(action, Iterable):  # if we didn't get a list of actions
            action = self._action_scal2vec(action)

        pos_inc = self.state.target_state.vel * self.time_step
        self.state.target_state.pos += pos_inc
        total_acc = np.zeros(2)

        for i in range(self.num_agents):
            diff_i = self.state.target_state.pos - self.state.agent_states[i].pos
            dist_square = (diff_i[0] * diff_i[0]) + (diff_i[1] * diff_i[1])
            total_acc += (self.G_const / dist_square) *\
                (diff_i / math.sqrt(dist_square))
            self.state.agent_states[i].pos += (self.state.agent_states[i].vel *
                                               self.time_step)
            agent_dist = self.state.agent_states[i].pos[0] ** 2 +\
                self.state.agent_states[i].pos[1] ** 2

            if (agent_dist > 2):
                self.state.agent_states[i].pos /= agent_dist
                self.state.agent_states[i].pos *= 2

        self.state.target_state.vel += (total_acc * self.time_step)

        ''' update velocities of agents based on acceleration '''
        for i in range(self.num_agents):
            ''' acceleration has constant magnitude and one of 8 directions '''
            acc_dir = np.zeros(2)
            if (action[i] != 8):
                acc_dir = np.asarray([math.cos((action[i] * math.pi) / 4),
                                     math.sin((action[i] * math.pi) / 4)])
            vel_inc = self.acceleration * acc_dir * self.time_step
            vel_dec = self.friction * self.state.agent_states[i].vel *\
                self.time_step
            self.state.agent_states[i].vel += (vel_inc - vel_dec)

        ''' checking if the game has ended so can return '''
        if (not self.state.in_box()):
            return self.state.to_array(), 0, True, {"Msg": "Game over"}

        return self.state.to_array(), 1, False, {"Msg": "Game not over"}

    def print_state(self):
        self.state.print_state()

    def _render_object(self, draw, obj, color):
        obj_x = int(((obj.pos[0] - ENV_LOWER) / ENV_SIDE)*RENDER_WIDTH)
        obj_y = int(((obj.pos[1] - ENV_LOWER) / ENV_SIDE)*RENDER_HEIGHT)
        draw.arc(
            [obj_x - RENDER_AGENT_SIZE/2, obj_y - RENDER_AGENT_SIZE/2,
             obj_x + RENDER_AGENT_SIZE/2, obj_y + RENDER_AGENT_SIZE/2],
            0, 360,
            fill=color
        )

    def _render_objective(self, draw, color):
        BOUND_X = (BOUND_SIDE/ENV_SIDE)*RENDER_WIDTH/2
        BOUND_Y = (BOUND_SIDE/ENV_SIDE)*RENDER_HEIGHT/2
        draw.rectangle(
            [RENDER_WIDTH/2 + BOUND_X, RENDER_HEIGHT/2 + BOUND_Y,
             RENDER_WIDTH/2 - BOUND_X, RENDER_HEIGHT/2 - BOUND_Y],
            outline=color
        )

    def _render_bounds(self, draw, color):
        draw.arc([0, 0, RENDER_WIDTH, RENDER_HEIGHT], 0, 360, fill=color)

    def _render(self, mode='human', close=False):
        img = Image.new('RGB', (RENDER_HEIGHT, RENDER_WIDTH), WHITE)
        draw = ImageDraw.Draw(img)
        for i in range(self.num_agents):
            agent = self.state.agent_states[i]
            self._render_object(draw, agent, RED)
        self._render_object(draw, self.state.target_state, BLUE)
        self._render_objective(draw, GREEN)
        self._render_bounds(draw, BLACK)
        del draw

        if mode == 'human':
            if (self.viewer is None):
                # don't import SimpleImageViewer by default because even importing
                # it requires a display
                from gym.envs.classic_control.rendering import SimpleImageViewer
                self.viewer = SimpleImageViewer()
            self.viewer.imshow(np.asarray(img))
        elif mode == 'rgb_array':
            return np.asarray(img)
예제 #28
0
class GridWorld(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self,
                 file_name="map1.txt",
                 fail_rate=0.0,
                 terminal_reward=1.0,
                 move_reward=0.0,
                 bump_reward=-0.5,
                 bomb_reward=-1.0):
        self.viewer = SimpleImageViewer()
        self.n = None
        self.m = None
        self.bombs = []
        self.walls = []
        self.goals = []
        self.start = None
        this_file_path = os.path.dirname(os.path.realpath(__file__))
        file_name = os.path.join(this_file_path, file_name)
        with open(file_name, "r") as f:
            for i, row in enumerate(f):
                row = row.rstrip('\r\n')
                if self.n is not None and len(row) != self.n:
                    raise ValueError(
                        "Map's rows are not of the same dimension...")
                self.n = len(row)
                for j, col in enumerate(row):
                    if col == "x" and self.start is None:
                        self.start = self.n * i + j
                    elif col == "x" and self.start is not None:
                        raise ValueError(
                            "There is more than one starting position in the map..."
                        )
                    elif col == "G":
                        self.goals.append(self.n * i + j)
                    elif col == "B":
                        self.bombs.append(self.n * i + j)
                    elif col == "1":
                        self.walls.append(self.n * i + j)
            self.m = i + 1
        if len(self.goals) == 0:
            raise ValueError("At least one goal needs to be specified...")
        self.n_states = self.n * self.m
        self.n_actions = 4
        self.fail_rate = fail_rate
        self.state = self.start
        self.terminal_reward = terminal_reward
        self.move_reward = move_reward
        self.bump_reward = bump_reward
        self.bomb_reward = bomb_reward
        self.action_space = spaces.Discrete(4)
        self.observation_space = spaces.Discrete(self.n_states)
        self.done = False

    def step(self, action):
        assert self.action_space.contains(action)
        if self.state in self.goals or np.random.rand() < self.fail_rate:
            return self.state, 0.0, self.done, None
        else:
            new_state = self.take_action(action)
            reward = self.get_reward(new_state)
            self.state = new_state
            return self.state, reward, self.done, None

    def reset(self):
        self.done = False
        self.state = self.start
        return self.state

    def render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        if mode == 'human':
            grid = np.multiply(np.ones((self.n_states, 3), dtype=np.int8),
                               np.array([0, 255, 0], dtype=np.int8))
            for g in self.goals:
                grid[g] = np.array([255, 0, 0])
            for b in self.bombs:
                grid[b] = np.array([255, 255, 0])
            for w in self.walls:
                grid[w] = np.array([0, 0, 0])
            grid[self.state] = np.array([0, 0, 255])
            grid = grid.reshape(self.m, self.n, 3)
            self.viewer.imshow(grid)
            return self.viewer.isopen
        elif mode == "rgb_array":
            return grid
        else:
            return

    def take_action(self, action):
        row = self.state / self.n
        col = self.state % self.n
        if action == DOWN and (row + 1) * self.n + col not in self.walls:
            row = min(row + 1, self.m - 1)
        elif action == UP and (row - 1) * self.n + col not in self.walls:
            row = max(0, row - 1)
        elif action == RIGHT and row * self.n + col + 1 not in self.walls:
            col = min(col + 1, self.n - 1)
        elif action == LEFT and row * self.n + col - 1 not in self.walls:
            col = max(0, col - 1)
        new_state = row * self.n + col
        return new_state

    def get_reward(self, new_state):
        if new_state in self.goals:
            self.done = True
            return self.terminal_reward
        elif new_state in self.bombs:
            return self.bomb_reward
        elif new_state == self.state:
            return self.bump_reward
        return self.move_reward
예제 #29
0
class Runner:
    def __init__(self, env, model, batch_size, timesteps, discount_rate,
                 summary_frequency, performance_num_episodes, summary_log_dir):
        self.env = env
        self.model = model
        self.timesteps = timesteps
        self.discount_rate = discount_rate
        self.observation = env.reset()
        self.batch_size = batch_size
        self.stats_recorder = StatsRecorder(
            summary_frequency=summary_frequency,
            performance_num_episodes=performance_num_episodes,
            summary_log_dir=summary_log_dir,
            save=True)
        self.viewer = SimpleImageViewer()

    def render(self):
        columns = []
        for i in range(80):
            rows = []
            for j in range(80):
                if self.observation[i][j] == 1:
                    rows.append([255, 255, 255])
                else:
                    rows.append([0, 0, 0])
            columns.append(rows)
        self.viewer.imshow(np.asarray(columns, dtype=np.uint8))

    def run(self):
        observations = []
        rewards = []
        actions = []
        terminals = []

        for t in range(self.timesteps + 1):
            action_index = self.model.predict_action([self.observation])
            observations.append(self.observation)
            action = action_with_index(action_index)

            self.observation, reward, terminal = self.env.step(action)
            self.stats_recorder.after_step(reward=reward, terminal=terminal)

            rewards.append(reward)
            actions.append(action_index)
            terminals.append(terminal)

            if len(rewards) == self.batch_size:
                discounted_rewards = discount(rewards, terminals,
                                              self.discount_rate)

                self.model.train(observations, discounted_rewards, actions)
                observations = []
                rewards = []
                actions = []
                terminals = []

            if terminal:
                self.observation = self.env.reset()

            if t % self.stats_recorder.summary_frequency == 0:
                self.model.save(0)
예제 #30
0
class TetrisEnv(gym.Env, gym.utils.EzPickle):
    """An environment for playing Tetris in OpenAI Gym."""

    # meta-data about the environment for OpenAI Gym utilities (like Monitor)
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 30,
    }

    def __init__(self, max_steps: int, random_state: int = None) -> None:
        """
        Initialize a new Tetris environment.

        Args:
            max_steps: the max number of steps per episode.
            random_state: the random seed to start the environment with

        Returns:
            None

        """
        gym.utils.EzPickle.__init__(self)
        self.max_steps = max_steps
        self.viewer = None
        self.step_number = 0
        # Setup the observation space as RGB game frames
        self.observation_space = gym.spaces.Box(low=0,
                                                high=255,
                                                shape=(SCREEN_HEIGHT,
                                                       SCREEN_WIDTH, 3),
                                                dtype=np.uint8)
        # Setup the action space, the game defines 12 legal actions
        self.action_space = gym.spaces.Discrete(12)
        # setup the game
        self.game = Tetris()
        self.seed(random_state)

    @property
    def screen(self) -> np.ndarray:
        """Return the screen of the game"""
        return self.game.screen

    def reset(self) -> np.ndarray:
        """Reset the emulator and return the initial state."""
        self.game.reset()
        # reset the step count
        self.step_number = 0
        # return the initial screen from the game
        return self.game.screen

    def step(self, action: int) -> tuple:
        """
        Take a step using the given action.

        Args:
            action: the discrete action to perform. will use the action in
                    `self.actions` indexed by this value

        Returns:
            a tuple of:
            -   the start as a result of the action
            -   the reward achieved by taking the action
            -   a flag denoting whether the episode has ended
            -   a dictionary of extra information

        """
        state, reward, done, info = self.game.step(action)
        self.step_number += 1
        # if this step has passed the max number, set the episode to done
        if self.step_number >= self.max_steps:
            done = True
        return state, reward, done, info

    def render(self, mode: str = 'human'):
        """
        Render the current screen using the given mode.

        Args:
            mode: the mode to render the screen using
                - 'human': render in a window using GTK
                - 'rgb_array': render in the back-end and return a matrix

        Returns:
            None if mode is 'human' or a matrix if mode is 'rgb_array'

        """
        # if the mode is RGB, return the screen as a NumPy array
        if mode == 'rgb_array':
            return self.game.screen
        # if the mode is human, create a viewer and display the screen
        elif mode == 'human':
            from pyglet.window import Window
            from gym.envs.classic_control.rendering import SimpleImageViewer
            if self.viewer is None:
                self.viewer = SimpleImageViewer()
                self.viewer.window = Window(
                    width=SCREEN_WIDTH,
                    height=SCREEN_HEIGHT,
                    caption=self.spec.id,
                )
            self.viewer.imshow(self.game.screen)
            return self.viewer.isopen
        # otherwise the render mode is not supported, raise an error
        else:
            raise ValueError('unsupported render mode: {}'.format(repr(mode)))

    def close(self) -> None:
        """Close the emulator."""
        # delete the existing game if there is one
        if isinstance(self.game, Tetris):
            del self.game
        if self.viewer is not None:
            self.viewer.close()
            del self.viewer

    def seed(self, random_state: int = None) -> list:
        """
        Set the seed for this env's random number generator(s).

        Args:
            random_state: the seed to set the random generator to

        Returns:
            A list of seeds used in this env's random number generators

        """
        random.seed(random_state)
        self.curr_seed = random_state

        return [self.curr_seed]

    def get_keys_to_action(self) -> dict:
        """Return the dictionary of keyboard keys to actions."""
        # Map of in game directives to their associated keyboard value
        down = ord('s')
        left = ord('a')
        right = ord('d')
        rot_l = ord('q')
        rot_r = ord('e')
        # A mapping of pressed key combinations to discrete actions
        keys_to_action = {
            (): 0,
            (left, ): 1,
            (right, ): 2,
            (down, ): 3,
            (rot_l, ): 4,
            (rot_r, ): 5,
            tuple(sorted((
                left,
                down,
            ))): 6,
            tuple(sorted((
                right,
                down,
            ))): 7,
            tuple(sorted((
                left,
                rot_l,
            ))): 8,
            tuple(sorted((
                right,
                rot_l,
            ))): 9,
            tuple(sorted((
                left,
                rot_r,
            ))): 10,
            tuple(sorted((
                right,
                rot_r,
            ))): 11,
        }

        return keys_to_action