Ejemplo n.º 1
0
def visualize_env(env, mode, max_steps=sys.maxsize, speedup=1):  # 环境可视化方法
    timestep = 0.05
    # step ahead with all-zero action
    if mode == 'noop':  # noop是no operation的缩写,在该模式下什么都不做
        for _ in range(max_steps):
            env.render()  # render方法就是显示环境,以何种方式在env中由用户定义,下同
            time.sleep(timestep / speedup)  # 每一步结束要等待多长时间,下同
    elif mode == 'random':
        env.reset()  #  reset方法在env中定义,使agent回到初始状态(固定状态?随机状态?由用户定义)
        env.render()
        for i in range(max_steps):
            action = env.action_space.sample(
            )  # action_space定义了动作空间大小,sample方法由action_space的返回值决定(这个对象有sample方法)
            _, _, done, _ = env.step(
                action)  # done是action的一个返回值,判断当前episode是否结束
            # if i % 10 == 0:
            env.render()
            # import time as ttime
            time.sleep(timestep / speedup)
            if done:
                env.reset()  # 如果当前episode完成,就复位进入下一个episode,直到采样达到max_steps结束
    elif mode == 'static':
        env.reset()
        while True:
            env.render()
            time.sleep(timestep / speedup)
    elif mode == 'human':  # 进入交互模式,暂时用不到,先不解释了
        if hasattr(env, 'start_interactive'):
            env.start_interactive()
        else:
            env.reset()
            env.render()
            tr = 0.
            from rllab.envs.box2d.box2d_env import Box2DEnv
            if isinstance(env, Box2DEnv):
                for _ in range(max_steps):
                    pygame.event.pump()
                    keys = pygame.key.get_pressed()
                    action = env.action_from_keys(keys)
                    ob, r, done, _ = env.step(action)
                    tr += r
                    env.render()
                    time.sleep(timestep / speedup)
                    if done:
                        tr = 0.
                        env.reset()
                return

            from rllab.envs.mujoco.mujoco_env import MujocoEnv
            from rllab.envs.mujoco.maze.maze_env import MazeEnv
            if isinstance(env, (MujocoEnv, MazeEnv)):
                trs = [tr]
                actions = [np.zeros(2)]
                from rllab.mujoco_py import glfw

                def cb(window, key, scancode, action, mods):
                    actions[0] = env.action_from_key(key)

                glfw.set_key_callback(env.viewer.window, cb)
                while True:
                    try:
                        actions[0] = np.zeros(2)
                        glfw.poll_events()
                        # if np.linalg.norm(actions[0]) > 0:
                        ob, r, done, info = env.step(actions[0])
                        trs[0] += r
                        env.render()
                        # time.sleep(env.timestep / speedup)
                        time.sleep(env.timestep / speedup)
                        if done:
                            trs[0] = 0.
                            env.reset()
                    except Exception as e:
                        print(e)
                return

            assert hasattr(
                env, "start_interactive"
            ), "The environment must implement method start_interactive"

            env.start_interactive()
        # Assume using matplotlib
        # TODO - make this logic more legit

        # env.render()
        # import matplotlib.pyplot as plt
        # def handle_key_pressed(event):
        #     action = env.action_from_key(event.key)
        #     if action is not None:
        #         _, _, done, _ = env.step(action)
        #         if done:
        #             plt.close()
        #             return
        #         env.render()
        #
        # env.matplotlib_figure.canvas.mpl_connect('key_press_event', handle_key_pressed)
        # plt.ioff()
        # plt.show()

    else:
        raise ValueError('Unsupported mode: %s' % mode)
Ejemplo n.º 2
0
def visualize_env(env, mode, max_steps=sys.maxsize, speedup=1):
    timestep = 0.05
    # step ahead with all-zero action
    if mode == 'noop':
        for _ in range(max_steps):
            env.render()
            time.sleep(timestep / speedup)
    elif mode == 'random':
        env.reset()
        env.render()
        for i in range(max_steps):
            action = env.action_space.sample()
            _, _, done, _ = env.step(action)
            # if i % 10 == 0:
            env.render()
            # import time as ttime
            time.sleep(timestep / speedup)
            if done:
                env.reset()
    elif mode == 'static':
        env.reset()
        while True:
            env.render()
            time.sleep(timestep / speedup)
    elif mode == 'human':
        if hasattr(env, 'start_interactive'):
            env.start_interactive()
        else:
            env.reset()
            env.render()
            tr = 0.
            from rllab.envs.box2d.box2d_env import Box2DEnv
            if isinstance(env, Box2DEnv):
                for _ in range(max_steps):
                    pygame.event.pump()
                    keys = pygame.key.get_pressed()
                    action = env.action_from_keys(keys)
                    ob, r, done, _ = env.step(action)
                    tr += r
                    env.render()
                    time.sleep(timestep / speedup)
                    if done:
                        tr = 0.
                        env.reset()
                return

            from rllab.envs.mujoco.mujoco_env import MujocoEnv
            from rllab.envs.mujoco.maze.maze_env import MazeEnv
            if isinstance(env, (MujocoEnv, MazeEnv)):
                trs = [tr]
                actions = [np.zeros(2)]
                from rllab.mujoco_py import glfw

                def cb(window, key, scancode, action, mods):
                    actions[0] = env.action_from_key(key)

                glfw.set_key_callback(env.viewer.window, cb)
                while True:
                    try:
                        actions[0] = np.zeros(2)
                        glfw.poll_events()
                        # if np.linalg.norm(actions[0]) > 0:
                        ob, r, done, info = env.step(actions[0])
                        trs[0] += r
                        env.render()
                        # time.sleep(env.timestep / speedup)
                        time.sleep(env.timestep / speedup)
                        if done:
                            trs[0] = 0.
                            env.reset()
                    except Exception as e:
                        print(e)
                return

            assert hasattr(
                env, "start_interactive"
            ), "The environment must implement method start_interactive"

            env.start_interactive()
        # Assume using matplotlib
        # TODO - make this logic more legit

        # env.render()
        # import matplotlib.pyplot as plt
        # def handle_key_pressed(event):
        #     action = env.action_from_key(event.key)
        #     if action is not None:
        #         _, _, done, _ = env.step(action)
        #         if done:
        #             plt.close()
        #             return
        #         env.render()
        #
        # env.matplotlib_figure.canvas.mpl_connect('key_press_event', handle_key_pressed)
        # plt.ioff()
        # plt.show()

    else:
        raise ValueError('Unsupported mode: %s' % mode)
Ejemplo n.º 3
0
def visualize_env(env, mode, max_steps=sys.maxsize, speedup=1):
    timestep = 0.05
    # step ahead with all-zero action
    if mode == 'noop':
        for _ in range(max_steps):
            env.render()
            time.sleep(timestep / speedup)
    elif mode == 'random':
        env.reset()
        env.render()
        for i in range(max_steps):
            action = env.action_space.sample()
            _, _, done, _ = env.step(action)
            # if i % 10 == 0:
            env.render()
            # import time as ttime
            time.sleep(timestep / speedup)
            if done:
                env.reset()
    elif mode == 'static':
        env.reset()
        while True:
            env.render()
            time.sleep(timestep / speedup)
    elif mode == 'human':
        env.reset()
        env.render()
        tr = 0.
        from rllab.envs.box2d.box2d_env import Box2DEnv
        if isinstance(env, Box2DEnv):
            for _ in range(max_steps):
                pygame.event.pump()
                keys = pygame.key.get_pressed()
                action = env.action_from_keys(keys)
                ob, r, done = env.step(action)
                tr += r
                env.render()
                time.sleep(timestep / speedup)
                if done:
                    tr = 0.
                    env.reset()
            return

        from rllab.envs.mujoco.mujoco_env import MujocoEnv
        from rllab.envs.mujoco.maze.maze_env import MazeEnv
        if isinstance(env, (MujocoEnv, MazeEnv)):
            trs = [tr]
            actions = [np.zeros(2)]
            from rllab.mujoco_py import glfw

            def cb(window, key, scancode, action, mods):
                actions[0] = env.action_from_key(key)

            glfw.set_key_callback(env.viewer.window, cb)
            while True:
                try:
                    actions[0] = np.zeros(2)
                    glfw.poll_events()
                    # if np.linalg.norm(actions[0]) > 0:
                    ob, r, done, info = env.step(actions[0])
                    trs[0] += r
                    env.render()
                    # time.sleep(env.timestep / speedup)
                    time.sleep(env.timestep / speedup)
                    if done:
                        trs[0] = 0.
                        env.reset()
                except Exception as e:
                    print(e)
            return

        assert hasattr(env, "action_from_key") and hasattr(env, "matplotlib_figure"), \
            "The environment must implement the method action_from_key and have the matplotlib_figure attribute " \
            "available"
        # Assume using matplotlib
        # TODO - make this logic more legit

        env.render()
        import matplotlib.pyplot as plt
        def handle_key_pressed(event):
            action = env.action_from_key(event.key)
            if action is not None:
                _, _, done, _ = env.step(action)
                if done:
                    plt.close()
                    return
                env.render()

        env.matplotlib_figure.canvas.mpl_connect('key_press_event', handle_key_pressed)
        plt.ioff()
        plt.show()

    else:
        raise ValueError('Unsupported mode: %s' % mode)
Ejemplo n.º 4
0
 def loop_once(self):
     self.render()
     # Swap front and back buffers
     glfw.swap_buffers(self.window)
     # Poll for and process events
     glfw.poll_events()
Ejemplo n.º 5
0
 def loop_once(self):
     self.render()
     # Swap front and back buffers
     glfw.swap_buffers(self.window)
     # Poll for and process events
     glfw.poll_events()