def test_batched_backend_equivalence(self):
        return
        """
        Tests if Python and TensorFlow backend return the same output
        for a standard DQN-style preprocessing stack.
        """
        env_spec = dict(
            type="openai",
            gym_env="Pong-v0",
            frameskip=4,
            max_num_noops=30,
            episodic_life=True
        )
        # Test with batching because we assume vector environments to be the normal case going forward.
        env = SequentialVectorEnv(num_envs=4, env_spec=env_spec, num_background_envs=2)
        in_space = env.state_space

        agent_config = config_from_path("configs/ray_apex_for_pong.json")
        preprocessing_spec = deepcopy(agent_config["preprocessing_spec"])

        # Set up python preprocessor.
        scopes = [preprocessor["scope"] for preprocessor in preprocessing_spec]
        # Set backend to python.
        for spec in preprocessing_spec:
            spec["backend"] = "python"
        python_processor = PreprocessorStack(*preprocessing_spec, backend="python")
        for sub_comp_scope in scopes:
            python_processor.sub_components[sub_comp_scope].create_variables(dict(preprocessing_inputs=in_space))
        python_processor.reset()

        # To have the use case we considered so far, use agent interface for TF backend.
        agent_config.pop("type")
        agent = ApexAgent(state_space=env.state_space, action_space=env.action_space, **agent_config)

        # Generate a few states from random set points. Test if preprocessed states are almost equal
        states = np.asarray(env.reset_all())
        actions, agent_preprocessed_states = agent.get_action(
            states=states, use_exploration=False, extra_returns="preprocessed_states")
        print("TensorFlow preprocessed shape: {}".format(np.asarray(agent_preprocessed_states).shape))
        python_preprocessed_states = python_processor.preprocess(states)
        print("Python preprocessed shape: {}".format(np.asarray(python_preprocessed_states).shape))
        print("Asserting (almost) equal values:")
        for tf_state, python_state in zip(agent_preprocessed_states, python_preprocessed_states):
            flat_tf = np.ndarray.flatten(tf_state)
            flat_python = np.ndarray.flatten(python_state)
            for x, y in zip(flat_tf, flat_python):
                recursive_assert_almost_equal(x, y, decimals=3)

        states, _, _, _ = env.step(actions)
        actions, agent_preprocessed_states = agent.get_action(
            states=states, use_exploration=False, extra_returns="preprocessed_states")
        print("TensorFlow preprocessed shape: {}".format(np.asarray(agent_preprocessed_states).shape))
        python_preprocessed_states = python_processor.preprocess(states)
        print("Python preprocessed shape: {}".format(np.asarray(python_preprocessed_states).shape))
        print("Asserting (almost) equal values:")
        recursive_assert_almost_equal(agent_preprocessed_states, python_preprocessed_states, decimals=3)
Example #2
0
    def __init__(self,
                 agent,
                 env_spec=None,
                 num_envs=1,
                 frameskip=1,
                 render=False,
                 worker_executes_exploration=True,
                 exploration_epsilon=0.1,
                 episode_finish_callback=None):
        """
        Args:
            agent (Agent): Agent to execute environment on.
            env_spec Optional[Union[callable, dict]]): Either an environment spec or a callable returning a new
                environment.
            num_envs (int): How many single Environments should be run in parallel in a SequentialVectorEnv.
            frameskip (int): How often actions are repeated after retrieving them from the agent.
                This setting can be overwritten in the single calls to the different `execute_..` methods.
            render (bool): Whether to render the environment after each action.
                Default: False.
            worker_executes_exploration (bool): If worker executes exploration by sampling.
            exploration_epsilon (Optional[float]): Epsilon to use if worker executes exploration.
        """
        super(Worker, self).__init__()
        self.num_environments = num_envs
        self.logger = logging.getLogger(__name__)
        if env_spec is not None:
            self.env_ids = [
                "env_{}".format(i) for i in range_(self.num_environments)
            ]
            self.vector_env = SequentialVectorEnv(
                env_spec=env_spec, num_envs=self.num_environments)
        else:
            self.env_ids = []
            self.vector_env = None
        self.agent = agent
        self.frameskip = frameskip
        self.render = render

        # Update schedule if worker is performing updates.
        self.updating = None
        self.steps_before_update = None
        self.update_interval = None
        self.update_steps = None
        self.sync_interval = None
        self.episodes_since_update = 0

        # Default val or None?
        self.update_mode = "time_steps"

        self.worker_executes_exploration = worker_executes_exploration
        self.exploration_epsilon = exploration_epsilon

        self.episode_finish_callback = episode_finish_callback
    def test_sequential_vector_env(self):
        vector_env = SequentialVectorEnv(num_environments=self.num_vector_envs,
                                         env_spec=self.env_spec,
                                         num_background_envs=2)
        agent = Agent.from_spec(
            # Uses 2015 DQN parameters as closely as possible.
            config_from_path("configs/dqn_vector_env.json"),
            state_space=vector_env.state_space,
            # Try with "reduced" action space (actually only 3 actions, up, down, no-op)
            action_space=vector_env.action_space)

        states = vector_env.reset_all()
        start = time.monotonic()
        ep_lengths = [0 for _ in range_(self.num_vector_envs)]

        for _ in range_(int(self.samples / self.num_vector_envs)):
            # Sample all envs at once.
            actions, preprocessed_states = agent.get_action(
                states, extra_returns="preprocessed_states")
            states, rewards, terminals, infos = vector_env.step(actions)
            ep_lengths = [ep_length + 1 for ep_length in ep_lengths]

            for i, terminal in enumerate(terminals):
                if terminal:
                    print("reset env {} after {} states".format(
                        i, ep_lengths[i]))
                    vector_env.reset(i)
                    ep_lengths[i] = 0

        runtime = time.monotonic() - start
        tp = self.samples / runtime

        print('Testing vector env {} performance:'.format(
            self.env_spec["gym_env"]))
        print('Ran {} steps, throughput: {} states/s, total time: {} s'.format(
            self.samples, tp, runtime))
Example #4
0
    def __init__(self,
                 agent,
                 env_spec=None,
                 num_environments=1,
                 frameskip=1,
                 render=False,
                 worker_executes_exploration=True,
                 exploration_epsilon=0.1,
                 episode_finish_callback=None,
                 max_timesteps=None):
        """
        Args:
            agent (Agent): Agent to execute environment on.

            env_spec Optional[Union[callable, dict]]): Either an environment spec or a callable returning a new
                environment.

            num_environments (int): How many single Environments should be run in parallel in a SequentialVectorEnv.

            frameskip (int): How often actions are repeated after retrieving them from the agent.
                This setting can be overwritten in the single calls to the different `execute_..` methods.

            render (bool): Whether to render the environment after each action.
                Default: False.

            worker_executes_exploration (bool): If worker executes exploration by sampling.
            exploration_epsilon (Optional[float]): Epsilon to use if worker executes exploration.

            max_timesteps (Optional[int]): A max number on the time steps this Worker expects to perform.
                This is not a forced limit, but serves to calculate the `time_percentage` value passed into
                the Agent for time-dependent (decay) parameter calculations.
                If None, Worker will try to infer this value automatically.
        """
        super(Worker, self).__init__()
        self.num_environments = num_environments
        self.logger = logging.getLogger(__name__)

        # VectorEnv was passed in directly -> Use that one.
        if isinstance(env_spec, VectorEnv):
            self.vector_env = env_spec
            self.num_environments = self.vector_env.num_environments
            self.env_ids = [
                "env_{}".format(i) for i in range_(self.num_environments)
            ]
        # `Env_spec` is for single envs inside a SequentialVectorEnv.
        elif env_spec is not None:
            self.vector_env = SequentialVectorEnv(
                env_spec=env_spec, num_environments=self.num_environments)
            self.env_ids = [
                "env_{}".format(i) for i in range_(self.num_environments)
            ]
        # No env_spec.
        else:
            self.vector_env = None
            self.env_ids = []

        self.agent = agent
        self.frameskip = frameskip
        self.render = render

        # Update schedule if worker is performing updates.
        self.updating = None
        self.steps_before_update = None
        self.update_interval = None
        self.update_steps = None
        self.sync_interval = None
        self.episodes_since_update = 0

        self.max_timesteps = max_timesteps

        # Default val or None?
        self.update_mode = "time_steps"

        self.worker_executes_exploration = worker_executes_exploration
        self.exploration_epsilon = exploration_epsilon

        self.episode_finish_callback = episode_finish_callback
Example #5
0
    def test_sequential_vector_env(self):
        num_envs = 4
        env = SequentialVectorEnv(num_environments=num_envs,
                                  env_spec={
                                      "type": "gridworld",
                                      "world": "2x2"
                                  })

        # Simple test runs with fixed actions.
        # X=player's position
        s = env.reset(index=0)  # ["XH", " G"]  X=player's position
        self.assertTrue(s == 0)

        s = env.reset_all()
        all(self.assertTrue(s_ == 0) for s_ in s)

        s, r, t, _ = env.step([2
                               for _ in range(num_envs)])  # down: [" H", "XG"]
        all(self.assertTrue(s_ == 1) for s_ in s)
        all(self.assertTrue(r_ == -1.0) for r_ in r)
        all(self.assertTrue(not t_) for t_ in t)

        s, r, t, _ = env.step([1 for _ in range(num_envs)
                               ])  # right: [" H", " X"]
        all(self.assertTrue(s_ == 3) for s_ in s)
        all(self.assertTrue(r_ == 1.0) for r_ in r)
        all(self.assertTrue(t_) for t_ in t)

        [env.reset(index=i)
         for i in range(num_envs)]  # ["XH", " G"]  X=player's position
        s, r, t, _ = env.step([1 for _ in range(num_envs)
                               ])  # right: [" X", " G"] -> in the hole
        all(self.assertTrue(s_ == 2) for s_ in s)
        all(self.assertTrue(r_ == -5.0) for r_ in r)
        all(self.assertTrue(t_) for t_ in t)

        # Run against a wall.
        env.reset_all()  # ["XH", " G"]  X=player's position
        s, r, t, _ = env.step([3
                               for _ in range(num_envs)])  # left: ["XH", " G"]
        all(self.assertTrue(s_ == 0) for s_ in s)
        all(self.assertTrue(r_ == -1.0) for r_ in r)
        all(self.assertTrue(not t_) for t_ in t)
        s, r, t, _ = env.step([2
                               for _ in range(num_envs)])  # down: [" H", "XG"]
        all(self.assertTrue(s_ == 1) for s_ in s)
        all(self.assertTrue(r_ == -1.0) for r_ in r)
        all(self.assertTrue(not t_) for t_ in t)
        s, r, t, _ = env.step([0 for _ in range(num_envs)])  # up: ["XH", " G"]
        all(self.assertTrue(s_ == 0) for s_ in s)
        all(self.assertTrue(r_ == -1.0) for r_ in r)
        all(self.assertTrue(not t_) for t_ in t)