def setup_preprocessor(preprocessing_spec, in_space):
        if preprocessing_spec is not None:
            # TODO move ingraph for python component assembly.
            preprocessing_spec = deepcopy(preprocessing_spec)
            in_space = deepcopy(in_space)
            # Store scopes (set if not given).
            scopes = []
            for i, preprocessor in enumerate(preprocessing_spec):
                if "scope" not in preprocessor:
                    preprocessor["scope"] = "preprocessor-{}".format(i)
                scopes.append(preprocessor["scope"])
                # Set backend to python.
                preprocessor["backend"] = "python"

            processor_stack = PreprocessorStack(*preprocessing_spec,
                                                backend="python")
            build_space = in_space
            for sub_comp_scope in scopes:
                processor_stack.sub_components[
                    sub_comp_scope].create_variables(
                        input_spaces=dict(preprocessing_inputs=build_space),
                        action_space=None)
                build_space = processor_stack.sub_components[
                    sub_comp_scope].get_preprocessed_space(build_space)
            processor_stack.reset()
            return processor_stack
        else:
            return None
Esempio n. 2
0
 def setup_preprocessor(self, preprocessing_spec, in_space):
     if preprocessing_spec is not None:
         preprocessing_spec = deepcopy(preprocessing_spec)
         in_space = deepcopy(in_space)
         # Set scopes.
         scopes = [
             preprocessor["scope"] for preprocessor in preprocessing_spec
         ]
         # Set backend to python.
         for spec in preprocessing_spec:
             spec["backend"] = "python"
         processor_stack = PreprocessorStack(*preprocessing_spec,
                                             backend="python")
         build_space = in_space
         for sub_comp_scope in scopes:
             processor_stack.sub_components[
                 sub_comp_scope].create_variables(
                     input_spaces=dict(preprocessing_inputs=build_space),
                     action_space=None)
             build_space = processor_stack.sub_components[
                 sub_comp_scope].get_preprocessed_space(build_space)
         processor_stack.reset()
         return processor_stack
     else:
         return None
Esempio n. 3
0
    def test_with_final_eval(self):
        """
        Tests if apex can learn a simple environment using a single worker, thus replicating
        DQN.
        """
        env_spec = dict(type="openai", gym_env="CartPole-v0")
        agent_config = config_from_path("configs/apex_agent_cartpole.json")

        # Use n-step adjustments.
        agent_config["execution_spec"]["ray_spec"]["worker_spec"][
            "n_step_adjustment"] = 3
        agent_config["execution_spec"]["ray_spec"]["apex_replay_spec"][
            "n_step_adjustment"] = 3
        agent_config["n_step"] = 3

        executor = ApexExecutor(
            environment_spec=env_spec,
            agent_config=agent_config,
        )
        # Define executor, test assembly.
        print("Successfully created executor.")

        # Executes actual workload.
        result = executor.execute_workload(
            workload=dict(num_timesteps=20000,
                          report_interval=1000,
                          report_interval_min_seconds=1))
        print("Finished executing workload:")
        print(result)

        # Get agent.
        agent = executor.local_agent
        preprocessing_spec = agent_config["preprocessing_spec"]

        # Create env.
        env = OpenAIGymEnv.from_spec(env_spec)

        if preprocessing_spec is not None:
            preprocessing_spec = deepcopy(preprocessing_spec)
            in_space = env.state_space.with_batch_rank()
            in_space = deepcopy(in_space)
            # Set scopes.
            scopes = [
                preprocessor["scope"] for preprocessor in preprocessing_spec
            ]
            # Set backend to python.
            for spec in preprocessing_spec:
                spec["backend"] = "python"
            processor_stack = PreprocessorStack(*preprocessing_spec,
                                                backend="python")
            build_space = in_space
            for sub_comp_scope in scopes:
                processor_stack.sub_components[
                    sub_comp_scope].create_variables(
                        input_spaces=dict(preprocessing_inputs=build_space),
                        action_space=None)
                build_space = processor_stack.sub_components[
                    sub_comp_scope].get_preprocessed_space(build_space)
            processor_stack.reset()
        else:
            processor_stack = None

        ep_rewards = []
        print("finished learning, starting eval")
        for _ in range(10):
            state = env.reset()
            terminal = False
            ep_reward = 0
            while not terminal:
                state = agent.state_space.force_batch(state)
                if processor_stack is not None:
                    state = processor_stack.preprocess(state)

                actions = agent.get_action(states=state,
                                           use_exploration=False,
                                           apply_preprocessing=False)
                next_state, step_reward, terminal, info = env.step(
                    actions=actions[0])
                ep_reward += step_reward

                state = next_state
                if terminal:
                    ep_rewards.append(ep_reward)
                    break

        print("Eval episode rewards:")
        print(ep_rewards)