Esempio n. 1
0
 def eval_action(fkd: CompilerEnv, action: int) -> RewardAction:
     """Evaluate the given action."""
     try:
         _, reward, _, _ = fkd.step(action)
     finally:
         fkd.close()
     return RewardAction(reward=reward, action=action)
Esempio n. 2
0
 def run_one_environment(self, env: CompilerEnv) -> None:
     """Run random walks in an infinite loop. Returns if the environment ends."""
     try:
         while self.should_run_one_episode:
             self.total_episode_count += 1
             if not self.run_one_episode(env):
                 return
     finally:
         env.close()
Esempio n. 3
0
def test_step(env: CompilerEnv, action_name: str):
    """Run each action on a single benchmark."""
    env.reward_space = "IrInstructionCount"
    env.observation_space = "Autophase"
    env.reset(benchmark="cbench-v1/crc32")
    observation, reward, done, _ = env.step(
        env.action_space.from_string(action_name))

    assert isinstance(observation, np.ndarray)
    assert observation.shape == (AUTOPHASE_FEATURE_DIM, )
    assert isinstance(reward, float)
    assert isinstance(done, bool)

    try:
        env.close()
    except ServiceError as e:
        # env.close() will raise an error if the service terminated
        # ungracefully. In that case, the "done" flag should have been set.
        assert done, f"Service error was raised when 'done' flag not set: {e}"
Esempio n. 4
0
def run_random_walk(env: CompilerEnv, step_count: int) -> List[float]:
    """Perform a random walk of the action space.

    :param env: The environment to use.
    :param step_count: The number of steps to run. This value is an upper bound -
        fewer steps will be performed if any of the actions lead the
        environment to end the episode.
    :return: The list of observed rewards.
    """
    rewards = []

    step_num = 0
    with Timer() as episode_time:
        env.reset()
        for step_num in range(1, step_count + 1):
            action_index = env.action_space.sample()
            with Timer() as step_time:
                observation, reward, done, info = env.step(action_index)
            print(f"\n=== Step {humanize.intcomma(step_num)} ===")
            print(f"Action:       {env.action_space.names[action_index]} "
                  f"(changed={not info.get('action_had_no_effect')})")
            rewards.append(reward)
            print(f"Reward:       {reward}")
            if env._eager_observation:
                print(f"Observation:\n{observation}")
            print(f"Step time:    {step_time}")
            if done:
                print("Episode ended by environment")
                break
        env.close()

    def reward_delta(reward):
        delta = rewards[0] / max(reward, 1e-9) - 1
        return emph(f"{'+' if delta >= 0 else ''}{delta:.2%}")

    print(
        f"\nCompleted {emph(humanize.intcomma(step_num))} steps in {episode_time} "
        f"({step_num / episode_time.time:.1f} steps / sec).")
    print(f"Init reward:  {rewards[0]}")
    print(f"Final reward: {rewards[-1]} ({reward_delta(rewards[-1])})")
    print(f"Max reward:   {max(rewards)} ({reward_delta(max(rewards))} "
          f"at step {humanize.intcomma(rewards.index(max(rewards)) + 1)})")
    return rewards
Esempio n. 5
0
def replay_actions_from_logs(env: CompilerEnv,
                             logdir: Path,
                             benchmark=None) -> None:
    best_actions_path = logdir / logs.BEST_ACTIONS_NAME
    meta_path = logdir / logs.METADATA_NAME

    assert best_actions_path.is_file(), f"File not found: {best_actions_path}"
    assert meta_path.is_file(), f"File not found: {meta_path}"

    with open(meta_path, "rb") as f:
        meta = json.load(f)

    with open(best_actions_path) as f:
        actions = [l.strip() for l in f.readlines() if l.strip()]

    benchmark = benchmark or meta["benchmark"]
    env.reward_space = meta["reward"]
    env.reset(benchmark=benchmark)
    replay_actions(env, actions, logdir)
    env.close()
Esempio n. 6
0
def run_manual_env(env: CompilerEnv):
    """Run an environment manually.

    The manual environment allows the user to step through the environment,
    selection observations, rewards, and actions to run as they see fit. This is
    useful for debugging.

    :param env: The environment to run.
    """
    benchmark = None
    if not env.benchmark:
        # Allow the user to choose a benchmark, with the first choice being
        # to select randomly.
        benchmarks = sorted(env.benchmarks)
        if not benchmarks:
            print(
                "No benchmarks available see https://facebookresearch.github.io/CompilerGym/getting_started.html#installing-benchmarks"
            )
            print("Exiting...")
            env.close()
            return

        # Strip default benchmark:// protocol.
        for i, benchmark in enumerate(benchmarks):
            if benchmark.startswith("benchmark://"):
                benchmarks[i] = benchmark[len("benchmark://"):]

        benchmark_index = user_input.read_list_index("Benchmark",
                                                     ["random"] + benchmarks)
        if benchmark_index:
            benchmark = benchmarks[benchmark_index - 1]
        else:
            benchmark = None

    with Timer() as timer:
        eager_observation = env.reset(benchmark=benchmark)

    print(f"Reset {env.benchmark} environment in {timer}")
    if env.observation_space and eager_observation is not None:
        print(
            f"Observation: {env.observation_space.to_string(eager_observation)}"
        )

    observation_names = sorted(env.observation.spaces.keys())
    reward_names = sorted(env.reward.spaces.keys())
    last_eager_reward: Optional[float] = None
    step_count = 1

    while True:
        print(
            f"\nStep {step_count}. Select: [{emph('a')}]ction "
            f"[{emph('o')}]bservation [{emph('r')}]eward "
            f"[{emph('c')}]ommandline [{emph('e')}]nd >>> ",
            end="",
            flush=True,
        )
        while True:
            c = user_input.read_char()
            if c == "a":
                print("action", flush=True)
                index = user_input.read_list_index("Actions", ["random"] +
                                                   env.action_space.names)
                step_count += 1
                with Timer() as t:
                    if index == 0:
                        # User selected "random" action.
                        index = env.action_space.sample()
                    else:
                        # Offset to remove "random" action from index.
                        index -= 1
                    eager_observation, eager_reward, done, info = env.step(
                        index)

                # Print the eager observation, if available.
                if env.observation_space and eager_observation is not None:
                    print(
                        f"Observation: {env.observation_space.to_string(eager_observation)}"
                    )

                # Print the eager reward and the diff, if available.
                if env.reward_space and eager_reward is not None:
                    reward_diff = ""
                    if last_eager_reward is not None and eager_reward is not None:
                        reward_diff = (
                            f" (change: {eager_reward - last_eager_reward:.6f})"
                        )
                    print(f"Reward: {eager_reward:.6f}{reward_diff}")
                    last_eager_reward = eager_reward

                print(
                    f"Action {env.action_space.names[index]} in {t}.",
                    " No effect." if info.get("action_had_no_effect") else "",
                    flush=True,
                )
                if done:
                    print("Episode ended by environment: ",
                          info["error_details"])
                    env.close()
                    return
                break
            if c == "o":
                print("observation", flush=True)
                observation_name = user_input.read_list_value(
                    "Observable values", observation_names)
                with Timer() as timer:
                    value = env.observation[observation_name]
                print(
                    env.observation.spaces[observation_name].to_string(value))
                print(f"Observation {observation_name} in {timer}")
                break
            elif c == "r":
                print("reward", flush=True)
                reward_name = user_input.read_list_value(
                    "Rewards", reward_names)
                with Timer(f"Reward {reward_name}"):
                    print(f"{env.reward[reward_name]:.6f}")
                break
            elif c == "c":
                print("commandline")
                print("$", env.commandline(), flush=True)
                break
            elif c == "e":
                print("end", flush=True)
                with Timer("Closed environment"):
                    env.close()
                print("Have a nice day!")
                return
Esempio n. 7
0
def test_init_benchmark(env: CompilerEnv, benchmark_name: str):
    """Create an environment for each benchmark and close it."""
    env.reset(benchmark=benchmark_name)
    assert env.benchmark == benchmark_name
    env.close()