def __call__(self, env: CompilerEnv, seed: int = 0xCC) -> CompilerEnvState: """Autotune the given environment. :param env: The environment to autotune. :param seed: The random seed for the autotuner. :returns: A CompilerEnvState tuple describing the autotuning result. """ # Run the autotuner in a temporary working directory and capture the # stdout/stderr. with tempfile.TemporaryDirectory(dir=transient_cache_path("."), prefix="autotune-") as tmpdir: with temporary_working_directory(Path(tmpdir)): with capture_output(): with Timer() as timer: self.autotune(env, seed=seed, **self.autotune_kwargs) return CompilerEnvState( benchmark=env.benchmark.uri, commandline=env.commandline(), walltime=timer.time, reward=self.optimization_target.final_reward(env), )
def run_manual_env(env: CompilerEnv): """Run an environment manually. The manual environment allows the user to step through the environment, selection observations, rewards, and actions to run as they see fit. This is useful for debugging. :param env: The environment to run. """ benchmark = None if not env.benchmark: # Allow the user to choose a benchmark, with the first choice being # to select randomly. benchmarks = sorted(env.benchmarks) if not benchmarks: print( "No benchmarks available see https://facebookresearch.github.io/CompilerGym/getting_started.html#installing-benchmarks" ) print("Exiting...") env.close() return # Strip default benchmark:// protocol. for i, benchmark in enumerate(benchmarks): if benchmark.startswith("benchmark://"): benchmarks[i] = benchmark[len("benchmark://"):] benchmark_index = user_input.read_list_index("Benchmark", ["random"] + benchmarks) if benchmark_index: benchmark = benchmarks[benchmark_index - 1] else: benchmark = None with Timer() as timer: eager_observation = env.reset(benchmark=benchmark) print(f"Reset {env.benchmark} environment in {timer}") if env.observation_space and eager_observation is not None: print( f"Observation: {env.observation_space.to_string(eager_observation)}" ) observation_names = sorted(env.observation.spaces.keys()) reward_names = sorted(env.reward.spaces.keys()) last_eager_reward: Optional[float] = None step_count = 1 while True: print( f"\nStep {step_count}. Select: [{emph('a')}]ction " f"[{emph('o')}]bservation [{emph('r')}]eward " f"[{emph('c')}]ommandline [{emph('e')}]nd >>> ", end="", flush=True, ) while True: c = user_input.read_char() if c == "a": print("action", flush=True) index = user_input.read_list_index("Actions", ["random"] + env.action_space.names) step_count += 1 with Timer() as t: if index == 0: # User selected "random" action. index = env.action_space.sample() else: # Offset to remove "random" action from index. index -= 1 eager_observation, eager_reward, done, info = env.step( index) # Print the eager observation, if available. if env.observation_space and eager_observation is not None: print( f"Observation: {env.observation_space.to_string(eager_observation)}" ) # Print the eager reward and the diff, if available. if env.reward_space and eager_reward is not None: reward_diff = "" if last_eager_reward is not None and eager_reward is not None: reward_diff = ( f" (change: {eager_reward - last_eager_reward:.6f})" ) print(f"Reward: {eager_reward:.6f}{reward_diff}") last_eager_reward = eager_reward print( f"Action {env.action_space.names[index]} in {t}.", " No effect." if info.get("action_had_no_effect") else "", flush=True, ) if done: print("Episode ended by environment: ", info["error_details"]) env.close() return break if c == "o": print("observation", flush=True) observation_name = user_input.read_list_value( "Observable values", observation_names) with Timer() as timer: value = env.observation[observation_name] print( env.observation.spaces[observation_name].to_string(value)) print(f"Observation {observation_name} in {timer}") break elif c == "r": print("reward", flush=True) reward_name = user_input.read_list_value( "Rewards", reward_names) with Timer(f"Reward {reward_name}"): print(f"{env.reward[reward_name]:.6f}") break elif c == "c": print("commandline") print("$", env.commandline(), flush=True) break elif c == "e": print("end", flush=True) with Timer("Closed environment"): env.close() print("Have a nice day!") return
def from_agent(cls, env: CompilerEnv, agent, runtime: bool = True, runtimes_count: int = 30): # We calculate our own reward at the end, no need for incremental # rewards during inference. env.reward_space = None # Run inference on the environment. observation, done = env.reset(), False with Timer() as inference_timer: while not done: action = agent.compute_action(observation) observation, _, done, _ = env.step(action) instruction_count_init = env.unwrapped.observation[ "IrInstructionCountO0"] instruction_count_final = env.unwrapped.observation[ "IrInstructionCount"] instruction_count_oz = env.unwrapped.observation[ "IrInstructionCountOz"] object_size_init = env.unwrapped.observation["ObjectTextSizeO0"] object_size_final = env.unwrapped.observation["ObjectTextSizeBytes"] object_size_oz = env.unwrapped.observation["ObjectTextSizeOz"] runtimes_init = [] runtimes_o3 = [] runtimes_final = [] try: if runtime and env.unwrapped.observation["IsRunnable"]: env.send_param("llvm.set_runtimes_per_observation_count", str(runtimes_count)) env.unwrapped.observation["Runtime"] # warmup runtimes_final = env.unwrapped.observation["Runtime"].tolist() assert (len(runtimes_final) == runtimes_count ), f"{len(runtimes_final)} != {runtimes_count}" env.reset() env.send_param("llvm.set_runtimes_per_observation_count", str(runtimes_count)) env.unwrapped.observation["Runtime"] # warmup runtimes_init = env.unwrapped.observation["Runtime"].tolist() assert (len(runtimes_init) == runtimes_count ), f"{len(runtimes_init)} != {runtimes_count}" env.send_param("llvm.apply_baseline_optimizations", "-O3") env.unwrapped.observation["Runtime"] # warmup runtimes_o3 = env.unwrapped.observation["Runtime"].tolist() assert (len(runtimes_o3) == runtimes_count ), f"{len(runtimes_o3)} != {runtimes_count}" except Exception as e: # pylint: disable=broad-except logger.warning("Failed to compute runtime: %s", e) return cls( benchmark=env.benchmark.uri, inference_walltime_seconds=inference_timer.time, commandline=env.commandline(), episode_len=len(env.actions), instruction_count_init=instruction_count_init, instruction_count_final=instruction_count_final, instruction_count_oz=instruction_count_oz, instruction_count_reduction=instruction_count_oz / max(instruction_count_final, 1), object_size_init=object_size_init, object_size_final=object_size_final, object_size_oz=object_size_oz, object_size_reduction=object_size_oz / max(object_size_final, 1), runtimes_init=runtimes_init, runtimes_final=runtimes_final, runtimes_o3=runtimes_o3, runtime_reduction=np.median(runtimes_o3 or [0]) / max(np.median(runtimes_final or [0]), 1), )
def test_commandline(env: CompilerEnv): env.reset(benchmark="cBench-v0/crc32") assert env.commandline() == "opt input.bc -o output.bc"