def test_service_env_dies_reset(env: CompilerEnv): env.observation_space = "Autophase" env.reward_space = "IrInstructionCount" env.reset("cbench-v1/crc32") # Kill the service. Note killing the service for a ManagedConnection will # result in a ServiceError because we have not ended the session we started # with env.reset() above. For UnmanagedConnection, this error will not be # raised. try: env.service.close() except ServiceError as e: assert "Service exited with returncode " in str(e) # Check that the environment doesn't fall over. observation, reward, done, info = env.step(0) assert done, info["error_details"] assert not env.in_episode # Check that default values are returned. np.testing.assert_array_equal(observation, np.zeros(AUTOPHASE_FEATURE_DIM)) assert reward == 0 # Reset the environment and check that it works. env.reset(benchmark="cbench-v1/crc32") assert env.in_episode observation, reward, done, info = env.step(0) assert not done, info["error_details"] assert observation is not None assert reward is not None
def test_service_env_dies_reset(env: CompilerEnv): env.observation_space = "Autophase" env.reward_space = "IrInstructionCount" env.reset("cBench-v0/crc32") # Kill the service. env.service.close() # Check that the environment doesn't fall over. observation, reward, done, info = env.step(0) assert done, info["error_details"] assert not env.in_episode # Check that default values are returned. np.testing.assert_array_equal(observation, np.zeros(AUTOPHASE_FEATURE_DIM)) assert reward == 0 # Reset the environment and check that it works. env.reset(benchmark="cBench-v0/crc32") assert env.in_episode observation, reward, done, info = env.step(0) assert not done, info["error_details"] assert observation is not None assert reward is not None
def test_double_reset(env: CompilerEnv): """Test that reset() can be called twice.""" env.reset() assert env.in_episode env.step(env.action_space.sample()) env.reset() env.step(env.action_space.sample()) assert env.in_episode
def test_double_reset_with_step(env: CompilerEnv): """Test that reset() can be called twice with a step.""" env.reset() assert env.in_episode _, _, done, info = env.step(env.action_space.sample()) assert not done, info env.reset() _, _, done, info = env.step(env.action_space.sample()) assert not done, info assert env.in_episode
def test_fork(env: CompilerEnv): env.reset() env.step(0) env.step(1) other_env = env.fork() try: assert env.benchmark == other_env.benchmark assert other_env.actions == [0, 1] finally: other_env.close()
def nevergrad(env: CompilerEnv, optimization_target: OptimizationTarget, search_time_seconds: int, seed: int, episode_length: int = 100, optimizer: str = "DiscreteLenglerOnePlusOne", **kwargs) -> None: """Optimize an environment using nevergrad. Nevergrad is a gradient-free optimization platform that provides implementations of various black box optimizations techniques: https://facebookresearch.github.io/nevergrad/ """ if optimization_target == OptimizationTarget.RUNTIME: def calculate_negative_reward(actions: Tuple[int]) -> float: env.reset() env.step(actions) return -env.episode_reward else: # Only cache the deterministic non-runtime rewards. @lru_cache(maxsize=int(1e4)) def calculate_negative_reward(actions: Tuple[int]) -> float: env.reset() env.step(actions) return -env.episode_reward params = ng.p.Choice( choices=range(env.action_space.n), repetitions=episode_length, deterministic=True, ) params.random_state.seed(seed) optimizer_class = getattr(ng.optimizers, optimizer) optimizer = optimizer_class(parametrization=params, budget=1, num_workers=1) end_time = time() + search_time_seconds while time() < end_time: x = optimizer.ask() optimizer.tell(x, calculate_negative_reward(x.value)) # Get best solution and replay it. recommendation = optimizer.provide_recommendation() env.reset() env.step(recommendation.value)
def test_eager_reward(env: CompilerEnv): env.eager_reward_space = "codesize" env.reset() observation, reward, done, info = env.step(0) assert observation is None assert reward == 0 assert not done
def run_one_episode(self, env: CompilerEnv) -> bool: """Run a single random episode. :param env: An environment. :return: True if the episode ended gracefully, else False. """ observation = env.reset() actions: List[int] = [] patience = self._patience total_returns = 0 while patience >= 0: patience -= 1 self.total_step_count += 1 # === Your agent here! === action_index = env.action_space.sample() # === End of agent. === actions.append(action_index) observation, reward, done, _ = env.step(action_index) if done: return False total_returns += reward if total_returns > self.best_returns: patience = self._patience self.best_returns = total_returns self.best_actions = actions.copy() self.best_commandline = env.commandline() self.best_found_at_time = time() return True
def run_one_trial( env: CompilerEnv, reward_space: str, action: int, max_warmup_steps: int ) -> Optional[float]: """Run a random number of "warmup" steps in an environment, then compute the immediate reward of the given action. :return: An immediate reward. """ num_warmup_steps = random.randint(0, max_warmup_steps) warmup_actions = [env.action_space.sample() for _ in range(num_warmup_steps)] env.reward_space = reward_space _, _, done, _ = env.step(warmup_actions) if done: return None _, (reward,), done, _ = env.step(action, rewards=[reward_space]) return None if done else reward
def eval_action(fkd: CompilerEnv, action: int) -> RewardAction: """Evaluate the given action.""" try: _, reward, _, _ = fkd.step(action) finally: fkd.close() return RewardAction(reward=reward, action=action)
def test_default_reward(env: CompilerEnv): """Test default reward space.""" env.reward_space = "runtime" env.reset() observation, reward, done, info = env.step(0) assert observation is None assert reward == 0 assert not done
def test_eager_ir_observation(env: CompilerEnv): env.eager_observation_space = "ir" observation = env.reset() assert observation == "Hello, world!" observation, reward, done, info = env.step(0) assert observation == "Hello, world!" assert reward is None assert not done
def test_default_ir_observation(env: CompilerEnv): """Test default observation space.""" env.observation_space = "ir" observation = env.reset() assert len(observation) > 0 observation, reward, done, info = env.step(0) assert not done, info assert len(observation) > 0 assert reward is None
def test_default_ir_observation(env: CompilerEnv): """Test default observation space.""" env.observation_space = "ir" observation = env.reset() assert observation == "Hello, world!" observation, reward, done, info = env.step(0) assert observation == "Hello, world!" assert reward is None assert not done
def test_service_env_dies_reset(env: CompilerEnv): env.observation_space = "Autophase" env.reward_space = "IrInstructionCount" env.reset("cBench-v0/crc32") # Kill the service. env.service.close() # Check that the environment doesn't fall over. observation, reward, done, _ = env.step(0) assert done assert observation is None assert reward is None # Reset the environment and check that it works. env.reset(benchmark="cBench-v0/crc32") observation, reward, done, _ = env.step(0) assert not done assert observation is not None assert reward is not None
def test_step(env: CompilerEnv, action_name: str): """Run each action on a single benchmark.""" env.reward_space = "IrInstructionCount" env.observation_space = "Autophase" env.reset(benchmark="cBench-v0/crc32") observation, reward, done, _ = env.step( env.action_space.from_string(action_name)) assert isinstance(observation, np.ndarray) assert observation.shape == (AUTOPHASE_FEATURE_DIM, ) assert isinstance(reward, float) assert isinstance(done, bool)
def run_one_trial(env: CompilerEnv, reward_space: str, action: int, max_warmup_steps: int) -> Optional[float]: """Run a random number of "warmup" steps in an environment, then compute the reward delta of the given action. :return: The ratio of reward improvement. """ num_warmup_steps = random.randint(0, max_warmup_steps) for _ in range(num_warmup_steps): _, _, done, _ = env.step(env.action_space.sample()) if done: return None # Force reward calculation. init_reward = env.reward[reward_space] assert init_reward is not None _, _, done, _ = env.step(action) if done: return None reward_after = env.reward[reward_space] assert reward_after is not None return reward_after
def apply_random_trajectory( env: CompilerEnv, random_trajectory_length_range=(1, 50) ) -> List[Tuple[int, observation_t, float, bool]]: """Evaluate and return a random trajectory.""" num_actions = random.randint(*random_trajectory_length_range) trajectory = [] for _ in range(num_actions): action = env.action_space.sample() observation, reward, done, _ = env.step(action) if done: break # Broken trajectory. trajectory.append((action, observation, reward, done)) return trajectory
def run_one_trial( env: CompilerEnv, reward_space: str, min_steps: int, max_steps: int ) -> Optional[float]: """Run a random number of random steps in an environment and return the cumulative reward. :return: A cumulative reward. """ num_steps = random.randint(min_steps, max_steps) warmup_actions = [env.action_space.sample() for _ in range(num_steps)] env.reward_space = reward_space _, _, done, _ = env.step(warmup_actions) if done: return None return env.episode_reward
def test_step(env: CompilerEnv, action_name: str): """Run each action on a single benchmark.""" env.eager_reward_space = "IrInstructionCount" env.eager_observation_space = "Autophase" env.reset(benchmark="cBench-v0/crc32") observation, reward, done, info = env.step( env.action_space.from_string(action_name) ) if done: assert observation is None assert reward is None else: assert isinstance(observation, np.ndarray) assert observation.shape == (56,) assert reward < 0
def replay_actions(env: CompilerEnv, action_names: List[str], outdir: Path): logs_path = outdir / logs.BEST_ACTIONS_PROGRESS_NAME start_time = time() if config.enable_llvm_env: if isinstance(env, LlvmEnv): env.write_bitcode(outdir / "unoptimized.bc") with open(str(logs_path), "w") as f: ep_reward = 0 for i, action in enumerate(action_names, start=1): _, reward, done, _ = env.step(env.action_space.names.index(action)) assert not done ep_reward += reward print( f"Step [{i:03d} / {len(action_names):03d}]: reward={reward:.4f} \t" f"episode={ep_reward:.4f} \taction={action}") progress = RandomSearchProgressLogEntry( runtime_seconds=time() - start_time, total_episode_count=1, total_step_count=i, num_passes=i, reward=reward, ) print(progress.to_csv(), action, file=f, sep=",") if config.enable_llvm_env: if isinstance(env, LlvmEnv): env.write_bitcode(outdir / "optimized.bc") print( tabulate( [ ( "IR instruction count", env.observation["IrInstructionCountO0"], env.observation["IrInstructionCountOz"], env.observation["IrInstructionCount"], ), ( "Object .text size (bytes)", env.observation["ObjectTextSizeO0"], env.observation["ObjectTextSizeOz"], env.observation["ObjectTextSizeBytes"], ), ], headers=("", "-O0", "-Oz", "final"), ))
def run_one_trial(env: CompilerEnv, reward: str, min_steps: int, max_steps: int) -> Optional[float]: """Run a random number of "warmup" steps in an environment, then compute the reward delta of the given action. :return: The ratio of reward improvement. """ num_steps = random.randint(min_steps, max_steps) init_reward = env.reward[reward] for _ in range(num_steps): _, _, done, _ = env.step(env.action_space.sample()) if done: return None reward_after = env.reward[reward] assert init_reward is not None assert reward_after is not None return (reward_after - init_reward) / init_reward
def run_random_walk(env: CompilerEnv, step_count: int) -> List[float]: """Perform a random walk of the action space. :param env: The environment to use. :param step_count: The number of steps to run. This value is an upper bound - fewer steps will be performed if any of the actions lead the environment to end the episode. :return: The list of observed rewards. """ rewards = [] step_num = 0 with Timer() as episode_time: env.reset() for step_num in range(1, step_count + 1): action_index = env.action_space.sample() with Timer() as step_time: observation, reward, done, info = env.step(action_index) print(f"\n=== Step {humanize.intcomma(step_num)} ===") print(f"Action: {env.action_space.names[action_index]} " f"(changed={not info.get('action_had_no_effect')})") rewards.append(reward) print(f"Reward: {reward}") if env._eager_observation: print(f"Observation:\n{observation}") print(f"Step time: {step_time}") if done: print("Episode ended by environment") break env.close() def reward_delta(reward): delta = rewards[0] / max(reward, 1e-9) - 1 return emph(f"{'+' if delta >= 0 else ''}{delta:.2%}") print( f"\nCompleted {emph(humanize.intcomma(step_num))} steps in {episode_time} " f"({step_num / episode_time.time:.1f} steps / sec).") print(f"Init reward: {rewards[0]}") print(f"Final reward: {rewards[-1]} ({reward_delta(rewards[-1])})") print(f"Max reward: {max(rewards)} ({reward_delta(max(rewards))} " f"at step {humanize.intcomma(rewards.index(max(rewards)) + 1)})") return rewards
def apply_random_trajectory( env: CompilerEnv, random_trajectory_length_range=(1, 50), timeout: int = 0, ) -> List[Tuple[int, ObservationType, float, bool]]: """Evaluate and return a random trajectory.""" end_time = time() + timeout num_actions = random.randint(*random_trajectory_length_range) trajectory = [] for _ in range(num_actions): action = env.action_space.sample() observation, reward, done, _ = env.step(action) if done: break # Broken trajectory. trajectory.append((action, observation, reward, done)) if timeout and time() > end_time: break return trajectory
def test_step(env: CompilerEnv, action_name: str): """Run each action on a single benchmark.""" env.reward_space = "IrInstructionCount" env.observation_space = "Autophase" env.reset(benchmark="cbench-v1/crc32") observation, reward, done, _ = env.step( env.action_space.from_string(action_name)) assert isinstance(observation, np.ndarray) assert observation.shape == (AUTOPHASE_FEATURE_DIM, ) assert isinstance(reward, float) assert isinstance(done, bool) try: env.close() except ServiceError as e: # env.close() will raise an error if the service terminated # ungracefully. In that case, the "done" flag should have been set. assert done, f"Service error was raised when 'done' flag not set: {e}"
def replay_actions(env: CompilerEnv, action_names: List[str], outdir: Path): logs_path = outdir / logs.BEST_ACTIONS_PROGRESS_NAME start_time = time() init_reward = env.reward[env.eager_reward_space] print( f"Step [{0:03d} / {len(action_names):03d}]: reward={init_reward:.4f}") with open(str(logs_path), "w") as f: progress = logs.ProgressLogEntry( runtime_seconds=time() - start_time, total_episode_count=1, total_step_count=0, num_passes=0, reward=init_reward, ) print(progress.to_csv(), "", file=f, sep="") previous_reward = init_reward for i, action in enumerate(action_names, start=1): _, reward, done, _ = env.step(env.action_space.names.index(action)) assert not done print( f"Step [{i:03d} / {len(action_names):03d}]: reward={reward:.4f}, " f"change={reward-previous_reward:.4f}, action={action}") progress = logs.ProgressLogEntry( runtime_seconds=time() - start_time, total_episode_count=1, total_step_count=i, num_passes=i, reward=reward, ) print(progress.to_csv(), action, file=f, sep=",") previous_reward = reward if isinstance(env, LlvmEnv): bitcode_path = outdir / logs.OPTIMIZED_BITCODE # Write optimized bitcode to file. temppath = env.observation["BitcodeFile"] # Copy, don't rename, since rename will fail if the paths are on # different devices. shutil.copyfile(temppath, str(bitcode_path)) os.remove(temppath)
def run_random_walk(env: CompilerEnv, step_count: int) -> None: """Perform a random walk of the action space. :param env: The environment to use. :param step_count: The number of steps to run. This value is an upper bound - fewer steps will be performed if any of the actions lead the environment to end the episode. """ rewards = [] step_num = 0 with Timer() as episode_time: env.reset() for step_num in range(1, step_count + 1): action_index = env.action_space.sample() with Timer() as step_time: observation, reward, done, info = env.step(action_index) print(f"\n=== Step {humanize.intcomma(step_num)} ===\n" f"Action: {env.action_space.names[action_index]} " f"(changed={not info.get('action_had_no_effect')})\n" f"Reward: {reward}") rewards.append(reward) if env.observation_space: print(f"Observation:\n{observation}") print(f"Step time: {step_time}") if done: print("Episode ended by environment") break def reward_percentage(reward, rewards): if sum(rewards) == 0: return 0 percentage = reward / sum(rewards) return emph(f"{'+' if percentage >= 0 else ''}{percentage:.2%}") print( f"\nCompleted {emph(humanize.intcomma(step_num))} steps in {episode_time} " f"({step_num / episode_time.time:.1f} steps / sec).\n" f"Total reward: {sum(rewards)}\n" f"Max reward: {max(rewards)} ({reward_percentage(max(rewards), rewards)} " f"at step {humanize.intcomma(rewards.index(max(rewards)) + 1)})")
def run_manual_env(env: CompilerEnv): """Run an environment manually. The manual environment allows the user to step through the environment, selection observations, rewards, and actions to run as they see fit. This is useful for debugging. :param env: The environment to run. """ benchmark = None if not env.benchmark: # Allow the user to choose a benchmark, with the first choice being # to select randomly. benchmarks = sorted(env.benchmarks) if not benchmarks: print( "No benchmarks available see https://facebookresearch.github.io/CompilerGym/getting_started.html#installing-benchmarks" ) print("Exiting...") env.close() return # Strip default benchmark:// protocol. for i, benchmark in enumerate(benchmarks): if benchmark.startswith("benchmark://"): benchmarks[i] = benchmark[len("benchmark://"):] benchmark_index = user_input.read_list_index("Benchmark", ["random"] + benchmarks) if benchmark_index: benchmark = benchmarks[benchmark_index - 1] else: benchmark = None with Timer() as timer: eager_observation = env.reset(benchmark=benchmark) print(f"Reset {env.benchmark} environment in {timer}") if env.observation_space and eager_observation is not None: print( f"Observation: {env.observation_space.to_string(eager_observation)}" ) observation_names = sorted(env.observation.spaces.keys()) reward_names = sorted(env.reward.spaces.keys()) last_eager_reward: Optional[float] = None step_count = 1 while True: print( f"\nStep {step_count}. Select: [{emph('a')}]ction " f"[{emph('o')}]bservation [{emph('r')}]eward " f"[{emph('c')}]ommandline [{emph('e')}]nd >>> ", end="", flush=True, ) while True: c = user_input.read_char() if c == "a": print("action", flush=True) index = user_input.read_list_index("Actions", ["random"] + env.action_space.names) step_count += 1 with Timer() as t: if index == 0: # User selected "random" action. index = env.action_space.sample() else: # Offset to remove "random" action from index. index -= 1 eager_observation, eager_reward, done, info = env.step( index) # Print the eager observation, if available. if env.observation_space and eager_observation is not None: print( f"Observation: {env.observation_space.to_string(eager_observation)}" ) # Print the eager reward and the diff, if available. if env.reward_space and eager_reward is not None: reward_diff = "" if last_eager_reward is not None and eager_reward is not None: reward_diff = ( f" (change: {eager_reward - last_eager_reward:.6f})" ) print(f"Reward: {eager_reward:.6f}{reward_diff}") last_eager_reward = eager_reward print( f"Action {env.action_space.names[index]} in {t}.", " No effect." if info.get("action_had_no_effect") else "", flush=True, ) if done: print("Episode ended by environment: ", info["error_details"]) env.close() return break if c == "o": print("observation", flush=True) observation_name = user_input.read_list_value( "Observable values", observation_names) with Timer() as timer: value = env.observation[observation_name] print( env.observation.spaces[observation_name].to_string(value)) print(f"Observation {observation_name} in {timer}") break elif c == "r": print("reward", flush=True) reward_name = user_input.read_list_value( "Rewards", reward_names) with Timer(f"Reward {reward_name}"): print(f"{env.reward[reward_name]:.6f}") break elif c == "c": print("commandline") print("$", env.commandline(), flush=True) break elif c == "e": print("end", flush=True) with Timer("Closed environment"): env.close() print("Have a nice day!") return
def test_Step_out_of_range(env: CompilerEnv): """Test error handling with an invalid action.""" env.reset() with pytest.raises(ValueError) as ctx: env.step(100) assert str(ctx.value) == "Out-of-range"
def test_step_before_reset(env: CompilerEnv): """Taking a step() before reset() is illegal.""" with pytest.raises(SessionNotFound, match=r"Must call reset\(\) before step\(\)"): env.step(0)