def test_contains_with_float_scalar_range(): space = Sequence( name="test", size_range=(3, 3), dtype=float, scalar_range=Scalar(name="test", min=0, max=1, dtype=float), ) assert space.contains([0.0, 0.0, 0.0]) assert space.contains([0.1, 1.0, 0.5]) assert not space.contains([0.0, 0.0, -1.0]) # out of bounds assert not space.contains([0.0, 0, 0.1]) # wrong dtype assert not space.contains([0.0, 0]) # wrong shape
def test_convert_to_double_sequence_space(): seq = Sequence( name=None, dtype=np.float64, size_range=(1, 2), scalar_range=Scalar(name=None, min=4.0, max=5.0, dtype=np.float64), ) converted_seq = py_converters.convert_to_ranged_sequence_space(seq) assert isinstance(converted_seq, DoubleSequenceSpace) assert converted_seq.length_range.min == 1 assert converted_seq.length_range.max == 2 assert isinstance(converted_seq.scalar_range, DoubleRange) assert converted_seq.scalar_range.min == 4.0 assert converted_seq.scalar_range.max == 5.0
def test_convert_to_float_sequence_space(): seq = Sequence( name=None, dtype=np.float32, size_range=(1, 2), scalar_range=Scalar(name=None, min=4, max=5, dtype=np.float32), ) converted_seq = py_converters.convert_to_ranged_sequence_space(seq) assert isinstance(converted_seq, FloatSequenceSpace) assert converted_seq.length_range.min == 1 assert converted_seq.length_range.max == 2 assert isinstance(converted_seq.scalar_range, FloatRange) assert np.isclose(converted_seq.scalar_range.min, 4) assert np.isclose(converted_seq.scalar_range.max, 5)
def test_convert_to_int64_sequence_space(): seq = Sequence( name=None, dtype=np.int64, size_range=(1, 2), scalar_range=Scalar(name=None, min=4, max=5, dtype=np.int64), ) converted_seq = py_converters.convert_to_ranged_sequence_space(seq) assert isinstance(converted_seq, Int64SequenceSpace) assert converted_seq.length_range.min == 1 assert converted_seq.length_range.max == 2 assert isinstance(converted_seq.scalar_range, Int64Range) assert converted_seq.scalar_range.min == 4 assert converted_seq.scalar_range.max == 5
def test_convert_to_boolean_sequence_space(): seq = Sequence( name=None, dtype=bool, size_range=(1, 2), scalar_range=Scalar(name=None, min=True, max=False, dtype=bool), ) converted_seq = py_converters.convert_to_ranged_sequence_space(seq) assert isinstance(converted_seq, BooleanSequenceSpace) assert converted_seq.length_range.min == 1 assert converted_seq.length_range.max == 2 assert isinstance(converted_seq.scalar_range, BooleanRange) assert converted_seq.scalar_range.min == True # noqa: E712 assert converted_seq.scalar_range.max == False # noqa: E712
def test_observation_spaces(env: CompilerEnv): """Test that the environment reports the service's observation spaces.""" env.reset() assert env.observation.spaces.keys() == {"ir", "features", "runtime"} assert env.observation.spaces["ir"].space == Sequence( name="test", size_range=(0, None), dtype=str, opaque_data_format="") assert env.observation.spaces["features"].space == Box(name="test", shape=(3, ), low=-100, high=100, dtype=int) assert env.observation.spaces["runtime"].space == Scalar(name="test", min=0, max=np.inf, dtype=float)
def test_observation_spaces_failing_because_of_bug(gcc_bin: str): """Test that the environment reports the service's observation spaces.""" with gym.make("gcc-v0", gcc_bin=gcc_bin) as env: env.reset() assert env.observation.spaces.keys() == { "asm_hash", "asm_size", "asm", "choices", "command_line", "instruction_counts", "obj_hash", "obj_size", "obj", "rtl", "source", } assert env.observation.spaces["obj_size"].space == Scalar( name="obj_size", min=-1, max=np.iinfo(np.int64).max, dtype=int) assert env.observation.spaces["asm"].space == Sequence( name="asm", size_range=(0, None), dtype=str, opaque_data_format="")
def test_bytes_contains(): space = Sequence(name="test", size_range=(0, None), dtype=bytes) assert space.contains(b"Hello, world!") assert space.contains(b"") assert not space.contains("Hello, world!")
def test_observation_spaces(env: CompilerEnv): """Test that the environment reports the service's observation spaces.""" env.reset() assert env.observation.spaces.keys() == { "ir", "Inst2vec", "Autophase", "AutophaseDict", "Programl", "runtime", "size", } assert env.observation.spaces["ir"].space == Sequence( name="ir", size_range=(0, np.iinfo(int).max), dtype=str, ) assert env.observation.spaces["Inst2vec"].space == Sequence( name="Inst2vec", size_range=(0, np.iinfo(int).max), scalar_range=Scalar( name=None, min=np.iinfo(np.int64).min, max=np.iinfo(np.int64).max, dtype=np.int64, ), dtype=int, ) assert env.observation.spaces["Autophase"].space == Sequence( name="Autophase", size_range=(len(AUTOPHASE_FEATURE_NAMES), len(AUTOPHASE_FEATURE_NAMES)), scalar_range=Scalar( name=None, min=np.iinfo(np.int64).min, max=np.iinfo(np.int64).max, dtype=np.int64, ), dtype=int, ) assert env.observation.spaces["AutophaseDict"].space == Dict( name="AutophaseDict", spaces={ name: Scalar(name=None, min=0, max=np.iinfo(np.int64).max, dtype=np.int64) for name in AUTOPHASE_FEATURE_NAMES }, ) assert env.observation.spaces["Programl"].space == Sequence( name="Programl", size_range=(0, np.iinfo(int).max), dtype=str, ) assert env.observation.spaces["runtime"].space == Scalar(name="runtime", min=0, max=np.inf, dtype=float) assert env.observation.spaces["size"].space == Scalar(name="size", min=0, max=np.inf, dtype=float)
def test_str_contains_too_short(): space = Sequence(name="test", size_range=(3, None), dtype=str) assert space.contains("Hello, world!") assert not space.contains("") assert not space.contains([1, 2, 3])
def test_int_contains(): space = Sequence(name="test", size_range=(5, 5), dtype=int) assert not space.contains(list(range(4))) assert space.contains(list(range(5))) assert not space.contains(list(range(6)))
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.actions: List[int] = [] self.datasets_site_path = site_data_path( "llvm/10.0.0/bitcode_benchmarks") # Register the LLVM datasets. self.datasets_site_path.mkdir(parents=True, exist_ok=True) self.inactive_datasets_site_path.mkdir(parents=True, exist_ok=True) for dataset in LLVM_DATASETS: self.register_dataset(dataset) self.inst2vec = _INST2VEC_ENCODER self.observation.spaces["CpuInfo"].space = DictSpace({ "name": Sequence(size_range=(0, None), dtype=str), "cores_count": Scalar(min=None, max=None, dtype=int), "l1i_cache_size": Scalar(min=None, max=None, dtype=int), "l1i_cache_count": Scalar(min=None, max=None, dtype=int), "l1d_cache_size": Scalar(min=None, max=None, dtype=int), "l1d_cache_count": Scalar(min=None, max=None, dtype=int), "l2_cache_size": Scalar(min=None, max=None, dtype=int), "l2_cache_count": Scalar(min=None, max=None, dtype=int), "l3_cache_size": Scalar(min=None, max=None, dtype=int), "l3_cache_count": Scalar(min=None, max=None, dtype=int), "l4_cache_size": Scalar(min=None, max=None, dtype=int), "l4_cache_count": Scalar(min=None, max=None, dtype=int), }) self.observation.add_derived_space( id="Inst2vecPreprocessedText", base_id="Ir", space=Sequence(size_range=(0, None), dtype=str), cb=lambda base_observation: self.inst2vec.preprocess( base_observation), default_value="", ) self.observation.add_derived_space( id="Inst2vecEmbeddingIndices", base_id="Ir", space=Sequence(size_range=(0, None), dtype=np.int32), cb=lambda base_observation: self.inst2vec.encode( self.inst2vec.preprocess(base_observation)), default_value=np.array([self.inst2vec.vocab["!UNK"]]), ) self.observation.add_derived_space( id="Inst2vec", base_id="Ir", space=Sequence(size_range=(0, None), dtype=np.ndarray), cb=lambda base_observation: self.inst2vec.embed( self.inst2vec.encode(self.inst2vec.preprocess(base_observation) )), default_value=np.vstack( [self.inst2vec.embeddings[self.inst2vec.vocab["!UNK"]]]), ) self.observation.add_derived_space( id="AutophaseDict", base_id="Autophase", space=DictSpace({ name: Scalar(min=0, max=None, dtype=int) for name in AUTOPHASE_FEATURE_NAMES }), cb=lambda base_observation: { name: val for name, val in zip(AUTOPHASE_FEATURE_NAMES, base_observation) }, )
def test_sample(): space = Sequence(name="test", size_range=(0, None), dtype=int) with pytest.raises(NotImplementedError): space.sample()
def test_observation_space_list(env: CompilerEnv): env.reset() env.observation.spaces = { "ir": Sequence(size_range=(0, None)), "features": Box(shape=(3, ), low=-100, high=100), }
def test_convert_to_bytes_sequence_space(): seq = Sequence(name=None, dtype=bytes, size_range=(1, 2)) converted_seq = py_converters.convert_to_bytes_sequence_space(seq) assert isinstance(converted_seq, BytesSequenceSpace) assert converted_seq.length_range.min == 1 assert converted_seq.length_range.max == 2
def __init__( self, *args, benchmark: Optional[Union[str, Benchmark]] = None, datasets_site_path: Optional[Path] = None, **kwargs, ): # First perform a one-time download of LLVM binaries that are needed by # the LLVM service and are not included by the pip-installed package. download_llvm_files() self.inst2vec = _INST2VEC_ENCODER super().__init__( *args, **kwargs, # Set a default benchmark for use. benchmark=benchmark or "cbench-v1/qsort", datasets=_get_llvm_datasets(site_data_base=datasets_site_path), rewards=[ CostFunctionReward( name="IrInstructionCount", cost_function="IrInstructionCount", init_cost_function="IrInstructionCountO0", default_negates_returns=True, deterministic=True, platform_dependent=False, ), NormalizedReward( name="IrInstructionCountNorm", cost_function="IrInstructionCount", init_cost_function="IrInstructionCountO0", max=1, default_negates_returns=True, deterministic=True, platform_dependent=False, ), BaselineImprovementNormalizedReward( name="IrInstructionCountO3", cost_function="IrInstructionCount", baseline_cost_function="IrInstructionCountO3", init_cost_function="IrInstructionCountO0", success_threshold=1, default_negates_returns=True, deterministic=True, platform_dependent=False, ), BaselineImprovementNormalizedReward( name="IrInstructionCountOz", cost_function="IrInstructionCount", baseline_cost_function="IrInstructionCountOz", init_cost_function="IrInstructionCountO0", success_threshold=1, default_negates_returns=True, deterministic=True, platform_dependent=False, ), CostFunctionReward( name="ObjectTextSizeBytes", cost_function="ObjectTextSizeBytes", init_cost_function="ObjectTextSizeO0", default_negates_returns=True, deterministic=True, platform_dependent=True, ), NormalizedReward( name="ObjectTextSizeNorm", cost_function="ObjectTextSizeBytes", init_cost_function="ObjectTextSizeO0", max=1, default_negates_returns=True, deterministic=True, platform_dependent=True, ), BaselineImprovementNormalizedReward( name="ObjectTextSizeO3", cost_function="ObjectTextSizeBytes", init_cost_function="ObjectTextSizeO0", baseline_cost_function="ObjectTextSizeO3", success_threshold=1, default_negates_returns=True, deterministic=True, platform_dependent=True, ), BaselineImprovementNormalizedReward( name="ObjectTextSizeOz", cost_function="ObjectTextSizeBytes", init_cost_function="ObjectTextSizeO0", baseline_cost_function="ObjectTextSizeOz", success_threshold=1, default_negates_returns=True, deterministic=True, platform_dependent=True, ), CostFunctionReward( name="TextSizeBytes", cost_function="TextSizeBytes", init_cost_function="TextSizeO0", default_negates_returns=True, deterministic=True, platform_dependent=True, ), NormalizedReward( name="TextSizeNorm", cost_function="TextSizeBytes", init_cost_function="TextSizeO0", max=1, default_negates_returns=True, deterministic=True, platform_dependent=True, ), BaselineImprovementNormalizedReward( name="TextSizeO3", cost_function="TextSizeBytes", init_cost_function="TextSizeO0", baseline_cost_function="TextSizeO3", success_threshold=1, default_negates_returns=True, deterministic=True, platform_dependent=True, ), BaselineImprovementNormalizedReward( name="TextSizeOz", cost_function="TextSizeBytes", init_cost_function="TextSizeO0", baseline_cost_function="TextSizeOz", success_threshold=1, default_negates_returns=True, deterministic=True, platform_dependent=True, ), ], derived_observation_spaces=[ { "id": "Inst2vecPreprocessedText", "base_id": "Ir", "space": Sequence(name="Inst2vecPreprocessedText", size_range=(0, None), dtype=str), "translate": self.inst2vec.preprocess, "default_value": "", }, { "id": "Inst2vecEmbeddingIndices", "base_id": "Ir", "space": Sequence( name="Inst2vecEmbeddingIndices", size_range=(0, None), dtype=np.int32, ), "translate": lambda base_observation: self.inst2vec.encode( self.inst2vec.preprocess(base_observation)), "default_value": np.array([self.inst2vec.vocab["!UNK"]]), }, { "id": "Inst2vec", "base_id": "Ir", "space": Sequence(name="Inst2vec", size_range=(0, None), dtype=np.ndarray), "translate": lambda base_observation: self.inst2vec.embed( self.inst2vec.encode( self.inst2vec.preprocess(base_observation))), "default_value": np.vstack([ self.inst2vec.embeddings[self.inst2vec.vocab["!UNK"]] ]), }, { "id": "InstCountDict", "base_id": "InstCount", "space": DictSpace( { f"{name}Count": Scalar(name=f"{name}Count", min=0, max=None, dtype=int) for name in INST_COUNT_FEATURE_NAMES }, name="InstCountDict", ), "translate": lambda base_observation: { f"{name}Count": val for name, val in zip(INST_COUNT_FEATURE_NAMES, base_observation) }, }, { "id": "InstCountNorm", "base_id": "InstCount", "space": Box( name="InstCountNorm", low=0, high=1, shape=(len(INST_COUNT_FEATURE_NAMES) - 1, ), dtype=np.float32, ), "translate": lambda base_observation: (base_observation[1:] / max( base_observation[0], 1)).astype(np.float32), }, { "id": "InstCountNormDict", "base_id": "InstCountNorm", "space": DictSpace( { f"{name}Density": Scalar(name=f"{name}Density", min=0, max=None, dtype=int) for name in INST_COUNT_FEATURE_NAMES[1:] }, name="InstCountNormDict", ), "translate": lambda base_observation: { f"{name}Density": val for name, val in zip(INST_COUNT_FEATURE_NAMES[1:], base_observation) }, }, { "id": "AutophaseDict", "base_id": "Autophase", "space": DictSpace( { name: Scalar(name=name, min=0, max=None, dtype=int) for name in AUTOPHASE_FEATURE_NAMES }, name="AutophaseDict", ), "translate": lambda base_observation: { name: val for name, val in zip(AUTOPHASE_FEATURE_NAMES, base_observation) }, }, ], ) # Mutable runtime configuration options that must be set on every call # to reset. self._runtimes_per_observation_count: Optional[int] = None self._runtimes_warmup_per_observation_count: Optional[int] = None cpu_info_spaces = [ Sequence(name="name", size_range=(0, None), dtype=str), Scalar(name="cores_count", min=None, max=None, dtype=int), Scalar(name="l1i_cache_size", min=None, max=None, dtype=int), Scalar(name="l1i_cache_count", min=None, max=None, dtype=int), Scalar(name="l1d_cache_size", min=None, max=None, dtype=int), Scalar(name="l1d_cache_count", min=None, max=None, dtype=int), Scalar(name="l2_cache_size", min=None, max=None, dtype=int), Scalar(name="l2_cache_count", min=None, max=None, dtype=int), Scalar(name="l3_cache_size", min=None, max=None, dtype=int), Scalar(name="l3_cache_count", min=None, max=None, dtype=int), Scalar(name="l4_cache_size", min=None, max=None, dtype=int), Scalar(name="l4_cache_count", min=None, max=None, dtype=int), ] self.observation.spaces["CpuInfo"].space = DictSpace( {space.name: space for space in cpu_info_spaces}, name="CpuInfo", )
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.inst2vec = Inst2vecEncoder() self.register_derived_space( base_name="CpuInfo", derived_name="CpuInfoDict", derived_space=DictSpace({ "name": Sequence(size_range=(0, None), dtype=str), "cores_count": Scalar(min=None, max=None, dtype=int), "l1i_cache_size": Scalar(min=None, max=None, dtype=int), "l1i_cache_count": Scalar(min=None, max=None, dtype=int), "l1d_cache_size": Scalar(min=None, max=None, dtype=int), "l1d_cache_count": Scalar(min=None, max=None, dtype=int), "l2_cache_size": Scalar(min=None, max=None, dtype=int), "l2_cache_count": Scalar(min=None, max=None, dtype=int), "l3_cache_size": Scalar(min=None, max=None, dtype=int), "l3_cache_count": Scalar(min=None, max=None, dtype=int), "l4_cache_size": Scalar(min=None, max=None, dtype=int), "l4_cache_count": Scalar(min=None, max=None, dtype=int), }), cb=lambda base_observation: base_observation, ) self.register_derived_space( base_name="Ir", derived_name="Inst2vecPreprocessedText", derived_space=Sequence(size_range=(0, None), dtype=str), cb=lambda base_observation: self.inst2vec.preprocess( base_observation), ) self.register_derived_space( base_name="Ir", derived_name="Inst2vecEmbeddingIndices", derived_space=Sequence(size_range=(0, None), dtype=np.int32), cb=lambda base_observation: self.inst2vec.encode( self.inst2vec.preprocess(base_observation)), ) self.register_derived_space( base_name="Ir", derived_name="Inst2vec", derived_space=Sequence(size_range=(0, None), dtype=np.ndarray), cb=lambda base_observation: self.inst2vec.embed( self.inst2vec.encode(self.inst2vec.preprocess(base_observation) )), ) self.register_derived_space( base_name="Autophase", derived_name="AutophaseDict", derived_space=DictSpace({ name: Scalar(min=0, max=None, dtype=int) for name in AUTOPHASE_FEATURE_NAMES }), cb=lambda base_observation: { name: val for name, val in zip(AUTOPHASE_FEATURE_NAMES, base_observation) }, )
def test_convert_to_string_space(): space = Sequence(name=None, size_range=(1, 2), dtype=str) converted_space = py_converters.convert_to_string_space(space) assert isinstance(converted_space, StringSpace) assert converted_space.length_range.min == 1 assert converted_space.length_range.max == 2
def make_seq(scalar_range, dtype, defaults): return Sequence( size_range=scalar_range2tuple(scalar_range, defaults), dtype=dtype, opaque_data_format=proto.opaque_data_format, )
def test_str_contains_too_long(): space = Sequence(size_range=(0, 4), dtype=str) assert not space.contains("Hello, world!") assert space.contains("") assert not space.contains([1, 2, 3])