def get_all_sampler_datasets(): datasets = [] sampledirs = [] for versioncache in fs.ls(fs.path("~/.cache/clgen"), abspaths=True): samplerdir = fs.path(versioncache, "sampler") if fs.isdir(samplerdir): sampledirs += fs.ls(samplerdir, abspaths=True) for samplerdir in sampledirs: inpath = fs.path(samplerdir, "kernels.db") if fs.isfile(inpath): datasets.append(inpath) return datasets
def read_file(*components, **kwargs): """ Load a JSON data blob. Arguments: path (str): Path to file. must_exist (bool, otional): If False, return empty dict if file does not exist. Returns: array or dict: JSON data. Raises: File404: If path does not exist, and must_exist is True. InvalidFile: If JSON is malformed. """ must_exist = kwargs.get("must_exist", True) if must_exist: path = fs.must_exist(*components) else: path = fs.path(*components) try: with open(path) as infile: return loads(infile.read()) except ValueError as e: raise ValueError( "malformed JSON file '{path}'. Message from parser: {err}".format( path=fs.basename(path), err=str(e))) except IOError as e: if not must_exist: return {} else: return e
def main(): parser = ArgumentParser(description=__description__) parser.add_argument("classification") parser.add_argument("outdir") args = parser.parse_args() db.init("cc1") session = db.make_session() program_ids = [ x[0] for x in session.query(sql.distinct(CLSmithResult.program_id)) \ .filter(CLSmithResult.classification == args.classification).all()] header = fs.read_file(dsmith.data_path("include", "clsmith.h")) fs.mkdir(args.outdir) for program_id in ProgressBar()(program_ids): outpath = fs.path(args.outdir, program_id + ".cl") if not fs.exists(outpath): program = session.query(CLSmithProgram) \ .filter(CLSmithProgram.id == program_id).one() pre, post = program.src.split('#include "CLSmith.h"') inlined = pre + header + post with open(outpath, "w") as outfile: print(inlined, file=outfile)
def echo(*args, **kwargs): """ Write a message to a file. Arguments: args A list of arguments which make up the message. The last argument is the path to the file to write to. """ msg = args[:-1] path = fs.path(args[-1]) append = kwargs.pop("append", False) if append: with open(path, "a") as file: print(*msg, file=file, **kwargs) else: with open(fs.path(path), "w") as file: print(*msg, file=file, **kwargs)
def root_path(*path) -> Path: """ Path relative to dsmith source respository. Arguments: *path (List[str]): Path components. Returns: Path: Path. """ return fs.path(ROOT, *path)
def keypath(self, key): """ Get the filesystem path for a key. Arguments: key: Key. Returns: str: Absolute path. """ return fs.path(self.path, self.escape_key(key))
def data_path(*path) -> Path: """ Path to data file. Arguments: *path (List[str]): Path components. Returns: Path: Path. """ return resource_filename(__name__, fs.path("data", *path))
def cachepath(*relative_path_components: str) -> pathlib.Path: """Return path to file system cache. Args: *relative_path_components: Relative path of cache. Returns: Absolute path of file system cache. """ cache_root = pathlib.Path(os.environ.get("CLGEN_CACHE", "~/.cache/clgen/")) cache_root.expanduser().mkdir(parents=True, exist_ok=True) return pathlib.Path(fs.path(cache_root, *relative_path_components))
def test_rmtrash(): with tempfile.NamedTemporaryFile(prefix='labm8_') as f: assert fs.isfile(f.name) fs.rmtrash(f.name) assert not fs.isfile(f.name) fs.rmtrash(f.name) fs.rm(f.name) with tempfile.TemporaryDirectory() as d: fs.rm(d) fs.mkdir(d, "foo/bar") system.echo("Hello, world!", fs.path(d, "foo/bar/baz")) assert fs.isfile(f, "foo/bar/baz") fs.rmtrash(d) assert not fs.isfile(d, "foo/bar/baz") assert not fs.isdir(d)
def make(target="all", dir=".", **kwargs): """ Run make. Arguments: target (str, optional): Name of the target to build. Defaults to "all". dir (str, optional): Path to directory containing Makefile. **kwargs (optional): Any additional arguments to be passed to system.run(). Returns: (int, str, str): The first element is the return code of the make command. The second and third elements are the stdout and stderr of the process. Raises: NoMakefileError: In case a Makefile is not found in the target directory. NoTargetError: In case the Makefile does not support the requested target. MakeError: In case the target rule fails. """ if not fs.isfile(fs.path(dir, "Makefile")): raise NoMakefileError("No makefile in '{}'".format(fs.abspath(dir))) fs.cd(dir) # Default parameters to system.run() if "timeout" not in kwargs: kwargs["timeout"] = 300 ret, out, err = system.run(["make", target], **kwargs) fs.cdpop() if ret > 0: if re.search(_BAD_TARGET_RE, err): raise NoTargetError("No rule for target '{}'".format(target)) else: raise MakeError("Target '{}' failed".format(target)) raise MakeError("Failed") return ret, out, err
def data_path(*components, exists=True) -> str: """ Return absolute path to unittest data file. Data files are located in <package>/test/data. Arguments: *components (str): Relative path. exists (bool, optional): If True, require that file exists. Returns: str: Absolute path. Raises: Data404: If path doesn't exist and 'exists' is True. """ path = fs.path(*components) abspath = os.path.join(os.path.dirname(__file__), "data", path) if exists and not os.path.exists(abspath): raise Data404(abspath) return abspath
import re import subprocess from phd.lib.labm8 import fs _LINE_RE = re.compile("^(?P<count>\d+) instcount - Number of (?P<type>.+)") DEFAULT_LLVM_PATH = fs.path( "~/src/msc-thesis/skelcl/libraries/llvm/build/bin/") class Error(Exception): """ LLVM module error. """ pass class ProgramNotFoundError(Error): """ Error thrown if a program is not found. """ pass class ClangError(Error): """ Error thrown if clang exits with non-zero status. """ pass
def test_path_homedir(): assert os.path.expanduser("~") == fs.path("~") assert (os.path.join(os.path.expanduser("~"), "foo") == fs.path("~", "foo"))
import dbus.service from phd.lib.labm8 import fs LOCAL_DIR = fs.path("~/.omnitune") class Error(Exception): pass class Server(dbus.service.Object): pass
def get_benchmarks(platform): B = pd.read_csv( fs.path("runtimes/{platform}-benchmarks.csv".format(**vars()))) B["source"] = [escape_suite_name(x) for x in B["benchmark"]] B["synthetic"] = [0] * len(B) return B
class Server(omnitune.Server): LLVM_PATH = fs.path("~/src/msc-thesis/skelcl/libraries/llvm/build/bin/") def __init__(self, *args, **kwargs): """ Construct a SkelCL server. """ # Fail if we can't find the path if not fs.isdir(self.LLVM_PATH): io.fatal("Could not find llvm path '{0}'".format(self.LLVM_PATH)) super(Server, self).__init__(*args, **kwargs) io.info("Registered server %s/SkelCLServer ..." % SESSION_NAME) # Setup persistent database. self.db = migrate(Database()) self.db.status_report() # Create an in-memory sample strategy cache. self.strategies = cache.TransientCache() @dbus.service.method(INTERFACE_NAME, in_signature='siiiiiiiisss', out_signature='(nn)') def RequestTrainingStencilParams(self, device_name, device_count, north, south, east, west, data_width, data_height, type_in, type_out, source, max_wg_size): """ Request training parameter values for a SkelCL stencil operation. Determines the parameter values to use for a SkelCL stencil operation by iterating over the space of parameter values. Args: device_name (str): The name of the execution device. device_count (int): The number of execution devices. north (int): The stencil shape north direction. south (int): The stencil shape south direction. east (int): The stencil shape east direction. west (int): The stencil shape west direction. data_width (int): The number of columns of data. data_height (int): The number of rows of data. type_in (str): The input data type. type_out (str): The output data type. max_wg_size (int): The maximum kernel workgroup size. source (str): The stencil kernel source code. Returns: A tuple of work group size values, e.g. (16,32) """ # Parse arguments. device_name = util.parse_str(device_name) device_count = int(device_count) north = int(north) south = int(south) east = int(east) west = int(west) data_width = int(data_width) data_height = int(data_height) type_in = util.parse_str(type_in) type_out = util.parse_str(type_out) source = util.parse_str(source) max_wg_size = int(max_wg_size) # Get the next scenario ID to train on. wg = training.random_wg_value(max_wg_size) return wg @dbus.service.method(INTERFACE_NAME, in_signature='siiiiiiiisss', out_signature='(nn)') def RequestStencilParams(self, device_name, device_count, north, south, east, west, data_width, data_height, type_in, type_out, source, max_wg_size): """ Request parameter values for a SkelCL stencil operation. Determines the parameter values to use for a SkelCL stencil operation, using a machine learning classifier to predict the optimal parameter values given a set of features determined from the arguments. Args: device_name (str): The name of the execution device. device_count (int): The number of execution devices. north (int): The stencil shape north direction. south (int): The stencil shape south direction. east (int): The stencil shape east direction. west (int): The stencil shape west direction. data_width (int): The number of columns of data. data_height (int): The number of rows of data. type_in (str): The input data type. type_out (str): The output data type. max_wg_size (int): The maximum kernel workgroup size. source (str): The stencil kernel source code. Returns: A tuple of work group size values, e.g. (16,32) """ start_time = time.time() # Parse arguments. device_name = util.parse_str(device_name) device_count = int(device_count) north = int(north) south = int(south) east = int(east) west = int(west) data_width = int(data_width) data_height = int(data_height) source = util.parse_str(source) max_wg_size = int(max_wg_size) # TODO: Perform feature extraction & classification wg = (64, 32) end_time = time.time() io.debug(("RequestStencilParams() -> " "({c}, {r}) [{t:.3f}s]".format(c=wg[0], r=wg[1], t=end_time - start_time))) return wg @dbus.service.method(INTERFACE_NAME, in_signature='siiiiiiisssiiid', out_signature='') def AddStencilRuntime(self, device_name, device_count, north, south, east, west, data_width, data_height, type_in, type_out, source, max_wg_size, wg_c, wg_r, runtime): """ Add a new stencil runtime. Args: device_name (str): The name of the execution device. device_count (int): The number of execution devices. north (int): The stencil shape north direction. south (int): The stencil shape south direction. east (int): The stencil shape east direction. west (int): The stencil shape west direction. data_width (int): The number of columns of data. data_height (int): The number of rows of data. type_in (str): The input data type. type_out (str): The output data type. source (str): The stencil kernel source code. max_wg_size (int): The maximum kernel workgroup size. wg_c (int): The workgroup size used (columns). wg_r (int): The workgroup size used (rows). runtime (double): The measured runtime in milliseconds. """ # Parse arguments. device_name = util.parse_str(device_name) device_count = int(device_count) north = int(north) south = int(south) east = int(east) west = int(west) data_width = int(data_width) data_height = int(data_height) type_in = util.parse_str(type_in) type_out = util.parse_str(type_out) source = util.parse_str(source) max_wg_size = int(max_wg_size) wg_c = int(wg_c) wg_r = int(wg_r) runtime = float(runtime) # Lookup IDs device = self.db.device_id(device_name, device_count) kernel = self.db.kernel_id(north, south, east, west, max_wg_size, source) dataset = self.db.datasets_id(data_width, data_height, type_in, type_out) scenario = self.db.scenario_id(device, kernel, dataset) params = self.db.params_id(wg_c, wg_r) # Add entry into runtimes table. self.db.add_runtime(scenario, params, runtime) self.db.commit() io.debug(("AddStencilRuntime({scenario}, {params}, {runtime})".format( scenario=scenario[:8], params=params, runtime=runtime))) @dbus.service.method(INTERFACE_NAME, in_signature='siiiiiiisssiii', out_signature='') def RefuseStencilParams(self, device_name, device_count, north, south, east, west, data_width, data_height, type_in, type_out, source, max_wg_size, wg_c, wg_r): """ Mark a set of parameters as bad. Args: device_name (str): The name of the execution device. device_count (int): The number of execution devices. north (int): The stencil shape north direction. south (int): The stencil shape south direction. east (int): The stencil shape east direction. west (int): The stencil shape west direction. data_width (int): The number of columns of data. data_height (int): The number of rows of data. type_in (str): The input data type. type_out (str): The output data type. source (str): The stencil kernel source code. max_wg_size (int): The maximum kernel workgroup size. wg_c (int): The workgroup size used (columns). wg_r (int): The workgroup size used (rows). """ # Parse arguments. device_name = util.parse_str(device_name) device_count = int(device_count) north = int(north) south = int(south) east = int(east) west = int(west) data_width = int(data_width) data_height = int(data_height) type_in = util.parse_str(type_in) type_out = util.parse_str(type_out) source = util.parse_str(source) max_wg_size = int(max_wg_size) wg_c = int(wg_c) wg_r = int(wg_r) # Lookup IDs device = self.db.device_id(device_name, device_count) kernel = self.db.kernel_id(north, south, east, west, max_wg_size, source) dataset = self.db.datasets_id(data_width, data_height, type_in, type_out) scenario = self.db.scenario_id(device, kernel, dataset) params = self.db.params_id(wg_c, wg_r) # Add entry into runtimes table. self.db.refuse_params(scenario, params) self.db.commit() io.debug(("RefuseStencilParams({scenario}, {params})".format( scenario=scenario[:8], params=params, runtime=runtime)))
class TestLLVM(TestCase): LLVM_PATH = fs.path("~/src/msc-thesis/skelcl/libraries/llvm/build/bin/") # assert_program_exists(): def test_assert_program_exists(self): self._test(None, llvm.assert_program_exists(__file__)) def test_assert_program_exists_fail(self): self.assertRaises(llvm.ProgramNotFoundError, llvm.assert_program_exists, "/not a real path") # bitcode() def test_bitcode_cl(self): self._test( self.stencil_gaussian_kernel_bc, llvm.bitcode(self.stencil_gaussian_kernel, language="cl", path=self.LLVM_PATH)) def test_bitcode_error_bad_src(self): self.assertRaises(llvm.ClangError, llvm.bitcode, "<NOT REAL CODE>", path=self.LLVM_PATH) def test_bitcode_error_bad_lang(self): self.assertRaises(llvm.ClangError, llvm.bitcode, self.stencil_gaussian_kernel, language="foobar", path=self.LLVM_PATH) def test_bitcode_missing_clang(self): self.assertRaises(llvm.ProgramNotFoundError, llvm.bitcode, "", path="/not a real path") # parse_instcounts() def test_parse_isntcounts(self): self._test(self.stencil_gaussian_kernel_ic_json, llvm.parse_instcounts(self.stencil_gaussian_kernel_ic)) def test_parse_isntcounts_empty(self): self._test({}, llvm.parse_instcounts("")) # instcounts() def test_instcounts_cl(self): self._test( self.stencil_gaussian_kernel_ic_json, llvm.instcounts(self.stencil_gaussian_kernel_bc, path=self.LLVM_PATH)) def test_instcounds_missing_opt(self): self.assertRaises(llvm.ProgramNotFoundError, llvm.instcounts, "", path="/not a real path") # instcounts2ratios() def test_instcounts2ratios(self): self._test( self.stencil_gaussian_kernel_ratios_json, llvm.instcounts2ratios(self.stencil_gaussian_kernel_ic_json))
def load_data_desc(platform, source="B", max_seq_len=1000, atomizer=CharacterAtomizer, quiet=False): """ load experimental results """ def get_benchmarks(platform): B = pd.read_csv( fs.path("runtimes/{platform}-benchmarks.csv".format(**vars()))) B["source"] = [escape_suite_name(x) for x in B["benchmark"]] B["synthetic"] = [0] * len(B) return B def get_npb_benchmarks(platform): B = get_benchmarks(platform) msk = B["source"] == "NPB" return B[msk] def get_synthetics(platform): S = pd.read_csv( fs.path("runtimes/{platform}-clgen.csv".format(**vars()))) S["source"] = ["CLgen"] * len(S) S["synthetic"] = [1] * len(S) return S if source == "B": dataframe = get_benchmarks(platform) elif source == "S": dataframe = get_synthetics(platform) elif source == "BS": dataframe = pd.concat( (get_benchmarks(platform), get_synthetics(platform))) elif source == "N": dataframe = get_npb_benchmarks(platform) elif source == "NS": dataframe = pd.concat( (get_npb_benchmarks(platform), get_synthetics(platform))) else: raise Exception dataframe["oracle_enc"] = [ 1 if x == "GPU" else 0 for x in dataframe["oracle"].values ] dataframe["benchmark_name"] = [ escape_benchmark_name(b) for b in dataframe["benchmark"].values ] # load source code: source_dir = fs.path("kernels") srcs, benchmark_names = [], [] for row in dataframe["benchmark"].values: inpath = fs.path(source_dir, row + ".cl") with open(inpath) as infile: src = infile.read() if not src.startswith("__kernel void A"): print(fs.basename(inpath)) raise Exception(src) srcs.append(src) dataframe["src"] = srcs dataframe["src_len"] = [len(s) for s in srcs] if not quiet: print("num instances {} ({} synthetic, {} benchmarks)".format( len(dataframe), sum(dataframe["synthetic"].values), len(dataframe) - sum(dataframe["synthetic"].values))) print("unique kernels", len(set(srcs))) # encode and pad sequences: atomizer = atomizer.from_text(''.join(dataframe["src"].values)) seqs = [atomizer.atomize(seq) for seq in dataframe["src"].values] seq_length = min(max(len(s) for s in seqs), max_seq_len) pad_val = atomizer.vocab_size + 1 dataframe["seq_len"] = [len(s) for s in seqs] dataframe["seq"] = list( pad_sequences(seqs, maxlen=seq_length, value=pad_val)) if not quiet: print("vocab size", atomizer.vocab_size + 1) print("pad val", pad_val) print("padded seq length", seq_length) return { "dataframe": dataframe, "seq_length": seq_length, "atomizer": atomizer }
def test_path(): assert "foo/bar" == fs.path("foo", "bar") assert "foo/bar/car" == fs.path("foo/bar", "car")
def get_synthetics(platform): S = pd.read_csv( fs.path("runtimes/{platform}-clgen.csv".format(**vars()))) S["source"] = ["CLgen"] * len(S) S["synthetic"] = [1] * len(S) return S
from experimental.dsmith import Colors runtime_t = NewType('runtime_t', float) status_t = NewType('status_t', int) return_t = namedtuple('return_t', ['runtime', 'status', 'stdout', 'stderr']) # build paths exec_path = dsmith.root_path("third_party", "clsmith", "build", "CLSmith") cl_launcher_path = dsmith.root_path("third_party", "clsmith", "build", "cl_launcher") include_path = dsmith.root_path("third_party", "clsmith", "runtime") # sanity checks assert fs.isexe(exec_path) assert fs.isexe(cl_launcher_path) assert fs.isfile(fs.path(include_path, "CLSmith.h")) def clsmith_cli(*args, timeout: int = 60, exec_path=exec_path) -> List[str]: return ["timeout", "--signal=9", str(timeout), exec_path] + list(args) def clsmith(*args, exec_path=exec_path) -> return_t: """ Returns: return_t: A named tuple consisting of runtime (float), status (int), stdout (str), and stderr (str). """ start_time = time() cli = clsmith_cli(*args)