def clang_cl_args(target=CLANG_CL_TARGETS[0], error_limit=0): """ Get the Clang args to compile OpenCL. :return: Array of args. """ libclc_include = fs.path(cfg.libclc(), 'generic', 'include') shim = smith.package_path(fs.path('share', 'include', 'opencl-shim.h')) # List of clang warnings to disable. disabled_warnings = [ 'ignored-pragmas', 'implicit-function-declaration', 'incompatible-library-redeclaration', 'macro-redefined', ] return [ '-I' + libclc_include, '-include', shim, '-target', target, '-ferror-limit={}'.format(error_limit), '-xcl' ] + ['-Wno-{}'.format(x) for x in disabled_warnings]
def get_all_sampler_datasets(): datasets = [] sampledirs = [] for versioncache in fs.ls(fs.path("~/.cache/clgen"), abspaths=True): samplerdir = fs.path(versioncache, "sampler") if fs.isdir(samplerdir): sampledirs += fs.ls(samplerdir, abspaths=True) for samplerdir in sampledirs: inpath = fs.path(samplerdir, "kernels.db") if fs.isfile(inpath): datasets.append(inpath) return datasets
def _init_error(err: Exception) -> None: """ tidy up in case of error """ log.error("corpus creation failed. Deleting corpus files") paths = [ fs.path(self.contentcache.path, "kernels.db"), fs.path(self.cache.path, "corpus.txt"), fs.path(self.cache.path, "tensor.npy"), fs.path(self.cache.path, "atomizer.pkl") ] for path in paths: if fs.exists(path): log.info("removing", path) fs.rm(path) raise err
def test_PullFromRemoteToLocal( test_host: machine_spec_pb2.Host, test_mirrored_directory: machine_spec_pb2.MirroredDirectory): """Test pulling a file from remote.""" m = LocalMirroredDirectory(test_host, test_mirrored_directory) with open(fs.path(m.remote_path, 'a'), 'w') as f: f.write('Hello, world!') m.PullFromRemoteToLocal() assert pathlib.Path(m.local_path).is_dir() assert (pathlib.Path(m.local_path) / 'a').is_file() with open(fs.path(m.local_path, 'a')) as f: assert f.read() == 'Hello, world!'
def models() -> Iterator[Model]: """ Iterate over all cached models. Returns ------- Iterator[Model] An iterable over all cached models. """ if fs.isdir(clgen.cachepath(), "model"): modeldirs = fs.ls(fs.path(clgen.cachepath(), "model"), abspaths=True) for modeldir in modeldirs: meta = jsonutil.read_file(fs.path(modeldir, "META")) model = Model.from_json(meta) yield model
def data_path(*components, **kwargs) -> str: """ Return absolute path to unittest data file. Data files are located in tests/data. Parameters ---------- *components : str Relative path. **kwargs If 'exists' True, require that file exists. Returns ------- str Absolute path. Raises ------ Data404 If path doesn"t exist. """ path = fs.path(*components) exists = kwargs.get("exists", True) abspath = os.path.join(os.path.dirname(__file__), "data", path) if exists and not os.path.exists(abspath): raise Data404(abspath) return abspath
def main(): db = _db.Database(fs.path("joblist.db")) data = [ row for row in db.execute("SELECT device,Count(*) AS count\n" "FROM jobs\n" "GROUP BY device\n" "ORDER BY count") ] io.info("Job list:") print(fmt.table(data, columns=("Device", "Jobs"))) print() jobs = [row for row in db.execute("SELECT * FROM jobs")] fs.mkdir("jobs") logs = { "monza": open("jobs/monza.txt", "w"), "whz5": open("jobs/whz5.txt", "w"), "monza": open("jobs/monza.txt", "w"), "cec": open("jobs/cec.txt", "w"), "florence": open("jobs/florence.txt", "w"), } for job in jobs: enum_job(logs, db, *job) lab.exit()
def __init__(self, path): """ Create a new directory lock. Arguments: path (str): Path to lock file. """ self.path = fs.path(path)
def data_path(*components, exists=True) -> str: """ Return absolute path to unittest data file. Data files are located in <package>/test/data. Parameters ---------- *components : str Relative path. exists : bool, optional If True, require that file exists. Returns ------- str Absolute path. Raises ------ Data404 If path doesn't exist and 'exists' is True. """ path = fs.path(*components) abspath = os.path.join(os.path.dirname(__file__), "data", path) if exists and not os.path.exists(abspath): raise Data404(abspath) return abspath
def get_all_sampler_datasets(all_clgen_versions: bool=True) -> list: if all_clgen_versions: versiondirs = fs.ls(fs.path("~/.cache/clgen"), abspaths=True) else: versiondirs = [fs.path("~/.cache/clgen", clgen.version())] versiondirs = [v for v in versiondirs if fs.isdir(v, "sampler")] datasets = [] for versiondir in versiondirs: for samplerdir in fs.ls(fs.path(versiondir, "sampler"), abspaths=True): inpath = fs.path(samplerdir, "kernels.db") if fs.isfile(inpath): datasets.append(inpath) return datasets
def package_data(*path) -> bytes: """ Read package data file. Parameters ---------- path : str The relative path to the data file, e.g. 'share/foo.txt'. Returns ------- bytes File contents. Raises ------ InternalError In case of IO error. """ # throw exception if file doesn't exist package_path(*path) try: return resource_string(__name__, fs.path(*path)) except Exception: raise InternalError("failed to read package data '{}'".format(path))
def main(): db = _db.Database(fs.path("joblist.db")) data = [row for row in db.execute("SELECT device,Count(*) AS count\n" "FROM jobs\n" "GROUP BY device\n" "ORDER BY count")] io.info("Job list:") print(fmt.table(data, columns=("Device", "Jobs"))) print() jobs = [row for row in db.execute("SELECT * FROM jobs")] fs.mkdir("jobs") logs = { "monza": open("jobs/monza.txt", "w"), "whz5": open("jobs/whz5.txt", "w"), "monza": open("jobs/monza.txt", "w"), "cec": open("jobs/cec.txt", "w"), "florence": open("jobs/florence.txt", "w"), } for job in jobs: enum_job(logs, db, *job) lab.exit()
def graph_ocl_stars(db_path: str) -> None: """ Plot distribution of stargazers per file. """ import matplotlib.pyplot as plt import seaborn as sns sns.set(color_codes=True) out_path = fs.path(IMG_DIR, '/ocl_stars.png') print('graph', out_path, '...') db = dbutil.connect(db_path) c = db.cursor() c.execute('SELECT stars FROM ContentMeta LEFT JOIN Repositories ' 'ON ContentMeta.repo_url=Repositories.url') stars = [x[0] for x in c.fetchall()] # Filter range data = [x for x in stars if x < 50] sns.distplot(data, bins=20, kde=False) plt.xlabel('GitHub Stargazer count') plt.ylabel('Number of files') plt.title('Stargazers per file') plt.savefig(out_path)
def __init__(self, path, tables={}, enable_traces=True): """ Arguments: path (str): The path to the database file. tables (dictionary of {str: tuple of str}, optional): A diction of {name: schema} pairs, where a schema is list of tuple pairs, of the form: (name, type). enable_traces(bool, optional): Enable traces for user defined functions and aggregates. """ self.path = fs.path(path) # Create directory if needed. parent_dir = fs.dirname(path) if parent_dir: fs.mkdir(parent_dir) self.connection = sql.connect(self.path) for name,schema in six.iteritems(tables): self.create_table(name, schema) io.debug("Opened connection to '{0}'".format(self.path)) # Register exit handler atexit.register(self.close) # Enable traces for user defined functions and aggregates. See: # # https://docs.python.org/2/library/sqlite3.html#sqlite3.enable_callback_tracebacks if enable_traces: sql.enable_callback_tracebacks(True)
def from_json(corpus_json: dict): """ Instantiate Corpus from JSON. Arguments: corpus_json (dict): Specification. Returns: Corpus: Insantiated corpus. """ path = corpus_json.pop("path", None) uid = corpus_json.pop("id", None) if path: path = unpack_directory_if_needed(fs.abspath(path)) if not fs.isdir(path): raise clgen.UserError( "Corpus path '{}' is not a directory".format(path)) uid = dirhash(path, 'sha1') elif uid: cache_path = fs.path(cache.ROOT, "corpus", uid) if not fs.isdir(cache_path): raise clgen.UserError("Corpus {} not found".format(uid)) else: raise clgen.UserError("No corpus path or ID provided") return Corpus(uid, path=path, **corpus_json)
def read_file(*components, **kwargs): """ Load a JSON data blob. Arguments: path (str): Path to file. must_exist (bool, otional): If False, return empty dict if file does not exist. Returns: array or dict: JSON data. Raises: File404: If path does not exist, and must_exist is True. InvalidFile: If JSON is malformed. """ must_exist = kwargs.get("must_exist", True) if must_exist: path = fs.must_exist(*components) else: path = fs.path(*components) try: with open(path) as infile: return loads(infile.read()) except ValueError as e: raise ValueError( "malformed JSON file '{path}'. Message from parser: {err}" .format(path=fs.basename(path), err=str(e))) except IOError as e: if not must_exist: return {} else: return e
def main(): parser = ArgumentParser(description=__description__) parser.add_argument("classification") parser.add_argument("outdir") args = parser.parse_args() db.init("cc1") session = db.make_session() program_ids = [ x[0] for x in session.query(sql.distinct(CLSmithResult.program_id)) \ .filter(CLSmithResult.classification == args.classification).all()] header = fs.read_file(dsmith.data_path("include", "clsmith.h")) fs.mkdir(args.outdir) for program_id in ProgressBar()(program_ids): outpath = fs.path(args.outdir, program_id + ".cl") if not fs.exists(outpath): program = session.query(CLSmithProgram) \ .filter(CLSmithProgram.id == program_id).one() pre, post = program.src.split('#include "CLSmith.h"') inlined = pre + header + post with open(outpath, "w") as outfile: print(inlined, file=outfile)
def read_file(*components, **kwargs): """ Load a JSON data blob. Arguments: path (str): Path to file. must_exist (bool, otional): If False, return empty dict if file does not exist. Returns: array or dict: JSON data. Raises: File404: If path does not exist, and must_exist is True. InvalidFile: If JSON is malformed. """ must_exist = kwargs.get("must_exist", True) if must_exist: path = fs.must_exist(*components) else: path = fs.path(*components) try: with open(path) as infile: return loads(infile.read()) except ValueError as e: raise ValueError( "malformed JSON file '{path}'. Message from parser: {err}".format( path=fs.basename(path), err=str(e))) except IOError as e: if not must_exist: return {} else: return e
def _main(infiles: List[TextIO], dir_mode: bool, summarise: bool, fatal_errors: bool, use_shum: bool, quiet: bool, no_header: bool) -> None: from clgen import features input_paths = [infile.name for infile in infiles] def features_dir(csv_path): return fs.basename(fs.dirname(csv_path)) if summarise: stats = [features.summarize(f) for f in input_paths] print('dataset', *list(stats[0].keys()), sep=',') for path, stat in zip(input_paths, stats): print(features_dir(path), *list(stat.values()), sep=',') return if dir_mode: trees = [fs.ls(d, abspaths=True, recursive=True) for d in input_paths] paths = [item for sublist in trees for item in sublist] else: paths = [fs.path(f) for f in input_paths] features.files(paths, fatal_errors=fatal_errors, quiet=quiet, use_shim=use_shim, header=not no_header)
def clang_cl_args(target: str = CLANG_CL_TARGETS[0], use_shim: bool = True, error_limit: int = 0) -> list: """ Get the Clang args to compile OpenCL. Arguments: target (str): LLVM target. use_shim (bool, optional): Inject shim header. error_limit (int, optional): Limit number of compiler errors. Returns: str[]: Array of args. """ # clang warnings to disable disabled_warnings = [ 'ignored-pragmas', 'implicit-function-declaration', 'incompatible-library-redeclaration', 'macro-redefined', ] args = [ '-I' + fs.path(native.LIBCLC), '-target', target, '-ferror-limit={}'.format(error_limit), '-xcl' ] + ['-Wno-{}'.format(x) for x in disabled_warnings] if use_shim: args += ['-include', native.SHIMFILE] return args
def graph_bc_lc(db_path: str) -> None: """ Plot distribution of bytecode line counts. """ import matplotlib.pyplot as plt import seaborn as sns sns.set(color_codes=True) out_path = fs.path(IMG_DIR, 'bc_lcs.png') print('graph', out_path, '...') db = dbutil.connect(db_path) c = db.cursor() c.execute("SELECT contents FROM Bytecodes") ocl = c.fetchall() ocl_lcs = [len(decode(x[0]).split('\n')) for x in ocl] # Filter range data = [x for x in ocl_lcs if x < 500] sns.distplot(data, bins=20, kde=False) plt.xlabel('Line count') plt.ylabel('Number of Bytecode files') plt.title('Distribution of Bytecode lengths') plt.savefig(out_path)
def _create_kernels_db(self, path: str, encoding: str = "default") -> None: """creates and caches kernels.db""" log.debug("creating database") # create a database and put it in the cache tmppath = fs.path(self.contentcache.path, "kernels.db.tmp") dbutil.create_db(tmppath) self.contentcache["kernels.db"] = tmppath # get a list of files in the corpus filelist = [ f for f in fs.ls(path, abspaths=True, recursive=True) if fs.isfile(f) ] # import files into database fetch.fetch_fs(self.contentcache["kernels.db"], filelist) # preprocess files preprocess.preprocess_db(self.contentcache["kernels.db"]) # encode kernel db encode(self.contentcache["kernels.db"], encoding) # print database stats explore.explore(self.contentcache["kernels.db"])
def to_dist(self, distpath: str, author: str = None) -> str: """ Create a dist file. Arguments: distpath (str): Path to dist file. author (str, optional): Author name. Returns: str: Path to generated distfile. """ outpath = fs.abspath(distpath) + ".tar.bz2" if fs.exists(outpath): raise DistError("file {} exists".format(outpath)) meta = self.meta if author is not None: meta["author"] = author log.debug(clgen.format_json(meta)) try: tar = tarfile.open(outpath, 'w:bz2') # write meta metapath = mktemp(prefix="clgen-", suffix=".json") clgen.write_file(metapath, clgen.format_json(meta)) log.debug("metafile:", metapath) # create tarball tar.add(metapath, arcname="meta.json") # pack contents: for path in meta["contents"]: abspath = fs.path(cache.ROOT, path) log.verbose("packing", abspath) tar.add(abspath, arcname=fs.path("contents", path)) # tidy up fs.rm(metapath) tar.close() except Exception as e: tar.close() fs.rm(metapath) fs.rm(outpath) raise e return outpath
def __init__(self, name: str): """ Create filesystem cache. """ self.path = fs.path(ROOT, name) self.name = name fs.mkdir(self.path)
def extra_args(use_shim=False): args = [] if use_shim: args += [ "-DSMITH_FEATURES", "-include", smith.package_path(fs.path('share', 'include', 'opencl-shim.h'))] return args
def get_instcount(opencl_path): io.debug("Reading file '%s'" % opencl_path) bitcode_path = fs.path("/tmp/temp.bc") host.system(clang_cmd(opencl_path, bitcode_path)) instcount_output = host.check_output(instcount_cmd(bitcode_path)) counts = parse_instcount(instcount_output) return counts
def echo(*args, **kwargs): """ Write a message to a file. Arguments: args A list of arguments which make up the message. The last argument is the path to the file to write to. """ msg = args[:-1] path = fs.path(args[-1]) append = kwargs.pop("append", False) if append: with open(path, "a") as file: print(*msg, file=file, **kwargs) else: with open(fs.path(path), "w") as file: print(*msg, file=file, **kwargs)
def _create_txt(self) -> None: """creates and caches corpus.txt""" log.debug("creating corpus") # TODO: additional options in corpus JSON to accomodate for EOF, # different encodings etc. tmppath = fs.path(self.cache.path, "corpus.txt.tmp") train(self.contentcache["kernels.db"], tmppath) self.cache["corpus.txt"] = tmppath
def cp_loc(path, name): """ Copy database from local filesystem. """ path = fs.path(path) dst = dst_path(name) io.info("Copying", path, "->", name) fs.cp(path, dst)
def export_model(self, out_basename): try: outfile = fs.path(str(out_basename) + ".dot") tree.export_graphviz(self.model, out_file=outfile, # label="none", max_depth=5, filled=True, rounded=True, class_names=["CPU", "GPU"], feature_names=["F1", "F2", "F3", "F4"]) print("export model to '{}'".format(outfile)) except Exception: pass
def keypath(self, key): """ Get the filesystem path for a key. Arguments: key: Key. Returns: str: Absolute path. """ return fs.path(self.path, self.escape_key(key))
def keypath(self, key: str) -> str: """ Return path to key in cache. Arguments: key (str): Key. Returns: str: Absolute path. """ return fs.path(self.path, self.escape(self.mapkey(key)))
def data_path(*path) -> Path: """ Path to data file. Arguments: *path (List[str]): Path components. Returns: Path: Path. """ return resource_filename(__name__, fs.path("data", *path))
def root_path(*path) -> Path: """ Path relative to dsmith source respository. Arguments: *path (List[str]): Path components. Returns: Path: Path. """ return fs.path(ROOT, *path)
def cache(self, model: Model) -> Cache: """ Return sampler cache. Arguments: model (Model): CLgen model. Returns: Cache: Cache. """ sampler_model_hash = clgen.checksum_str(self.hash + model.hash) return Cache(fs.path("sampler", sampler_model_hash))
def sql_script(name: str) -> str: """ Read SQL script to string. Arguments: name (str): The name of the SQL script (without file extension). Returns: str: SQL script. """ path = fs.path('data', 'sql', str(name) + ".sql") return package_str(path)
def cachepath(*relative_path_components: str) -> pathlib.Path: """Return path to file system cache. Args: *relative_path_components: Relative path of cache. Returns: Absolute path of file system cache. """ cache_root = pathlib.Path(os.environ.get("CLGEN_CACHE", "~/.cache/clgen/")) cache_root.expanduser().mkdir(parents=True, exist_ok=True) return pathlib.Path(fs.path(cache_root, *relative_path_components))
def clangformat_ocl(src, id='anon'): clangformat = fs.path(cfg.llvm_path(), "build", "bin", "clang-format") cmd = [clangformat, '-style={}'.format(json.dumps(clangformat_config))] process = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, env=cfg.toolchain_env()) stdout, stderr = process.communicate(src.encode('utf-8')) if stderr: print(stderr.decode('utf-8')) if process.returncode != 0: raise ClangFormatException(stderr.decode('utf-8')) return stdout.decode('utf-8')
def make(target="all", dir=".", **kwargs): """ Run make clean. Arguments: target (str, optional): Name of the target to build. Defaults to "all". dir (str, optional): Path to directory containing Makefile. **kwargs (optional): Any additional arguments to be passed to system.run(). Returns: (int, str, str): The first element is the return code of the make command. The second and third elements are the stdout and stderr of the process. Raises: NoMakefileError: In case a Makefile is not found in the target directory. NoTargetError: In case the Makefile does not support the requested target. MakeError: In case the target rule fails. """ if not fs.isfile(fs.path(dir, "Makefile")): raise NoMakefileError("No makefile in '{}'".format(fs.abspath(dir))) fs.cd(dir) # Default parameters to system.run() if "timeout" not in kwargs: kwargs["timeout"] = 300 ret, out, err = system.run(["make", target], **kwargs) fs.cdpop() if ret > 0: if re.search(_BAD_TARGET_RE, err): raise NoTargetError("No rule for target '{}'" .format(target)) else: raise MakeError("Target '{}' failed".format(target)) raise MakeError("Failed") return ret, out, err
def run_example_prog(prog, args): """ Run a SkelCL example program. Arguments: prog (str): The name of the program to run args (list of str): Any arguments """ fs.cd(fs.path(experiment.EXAMPLES_BUILD, prog)) cmd = ["./" + prog] + args cmd_str = " ".join(cmd) io.info("COMMAND:", io.colourise(io.Colours.RED, cmd_str)) ret, _, _ = system.run(cmd, stdout=system.STDOUT, stderr=system.STDERR) if ret: system.echo(cmd_str, "/tmp/naughty.txt", append=True) return ret
def run_job(i, n, wgsize, program, args): wg_c, wg_r = unhash_params(wgsize) # Set environment variable. os.environ["OMNITUNE_OFFLINE_TRAINING"] = "1" os.environ["OMNITUNE_STENCIL_WG_C"] = str(wg_c) os.environ["OMNITUNE_STENCIL_WG_R"] = str(wg_r) fs.cd(fs.path(experiment.EXAMPLES_BUILD, program)) cmd_str = "./{} {}".format(program, args.rstrip()) cmd = cmd_str.split() io.info(i, "of", n, " - ", wgsize, "COMMAND:", io.colourise(io.Colours.RED, cmd_str)) ret, _, _ = system.run(cmd, stdout=system.STDOUT, stderr=system.STDERR) if ret: print(ret, wgsize, program, args, sep="\t", file=errlog) else: print(ret, wgsize, program, args, sep="\t", file=runlog)
def speedup_regression(db): def _rank_fn(predictions): return sorted(predictions, key=lambda x: x[1], reversed=True) path = fs.path("~/data/msc-thesis/csv/speedup_stats.csv") dataset = RegressionDataset.load(path, db) # db.empty_table("speedup_classification_results") baseline = "4x4" classifiers = ( # ml.LinearRegression(), ml.RandomForest(), # ml.SMOreg(), # ml.ZeroR(), ) eval_fn = partial(eval_speedup_regressors, db, classifiers, baseline, _rank_fn, "speedup_classification_results") run_eval(db, dataset, eval_fn, "speedup_classification")
""" Configuration. """ from itertools import product import labm8 from labm8 import io from labm8 import fs from labm8 import system DATA_ROOT = fs.path("~/data/msc-thesis") DB_DEST = fs.path(DATA_ROOT, "db") ORACLE_PATH = fs.path("/usr/share/omnitune/db/skelcl.db") TAB_ROOT = fs.path(DATA_ROOT, "tab") IMG_ROOT = fs.path(DATA_ROOT, "img") # Path to classifier results using cross-validation. CLASS_XVAL_PATH = fs.path(DATA_ROOT, "class_xval.json") # Path to classifier results using synthetic training, real validation. CLASS_SYN_PATH = fs.path(DATA_ROOT, "class_syn.json") SRC_ROOT = fs.path("~/src/msc-thesis") EXAMPLES_BUILD = fs.path(SRC_ROOT, "skelcl/build/examples/") EXAMPLES_SRC = fs.path(SRC_ROOT, "skelcl/examples/") ################################# # Synthetic benchmark arguments # ################################# COMPLEXITIES = ([""], ["-c"])
def load(src, loader="weka.core.converters.ArffLoader", **kwargs): if not MODULE_SUPPORTED: return loader = WekaLoader(classname=loader, **kwargs) return loader.load_file(fs.path(src))
def save(data, dst, saver="weka.core.converters.ArffSaver", **kwargs): if not MODULE_SUPPORTED: return saver = WekaSaver(classname=saver, **kwargs) saver.save_file(data, fs.path(dst))
import re import subprocess import labm8 from labm8 import fs from labm8 import io _LINE_RE = re.compile("^(?P<count>\d+) instcount - Number of (?P<type>.+)") DEFAULT_LLVM_PATH = fs.path("~/src/msc-thesis/skelcl/libraries/llvm/build/bin/") class Error(Exception): """ LLVM module error. """ pass class ProgramNotFoundError(Error): """ Error thrown if a program is not found. """ pass class ClangError(Error): """ Error thrown if clang exits with non-zero status. """ pass
import itertools import labm8 from labm8 import fs ROOT = fs.path("~/src/msc-thesis/scraps/05-20")
SkelCL benchmark applications. """ import json import os import re import subprocess import sys import labm8 as lab from labm8 import fs from labm8 import io from labm8 import host CLANG = fs.path("/home/chris/src/msc-thesis/llvm/Release+Asserts/bin/clang") OPT = fs.path("/home/chris/src/msc-thesis/llvm/Release+Asserts/bin/opt") def print_help(): print("Get instruction counts for an OpenCL source file.") print() print(" Usage: %s <kernel>.cl" % sys.argv[0]) print() print("Accepts a path to an OpenCL file, and compiles to LLVM bytecode.") print("The LLVM InstCount pass is then performed on this bytecode, and the") print("results printed and returned.") def clang_cmd(opencl_input, bitcode_output): return [CLANG, "-Dcl_clang_storage_class_specifiers", "-isystem", "libclc/generic/include",
def test_path(self): self._test("foo/bar", fs.path("foo", "bar")) self._test("foo/bar/car", fs.path("foo/bar", "car"))
def toolchain_env(): return {'LD_LIBRARY_PATH': fs.path(llvm_path(), "build", "lib")}
def test_path_homedir(self): self._test(os.path.expanduser("~"), fs.path("~")) self._test(os.path.join(os.path.expanduser("~"), "foo"), fs.path("~", "foo"))
def dst_path(name): """ Get destination path for named database. """ return fs.path(experiment.DB_DEST, name + ".db")
import itertools import labm8 from labm8 import io from labm8 import fs from labm8 import system ROOT = fs.path("~/src/msc-thesis") SIMPLEBIG_BUILD = fs.path(ROOT, "skelcl/build/examples/SimpleBig") SIMPLEBIG_BUILD_BIN = fs.path(SIMPLEBIG_BUILD, "SimpleBig") SIMPLEBIG_SRC = fs.path(ROOT, "skelcl/examples/SimpleBig") SIMPLEBIG_SRC_HOST = fs.path(SIMPLEBIG_SRC, "main.cpp") DATABASE_ROOT = fs.path(ROOT, "data") DATABASES = [ fs.path(DATABASE_ROOT, "omnitune.skelcl.cec.db"), fs.path(DATABASE_ROOT, "omnitune.skelcl.dhcp-90-060.db"), fs.path(DATABASE_ROOT, "omnitune.skelcl.florence.db"), fs.path(DATABASE_ROOT, "omnitune.skelcl.monza.db"), fs.path(DATABASE_ROOT, "omnitune.skelcl.tim.db"), fs.path(DATABASE_ROOT, "omnitune.skelcl.whz5.db") ] DATABASE_ORACLE = "omnitune.oracle.db" ## Arguments COMPLEXITIES = ([""], ["-c"])
def main(): db = _db.Database(experiment.ORACLE_PATH) ml.start() # Delete any old stuff. fs.rm(experiment.IMG_ROOT + "/*") fs.rm(experiment.TAB_ROOT + "/*") # Make directories fs.mkdir(experiment.TAB_ROOT) fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/bars")) fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/heatmap")) fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/trisurf")) fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/devices")) fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/kernels")) fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/datasets")) fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/devices")) fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/kernels")) fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/datasets")) fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/devices")) fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/kernels")) fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/datasets")) visualise.pie(db.num_scenarios_by_device, fs.path(experiment.IMG_ROOT, "num_sceanrios_by_device")) visualise.pie(db.num_runtime_stats_by_device, fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_device")) visualise.pie(db.num_scenarios_by_dataset, fs.path(experiment.IMG_ROOT, "num_sceanrios_by_dataset")) visualise.pie(db.num_runtime_stats_by_dataset, fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_dataset")) visualise.pie(db.num_runtime_stats_by_kernel, fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_kernel")) visualise.pie(db.num_runtime_stats_by_kernel, fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_kernel")) # Per-scenario plots for row in db.scenario_properties: scenario,device,kernel,north,south,east,west,max_wgsize,width,height,tout = row title = ("{device}: {kernel}[{n},{s},{e},{w}]\n" "{width} x {height} {type}s" .format(device=text.truncate(device, 18), kernel=kernel, n=north, s=south, e=east, w=west, width=width, height=height, type=tout)) output = fs.path(experiment.IMG_ROOT, "scenarios/heatmap/{id}.png".format(id=scenario)) space = _space.ParamSpace.from_dict(db.perf_scenario(scenario)) max_c = min(25, len(space.c)) max_r = min(25, len(space.r)) space.reshape(max_c=max_c, max_r=max_r) # Heatmaps. mask = _space.ParamSpace(space.c, space.r) for j in range(len(mask.r)): for i in range(len(mask.c)): if space.matrix[j][i] == 0: r, c = space.r[j], space.c[i] # TODO: Get values from refused_params table. if r * c >= max_wgsize: # Illegal mask.matrix[j][i] = -1 else: # Refused db.execute("INSERT OR IGNORE INTO refused_params VALUES(?,?)", (scenario, hash_params(c, r))) space.matrix[j][i] = -1 mask.matrix[j][i] = 1 db.commit() new_order = list(reversed(range(space.matrix.shape[0]))) data = space.matrix[:][new_order] figsize=(12,6) _, ax = plt.subplots(1, 2, figsize=figsize, sharey=True) sns.heatmap(data, ax=ax[0], vmin=-1, vmax=1, xticklabels=space.c, yticklabels=list(reversed(space.r)), square=True) ax[0].set_title(title) new_order = list(reversed(range(mask.matrix.shape[0]))) data = mask.matrix[:][new_order] sns.heatmap(data, ax=ax[1], vmin=-1, vmax=1, xticklabels=space.c, yticklabels=list(reversed(space.r)), square=True) # Set labels. ax[0].set_ylabel("Rows") ax[0].set_xlabel("Columns") ax[1].set_ylabel("Rows") ax[1].set_xlabel("Columns") # plt.tight_layout() # plt.gcf().set_size_inches(*figsize, dpi=300) viz.finalise(output) # 3D bars. output = fs.path(experiment.IMG_ROOT, "scenarios/bars/{id}.png".format(id=scenario)) space.bar3d(output=output, title=title, zlabel="Performance", rotation=45) # Trisurfs. output = fs.path(experiment.IMG_ROOT, "scenarios/trisurf/{id}.png".format(id=scenario)) space.trisurf(output=output, title=title, zlabel="Performance", rotation=45) ##################### # ML Visualisations # ##################### #features_tab(db, experiment.TAB_ROOT) visualise_classification_job(db, "xval") visualise_classification_job(db, "arch") visualise_classification_job(db, "xval_real") visualise_classification_job(db, "synthetic_real") # Runtime regression accuracy. visualise_regression_job(db, "xval") visualise_regression_job(db, "arch") visualise_regression_job(db, "xval_real") visualise_regression_job(db, "synthetic_real") # Whole-dataset plots visualise.runtimes_variance(db, fs.path(experiment.IMG_ROOT, "runtime_variance.png"), min_samples=30) visualise.num_samples(db, fs.path(experiment.IMG_ROOT, "num_samples.png")) visualise.runtimes_range(db, fs.path(experiment.IMG_ROOT, "runtimes_range.png")) visualise.max_speedups(db, fs.path(experiment.IMG_ROOT, "max_speedups.png")) visualise.kernel_performance(db, fs.path(experiment.IMG_ROOT, "kernel_performance.png")) visualise.device_performance(db, fs.path(experiment.IMG_ROOT, "device_performance.png")) visualise.dataset_performance(db, fs.path(experiment.IMG_ROOT, "dataset_performance.png")) visualise.num_params_vs_accuracy(db, fs.path(experiment.IMG_ROOT, "num_params_vs_accuracy.png")) visualise.performance_vs_coverage(db, fs.path(experiment.IMG_ROOT, "performance_vs_coverage.png")) visualise.performance_vs_max_wgsize( db, fs.path(experiment.IMG_ROOT, "performance_vs_max_wgsize.png") ) visualise.performance_vs_wgsize(db, fs.path(experiment.IMG_ROOT, "performance_vs_wgsize.png")) visualise.performance_vs_wg_c(db, fs.path(experiment.IMG_ROOT, "performance_vs_wg_c.png")) visualise.performance_vs_wg_r(db, fs.path(experiment.IMG_ROOT, "performance_vs_wg_r.png")) visualise.max_wgsizes(db, fs.path(experiment.IMG_ROOT, "max_wgsizes.png")) visualise.oracle_speedups(db, fs.path(experiment.IMG_ROOT, "oracle_speedups.png")) visualise.coverage(db, fs.path(experiment.IMG_ROOT, "coverage/coverage.png")) visualise.safety(db, fs.path(experiment.IMG_ROOT, "safety/safety.png")) visualise.oracle_wgsizes(db, fs.path(experiment.IMG_ROOT, "oracle/all.png")) # Per-device plots for i,device in enumerate(db.devices): where = ("scenario IN " "(SELECT id from scenarios WHERE device='{0}')" .format(device)) output = fs.path(experiment.IMG_ROOT, "coverage/devices/{0}.png".format(i)) visualise.coverage(db, output=output, where=where, title=device) output = fs.path(experiment.IMG_ROOT, "safety/devices/{0}.png".format(i)) visualise.safety(db, output, where=where, title=device) output = fs.path(experiment.IMG_ROOT, "oracle/devices/{0}.png".format(i)) visualise.oracle_wgsizes(db, output, where=where, title=device) where = ("scenario IN (\n" " SELECT id from scenarios WHERE device='{0}'\n" ") AND scenario IN (\n" " SELECT id FROM scenarios WHERE kernel IN (\n" " SELECT id FROM kernel_names WHERE synthetic=0\n" " )\n" ")" .format(device)) output = fs.path(experiment.IMG_ROOT, "coverage/devices/{0}_real.png".format(i)) visualise.coverage(db, output=output, where=where, title=device + ", real") output = fs.path(experiment.IMG_ROOT, "safety/devices/{0}_real.png".format(i)) visualise.safety(db, output, where=where, title=device + ", real") output = fs.path(experiment.IMG_ROOT, "oracle/devices/{0}_real.png".format(i)) visualise.oracle_wgsizes(db, output, where=where, title=device + ", real") where = ("scenario IN (\n" " SELECT id from scenarios WHERE device='{0}'\n" ") AND scenario IN (\n" " SELECT id FROM scenarios WHERE kernel IN (\n" " SELECT id FROM kernel_names WHERE synthetic=1\n" " )\n" ")" .format(device)) output = fs.path(experiment.IMG_ROOT, "coverage/devices/{0}_synthetic.png".format(i)) visualise.coverage(db, output=output, where=where, title=device + ", synthetic") output = fs.path(experiment.IMG_ROOT, "safety/devices/{0}_synthetic.png".format(i)) visualise.safety(db, output, where=where, title=device + ", synthetic") output = fs.path(experiment.IMG_ROOT, "oracle/devices/{0}_synthetic.png".format(i)) visualise.oracle_wgsizes(db, output, where=where, title=device + ", synthetic") # Per-kernel plots for kernel,ids in db.lookup_named_kernels().iteritems(): id_wrapped = ['"' + id + '"' for id in ids] where = ("scenario IN " "(SELECT id from scenarios WHERE kernel IN ({0}))" .format(",".join(id_wrapped))) output = fs.path(experiment.IMG_ROOT, "coverage/kernels/{0}.png".format(kernel)) visualise.coverage(db, output=output, where=where, title=kernel) output = fs.path(experiment.IMG_ROOT, "safety/kernels/{0}.png".format(kernel)) visualise.safety(db, output=output, where=where, title=kernel) output = fs.path(experiment.IMG_ROOT, "oracle/kernels/{0}.png".format(kernel)) visualise.safety(db, output=output, where=where, title=kernel) # Per-dataset plots for i,dataset in enumerate(db.datasets): where = ("scenario IN " "(SELECT id from scenarios WHERE dataset='{0}')" .format(dataset)) output = fs.path(experiment.IMG_ROOT, "coverage/datasets/{0}.png".format(i)) visualise.coverage(db, output, where=where, title=dataset) output = fs.path(experiment.IMG_ROOT, "safety/datasets/{0}.png".format(i)) visualise.safety(db, output, where=where, title=dataset) output = fs.path(experiment.IMG_ROOT, "oracle/datasets/{0}.png".format(i)) visualise.safety(db, output, where=where, title=dataset) ml.stop()
def assert_exists(*path_components, **kwargs): path = fs.path(*path_components) if not os.path.exists(path): exception = kwargs.get("exception", SmithException) raise exception("path '{}' does not exist".format(path)) return path
def visualise_classification_job(db, job): basedir = "img/classification/{}/".format(job) fs.mkdir(basedir) fs.mkdir(basedir + "classifiers") fs.mkdir(basedir + "err_fns") visualise.err_fn_performance(db, basedir + "err_fns.png", job=job) # Bar plot of all results. visualise.classification(db, "img/classification/{}.png".format(job), job=job) # Per-classifier plots. for i,classifier in enumerate(db.classification_classifiers): visualise.classifier_speedups(db, classifier, basedir + "classifiers/{}.png".format(i), job=job) # Per-err_fn plots. for err_fn in db.err_fns: visualise.err_fn_speedups(db, err_fn, basedir + "err_fns/{}.png".format(err_fn), job=job, sort=True) # Results table. query = db.execute( "SELECT classifier,err_fn,Count(*) AS count\n" "FROM classification_results\n" "WHERE job=? GROUP BY classifier,err_fn", (job,) ) results = [] for classifier,err_fn,count in query: correct, illegal, refused, performance, speedup = zip(*[ row for row in db.execute( "SELECT correct,illegal,refused,performance,speedup\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn) ) ]) results.append([ classifier, err_fn, (sum(correct) / count) * 100, (sum(illegal) / count) * 100, (sum(refused) / count) * 100, min(performance) * 100, labmath.geomean(performance) * 100, max(performance) * 100, min(speedup), labmath.geomean(speedup), max(speedup) ]) str_args = { "float_format": lambda f: "{:.2f}".format(f) } for i in range(len(results)): results[i][0] = ml.classifier_basename(results[i][0]) columns=( "CLASSIFIER", "ERR_FN", "ACC %", "INV %", "REF %", "Omin %", "Oavg %", "Omax %", "Smin", "Savg", "Smax", ) latex.table(results, output=fs.path(experiment.TAB_ROOT, job + ".tex"), columns=columns, **str_args)