def unpack_directory_if_needed(path: str) -> str: """ If path is a tarball, unpack it. If path doesn't exist but there is a tarball with the same name, unpack it. Parameters ---------- path : str Path to directory or tarball. Returns ------- str Path to directory. Raises ------ clgen.InternalError If unable to extract archive. """ if fs.isdir(path): return path if fs.isfile(path) and path.endswith(".tar.bz2"): log.info("unpacking '{}'".format(path)) tar.unpack_archive(path) return re.sub(r'.tar.bz2$', '', path) if fs.isfile(path + ".tar.bz2"): log.info("unpacking '{}'".format(path + ".tar.bz2")) tar.unpack_archive(path + ".tar.bz2") return path raise clgen.InternalError("cannot interpret archive '{path}'" .format(**vars()))
def unpack_directory_if_needed(path: str) -> str: """ If path is a tarball, unpack it. If path doesn't exist but there is a tarball with the same name, unpack it. Arguments: path (str): Path to directory or tarball. Returns: str: Path to directory. """ if fs.isdir(path): return path if fs.isfile(path) and path.endswith(".tar.bz2"): log.info("unpacking '{}'".format(path)) clgen.unpack_archive(path) return re.sub(r'.tar.bz2$', '', path) if fs.isfile(path + ".tar.bz2"): log.info("unpacking '{}'".format(path + ".tar.bz2")) clgen.unpack_archive(path + ".tar.bz2") return path return path
def files_from_list(paths: list) -> list: """ Return a list of all file paths from a list of files or directories. For each path in the input: if it is a file, return it; if it is a directory, return a list of files in the directory. Arguments: paths (list of str): List of file and directory paths. Returns: list of str: Absolute file paths. Raises: File404: If any of the paths do not exist. """ ret = [] for path in paths: if fs.isfile(path): ret.append(fs.abspath(path)) elif fs.isdir(path): ret += [ f for f in fs.ls(path, abspaths=True, recursive=True) if fs.isfile(f) ] else: raise File404(path) return ret
def test_make_clean(self,): fs.cd("tests/data/makeproj") make.make() self._test(True, fs.isfile("foo")) self._test(True, fs.isfile("foo.o")) make.clean() self._test(False, fs.isfile("foo")) self._test(False, fs.isfile("foo.o")) fs.cdpop()
def test_make_clean(): fs.cd("labm8/data/test/makeproj") make.make() assert fs.isfile("foo") assert fs.isfile("foo.o") make.clean() assert not fs.isfile("foo") assert not fs.isfile("foo.o") fs.cdpop()
def test_rm(self): system.echo("Hello, world!", "/tmp/labm8.tmp") self._test(True, fs.isfile("/tmp/labm8.tmp")) fs.rm("/tmp/labm8.tmp") self._test(False, fs.isfile("/tmp/labm8.tmp")) fs.rm("/tmp/labm8.tmp") fs.rm("/tmp/labm8.tmp") fs.rm("/tmp/labm8.dir") fs.mkdir("/tmp/labm8.dir/foo/bar") system.echo("Hello, world!", "/tmp/labm8.dir/foo/bar/baz") self._test(True, fs.isfile("/tmp/labm8.dir/foo/bar/baz")) fs.rm("/tmp/labm8.dir") self._test(False, fs.isfile("/tmp/labm8.dir/foo/bar/baz")) self._test(False, fs.isfile("/tmp/labm8.dir/"))
def test_rmtrash(self): system.echo("Hello, world!", "/tmp/labm8.tmp") self.assertTrue(fs.isfile("/tmp/labm8.tmp")) fs.rmtrash("/tmp/labm8.tmp") self.assertFalse(fs.isfile("/tmp/labm8.tmp")) fs.rmtrash("/tmp/labm8.tmp") fs.rm("/tmp/labm8.tmp") fs.rm("/tmp/labm8.dir") fs.mkdir("/tmp/labm8.dir/foo/bar") system.echo("Hello, world!", "/tmp/labm8.dir/foo/bar/baz") self.assertTrue(fs.isfile("/tmp/labm8.dir/foo/bar/baz")) fs.rmtrash("/tmp/labm8.dir") self.assertFalse(fs.isfile("/tmp/labm8.dir/foo/bar/baz")) self.assertFalse(fs.isfile("/tmp/labm8.dir/"))
def test_rm(): system.echo("Hello, world!", "/tmp/labm8.tmp") assert fs.isfile("/tmp/labm8.tmp") fs.rm("/tmp/labm8.tmp") assert not fs.isfile("/tmp/labm8.tmp") fs.rm("/tmp/labm8.tmp") fs.rm("/tmp/labm8.tmp") fs.rm("/tmp/labm8.dir") fs.mkdir("/tmp/labm8.dir/foo/bar") system.echo("Hello, world!", "/tmp/labm8.dir/foo/bar/baz") assert fs.isfile("/tmp/labm8.dir/foo/bar/baz") fs.rm("/tmp/labm8.dir") assert not fs.isfile("/tmp/labm8.dir/foo/bar/baz") assert not fs.isfile("/tmp/labm8.dir/")
def test_rmtrash(): with tempfile.NamedTemporaryFile(prefix='labm8_') as f: assert fs.isfile(f.name) fs.rmtrash(f.name) assert not fs.isfile(f.name) fs.rmtrash(f.name) fs.rm(f.name) with tempfile.TemporaryDirectory() as d: fs.rm(d) fs.mkdir(d, "foo/bar") system.echo("Hello, world!", fs.path(d, "foo/bar/baz")) assert fs.isfile(f, "foo/bar/baz") fs.rmtrash(d) assert not fs.isfile(d, "foo/bar/baz") assert not fs.isdir(d)
def test_cp_over_dir(self): fs.mkdir("/tmp/labm8.tmp.src") system.echo("Hello, world!", "/tmp/labm8.tmp.src/foo") fs.rm("/tmp/labm8.tmp.copy") fs.mkdir("/tmp/labm8.tmp.copy") self._test(True, fs.isdir("/tmp/labm8.tmp.src")) self._test(True, fs.isfile("/tmp/labm8.tmp.src/foo")) self._test(True, fs.isdir("/tmp/labm8.tmp.copy")) self._test(False, fs.isfile("/tmp/labm8.tmp.copy/foo")) fs.cp("/tmp/labm8.tmp.src", "/tmp/labm8.tmp.copy/") self._test(True, fs.isdir("/tmp/labm8.tmp.src")) self._test(True, fs.isfile("/tmp/labm8.tmp.src/foo")) self._test(True, fs.isdir("/tmp/labm8.tmp.copy")) self._test(True, fs.isfile("/tmp/labm8.tmp.copy/foo")) self._test(fs.read("/tmp/labm8.tmp.src/foo"), fs.read("/tmp/labm8.tmp.copy/foo"))
def merge(outpath, inpaths=None): """ Merge kernel datasets. """ from clgen import explore if not fs.isfile(outpath): create_db(outpath) log.info("created", outpath) db = connect(outpath) if not inpaths: inpaths = get_all_sampler_datasets() for inpath in inpaths: log.info("merging from", inpath) c = db.cursor() c.execute("ATTACH '{}' AS rhs".format(inpath)) c.execute("INSERT OR IGNORE INTO ContentFiles " "SELECT * FROM rhs.ContentFiles") c.execute("INSERT OR IGNORE INTO PreprocessedFiles " "SELECT * FROM rhs.PreprocessedFiles") db.commit() c.execute("DETACH rhs") c.close() explore.explore(outpath)
def test_cp_over_dir(): fs.mkdir("/tmp/labm8.tmp.src") system.echo("Hello, world!", "/tmp/labm8.tmp.src/foo") fs.rm("/tmp/labm8.tmp.copy") fs.mkdir("/tmp/labm8.tmp.copy") assert fs.isdir("/tmp/labm8.tmp.src") assert fs.isfile("/tmp/labm8.tmp.src/foo") assert fs.isdir("/tmp/labm8.tmp.copy") assert not fs.isfile("/tmp/labm8.tmp.copy/foo") fs.cp("/tmp/labm8.tmp.src", "/tmp/labm8.tmp.copy/") assert fs.isdir("/tmp/labm8.tmp.src") assert fs.isfile("/tmp/labm8.tmp.src/foo") assert fs.isdir("/tmp/labm8.tmp.copy") assert fs.isfile("/tmp/labm8.tmp.copy/foo") assert (fs.read("/tmp/labm8.tmp.src/foo") == fs.read( "/tmp/labm8.tmp.copy/foo"))
def test_cli(): fs.rm("kernels.db") cli.main("db init kernels.db".split()) assert fs.exists("kernels.db") corpus_path = tests.archive("tiny", "corpus") cli.main("db explore kernels.db".split()) cli.main(f"fetch fs kernels.db {corpus_path}".split()) cli.main("preprocess kernels.db".split()) cli.main("db explore kernels.db".split()) fs.rm("kernels_out") cli.main("db dump kernels.db -d kernels_out".split()) assert fs.isdir("kernels_out") assert len(fs.ls("kernels_out")) >= 1 fs.rm("kernels.cl") cli.main("db dump kernels.db kernels.cl --file-sep --eof --reverse".split()) assert fs.isfile("kernels.cl") fs.rm("kernels_out") cli.main("db dump kernels.db --input-samples -d kernels_out".split()) assert fs.isdir("kernels_out") assert len(fs.ls("kernels_out")) == 250 fs.rm("kernels.db") fs.rm("kernels_out")
def _create_kernels_db(self, path: str, encoding: str = "default") -> None: """creates and caches kernels.db""" log.debug("creating database") # create a database and put it in the cache tmppath = fs.path(self.contentcache.path, "kernels.db.tmp") dbutil.create_db(tmppath) self.contentcache["kernels.db"] = tmppath # get a list of files in the corpus filelist = [ f for f in fs.ls(path, abspaths=True, recursive=True) if fs.isfile(f) ] # import files into database fetch.fetch_fs(self.contentcache["kernels.db"], filelist) # preprocess files preprocess.preprocess_db(self.contentcache["kernels.db"]) # encode kernel db encode(self.contentcache["kernels.db"], encoding) # print database stats explore.explore(self.contentcache["kernels.db"])
def test_set_and_get(): fs.rm("/tmp/labm8-cache-set-and-get") c = cache.FSCache("/tmp/labm8-cache-set-and-get") # create file system.echo("Hello, world!", "/tmp/labm8.testfile.txt") # sanity check assert fs.read("/tmp/labm8.testfile.txt") == ["Hello, world!"] # insert file into cache c['foobar'] = "/tmp/labm8.testfile.txt" # file must be in cache assert fs.isfile(c.keypath("foobar")) # file must have been moved assert not fs.isfile("/tmp/labm8.testfile.txt") # check file contents assert fs.read(c['foobar']) == ["Hello, world!"] assert fs.read(c['foobar']) == fs.read(c.get('foobar')) c.clear()
def assert_program_exists(path): """ Assert that a program exists. If the given path does not exist and is not a file, raises ProgramNotFoundError. """ if not fs.exists(path) or not fs.isfile(path): raise ProgramNotFoundError(path)
def get_jobs(): joblist = "jobs/{}.txt".format(system.HOSTNAME) io.debug(joblist) if fs.isfile(joblist): return open(joblist).readlines() else: return []
def test_set_and_get(self): c = cache.Cache("__test_set_and_get__") # create file tests.write_file(tests.data_path("tmp", "file.txt", exists=False), "Hello, world!") # sanity check self.assertEqual(tests.read_file(tests.data_path("tmp", "file.txt")), "Hello, world!") # insert file into cache c['foobar'] = tests.data_path("tmp", "file.txt") # file must be in cache self.assertTrue(fs.isfile(fs.path(c.path, "foobar"))) # file must have been moved self.assertFalse(fs.isfile(tests.data_path("file.txt", exists=False))) # check file contents self.assertTrue(tests.read_file(c['foobar']), "Hello, world!") c.empty()
def test_set_and_get(self): fs.rm("/tmp/labm8-cache-set-and-get") c = cache.FSCache("/tmp/labm8-cache-set-and-get") # create file system.echo("Hello, world!", "/tmp/labm8.testfile.txt") # sanity check self.assertEqual(fs.read("/tmp/labm8.testfile.txt"), ["Hello, world!"]) # insert file into cache c['foobar'] = "/tmp/labm8.testfile.txt" # file must be in cache self.assertTrue(fs.isfile(c.keypath("foobar"))) # file must have been moved self.assertFalse(fs.isfile("/tmp/labm8.testfile.txt")) # check file contents self.assertTrue(fs.read(c['foobar']), ["Hello, world!"]) self.assertEqual(fs.read(c['foobar']), fs.read(c.get('foobar'))) c.clear()
def get_all_sampler_datasets(): datasets = [] sampledirs = [] for versioncache in fs.ls(fs.path("~/.cache/clgen"), abspaths=True): samplerdir = fs.path(versioncache, "sampler") if fs.isdir(samplerdir): sampledirs += fs.ls(samplerdir, abspaths=True) for samplerdir in sampledirs: inpath = fs.path(samplerdir, "kernels.db") if fs.isfile(inpath): datasets.append(inpath) return datasets
def test_rm_glob(): fs.mkdir("/tmp/labm8.glob") system.echo("Hello, world!", "/tmp/labm8.glob/1") system.echo("Hello, world!", "/tmp/labm8.glob/2") system.echo("Hello, world!", "/tmp/labm8.glob/abc") fs.rm("/tmp/labm8.glob/a*", glob=False) assert fs.isfile("/tmp/labm8.glob/1") assert fs.isfile("/tmp/labm8.glob/2") assert fs.isfile("/tmp/labm8.glob/abc") fs.rm("/tmp/labm8.glob/a*") assert fs.isfile("/tmp/labm8.glob/1") assert fs.isfile("/tmp/labm8.glob/2") assert not fs.isfile("/tmp/labm8.glob/abc") fs.rm("/tmp/labm8.glob/*") assert not fs.isfile("/tmp/labm8.glob/1") assert not fs.isfile("/tmp/labm8.glob/2") assert not fs.isfile("/tmp/labm8.glob/abc")
def test_rm_glob(self): fs.mkdir("/tmp/labm8.glob") system.echo("Hello, world!", "/tmp/labm8.glob/1") system.echo("Hello, world!", "/tmp/labm8.glob/2") system.echo("Hello, world!", "/tmp/labm8.glob/abc") fs.rm("/tmp/labm8.glob/a*", glob=False) self._test(True, fs.isfile("/tmp/labm8.glob/1")) self._test(True, fs.isfile("/tmp/labm8.glob/2")) self._test(True, fs.isfile("/tmp/labm8.glob/abc")) fs.rm("/tmp/labm8.glob/a*") self._test(True, fs.isfile("/tmp/labm8.glob/1")) self._test(True, fs.isfile("/tmp/labm8.glob/2")) self._test(False, fs.isfile("/tmp/labm8.glob/abc")) fs.rm("/tmp/labm8.glob/*") self._test(False, fs.isfile("/tmp/labm8.glob/1")) self._test(False, fs.isfile("/tmp/labm8.glob/2")) self._test(False, fs.isfile("/tmp/labm8.glob/abc"))
def _create_kernels_db(self, path: str) -> None: """creates and caches kernels.db""" log.debug("creating database") # create a database and put it in the cache tmppath = self.contentcache.keypath("kernels.db.tmp") dbutil.create_db(tmppath) self.contentcache["kernels.db"] = tmppath # get a list of files in the corpus filelist = [f for f in fs.ls(path, abspaths=True, recursive=True) if fs.isfile(f)] # import files into database clgen.fetch(self.contentcache["kernels.db"], filelist)
def get_all_sampler_datasets(all_clgen_versions: bool=True) -> list: if all_clgen_versions: versiondirs = fs.ls(fs.path("~/.cache/clgen"), abspaths=True) else: versiondirs = [fs.path("~/.cache/clgen", clgen.version())] versiondirs = [v for v in versiondirs if fs.isdir(v, "sampler")] datasets = [] for versiondir in versiondirs: for samplerdir in fs.ls(fs.path(versiondir, "sampler"), abspaths=True): inpath = fs.path(samplerdir, "kernels.db") if fs.isfile(inpath): datasets.append(inpath) return datasets
def make(target="all", dir=".", **kwargs): """ Run make clean. Arguments: target (str, optional): Name of the target to build. Defaults to "all". dir (str, optional): Path to directory containing Makefile. **kwargs (optional): Any additional arguments to be passed to system.run(). Returns: (int, str, str): The first element is the return code of the make command. The second and third elements are the stdout and stderr of the process. Raises: NoMakefileError: In case a Makefile is not found in the target directory. NoTargetError: In case the Makefile does not support the requested target. MakeError: In case the target rule fails. """ if not fs.isfile(fs.path(dir, "Makefile")): raise NoMakefileError("No makefile in '{}'".format(fs.abspath(dir))) fs.cd(dir) # Default parameters to system.run() if "timeout" not in kwargs: kwargs["timeout"] = 300 ret, out, err = system.run(["make", target], **kwargs) fs.cdpop() if ret > 0: if re.search(_BAD_TARGET_RE, err): raise NoTargetError("No rule for target '{}'" .format(target)) else: raise MakeError("Target '{}' failed".format(target)) raise MakeError("Failed") return ret, out, err
def make(target="all", dir=".", **kwargs): """ Run make. Arguments: target (str, optional): Name of the target to build. Defaults to "all". dir (str, optional): Path to directory containing Makefile. **kwargs (optional): Any additional arguments to be passed to system.run(). Returns: (int, str, str): The first element is the return code of the make command. The second and third elements are the stdout and stderr of the process. Raises: NoMakefileError: In case a Makefile is not found in the target directory. NoTargetError: In case the Makefile does not support the requested target. MakeError: In case the target rule fails. """ if not fs.isfile(fs.path(dir, "Makefile")): raise NoMakefileError("No makefile in '{}'".format(fs.abspath(dir))) fs.cd(dir) # Default parameters to system.run() if "timeout" not in kwargs: kwargs["timeout"] = 300 ret, out, err = system.run(["make", target], **kwargs) fs.cdpop() if ret > 0: if re.search(_BAD_TARGET_RE, err): raise NoTargetError("No rule for target '{}'".format(target)) else: raise MakeError("Target '{}' failed".format(target)) raise MakeError("Failed") return ret, out, err
parser.add_argument("--clsmith", action="store_true", help="Only reduce CLSmith results") parser.add_argument("--clgen", action="store_true", help="Only reduce CLgen results") parser.add_argument("--recheck", action="store_true", help="Re-check existing errors") args = parser.parse_args() db.init(args.hostname) # initialize db engine clang = fs.abspath(f"../lib/llvm/build/{args.clang}/bin/clang") if not args.recheck and not fs.isfile(clang): print(f"fatal: clang '{clang}' does not exist") sys.exit(1) if args.clgen and args.clsmith: tablesets = [CLSMITH_TABLES, CLGEN_TABLES] elif args.clsmith: tablesets = [CLSMITH_TABLES] elif args.clgen: tablesets = [CLGEN_TABLES] else: tablesets = [CLSMITH_TABLES, CLGEN_TABLES] with Session(commit=True) as s: def next_batch():
def load_config(path="~/.omnitunerc.json"): path = fs.abspath(path) if fs.isfile(path): return json.load(open(path)) else: raise ConfigNotFoundError("File '{}' not found!".format(path))
runtime_t = NewType('runtime_t', float) status_t = NewType('status_t', int) return_t = namedtuple('return_t', ['runtime', 'status', 'stdout', 'stderr']) # build paths exec_path = dsmith.root_path("third_party", "clsmith", "build", "CLSmith") cl_launcher_path = dsmith.root_path("third_party", "clsmith", "build", "cl_launcher") include_path = dsmith.root_path("third_party", "clsmith", "runtime") # sanity checks assert fs.isexe(exec_path) assert fs.isexe(cl_launcher_path) assert fs.isfile(fs.path(include_path, "CLSmith.h")) def clsmith_cli(*args, timeout: int = 60, exec_path=exec_path) -> List[str]: return ["timeout", "--signal=9", str(timeout), exec_path] + list(args) def clsmith(*args, exec_path=exec_path) -> return_t: """ Returns: return_t: A named tuple consisting of runtime (float), status (int), stdout (str), and stderr (str). """ start_time = time() cli = clsmith_cli(*args)
#!/usr/bin/env python3.6 import sys from progressbar import ProgressBar from labm8 import crypto from labm8 import fs if __name__ == "__main__": inpath = sys.argv[1] outdir = sys.argv[2] print(f"reading from {inpath} into {outdir}") assert fs.isfile(inpath) assert not fs.exists(outdir) or fs.isdir(outdir) fs.mkdir(outdir) with open(inpath) as infile: text = infile.read() kernels = text.split("// ==== START SAMPLE ====") kernels = [kernel.strip() for kernel in kernels if kernel.strip()] print(len(kernels), "kernels") sha1s = [crypto.sha1_str(kernel) for kernel in kernels] for kernel, sha1 in ProgressBar()(list(zip(kernels, sha1s))): with open(f"{outdir}/{sha1}.txt", "w") as outfile: print(kernel, file=outfile)
def test_files_exist(self): for file in self.FILES: self.assertTrue(fs.isfile(file))
def test_isfile(self): self._test(True, fs.isfile(__file__)) self._test(False, fs.isfile("/")) self._test(False, fs.isfile("/not/a/real/path (I hope!)"))
def test_files_exist(): for file in FILES: assert fs.isfile(file)
def test_make(self): ret, out, err = make.make(dir="tests/data/makeproj") self._test(0, ret) self._test(True, out is not None) self._test(True, fs.isfile("tests/data/makeproj/foo")) self._test(True, fs.isfile("tests/data/makeproj/foo.o"))
def models_to_tab(*models: List[Model]) -> PrettyTable: """ Pretty print a table of model stats. Parameters ---------- models : List[Model] Models to tablify. Returns ------- PrettyTable Formatted table for printing. """ tab = PrettyTable([ "model", "corpus", "trained", "type", "nodes", "epochs", "lr", "dr", "gc", ]) tab.align['nodes'] = 'r' tab.sortby = "nodes" for model in models: meta = model.to_json() nodes = meta["architecture"]["rnn_size"] layers = meta["architecture"]["num_layers"] if "stats" in meta: num_epochs = len(meta["stats"]["epoch_costs"]) else: num_epochs = 0 if num_epochs >= meta["train_opts"]["epochs"]: trained = "Y" elif fs.isfile(fs.path(model.cache.path, "LOCK")): trained = f"WIP ({num_epochs}/{meta['train_opts']['epochs']})" elif num_epochs > 0: trained = f"{num_epochs}/{meta['train_opts']['epochs']}" else: trained = "" tab.add_row([ model.shorthash, model.corpus.shorthash, trained, meta["architecture"]["model_type"], f'{nodes} x {layers}', meta["train_opts"]["epochs"], "{:.0e}".format(meta["train_opts"]["learning_rate"]), meta["train_opts"]["lr_decay_rate"], meta["train_opts"]["grad_clip"], ]) return tab
def test_isfile(): assert fs.isfile(__file__) assert not fs.isfile("/") assert not fs.isfile("/not/a/real/path (I hope!)")
def IsRepoMetaFile(f: str): """Determine if a path is a GitHubRepoMetadata message.""" return (fs.isfile(f) and pbutil.ProtoIsReadable(f, scrape_repos_pb2.GitHubRepoMetadata()))
def test_make(): ret, out, err = make.make(dir="labm8/data/test/makeproj") assert not ret assert out assert fs.isfile("labm8/data/test/makeproj/foo") assert fs.isfile("labm8/data/test/makeproj/foo.o")