def test_write_file(): d1 = {"a": "1", "b": "2"} jsonutil.write_file("/tmp/labm8.write_file.json", d1) d2 = jsonutil.read_file("/tmp/labm8.write_file.json") fs.rm("/tmp/labm8.write_file.json") jsonutil.write_file("/tmp/labm8.write_file2.json", d1) d3 = jsonutil.read_file("/tmp/labm8.write_file2.json") fs.rm("/tmp/labm8.write_file2.json") assert d1 == d2 == d3
def test_write_file(self): d1 = { "a": "1", "b": "2" } jsonutil.write_file("/tmp/labm8.write_file.json", d1) d2 = jsonutil.read_file("/tmp/labm8.write_file.json") fs.rm("/tmp/labm8.write_file.json") jsonutil.write_file("/tmp/labm8.write_file2.json", d1) d3 = jsonutil.read_file("/tmp/labm8.write_file2.json") fs.rm("/tmp/labm8.write_file2.json") self.assertEqual(d1, d2) self.assertEqual(d1, d3)
def test_read_file(): a_str = """{ "a": 1, // this has comments "b": [1, 2, 3] } # end comment // begin with comment """ system.echo(a_str, "/tmp/labm8.loaf.json") a = jsonutil.read_file("/tmp/labm8.loaf.json") assert a == {'a': 1, 'b': [1, 2, 3]}
def test_read_file(self): a_str = """{ "a": 1, // this has comments "b": [1, 2, 3] } # end comment // begin with comment """ system.echo(a_str, "/tmp/labm8.loaf.json") a = jsonutil.read_file("/tmp/labm8.loaf.json") self.assertEqual(a["a"], 1) self.assertEqual(a["b"], [1, 2, 3]) self.assertFalse("c" in a)
def cache(self, model: clgen.Model): """ Return sampler cache. Parameters ---------- model : clgen.Model CLgen model. Returns ------- labm8 FSCache: Cache. """ sampler_model_hash = crypto.sha1_str(self.hash + model.hash) cache = clgen.mkcache("sampler", sampler_model_hash) # validate metadata against cache self.stats = { "time": 0, "progress": 0 } meta = deepcopy(self.to_json()) if cache.get("META"): cached_meta = jsonutil.read_file(cache["META"]) if "stats" in cached_meta: self.stats = cached_meta["stats"] del cached_meta["stats"] if "created" in cached_meta["sampler"]: del cached_meta["sampler"]["created"] del meta["sampler"]["created"] if "min_samples" in cached_meta["sampler"]: del cached_meta["sampler"]["min_samples"] del meta["sampler"]["min_samples"] if "min_kernels" in cached_meta["sampler"]: del cached_meta["sampler"]["min_kernels"] del meta["sampler"]["min_kernels"] if meta != cached_meta: raise clgen.InternalError("sampler metadata mismatch") else: self._flush_meta(cache) return cache
def models() -> Iterator[Model]: """ Iterate over all cached models. Returns ------- Iterator[Model] An iterable over all cached models. """ if fs.isdir(clgen.cachepath(), "model"): modeldirs = fs.ls(fs.path(clgen.cachepath(), "model"), abspaths=True) for modeldir in modeldirs: meta = jsonutil.read_file(fs.path(modeldir, "META")) model = Model.from_json(meta) yield model
def _main() -> None: cache = clgen.cachepath() log.warning("Not Implemented: refresh corpuses") if fs.isdir(cache, "model"): cached_modeldirs = fs.ls(fs.path(cache, "model"), abspaths=True) for cached_modeldir in cached_modeldirs: cached_model_id = fs.basename(cached_modeldir) cached_meta = jsonutil.read_file(fs.path(cached_modeldir, "META")) model = clgen.Model.from_json(cached_meta) if cached_model_id != model.hash: log.info(cached_model_id, '->', model.hash) if fs.isdir(model.cache.path): log.fatal("cache conflict", file=sys.stderr) fs.mv(cached_modeldir, model.cache.path) log.warning("Not Implemented: refresh samplers")
def __init__(self, contentid: str, path: str=None, **opts): """ Instantiate a corpus. If this is a new corpus, a number of files will be created, which may take some time. Parameters ---------- contentid : str ID of corpus content. path : str, optional Path to corpus. **opts Keyword options. """ # Validate options for key in opts.keys(): if key not in DEFAULT_CORPUS_OPTS: raise clgen.UserError( "Unsupported corpus option '{}'. Valid keys: {}".format( key, ','.join(sorted(DEFAULT_CORPUS_OPTS.keys())))) self.opts = deepcopy(DEFAULT_CORPUS_OPTS) types.update(self.opts, opts) self.opts["id"] = contentid # check that contentid exists self.language = clgen.Language.from_str(opts.get("language")) if (path is None and not fs.isdir(clgen.cachepath("contentfiles", f"{self.language}-{contentid}"))): raise clgen.UserError("corpus {self.language}-{contentid} not found" .format(**vars())) self.contentid = contentid self.contentcache = clgen.mkcache("contentfiles", f"{self.language}-{contentid}") self.kernels_db = self.contentcache.keypath('kernels.db') self.hash = self._hash(contentid, self.opts) self.cache = clgen.mkcache("corpus", f"{self.language}-{self.hash}") log.debug("contentfiles {self.contentid}".format(**vars())) log.debug("corpus {hash}".format(hash=self.hash)) # validate metadata against cache self.stats = { "preprocess_time": 0 } meta = deepcopy(self.to_json()) if self.cache.get("META"): cached_meta = jsonutil.read_file(self.cache["META"]) self.stats = cached_meta["stats"] # restore stats if "created" in cached_meta: del cached_meta["created"] del meta["created"] if "stats" in cached_meta: del cached_meta["stats"] del meta["stats"] if meta != cached_meta: raise clgen.InternalError("corpus metadata mismatch") else: self._flush_meta() with self.lock.acquire(replace_stale=True): self._create_files(path)
def test_read_file_bad_path(self): with self.assertRaises(fs.File404): jsonutil.read_file("/not/a/real/path") self.assertEqual({}, jsonutil.read_file("/not/a/real/path", must_exist=False))
def __init__(self, corpus: clgen.Corpus, **opts): """ Instantiate model. Parameters ---------- corpus : clgen.Corpus Corpus instance. **opts Training options. """ assert(isinstance(corpus, clgen.Corpus)) def _hash(corpus: clgen.Corpus, opts: dict) -> str: """ compute model hash """ hashopts = deepcopy(opts) del hashopts["created"] del hashopts["train_opts"]["epochs"] return crypto.sha1_list(corpus.hash, *types.dict_values(hashopts)) # Validate options for key in opts: if key not in DEFAULT_MODEL_OPTS: raise clgen.UserError( "Unsupported model option '{}'. Valid keys: {}".format( key, ','.join(sorted(DEFAULT_MODEL_OPTS.keys())))) # set properties self.opts = types.update(deepcopy(DEFAULT_MODEL_OPTS), opts) self.corpus = corpus self.hash = _hash(self.corpus, self.opts) self.cache = clgen.mkcache("model", f"{corpus.language}-{self.hash}") log.debug("model", self.hash) # validate metadata against cache, and restore stats self.stats = { "epoch_times": [], "epoch_costs": [], "epoch_batches": [] } meta = deepcopy(self.to_json()) if self.cache.get("META"): cached_meta = jsonutil.read_file(self.cache["META"]) self.stats = cached_meta["stats"] # restore stats if "created" in cached_meta: del cached_meta["created"] del meta["created"] if "created" in cached_meta["corpus"]: del cached_meta["corpus"]["created"] del meta["corpus"]["created"] if "stats" in cached_meta: del cached_meta["stats"] del meta["stats"] if "epochs" in cached_meta["train_opts"]: del cached_meta["train_opts"]["epochs"] del meta["train_opts"]["epochs"] if meta != cached_meta: log.error("Computed META:", jsonutil.format_json(meta)) raise clgen.InternalError( "metadata mismatch in model %s" % self.cache["META"]) else: self._flush_meta()
def test_read_file_bad_path(): with pytest.raises(fs.File404): jsonutil.read_file("/not/a/real/path") assert not jsonutil.read_file("/not/a/real/path", must_exist=False)