def evaluate(model, sampler): """ evaluate sampling efficiency """ print("starting sampling") sampler.sample(model) print("preprocessing sample") sample_db = sampler.cache(model)["kernels.db"] preprocess.preprocess_db(sample_db) num_kernels = dbutil.num_rows_in(sample_db, "ContentFiles") num_good_kernels = dbutil.num_good_kernels(sample_db) num_ugly_kernels = dbutil.num_rows_in(sample_db, "PreprocessedFiles", "WHERE status=2") discard_rate = 1 - (num_good_kernels / num_kernels) ugly_rate = 1 - (num_ugly_kernels / num_kernels) total_charcount = dbutil.cc(sample_db, "ContentFiles") good_charcount = dbutil.cc(sample_db, "PreprocessedFiles", condition="WHERE status=0") return { "argspec": sampler.kernel_opts["args"], "host": system.HOSTNAME, "date": time.nowstr(), "num_kernels": num_kernels, "num_good_kernels": num_good_kernels, "discard_rate": discard_rate, "ugly_rate": ugly_rate, "total_charcount": total_charcount, "good_charcount": good_charcount, "corpus_dir": model.corpus.cache.path, "model_dir": model.cache.path, "sampler_dir": sampler.cache(model).path, }
def meta(self) -> dict: """ Get trained model metadata. Format spec: https://github.com/ChrisCummins/clgen/issues/25 Returns: dict: Metadata. """ # checksum corpus and model cache files. Paths are relative to cache # root. cache_root_re = r'^' + cache.ROOT + '/' corpus_files = dict( (re.sub(cache_root_re, "", x), clgen.checksum_file(x)) for x in fs.ls(self.corpus.cache.path, abspaths=True)) model_files = dict( (re.sub(cache_root_re, "", x), clgen.checksum_file(x)) for x in fs.ls(self.cache.path, abspaths=True)) contents = corpus_files.copy() contents.update(model_files) _meta = deepcopy(self.opts) _meta["version"] = clgen.version() _meta["date_packaged"] = labtime.nowstr() _meta["corpus"] = self.corpus.meta, _meta["contents"] = contents return _meta
def add_to_log(log, entry, name=None): if name: log.append({"date": nowstr(), "name": name, "data": entry}) else: log.append({"date": nowstr(), "data": entry})